From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id 4A5781FF137 for ; Tue, 31 Mar 2026 14:33:55 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id C4A7C1BF51; Tue, 31 Mar 2026 14:34:22 +0200 (CEST) From: Hannes Laimer To: pbs-devel@lists.proxmox.com Subject: [PATCH proxmox-backup v6 3/8] datastore: add move_namespace Date: Tue, 31 Mar 2026 14:34:04 +0200 Message-ID: <20260331123409.198353-4-h.laimer@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260331123409.198353-1-h.laimer@proxmox.com> References: <20260331123409.198353-1-h.laimer@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1774960401106 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.084 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Message-ID-Hash: KP2AJUWXNKLFL6OZZKDMW4FQPCOKPOH6 X-Message-ID-Hash: KP2AJUWXNKLFL6OZZKDMW4FQPCOKPOH6 X-MailFrom: h.laimer@proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox Backup Server development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: Relocate an entire namespace subtree (the given namespace, all child namespaces, and their groups) to a new location within the same datastore. Groups are moved one at a time with per-group and per-snapshot exclusive locking to ensure no concurrent readers or writers are active on any snapshot being moved. Groups that cannot be locked (because a backup, verify, or other task is running) are deferred and retried once after all other groups have been processed - the conflicting task may have finished by then. Groups that still cannot be moved after the retry are reported in the task log and remain at the source so they can be retried with move_group individually. Source namespaces where all groups succeeded have their local directories (and S3 markers, if applicable) removed deepest-first. Signed-off-by: Hannes Laimer --- pbs-datastore/src/backup_info.rs | 7 +- pbs-datastore/src/datastore.rs | 279 ++++++++++++++++++++++++++++++- 2 files changed, 284 insertions(+), 2 deletions(-) diff --git a/pbs-datastore/src/backup_info.rs b/pbs-datastore/src/backup_info.rs index a8a56198..5e3f9202 100644 --- a/pbs-datastore/src/backup_info.rs +++ b/pbs-datastore/src/backup_info.rs @@ -296,7 +296,12 @@ impl BackupGroup { let src_path = self.full_group_path(); let target_path = self.store.group_path(target_ns, &self.group); - log::info!("moving backup group {src_path:?} to {target_path:?}"); + log::info!( + "moving group '{}/{}' from '{}' to '{target_ns}'", + self.group.ty, + self.group.id, + self.ns, + ); match backend { DatastoreBackend::Filesystem => { diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs index d74e7e42..f49644b2 100644 --- a/pbs-datastore/src/datastore.rs +++ b/pbs-datastore/src/datastore.rs @@ -31,7 +31,7 @@ use pbs_api_types::{ ArchiveType, Authid, BackupGroupDeleteStats, BackupNamespace, BackupType, ChunkOrder, DataStoreConfig, DatastoreBackendConfig, DatastoreBackendType, DatastoreFSyncLevel, DatastoreTuning, GarbageCollectionCacheStats, GarbageCollectionStatus, MaintenanceMode, - MaintenanceType, Operation, UPID, + MaintenanceType, Operation, MAX_NAMESPACE_DEPTH, UPID, }; use pbs_config::s3::S3_CFG_TYPE_ID; use pbs_config::{BackupLockGuard, ConfigVersionCache}; @@ -84,6 +84,29 @@ const S3_DELETE_BATCH_LIMIT: usize = 100; // max defer time for s3 batch deletions const S3_DELETE_DEFER_LIMIT_SECONDS: Duration = Duration::from_secs(60 * 5); +/// Error from a per-group move attempt inside `move_namespace`, distinguishing lock conflicts +/// (retryable - the conflicting task may finish) from hard errors (not retryable). +struct MoveGroupError { + source: Error, + is_lock_conflict: bool, +} + +impl MoveGroupError { + fn lock(source: Error) -> Self { + Self { + source, + is_lock_conflict: true, + } + } + + fn hard(source: Error) -> Self { + Self { + source, + is_lock_conflict: false, + } + } +} + /// checks if auth_id is owner, or, if owner is a token, if /// auth_id is the user of the token pub fn check_backup_owner(owner: &Authid, auth_id: &Authid) -> Result<(), Error> { @@ -1000,6 +1023,203 @@ impl DataStore { Ok((removed_all_requested, stats)) } + /// Move a backup namespace (including all child namespaces and groups) to a new location. + /// + /// Groups are moved one at a time with per-group and per-snapshot exclusive locking to + /// ensure no concurrent readers or writers are active on any snapshot being moved. Groups + /// that cannot be locked (because a backup, verify, or other task is running) are skipped + /// and reported in the error. The same iterative semantics apply to both the filesystem + /// and S3 backends. + /// + /// Fails if: + /// - `source_ns` is the root namespace + /// - `source_ns` == `target_ns` + /// - `source_ns` does not exist + /// - `target_ns` already exists (to prevent silent merging) + /// - `target_ns`'s parent does not exist + /// - `source_ns` is an ancestor of `target_ns` + /// - the move would exceed the maximum namespace depth + pub fn move_namespace( + self: &Arc, + source_ns: &BackupNamespace, + target_ns: &BackupNamespace, + ) -> Result<(), Error> { + if source_ns.is_root() { + bail!("cannot move root namespace"); + } + if source_ns == target_ns { + bail!("source and target namespace must be different"); + } + + if !self.namespace_exists(source_ns) { + bail!("source namespace '{source_ns}' does not exist"); + } + if self.namespace_exists(target_ns) { + bail!("target namespace '{target_ns}' already exists"); + } + let target_parent = target_ns.parent(); + if !self.namespace_exists(&target_parent) { + bail!("target namespace parent '{target_parent}' does not exist"); + } + if source_ns.contains(target_ns).is_some() { + bail!( + "cannot move namespace '{source_ns}' into its own subtree (target: '{target_ns}')" + ); + } + + let all_source_ns: Vec = self + .recursive_iter_backup_ns(source_ns.clone())? + .collect::, Error>>()?; + + let all_source_groups: Vec = all_source_ns + .iter() + .map(|ns| self.iter_backup_groups(ns.clone())) + .collect::, Error>>()? + .into_iter() + .flatten() + .collect::, Error>>()?; + + let subtree_depth = all_source_ns + .iter() + .map(BackupNamespace::depth) + .max() + .map_or(0, |d| d - source_ns.depth()); + if subtree_depth + target_ns.depth() > MAX_NAMESPACE_DEPTH { + bail!( + "move would exceed maximum namespace depth \ + ({subtree_depth}+{} > {MAX_NAMESPACE_DEPTH})", + target_ns.depth(), + ); + } + + let backend = self.backend()?; + + log::info!( + "moving namespace '{source_ns}' -> '{target_ns}': {} namespaces, {} groups", + all_source_ns.len(), + all_source_groups.len(), + ); + + // Create target local namespace directories upfront (covers empty namespaces). + for ns in &all_source_ns { + let target_child = ns.map_prefix(source_ns, target_ns)?; + std::fs::create_dir_all(self.namespace_path(&target_child)).with_context(|| { + format!("failed to create local dir for target namespace '{target_child}'") + })?; + } + + // For S3: create namespace markers for all target namespaces. + if let DatastoreBackend::S3(s3_client) = &backend { + for ns in &all_source_ns { + let target_child = ns.map_prefix(source_ns, target_ns)?; + let object_key = crate::s3::object_key_from_path( + &target_child.path(), + NAMESPACE_MARKER_FILENAME, + ) + .context("invalid namespace marker object key")?; + log::debug!("creating S3 namespace marker for '{target_child}': {object_key:?}"); + proxmox_async::runtime::block_on( + s3_client.upload_no_replace_with_retry(object_key, Bytes::from("")), + ) + .context("failed to create namespace marker on S3 backend")?; + } + } + + // Move each group with per-group and per-snapshot locking. Groups that cannot be + // locked (concurrent operation in progress) are deferred and retried once after all + // other groups have been processed - the conflicting task may have finished by then. + let mut failed_groups: Vec<(BackupNamespace, String)> = Vec::new(); + let mut failed_ns: HashSet = HashSet::new(); + let mut deferred: Vec<&BackupGroup> = Vec::new(); + + for group in &all_source_groups { + if let Err(err) = self.lock_and_move_group(group, source_ns, target_ns, &backend) { + if err.is_lock_conflict { + log::info!( + "deferring group '{}' in '{}' - lock conflict, will retry: {:#}", + group.group(), + group.backup_ns(), + err.source, + ); + deferred.push(group); + } else { + warn!( + "failed to move group '{}' in '{}': {:#}", + group.group(), + group.backup_ns(), + err.source, + ); + failed_groups.push((group.backup_ns().clone(), group.group().to_string())); + failed_ns.insert(group.backup_ns().clone()); + } + } + } + + // Retry deferred groups once - conflicting tasks may have finished. + if !deferred.is_empty() { + log::info!("retrying {} deferred group(s)...", deferred.len()); + for group in deferred { + if let Err(err) = self.lock_and_move_group(group, source_ns, target_ns, &backend) { + warn!( + "failed to move group '{}' in '{}' on retry: {:#}", + group.group(), + group.backup_ns(), + err.source, + ); + failed_groups.push((group.backup_ns().clone(), group.group().to_string())); + failed_ns.insert(group.backup_ns().clone()); + } + } + } + + // Clean up source namespaces that are now fully empty (all groups moved). + // Process deepest-first so parent directories are already empty when reached. + for ns in all_source_ns.iter().rev() { + // Skip if this namespace itself or any descendant still has groups. + let has_remaining = failed_ns + .iter() + .any(|fns| fns == ns || ns.contains(fns).is_some()); + if has_remaining { + continue; + } + + // For S3: delete the source namespace marker. + if let DatastoreBackend::S3(s3_client) = &backend { + let object_key = + crate::s3::object_key_from_path(&ns.path(), NAMESPACE_MARKER_FILENAME) + .context("invalid namespace marker object key")?; + log::debug!("deleting source S3 namespace marker for '{ns}': {object_key:?}"); + proxmox_async::runtime::block_on(s3_client.delete_object(object_key)) + .context("failed to delete source namespace marker on S3 backend")?; + } + + // Remove the source local directory. Try type subdirectories first + // (they should be empty after the per-group renames), then the namespace dir. + let ns_path = self.namespace_path(ns); + if let Ok(entries) = std::fs::read_dir(&ns_path) { + for entry in entries.flatten() { + let _ = std::fs::remove_dir(entry.path()); + } + } + let _ = std::fs::remove_dir(&ns_path); + } + + if !failed_groups.is_empty() { + let group_list: Vec = failed_groups + .iter() + .map(|(ns, group)| format!("'{group}' in '{ns}'")) + .collect(); + bail!( + "namespace move partially completed; {} group(s) could not be moved \ + and remain at source: {}", + failed_groups.len(), + group_list.join(", "), + ); + } + + Ok(()) + } + /// Remove a complete backup group including all snapshots. /// /// Returns `BackupGroupDeleteStats`, containing the number of deleted snapshots @@ -1070,6 +1290,63 @@ impl DataStore { source_group.move_to(target_ns, &backend) } + /// Try to lock and move a single group as part of a namespace move. Acquires exclusive + /// locks on the group and all its snapshots, then performs the actual move. Returns a + /// `MoveGroupError` on failure, indicating whether the failure was a lock conflict + /// (retryable) or a hard error. + fn lock_and_move_group( + self: &Arc, + group: &BackupGroup, + source_ns: &BackupNamespace, + target_ns: &BackupNamespace, + backend: &DatastoreBackend, + ) -> Result<(), MoveGroupError> { + let target_group_ns = group + .backup_ns() + .map_prefix(source_ns, target_ns) + .map_err(MoveGroupError::hard)?; + + // Acquire exclusive group lock - prevents new snapshot additions/removals. + let _group_lock = group.lock().map_err(MoveGroupError::lock)?; + + // Acquire exclusive lock on each snapshot to ensure no concurrent readers. + let mut _snap_locks = Vec::new(); + for snap in group.iter_snapshots().map_err(MoveGroupError::hard)? { + let snap = snap.map_err(MoveGroupError::hard)?; + _snap_locks.push(snap.lock().map_err(MoveGroupError::lock)?); + } + + // Ensure the target type directory exists before move_to renames into it. + std::fs::create_dir_all(self.type_path(&target_group_ns, group.group().ty)) + .map_err(|err| MoveGroupError::hard(err.into()))?; + + // For S3: pre-create the target group directory with the source owner so the + // group is visible in the UI during the S3 copy. On success move_to removes + // this directory and renames the source into its place. On failure it stays so + // users can delete the group via the API (the owner file enables the auth + // check), which also cleans up orphaned S3 objects at that prefix. + if matches!(backend, DatastoreBackend::S3(_)) { + let target_group_path = self.group_path(&target_group_ns, group.group()); + std::fs::create_dir_all(&target_group_path) + .map_err(|err| MoveGroupError::hard(err.into()))?; + if let Ok(owner) = group.get_owner() { + let target_group = + self.backup_group(target_group_ns.clone(), group.group().clone()); + if let Err(err) = target_group.set_owner(&owner, true) { + warn!( + "move_namespace: failed to set owner for target group '{}' in '{}': {err:#}", + group.group(), + target_group_ns, + ); + } + } + } + + group + .move_to(&target_group_ns, backend) + .map_err(MoveGroupError::hard) + } + /// Remove a backup directory including all content pub fn remove_backup_dir( self: &Arc, -- 2.47.3