From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id 5EC661FF136 for ; Mon, 20 Apr 2026 16:49:59 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 2C77953CA; Mon, 20 Apr 2026 16:49:59 +0200 (CEST) Date: Mon, 20 Apr 2026 16:49:48 +0200 From: Fabian =?iso-8859-1?q?Gr=FCnbichler?= Subject: Re: [PATCH proxmox-backup v7 3/9] datastore: add move-namespace To: Hannes Laimer , pbs-devel@lists.proxmox.com References: <20260416171830.266553-1-h.laimer@proxmox.com> <20260416171830.266553-4-h.laimer@proxmox.com> In-Reply-To: <20260416171830.266553-4-h.laimer@proxmox.com> MIME-Version: 1.0 User-Agent: astroid/0.17.0 (https://github.com/astroidmail/astroid) Message-Id: <1776694266.6hgr1v4evp.astroid@yuna.none> Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: quoted-printable X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1776696508082 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.054 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Message-ID-Hash: 4UUM3LTM5SUMJSJYZMDXANXXNX3SBOAJ X-Message-ID-Hash: 4UUM3LTM5SUMJSJYZMDXANXXNX3SBOAJ X-MailFrom: f.gruenbichler@proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox Backup Server development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: On April 16, 2026 7:18 pm, Hannes Laimer wrote: > Relocate an entire namespace subtree (the given namespace, all child > namespaces, and their groups) to a new location within the same > datastore. >=20 > Groups that cannot be locked because another task is running are > deferred and retried once after all other groups have been processed. > Groups that still fail are reported and remain at the source so they > can be retried individually. >=20 > When merging is enabled and a source group already exists in the > target, the snapshots are merged provided ownership matches and > source snapshots are strictly newer. >=20 > An optional max-depth parameter limits how many levels of child > namespaces are included. When delete-source is true (the default), > empty source namespaces are removed after the move. >=20 > Signed-off-by: Hannes Laimer > --- > pbs-datastore/src/datastore.rs | 245 ++++++++++++++++++++++++++++++++- > 1 file changed, 244 insertions(+), 1 deletion(-) >=20 > diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore= .rs > index f88c356e..536824c9 100644 > --- a/pbs-datastore/src/datastore.rs > +++ b/pbs-datastore/src/datastore.rs > @@ -34,7 +34,7 @@ use pbs_api_types::{ > ArchiveType, Authid, BackupGroupDeleteStats, BackupNamespace, Backup= Type, ChunkOrder, > DataStoreConfig, DatastoreBackendConfig, DatastoreBackendType, Datas= toreFSyncLevel, > DatastoreTuning, GarbageCollectionCacheStats, GarbageCollectionStatu= s, MaintenanceMode, > - MaintenanceType, Operation, S3Statistics, UPID, > + MaintenanceType, Operation, S3Statistics, MAX_NAMESPACE_DEPTH, UPID, > }; > use pbs_config::s3::S3_CFG_TYPE_ID; > use pbs_config::{BackupLockGuard, ConfigVersionCache}; > @@ -1124,6 +1124,249 @@ impl DataStore { > Ok((removed_all_requested, stats)) > } > =20 > + /// Move a backup namespace (including all child namespaces and grou= ps) to a new location. > + /// > + /// Groups are moved one at a time. For each group, exclusive locks = on the group and all > + /// its snapshots (locked and moved in batches) ensure no concurrent= operations are active. Groups > + /// that cannot be locked (because a backup, verify, or other task i= s running) are deferred > + /// and retried once, then reported in the error. > + /// > + /// If the target namespace already exists, groups are moved into it= . Groups that fail > + /// (lock conflict, merge invariant violation, or I/O error) are ski= pped and reported at > + /// the end - they remain at the source and can be retried individua= lly. > + /// > + /// `max_depth` limits how many levels of child namespaces below `so= urce_ns` are included. > + /// `None` means unlimited (move the entire subtree). `Some(0)` move= s only the groups > + /// directly in `source_ns` (no child namespaces). > + /// > + /// When `delete_source` is true the source namespace directories ar= e removed after all > + /// groups have been moved. When false the (now empty) source namesp= aces are kept. > + /// > + /// When `merge_groups` is true and a source group already exists in= the target namespace, > + /// the snapshots are merged into the existing target group (provide= d ownership matches > + /// and source snapshots are strictly newer). When false the operati= on fails for that > + /// group. > + pub fn move_namespace( > + self: &Arc, > + source_ns: &BackupNamespace, > + target_ns: &BackupNamespace, > + max_depth: Option, > + delete_source: bool, > + merge_groups: bool, > + ) -> Result<(), Error> { > + if *OLD_LOCKING { > + bail!("move operations require new-style file-based locking"= ); > + } > + if source_ns.is_root() { > + bail!("cannot move root namespace"); > + } > + if source_ns =3D=3D target_ns { > + bail!("source and target namespace must be different"); > + } > + > + if !self.namespace_exists(source_ns) { > + bail!("source namespace '{source_ns}' does not exist"); > + } > + let target_parent =3D target_ns.parent(); > + if !target_ns.is_root() && !self.namespace_exists(&target_parent= ) { > + bail!("target namespace parent '{target_parent}' does not ex= ist"); > + } > + if source_ns.contains(target_ns).is_some() { > + bail!( > + "cannot move namespace '{source_ns}' into its own subtre= e (target: '{target_ns}')" > + ); > + } all of these are already done right before calling this, so should either be moved into a small helper to avoid deviating, or not done twice IMHO. we don't do anything inbetween other than forking a worker after all.. > + > + let all_source_ns: Vec =3D if let Some(depth) = =3D max_depth { > + ListNamespacesRecursive::new_max_depth(Arc::clone(self), sou= rce_ns.clone(), depth)? > + .collect::, Error>>()? > + } else { > + self.recursive_iter_backup_ns(source_ns.clone())? > + .collect::, Error>>()? > + }; this could just be ListNamespacesRecrusive::new_max_depth with the depth unwrapped to the max value if not set? > + > + let all_source_groups: Vec =3D all_source_ns > + .iter() > + .map(|ns| self.iter_backup_groups(ns.clone())) > + .collect::, Error>>()? > + .into_iter() > + .flatten() > + .collect::, Error>>()?; the only benefit of this is a single log line below, and the chances of conflicts might be lower if we iterate each namespace on its own, instead of all of them up front? > + > + let subtree_depth =3D all_source_ns > + .iter() > + .map(BackupNamespace::depth) > + .max() > + .map_or(0, |d| d - source_ns.depth()); > + if subtree_depth + target_ns.depth() > MAX_NAMESPACE_DEPTH { > + bail!( > + "move would exceed maximum namespace depth \ > + ({subtree_depth}+{} > {MAX_NAMESPACE_DEPTH})", > + target_ns.depth(), > + ); > + } this is basically crate::sync::check_namespace_depth_limit, should it move to BackupNamespace or some other place? > + > + let backend =3D self.backend()?; > + > + log::info!( > + "moving namespace '{source_ns}' -> '{target_ns}': {} namespa= ces, {} groups", > + all_source_ns.len(), > + all_source_groups.len(), > + ); > + > + // Create target local namespace directories upfront (covers emp= ty namespaces). > + for ns in &all_source_ns { > + let target_child =3D ns.map_prefix(source_ns, target_ns)?; > + std::fs::create_dir_all(self.namespace_path(&target_child)).= with_context(|| { > + format!("failed to create local dir for target namespace= '{target_child}'") > + })?; this could use Datastore::create_namespace , which does some extra checks and > + } > + > + // For S3: create namespace markers for all target namespaces. > + if let DatastoreBackend::S3(s3_client) =3D &backend { > + for ns in &all_source_ns { > + let target_child =3D ns.map_prefix(source_ns, target_ns)= ?; > + let object_key =3D crate::s3::object_key_from_path( > + &target_child.path(), > + NAMESPACE_MARKER_FILENAME, > + ) > + .context("invalid namespace marker object key")?; > + log::debug!("creating S3 namespace marker for '{target_c= hild}': {object_key:?}"); > + proxmox_async::runtime::block_on( > + s3_client.upload_no_replace_with_retry(object_key, B= ytes::from("")), > + ) > + .context("failed to create namespace marker on S3 backen= d")?; also does this for us > + } > + } > + > + // Move each group with per-group and per-snapshot locking. Grou= ps that cannot be > + // locked are deferred and retried once after all other groups h= ave been processed. > + let mut failed_groups: Vec<(BackupNamespace, String)> =3D Vec::n= ew(); > + let mut failed_ns: HashSet =3D HashSet::new(); > + let mut deferred: Vec<&BackupGroup> =3D Vec::new(); > + > + for group in &all_source_groups { > + if let Err(err) =3D > + self.lock_and_move_group(group, source_ns, target_ns, &b= ackend, merge_groups) > + { > + match err { > + MoveGroupError::Soft(err) =3D> { > + log::info!( > + "deferring group '{}' in '{}' - lock confli= ct, will retry: {err:#}", > + group.group(), > + group.backup_ns(), > + ); > + deferred.push(group); > + } > + MoveGroupError::Hard(err) =3D> { > + warn!( > + "failed to move group '{}' in '{}': {err:#}"= , > + group.group(), > + group.backup_ns(), > + ); > + failed_groups.push((group.backup_ns().clone(), g= roup.group().to_string())); > + failed_ns.insert(group.backup_ns().clone()); > + } > + } > + } > + } > + > + // Retry deferred groups once. > + if !deferred.is_empty() { > + log::info!("retrying {} deferred group(s)...", deferred.len(= )); > + for group in deferred { > + if let Err(err) =3D > + self.lock_and_move_group(group, source_ns, target_ns= , &backend, merge_groups) > + { > + let err =3D match err { > + MoveGroupError::Soft(err) | MoveGroupError::Hard= (err) =3D> err, > + }; > + warn!( > + "failed to move group '{}' in '{}' on retry: {er= r:#}", > + group.group(), > + group.backup_ns(), > + ); > + failed_groups.push((group.backup_ns().clone(), group= .group().to_string())); > + failed_ns.insert(group.backup_ns().clone()); > + } > + } > + } > + > + // Clean up source namespaces when requested. Process deepest-fi= rst so child > + // directories are removed before their parents. > + if delete_source { > + let moved_ns: HashSet<&BackupNamespace> =3D all_source_ns.it= er().collect(); this is confusingly named, these are the namespaces we *tried to* move, not necessarily the ones we actually *moved*. > + > + for ns in all_source_ns.iter().rev() { > + // Skip namespaces that still have groups from failed mo= ves or child > + // namespaces that were excluded by the depth limit. > + if failed_ns > + .iter() > + .any(|fns| fns =3D=3D ns || ns.contains(fns).is_some= ()) nit: fns is a bit opaque, maybe `failed` or `failed_ns`? should this log a warning that this NS was not cleaned up, and why? > + { > + continue; > + } > + let has_excluded_children =3D self > + .iter_backup_ns(ns.clone()) > + .ok() > + .into_iter() > + .flatten() > + .filter_map(|child| child.ok()) this could use iter_backup_ns_ok instead? > + .any(|child| !moved_ns.contains(&child)); this does a lot of nested iteration now.. what we actually want to check is if there are any namespaces left below, because if there are, we can't remove, either because - we didn't try to move all children in the first place (depth check) - we didn't manage to move all children we wanted to move (failed_ns, handled above) - new children were created in the meantime this check here handles the first and third case? > + if has_excluded_children { > + continue; > + } > + > + // Remove type subdirectories first (should be empty aft= er per-group renames), > + // then the namespace directory itself. Uses remove_dir = which fails on > + // non-empty directories, handling concurrent group crea= tion gracefully. > + let ns_path =3D self.namespace_path(ns); > + if let Ok(entries) =3D std::fs::read_dir(&ns_path) { > + for entry in entries.flatten() { > + if let Err(err) =3D std::fs::remove_dir(entry.pa= th()) { > + warn!( > + "failed to remove source directory {:?}:= {err}", > + entry.path(), > + ); > + } > + } > + } > + if let Err(err) =3D std::fs::remove_dir(&ns_path) { > + warn!("failed to remove source namespace directory {= ns_path:?}: {err}"); > + continue; > + } > + > + // Local directory removed - delete the S3 namespace ma= rker too. > + if let DatastoreBackend::S3(s3_client) =3D &backend { > + let object_key =3D > + crate::s3::object_key_from_path(&ns.path(), NAME= SPACE_MARKER_FILENAME) > + .context("invalid namespace marker object ke= y")?; > + log::debug!("deleting source S3 namespace marker for= '{ns}': {object_key:?}"); > + if let Err(err) =3D > + proxmox_async::runtime::block_on(s3_client.delet= e_object(object_key)) > + { > + warn!("failed to delete source S3 namespace mark= er for '{ns}': {err:#}"); > + } > + } we do have remove_namespace_recursive already, which is what is used by the delete_namespace API endpoint, and do not offer a way to just delete a single namespace with protection against removing empty child namespaces.. should we just use that here? maybe extending it with a delete_child_namespaces parameter, and setting that to false here and in pull, but true in the API endpoint? > + } > + } > + > + if !failed_groups.is_empty() { > + let group_list: Vec =3D failed_groups > + .iter() > + .map(|(ns, group)| format!("'{group}' in '{ns}'")) > + .collect(); > + bail!( > + "namespace move partially completed; {} group(s) could n= ot be moved \ > + and remain at source: {}", > + failed_groups.len(), > + group_list.join(", "), > + ); > + } > + > + Ok(()) > + } > + > /// Remove a complete backup group including all snapshots. > /// > /// Returns `BackupGroupDeleteStats`, containing the number of delet= ed snapshots > --=20 > 2.47.3 >=20 >=20 >=20 >=20 >=20 >=20