From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <pbs-devel-bounces@lists.proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
	by lore.proxmox.com (Postfix) with ESMTPS id 4ECFF1FF142
	for <inbox@lore.proxmox.com>; Tue, 07 Apr 2026 15:18:58 +0200 (CEST)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
	by firstgate.proxmox.com (Proxmox) with ESMTP id 79DF41B40F;
	Tue,  7 Apr 2026 15:19:31 +0200 (CEST)
Date: Tue, 07 Apr 2026 15:18:51 +0200
From: Fabian =?iso-8859-1?q?Gr=FCnbichler?= <f.gruenbichler@proxmox.com>
Subject: Re: [PATCH proxmox-backup v6 3/8] datastore: add move_namespace
To: Hannes Laimer <h.laimer@proxmox.com>, pbs-devel@lists.proxmox.com
References: <20260331123409.198353-1-h.laimer@proxmox.com>
	<20260331123409.198353-4-h.laimer@proxmox.com>
In-Reply-To: <20260331123409.198353-4-h.laimer@proxmox.com>
MIME-Version: 1.0
User-Agent: astroid/0.17.0 (https://github.com/astroidmail/astroid)
Message-Id: <1775567352.horkmrssae.astroid@yuna.none>
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: quoted-printable
X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2
X-Bm-Transport-Timestamp: 1775567869294
X-SPAM-LEVEL: Spam detection results:  0
	AWL                     0.053 Adjusted score from AWL reputation of From:
 address
	BAYES_00                 -1.9 Bayes spam probability is 0 to 1%
	DMARC_MISSING             0.1 Missing DMARC policy
	KAM_DMARC_STATUS         0.01 Test Rule for DKIM or SPF Failure with Strict
 Alignment
	RCVD_IN_VALIDITY_CERTIFIED_BLOCKED  0.001 ADMINISTRATOR NOTICE: The query to
 Validity was blocked.  See
 https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more
 information.
	RCVD_IN_VALIDITY_RPBL_BLOCKED  0.001 ADMINISTRATOR NOTICE: The query to
 Validity was blocked.  See
 https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more
 information.
	RCVD_IN_VALIDITY_SAFE_BLOCKED  0.001 ADMINISTRATOR NOTICE: The query to
 Validity was blocked.  See
 https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more
 information.
	SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
	SPF_PASS               -0.001 SPF: sender matches SPF record
Message-ID-Hash: BEA633NE7TMHHYL57YVX5SOYO4OA4UQQ
X-Message-ID-Hash: BEA633NE7TMHHYL57YVX5SOYO4OA4UQQ
X-MailFrom: f.gruenbichler@proxmox.com
X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop;
 banned-address; emergency; member-moderation; nonmember-moderation;
 administrivia; implicit-dest; max-recipients; max-size; news-moderation;
 no-subject; digests; suspicious-header
X-Mailman-Version: 3.3.10
Precedence: list
List-Id: Proxmox Backup Server development discussion
 <pbs-devel.lists.proxmox.com>
List-Help: <mailto:pbs-devel-request@lists.proxmox.com?subject=help>
List-Owner: <mailto:pbs-devel-owner@lists.proxmox.com>
List-Post: <mailto:pbs-devel@lists.proxmox.com>
List-Subscribe: <mailto:pbs-devel-join@lists.proxmox.com>
List-Unsubscribe: <mailto:pbs-devel-leave@lists.proxmox.com>

On March 31, 2026 2:34 pm, Hannes Laimer wrote:
> Relocate an entire namespace subtree (the given namespace, all child
> namespaces, and their groups) to a new location within the same
> datastore.
>=20
> Groups are moved one at a time with per-group and per-snapshot
> exclusive locking to ensure no concurrent readers or writers are
> active on any snapshot being moved. Groups that cannot be locked
> (because a backup, verify, or other task is running) are deferred and
> retried once after all other groups have been processed - the
> conflicting task may have finished by then.
>=20
> Groups that still cannot be moved after the retry are reported in the
> task log and remain at the source so they can be retried with
> move_group individually. Source namespaces where all groups succeeded
> have their local directories (and S3 markers, if applicable) removed
> deepest-first.
>=20
> Signed-off-by: Hannes Laimer <h.laimer@proxmox.com>
> ---
>  pbs-datastore/src/backup_info.rs |   7 +-
>  pbs-datastore/src/datastore.rs   | 279 ++++++++++++++++++++++++++++++-
>  2 files changed, 284 insertions(+), 2 deletions(-)
>=20
> diff --git a/pbs-datastore/src/backup_info.rs b/pbs-datastore/src/backup_=
info.rs
> index a8a56198..5e3f9202 100644
> --- a/pbs-datastore/src/backup_info.rs
> +++ b/pbs-datastore/src/backup_info.rs
> @@ -296,7 +296,12 @@ impl BackupGroup {
>          let src_path =3D self.full_group_path();
>          let target_path =3D self.store.group_path(target_ns, &self.group=
);
> =20
> -        log::info!("moving backup group {src_path:?} to {target_path:?}"=
);
> +        log::info!(
> +            "moving group '{}/{}' from '{}' to '{target_ns}'",
> +            self.group.ty,
> +            self.group.id,
> +            self.ns,
> +        );

nit: should be merged into previous patch I think

> =20
>          match backend {
>              DatastoreBackend::Filesystem =3D> {
> diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore=
.rs
> index d74e7e42..f49644b2 100644
> --- a/pbs-datastore/src/datastore.rs
> +++ b/pbs-datastore/src/datastore.rs
> @@ -31,7 +31,7 @@ use pbs_api_types::{
>      ArchiveType, Authid, BackupGroupDeleteStats, BackupNamespace, Backup=
Type, ChunkOrder,
>      DataStoreConfig, DatastoreBackendConfig, DatastoreBackendType, Datas=
toreFSyncLevel,
>      DatastoreTuning, GarbageCollectionCacheStats, GarbageCollectionStatu=
s, MaintenanceMode,
> -    MaintenanceType, Operation, UPID,
> +    MaintenanceType, Operation, MAX_NAMESPACE_DEPTH, UPID,
>  };
>  use pbs_config::s3::S3_CFG_TYPE_ID;
>  use pbs_config::{BackupLockGuard, ConfigVersionCache};
> @@ -84,6 +84,29 @@ const S3_DELETE_BATCH_LIMIT: usize =3D 100;
>  // max defer time for s3 batch deletions
>  const S3_DELETE_DEFER_LIMIT_SECONDS: Duration =3D Duration::from_secs(60=
 * 5);
> =20
> +/// Error from a per-group move attempt inside `move_namespace`, disting=
uishing lock conflicts
> +/// (retryable - the conflicting task may finish) from hard errors (not =
retryable).
> +struct MoveGroupError {
> +    source: Error,
> +    is_lock_conflict: bool,
> +}
> +
> +impl MoveGroupError {
> +    fn lock(source: Error) -> Self {
> +        Self {
> +            source,
> +            is_lock_conflict: true,
> +        }
> +    }
> +
> +    fn hard(source: Error) -> Self {
> +        Self {
> +            source,
> +            is_lock_conflict: false,
> +        }
> +    }
> +}
> +
>  /// checks if auth_id is owner, or, if owner is a token, if
>  /// auth_id is the user of the token
>  pub fn check_backup_owner(owner: &Authid, auth_id: &Authid) -> Result<()=
, Error> {
> @@ -1000,6 +1023,203 @@ impl DataStore {
>          Ok((removed_all_requested, stats))
>      }
> =20
> +    /// Move a backup namespace (including all child namespaces and grou=
ps) to a new location.
> +    ///
> +    /// Groups are moved one at a time with per-group and per-snapshot e=
xclusive locking to
> +    /// ensure no concurrent readers or writers are active on any snapsh=
ot being moved. Groups
> +    /// that cannot be locked (because a backup, verify, or other task i=
s running) are skipped
> +    /// and reported in the error. The same iterative semantics apply to=
 both the filesystem
> +    /// and S3 backends.
> +    ///
> +    /// Fails if:
> +    /// - `source_ns` is the root namespace
> +    /// - `source_ns` =3D=3D `target_ns`
> +    /// - `source_ns` does not exist
> +    /// - `target_ns` already exists (to prevent silent merging)
> +    /// - `target_ns`'s parent does not exist
> +    /// - `source_ns` is an ancestor of `target_ns`
> +    /// - the move would exceed the maximum namespace depth
> +    pub fn move_namespace(
> +        self: &Arc<Self>,
> +        source_ns: &BackupNamespace,
> +        target_ns: &BackupNamespace,
> +    ) -> Result<(), Error> {
> +        if source_ns.is_root() {
> +            bail!("cannot move root namespace");
> +        }
> +        if source_ns =3D=3D target_ns {
> +            bail!("source and target namespace must be different");
> +        }
> +
> +        if !self.namespace_exists(source_ns) {
> +            bail!("source namespace '{source_ns}' does not exist");
> +        }
> +        if self.namespace_exists(target_ns) {
> +            bail!("target namespace '{target_ns}' already exists");
> +        }

what if I want to move all groups from A to B, even if B already exists?
I think this is a nice feature, and while this series allows me to
manually do that by moving each group individually, it would also be
helpful to allow moving them in bulk?

you basically get "move namespace" from "move namespace contents +
remove namespace if successful", so IMHO it would make sense to expose
the former as the latter (e.g., by having a boolean flag for "remove
source namespace", similar to how we handle moving disks of PVE guests)

> +        let target_parent =3D target_ns.parent();
> +        if !self.namespace_exists(&target_parent) {
> +            bail!("target namespace parent '{target_parent}' does not ex=
ist");
> +        }
> +        if source_ns.contains(target_ns).is_some() {
> +            bail!(
> +                "cannot move namespace '{source_ns}' into its own subtre=
e (target: '{target_ns}')"
> +            );
> +        }

if we make removal of the source NS optional, and add an optional
recursion limit then this might make sense again - but we can add that
part later ;)


we should assert new style locking up front here as well.

> +
> +        let all_source_ns: Vec<BackupNamespace> =3D self
> +            .recursive_iter_backup_ns(source_ns.clone())?
> +            .collect::<Result<Vec<_>, Error>>()?;
> +
> +        let all_source_groups: Vec<BackupGroup> =3D all_source_ns
> +            .iter()
> +            .map(|ns| self.iter_backup_groups(ns.clone()))
> +            .collect::<Result<Vec<_>, Error>>()?
> +            .into_iter()
> +            .flatten()
> +            .collect::<Result<Vec<_>, Error>>()?;
> +
> +        let subtree_depth =3D all_source_ns
> +            .iter()
> +            .map(BackupNamespace::depth)
> +            .max()
> +            .map_or(0, |d| d - source_ns.depth());
> +        if subtree_depth + target_ns.depth() > MAX_NAMESPACE_DEPTH {
> +            bail!(
> +                "move would exceed maximum namespace depth \
> +                ({subtree_depth}+{} > {MAX_NAMESPACE_DEPTH})",
> +                target_ns.depth(),
> +            );
> +        }
> +
> +        let backend =3D self.backend()?;
> +
> +        log::info!(
> +            "moving namespace '{source_ns}' -> '{target_ns}': {} namespa=
ces, {} groups",
> +            all_source_ns.len(),
> +            all_source_groups.len(),
> +        );
> +
> +        // Create target local namespace directories upfront (covers emp=
ty namespaces).
> +        for ns in &all_source_ns {
> +            let target_child =3D ns.map_prefix(source_ns, target_ns)?;
> +            std::fs::create_dir_all(self.namespace_path(&target_child)).=
with_context(|| {
> +                format!("failed to create local dir for target namespace=
 '{target_child}'")
> +            })?;
> +        }
> +
> +        // For S3: create namespace markers for all target namespaces.
> +        if let DatastoreBackend::S3(s3_client) =3D &backend {
> +            for ns in &all_source_ns {
> +                let target_child =3D ns.map_prefix(source_ns, target_ns)=
?;
> +                let object_key =3D crate::s3::object_key_from_path(
> +                    &target_child.path(),
> +                    NAMESPACE_MARKER_FILENAME,
> +                )
> +                .context("invalid namespace marker object key")?;
> +                log::debug!("creating S3 namespace marker for '{target_c=
hild}': {object_key:?}");
> +                proxmox_async::runtime::block_on(
> +                    s3_client.upload_no_replace_with_retry(object_key, B=
ytes::from("")),
> +                )
> +                .context("failed to create namespace marker on S3 backen=
d")?;
> +            }
> +        }
> +
> +        // Move each group with per-group and per-snapshot locking. Grou=
ps that cannot be
> +        // locked (concurrent operation in progress) are deferred and re=
tried once after all
> +        // other groups have been processed - the conflicting task may h=
ave finished by then.
> +        let mut failed_groups: Vec<(BackupNamespace, String)> =3D Vec::n=
ew();
> +        let mut failed_ns: HashSet<BackupNamespace> =3D HashSet::new();
> +        let mut deferred: Vec<&BackupGroup> =3D Vec::new();
> +
> +        for group in &all_source_groups {
> +            if let Err(err) =3D self.lock_and_move_group(group, source_n=
s, target_ns, &backend) {
> +                if err.is_lock_conflict {
> +                    log::info!(
> +                        "deferring group '{}' in '{}' - lock conflict, w=
ill retry: {:#}",
> +                        group.group(),
> +                        group.backup_ns(),
> +                        err.source,
> +                    );
> +                    deferred.push(group);
> +                } else {
> +                    warn!(
> +                        "failed to move group '{}' in '{}': {:#}",
> +                        group.group(),
> +                        group.backup_ns(),
> +                        err.source,
> +                    );
> +                    failed_groups.push((group.backup_ns().clone(), group=
.group().to_string()));
> +                    failed_ns.insert(group.backup_ns().clone());
> +                }
> +            }
> +        }
> +
> +        // Retry deferred groups once - conflicting tasks may have finis=
hed.
> +        if !deferred.is_empty() {
> +            log::info!("retrying {} deferred group(s)...", deferred.len(=
));
> +            for group in deferred {
> +                if let Err(err) =3D self.lock_and_move_group(group, sour=
ce_ns, target_ns, &backend) {
> +                    warn!(
> +                        "failed to move group '{}' in '{}' on retry: {:#=
}",
> +                        group.group(),
> +                        group.backup_ns(),
> +                        err.source,
> +                    );
> +                    failed_groups.push((group.backup_ns().clone(), group=
.group().to_string()));
> +                    failed_ns.insert(group.backup_ns().clone());
> +                }
> +            }
> +        }
> +
> +        // Clean up source namespaces that are now fully empty (all grou=
ps moved).
> +        // Process deepest-first so parent directories are already empty=
 when reached.
> +        for ns in all_source_ns.iter().rev() {
> +            // Skip if this namespace itself or any descendant still has=
 groups.
> +            let has_remaining =3D failed_ns
> +                .iter()
> +                .any(|fns| fns =3D=3D ns || ns.contains(fns).is_some());
> +            if has_remaining {
> +                continue;

this is a very incomplete check, as new groups could have been added
since we iterated above..

> +            }
> +
> +            // For S3: delete the source namespace marker.
> +            if let DatastoreBackend::S3(s3_client) =3D &backend {
> +                let object_key =3D
> +                    crate::s3::object_key_from_path(&ns.path(), NAMESPAC=
E_MARKER_FILENAME)
> +                        .context("invalid namespace marker object key")?=
;
> +                log::debug!("deleting source S3 namespace marker for '{n=
s}': {object_key:?}");
> +                proxmox_async::runtime::block_on(s3_client.delete_object=
(object_key))
> +                    .context("failed to delete source namespace marker o=
n S3 backend")?;
> +            }
> +
> +            // Remove the source local directory. Try type subdirectorie=
s first
> +            // (they should be empty after the per-group renames), then =
the namespace dir.
> +            let ns_path =3D self.namespace_path(ns);
> +            if let Ok(entries) =3D std::fs::read_dir(&ns_path) {
> +                for entry in entries.flatten() {
> +                    let _ =3D std::fs::remove_dir(entry.path());
> +                }
> +            }
> +            let _ =3D std::fs::remove_dir(&ns_path);

and then errors are swallowed silently here?

> +        }
> +
> +        if !failed_groups.is_empty() {
> +            let group_list: Vec<String> =3D failed_groups
> +                .iter()
> +                .map(|(ns, group)| format!("'{group}' in '{ns}'"))
> +                .collect();
> +            bail!(
> +                "namespace move partially completed; {} group(s) could n=
ot be moved \
> +                and remain at source: {}",
> +                failed_groups.len(),
> +                group_list.join(", "),
> +            );

and as a result this here is incomplete as well..

> +        }
> +
> +        Ok(())
> +    }
> +
>      /// Remove a complete backup group including all snapshots.
>      ///
>      /// Returns `BackupGroupDeleteStats`, containing the number of delet=
ed snapshots
> @@ -1070,6 +1290,63 @@ impl DataStore {
>          source_group.move_to(target_ns, &backend)
>      }
> =20
> +    /// Try to lock and move a single group as part of a namespace move.=
 Acquires exclusive
> +    /// locks on the group and all its snapshots, then performs the actu=
al move. Returns a
> +    /// `MoveGroupError` on failure, indicating whether the failure was =
a lock conflict
> +    /// (retryable) or a hard error.
> +    fn lock_and_move_group(
> +        self: &Arc<Self>,
> +        group: &BackupGroup,
> +        source_ns: &BackupNamespace,
> +        target_ns: &BackupNamespace,
> +        backend: &DatastoreBackend,
> +    ) -> Result<(), MoveGroupError> {
> +        let target_group_ns =3D group
> +            .backup_ns()
> +            .map_prefix(source_ns, target_ns)
> +            .map_err(MoveGroupError::hard)?;
> +
> +        // Acquire exclusive group lock - prevents new snapshot addition=
s/removals.
> +        let _group_lock =3D group.lock().map_err(MoveGroupError::lock)?;
> +
> +        // Acquire exclusive lock on each snapshot to ensure no concurre=
nt readers.
> +        let mut _snap_locks =3D Vec::new();
> +        for snap in group.iter_snapshots().map_err(MoveGroupError::hard)=
? {
> +            let snap =3D snap.map_err(MoveGroupError::hard)?;
> +            _snap_locks.push(snap.lock().map_err(MoveGroupError::lock)?)=
;
> +        }

same comments as for previous patch apply here as well..

> +
> +        // Ensure the target type directory exists before move_to rename=
s into it.
> +        std::fs::create_dir_all(self.type_path(&target_group_ns, group.g=
roup().ty))
> +            .map_err(|err| MoveGroupError::hard(err.into()))?;
> +
> +        // For S3: pre-create the target group directory with the source=
 owner so the
> +        // group is visible in the UI during the S3 copy. On success mov=
e_to removes
> +        // this directory and renames the source into its place. On fail=
ure it stays so
> +        // users can delete the group via the API (the owner file enable=
s the auth
> +        // check), which also cleans up orphaned S3 objects at that pref=
ix.

should we also do this for non-bulk moving?

> +        if matches!(backend, DatastoreBackend::S3(_)) {
> +            let target_group_path =3D self.group_path(&target_group_ns, =
group.group());
> +            std::fs::create_dir_all(&target_group_path)
> +                .map_err(|err| MoveGroupError::hard(err.into()))?;
> +            if let Ok(owner) =3D group.get_owner() {
> +                let target_group =3D
> +                    self.backup_group(target_group_ns.clone(), group.gro=
up().clone());
> +                if let Err(err) =3D target_group.set_owner(&owner, true)=
 {
> +                    warn!(
> +                        "move_namespace: failed to set owner for target =
group '{}' in '{}': {err:#}",
> +                        group.group(),
> +                        target_group_ns,
> +                    );
> +                }
> +            }
> +        }
> +
> +        group
> +            .move_to(&target_group_ns, backend)
> +            .map_err(MoveGroupError::hard)
> +    }
> +
>      /// Remove a backup directory including all content
>      pub fn remove_backup_dir(
>          self: &Arc<Self>,
> --=20
> 2.47.3
>=20
>=20
>=20
>=20
>=20
>=20