From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <pbs-devel-bounces@lists.proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9])
	by lore.proxmox.com (Postfix) with ESMTPS id 288811FF16F
	for <inbox@lore.proxmox.com>; Tue, 14 Oct 2025 14:42:27 +0200 (CEST)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
	by firstgate.proxmox.com (Proxmox) with ESMTP id C87AF2D2D;
	Tue, 14 Oct 2025 14:42:45 +0200 (CEST)
Date: Tue, 14 Oct 2025 14:42:37 +0200
From: Fabian =?iso-8859-1?q?Gr=FCnbichler?= <f.gruenbichler@proxmox.com>
To: Proxmox Backup Server development discussion <pbs-devel@lists.proxmox.com>
References: <20251008134344.3512958-1-d.csapak@proxmox.com>
 <20251008134344.3512958-8-d.csapak@proxmox.com>
In-Reply-To: <20251008134344.3512958-8-d.csapak@proxmox.com>
MIME-Version: 1.0
User-Agent: astroid/0.17.0 (https://github.com/astroidmail/astroid)
Message-Id: <1760442647.ep0kyrw32q.astroid@yuna.none>
X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2
X-Bm-Transport-Timestamp: 1760445723603
X-SPAM-LEVEL: Spam detection results:  0
 AWL -0.101 Adjusted score from AWL reputation of From: address
 BAYES_00                 -1.9 Bayes spam probability is 0 to 1%
 DMARC_MISSING             0.1 Missing DMARC policy
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 POISEN_SPAM_PILL          0.1 Meta: its spam
 POISEN_SPAM_PILL_1        0.1 random spam to be learned in bayes
 POISEN_SPAM_PILL_3        0.1 random spam to be learned in bayes
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
Subject: Re: [pbs-devel] [PATCH proxmox-backup v2 6/6] api: admin:
 datastore: implement streaming content api call
X-BeenThere: pbs-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox Backup Server development discussion
 <pbs-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pbs-devel/>
List-Post: <mailto:pbs-devel@lists.proxmox.com>
List-Help: <mailto:pbs-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel>, 
 <mailto:pbs-devel-request@lists.proxmox.com?subject=subscribe>
Reply-To: Proxmox Backup Server development discussion
 <pbs-devel@lists.proxmox.com>
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Errors-To: pbs-devel-bounces@lists.proxmox.com
Sender: "pbs-devel" <pbs-devel-bounces@lists.proxmox.com>

On October 8, 2025 3:43 pm, Dominik Csapak wrote:
> this is a new api call that utilizes `proxmox_router::Stream` to provide
> a streaming interface to querying the datastore content.
> 
> This can be done when a client requests this api call with the
> `application/json-seq` Accept header.
> 
> In contrast to the existing api calls, this one
> * returns all types of content items (namespaces, groups, snapshots; can
>   be filtered with a parameter)
> * iterates over them recursively (with the range that is given with the
>   parameter)
> 
> The api call returns the data in the following order:
> * first all visible namespaces
> * then for each ns in order
>   * each group
>   * each snapshot
> 
> This is done so that we can have a good way of building a tree view in
> the ui.
> 
> Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
> ---
> changes from v1:
> * don't use a tokio::spawn  around a 'spawn_blocking'
> * don't use async-stream, but proxmox-router::Stream
> * add new use statements from pbs_api_types in a new line
>   (makes the diff more readable..)
> * remove content type filter from api call (can be done later still)
> * return one level more for namespaces than given, otherwise
>   we'll list groups + snapshots for the levels, but not the existing
>   namespaces there
> 
>  src/api2/admin/datastore.rs | 175 +++++++++++++++++++++++++++++++-----
>  1 file changed, 155 insertions(+), 20 deletions(-)
> 
> diff --git a/src/api2/admin/datastore.rs b/src/api2/admin/datastore.rs
> index 643d1694b..811ee8713 100644
> --- a/src/api2/admin/datastore.rs
> +++ b/src/api2/admin/datastore.rs
> @@ -23,7 +23,7 @@ use proxmox_compression::zstd::ZstdEncoder;
>  use proxmox_log::LogContext;
>  use proxmox_router::{
>      http_err, list_subdirs_api_method, ApiHandler, ApiMethod, ApiResponseFuture, Permission,
> -    Router, RpcEnvironment, RpcEnvironmentType, SubdirMap,
> +    Record, Router, RpcEnvironment, RpcEnvironmentType, SubdirMap,
>  };
>  use proxmox_rrd_api_types::{RrdMode, RrdTimeframe};
>  use proxmox_schema::*;
> @@ -49,6 +49,7 @@ use pbs_api_types::{
>      PRIV_DATASTORE_BACKUP, PRIV_DATASTORE_MODIFY, PRIV_DATASTORE_PRUNE, PRIV_DATASTORE_READ,
>      PRIV_DATASTORE_VERIFY, PRIV_SYS_MODIFY, UPID, UPID_SCHEMA, VERIFICATION_OUTDATED_AFTER_SCHEMA,
>  };
> +use pbs_api_types::{DatastoreContent, NamespaceListItem};
>  use pbs_client::pxar::{create_tar, create_zip};
>  use pbs_config::CachedUserInfo;
>  use pbs_datastore::backup_info::BackupInfo;
> @@ -70,7 +71,10 @@ use proxmox_rest_server::{formatter, worker_is_active, WorkerTask};
>  
>  use crate::api2::backup::optional_ns_param;
>  use crate::api2::node::rrd::create_value_from_rrd;
> -use crate::backup::{check_ns_privs_full, ListAccessibleBackupGroups, VerifyWorker, NS_PRIVS_OK};
> +use crate::backup::{
> +    can_access_any_namespace_below, check_ns_privs, check_ns_privs_full,
> +    ListAccessibleBackupGroups, VerifyWorker, NS_PRIVS_OK,
> +};
>  use crate::server::jobstate::{compute_schedule_status, Job, JobState};
>  use crate::tools::{backup_info_to_snapshot_list_item, get_all_snapshot_files, read_backup_index};
>  
> @@ -387,7 +391,7 @@ pub async fn delete_snapshot(
>  }
>  
>  #[api(
> -    serializing: true,
> +    stream: true,
>      input: {
>          properties: {
>              store: { schema: DATASTORE_SCHEMA },
> @@ -395,40 +399,125 @@ pub async fn delete_snapshot(
>                  type: BackupNamespace,
>                  optional: true,
>              },
> -            "backup-type": {
> -                optional: true,
> -                type: BackupType,
> -            },
> -            "backup-id": {
> +            "max-depth": {
> +                schema: NS_MAX_DEPTH_SCHEMA,
>                  optional: true,
> -                schema: BACKUP_ID_SCHEMA,
>              },
>          },
>      },
> -    returns: pbs_api_types::ADMIN_DATASTORE_LIST_SNAPSHOTS_RETURN_TYPE,
>      access: {
>          permission: &Permission::Anybody,
>          description: "Requires on /datastore/{store}[/{namespace}] either DATASTORE_AUDIT for any \
>              or DATASTORE_BACKUP and being the owner of the group",
>      },
>  )]
> -/// List backup snapshots.
> -pub async fn list_snapshots(
> +/// List datastore content, recursively through all namespaces.

this could definitely be expanded a bit ;)

> +pub async fn list_content(
>      store: String,
>      ns: Option<BackupNamespace>,
> -    backup_type: Option<BackupType>,
> -    backup_id: Option<String>,
> +    max_depth: Option<usize>,
>      _param: Value,
>      _info: &ApiMethod,
>      rpcenv: &mut dyn RpcEnvironment,
> -) -> Result<Vec<SnapshotListItem>, Error> {
> +) -> Result<proxmox_router::Stream, Error> {
> +    let (sender, receiver) = tokio::sync::mpsc::channel(128);
> +
>      let auth_id: Authid = rpcenv.get_auth_id().unwrap().parse()?;
> +    let user_info = CachedUserInfo::new()?;
>  
> -    tokio::task::spawn_blocking(move || unsafe {
> -        list_snapshots_blocking(store, ns, backup_type, backup_id, auth_id)
> -    })
> -    .await
> -    .map_err(|err| format_err!("failed to await blocking task: {err}"))?
> +    let datastore = DataStore::lookup_datastore(&store, Some(Operation::Read))?;
> +
> +    // show one level of namespaces more than we use for group/snapshot list
> +    let list_max_depth = max_depth.map(|depth| depth + 1);

should this be guarded by a parameter? it is what we want for the UI (I
guess?), but it's a bit weird to do that unconditionally?

also it needs to be clamped, else if I pass the maximum value of 7 the
call will choke in checks further below in the call stack..

> +    if !can_access_any_namespace_below(

ah no, it doesn't - this just returns false in that case and bail with
403..

> +        datastore.clone(),
> +        &auth_id,
> +        &user_info,
> +        ns.clone(),
> +        list_max_depth,
> +    ) {
> +        proxmox_router::http_bail!(FORBIDDEN, "permission check failed");

this effectively checks NS_PRIVS_OK

> +    }
> +
> +    let ns = ns.unwrap_or_default();
> +
> +    tokio::task::spawn_blocking(move || {
> +        for ns in datastore.recursive_iter_backup_ns_ok(ns.clone(), list_max_depth)? {
> +            match check_ns_privs(&store, &ns, &auth_id, NS_PRIVS_OK) {

and here we use NS_PRIVS_OK as well, so a namespace will be emitted if
the authid has *any of* PRIV_DATASTORE_MODIFY | PRIV_DATASTORE_READ | PRIV_DATASTORE_BACKUP | PRIV_DATASTORE_AUDIT;


> +                Ok(_) => sender.blocking_send(Record::new(DatastoreContent::from(
> +                    NamespaceListItem { ns, comment: None },
> +                )))?,
> +                // don't disclose errors about ns privileges, since that can be an info leak
> +                Err(_) => continue,
> +            }
> +        }
> +
> +        for ns in datastore.recursive_iter_backup_ns_ok(ns, max_depth)? {

while this is not super expensive, we are effectively doing it twice? we
could just save the namespaces above (skipping those we don't want to
recurse into).. would also avoid returning inconsistent results if
namespaces are created/removed while we are streaming..

> +            let list_all = match check_ns_privs_full(
> +                &store,
> +                &ns,
> +                &auth_id,
> +                PRIV_DATASTORE_AUDIT,
> +                PRIV_DATASTORE_BACKUP,
> +            ) {
> +                Ok(requires_owner) => !requires_owner,
> +                // don't disclose errors about ns privileges, since that can be an info leak
> +                Err(_) => continue,

but here we abort listing the namespace *contents* and just skip it
silently, if the authid doesn't have either AUDIT or BACKUP privs..

which means if I query this endpoint using READ or MODIFY (only), it
would look to me like the namespace is empty..

I think we should either emit an error (if we reach this point, the user
has privs on that namespace, just not the right ones), or we should use
consistent privs above when deciding whether to tell the caller about
the namespace in the first place..

> +            };
> +            for group in datastore.iter_backup_groups(ns.clone())? {

should this Error here also be emitted? it effectively can only happen
if we fail to read this namespace directory..

> +                let group = match group {
> +                    Ok(group) => group,
> +                    Err(err) => {
> +                        sender.blocking_send(Record::error(err))?;
> +                        continue;
> +                    }
> +                };
> +                let group = backup_group_to_group_list_item(
> +                    datastore.clone(),
> +                    group,
> +                    &ns,
> +                    &auth_id,
> +                    list_all,
> +                );
> +
> +                if let Some(group) = group {
> +                    sender
> +                        .blocking_send(Record::new(DatastoreContent::from((ns.clone(), group))))?;
> +                }
> +            }
> +
> +            for group in datastore.iter_backup_groups(ns.clone())? {

same question as above here as well, we iterate groups twice now - this
is now now longer quite as cheap, and the risk of inconsistency is even
higher..

effectively we have to trade memory overhead and consistency (iterate
once and remember) vs syscall overhead and inconsistency (iterate twice)

> +                let group = match group {
> +                    Ok(group) => group,
> +                    Err(err) => {
> +                        sender.blocking_send(Record::error(err))?;
> +                        continue;
> +                    }
> +                };
> +                let owner = match get_group_owner(&store, &ns, &group) {
> +                    Some(auth_id) => auth_id,
> +                    None => continue,
> +                };

if we remember which groups we emitted above, instead of re-iterating
here, then maybe we don't need to re-check here. but if we re-iterate,
we should also re-check I guess, like Wolfgang said..

but I'd like to clarify another point - the "legacy" code path here
needs the owner in the snapshot list because we effectively re-create
the groups from it.. but here, we could drop it - all snapshots within a
group are always owned by the owner of the group, which we emit earlier
on (if we don't iterate twice with potentially inconsistent results).

I guess the same could be argued for w.r.t. the namespace fields in
groups and snapshots which could be replaced by a single record that
tells us that the groups/snapshots following it are part of that
namespace..

maybe doesn't buy us much if we have to reconstruct the full records on
the client side for $reasons, but wanted to raise it nevertheless..

> +                for backup_dir in group.iter_snapshots()? {

if this fails, it likely means the whole group got removed, or the FS
permissions are somehow wrong, we shouldn't fail the whole contents call
in that case?

> +                    let backup_info = match backup_dir {
> +                        Ok(snap) => BackupInfo::new(snap)?,

same logic applies here..

> +                        Err(err) => {
> +                            sender.blocking_send(Record::error(err))?;
> +                            continue;
> +                        }
> +                    };
> +                    let snapshot = backup_info_to_snapshot_list_item(&backup_info, &owner);
> +                    sender.blocking_send(Record::new(DatastoreContent::from((
> +                        ns.clone(),
> +                        snapshot,
> +                    ))))?;
> +                }
> +            }
> +        }
> +        Ok::<_, Error>(())
> +    });

and all the errors are lost/ignored anyway, so the stream just stops at
whatever record we were at when the error occured, with no indication
its incomplete on either end?

> +
> +    Ok(ReceiverStream::new(receiver).into())
>  }
>  
>  fn get_group_owner(
> @@ -514,6 +603,51 @@ unsafe fn list_snapshots_blocking(
>      })
>  }
>  
> +#[api(
> +    serializing: true,
> +    input: {
> +        properties: {
> +            store: { schema: DATASTORE_SCHEMA },
> +            ns: {
> +                type: BackupNamespace,
> +                optional: true,
> +            },
> +            "backup-type": {
> +                optional: true,
> +                type: BackupType,
> +            },
> +            "backup-id": {
> +                optional: true,
> +                schema: BACKUP_ID_SCHEMA,
> +            },
> +        },
> +    },
> +    returns: pbs_api_types::ADMIN_DATASTORE_LIST_SNAPSHOTS_RETURN_TYPE,
> +    access: {
> +        permission: &Permission::Anybody,
> +        description: "Requires on /datastore/{store}[/{namespace}] either DATASTORE_AUDIT for any \
> +            or DATASTORE_BACKUP and being the owner of the group",
> +    },
> +)]
> +/// List backup snapshots.
> +pub async fn list_snapshots(
> +    store: String,
> +    ns: Option<BackupNamespace>,
> +    backup_type: Option<BackupType>,
> +    backup_id: Option<String>,
> +    _param: Value,
> +    _info: &ApiMethod,
> +    rpcenv: &mut dyn RpcEnvironment,
> +) -> Result<Vec<SnapshotListItem>, Error> {
> +    let auth_id: Authid = rpcenv.get_auth_id().unwrap().parse()?;
> +
> +    tokio::task::spawn_blocking(move || unsafe {
> +        list_snapshots_blocking(store, ns, backup_type, backup_id, auth_id)
> +    })
> +    .await
> +    .map_err(|err| format_err!("failed to await blocking task: {err}"))?
> +}
> +
>  async fn get_snapshots_count(
>      store: &Arc<DataStore>,
>      owner: Option<&Authid>,
> @@ -2764,6 +2898,7 @@ const DATASTORE_INFO_SUBDIRS: SubdirMap = &[
>          "change-owner",
>          &Router::new().post(&API_METHOD_SET_BACKUP_OWNER),
>      ),
> +    ("content", &Router::new().get(&API_METHOD_LIST_CONTENT)),
>      (
>          "download",
>          &Router::new().download(&API_METHOD_DOWNLOAD_FILE),
> -- 
> 2.47.3
> 
> 
> 
> _______________________________________________
> pbs-devel mailing list
> pbs-devel@lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
> 
> 
> 


_______________________________________________
pbs-devel mailing list
pbs-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel