From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id 288811FF16F for ; Tue, 14 Oct 2025 14:42:27 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id C87AF2D2D; Tue, 14 Oct 2025 14:42:45 +0200 (CEST) Date: Tue, 14 Oct 2025 14:42:37 +0200 From: Fabian =?iso-8859-1?q?Gr=FCnbichler?= To: Proxmox Backup Server development discussion References: <20251008134344.3512958-1-d.csapak@proxmox.com> <20251008134344.3512958-8-d.csapak@proxmox.com> In-Reply-To: <20251008134344.3512958-8-d.csapak@proxmox.com> MIME-Version: 1.0 User-Agent: astroid/0.17.0 (https://github.com/astroidmail/astroid) Message-Id: <1760442647.ep0kyrw32q.astroid@yuna.none> X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1760445723603 X-SPAM-LEVEL: Spam detection results: 0 AWL -0.101 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment POISEN_SPAM_PILL 0.1 Meta: its spam POISEN_SPAM_PILL_1 0.1 random spam to be learned in bayes POISEN_SPAM_PILL_3 0.1 random spam to be learned in bayes SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pbs-devel] [PATCH proxmox-backup v2 6/6] api: admin: datastore: implement streaming content api call X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" On October 8, 2025 3:43 pm, Dominik Csapak wrote: > this is a new api call that utilizes `proxmox_router::Stream` to provide > a streaming interface to querying the datastore content. > > This can be done when a client requests this api call with the > `application/json-seq` Accept header. > > In contrast to the existing api calls, this one > * returns all types of content items (namespaces, groups, snapshots; can > be filtered with a parameter) > * iterates over them recursively (with the range that is given with the > parameter) > > The api call returns the data in the following order: > * first all visible namespaces > * then for each ns in order > * each group > * each snapshot > > This is done so that we can have a good way of building a tree view in > the ui. > > Signed-off-by: Dominik Csapak > --- > changes from v1: > * don't use a tokio::spawn around a 'spawn_blocking' > * don't use async-stream, but proxmox-router::Stream > * add new use statements from pbs_api_types in a new line > (makes the diff more readable..) > * remove content type filter from api call (can be done later still) > * return one level more for namespaces than given, otherwise > we'll list groups + snapshots for the levels, but not the existing > namespaces there > > src/api2/admin/datastore.rs | 175 +++++++++++++++++++++++++++++++----- > 1 file changed, 155 insertions(+), 20 deletions(-) > > diff --git a/src/api2/admin/datastore.rs b/src/api2/admin/datastore.rs > index 643d1694b..811ee8713 100644 > --- a/src/api2/admin/datastore.rs > +++ b/src/api2/admin/datastore.rs > @@ -23,7 +23,7 @@ use proxmox_compression::zstd::ZstdEncoder; > use proxmox_log::LogContext; > use proxmox_router::{ > http_err, list_subdirs_api_method, ApiHandler, ApiMethod, ApiResponseFuture, Permission, > - Router, RpcEnvironment, RpcEnvironmentType, SubdirMap, > + Record, Router, RpcEnvironment, RpcEnvironmentType, SubdirMap, > }; > use proxmox_rrd_api_types::{RrdMode, RrdTimeframe}; > use proxmox_schema::*; > @@ -49,6 +49,7 @@ use pbs_api_types::{ > PRIV_DATASTORE_BACKUP, PRIV_DATASTORE_MODIFY, PRIV_DATASTORE_PRUNE, PRIV_DATASTORE_READ, > PRIV_DATASTORE_VERIFY, PRIV_SYS_MODIFY, UPID, UPID_SCHEMA, VERIFICATION_OUTDATED_AFTER_SCHEMA, > }; > +use pbs_api_types::{DatastoreContent, NamespaceListItem}; > use pbs_client::pxar::{create_tar, create_zip}; > use pbs_config::CachedUserInfo; > use pbs_datastore::backup_info::BackupInfo; > @@ -70,7 +71,10 @@ use proxmox_rest_server::{formatter, worker_is_active, WorkerTask}; > > use crate::api2::backup::optional_ns_param; > use crate::api2::node::rrd::create_value_from_rrd; > -use crate::backup::{check_ns_privs_full, ListAccessibleBackupGroups, VerifyWorker, NS_PRIVS_OK}; > +use crate::backup::{ > + can_access_any_namespace_below, check_ns_privs, check_ns_privs_full, > + ListAccessibleBackupGroups, VerifyWorker, NS_PRIVS_OK, > +}; > use crate::server::jobstate::{compute_schedule_status, Job, JobState}; > use crate::tools::{backup_info_to_snapshot_list_item, get_all_snapshot_files, read_backup_index}; > > @@ -387,7 +391,7 @@ pub async fn delete_snapshot( > } > > #[api( > - serializing: true, > + stream: true, > input: { > properties: { > store: { schema: DATASTORE_SCHEMA }, > @@ -395,40 +399,125 @@ pub async fn delete_snapshot( > type: BackupNamespace, > optional: true, > }, > - "backup-type": { > - optional: true, > - type: BackupType, > - }, > - "backup-id": { > + "max-depth": { > + schema: NS_MAX_DEPTH_SCHEMA, > optional: true, > - schema: BACKUP_ID_SCHEMA, > }, > }, > }, > - returns: pbs_api_types::ADMIN_DATASTORE_LIST_SNAPSHOTS_RETURN_TYPE, > access: { > permission: &Permission::Anybody, > description: "Requires on /datastore/{store}[/{namespace}] either DATASTORE_AUDIT for any \ > or DATASTORE_BACKUP and being the owner of the group", > }, > )] > -/// List backup snapshots. > -pub async fn list_snapshots( > +/// List datastore content, recursively through all namespaces. this could definitely be expanded a bit ;) > +pub async fn list_content( > store: String, > ns: Option, > - backup_type: Option, > - backup_id: Option, > + max_depth: Option, > _param: Value, > _info: &ApiMethod, > rpcenv: &mut dyn RpcEnvironment, > -) -> Result, Error> { > +) -> Result { > + let (sender, receiver) = tokio::sync::mpsc::channel(128); > + > let auth_id: Authid = rpcenv.get_auth_id().unwrap().parse()?; > + let user_info = CachedUserInfo::new()?; > > - tokio::task::spawn_blocking(move || unsafe { > - list_snapshots_blocking(store, ns, backup_type, backup_id, auth_id) > - }) > - .await > - .map_err(|err| format_err!("failed to await blocking task: {err}"))? > + let datastore = DataStore::lookup_datastore(&store, Some(Operation::Read))?; > + > + // show one level of namespaces more than we use for group/snapshot list > + let list_max_depth = max_depth.map(|depth| depth + 1); should this be guarded by a parameter? it is what we want for the UI (I guess?), but it's a bit weird to do that unconditionally? also it needs to be clamped, else if I pass the maximum value of 7 the call will choke in checks further below in the call stack.. > + if !can_access_any_namespace_below( ah no, it doesn't - this just returns false in that case and bail with 403.. > + datastore.clone(), > + &auth_id, > + &user_info, > + ns.clone(), > + list_max_depth, > + ) { > + proxmox_router::http_bail!(FORBIDDEN, "permission check failed"); this effectively checks NS_PRIVS_OK > + } > + > + let ns = ns.unwrap_or_default(); > + > + tokio::task::spawn_blocking(move || { > + for ns in datastore.recursive_iter_backup_ns_ok(ns.clone(), list_max_depth)? { > + match check_ns_privs(&store, &ns, &auth_id, NS_PRIVS_OK) { and here we use NS_PRIVS_OK as well, so a namespace will be emitted if the authid has *any of* PRIV_DATASTORE_MODIFY | PRIV_DATASTORE_READ | PRIV_DATASTORE_BACKUP | PRIV_DATASTORE_AUDIT; > + Ok(_) => sender.blocking_send(Record::new(DatastoreContent::from( > + NamespaceListItem { ns, comment: None }, > + )))?, > + // don't disclose errors about ns privileges, since that can be an info leak > + Err(_) => continue, > + } > + } > + > + for ns in datastore.recursive_iter_backup_ns_ok(ns, max_depth)? { while this is not super expensive, we are effectively doing it twice? we could just save the namespaces above (skipping those we don't want to recurse into).. would also avoid returning inconsistent results if namespaces are created/removed while we are streaming.. > + let list_all = match check_ns_privs_full( > + &store, > + &ns, > + &auth_id, > + PRIV_DATASTORE_AUDIT, > + PRIV_DATASTORE_BACKUP, > + ) { > + Ok(requires_owner) => !requires_owner, > + // don't disclose errors about ns privileges, since that can be an info leak > + Err(_) => continue, but here we abort listing the namespace *contents* and just skip it silently, if the authid doesn't have either AUDIT or BACKUP privs.. which means if I query this endpoint using READ or MODIFY (only), it would look to me like the namespace is empty.. I think we should either emit an error (if we reach this point, the user has privs on that namespace, just not the right ones), or we should use consistent privs above when deciding whether to tell the caller about the namespace in the first place.. > + }; > + for group in datastore.iter_backup_groups(ns.clone())? { should this Error here also be emitted? it effectively can only happen if we fail to read this namespace directory.. > + let group = match group { > + Ok(group) => group, > + Err(err) => { > + sender.blocking_send(Record::error(err))?; > + continue; > + } > + }; > + let group = backup_group_to_group_list_item( > + datastore.clone(), > + group, > + &ns, > + &auth_id, > + list_all, > + ); > + > + if let Some(group) = group { > + sender > + .blocking_send(Record::new(DatastoreContent::from((ns.clone(), group))))?; > + } > + } > + > + for group in datastore.iter_backup_groups(ns.clone())? { same question as above here as well, we iterate groups twice now - this is now now longer quite as cheap, and the risk of inconsistency is even higher.. effectively we have to trade memory overhead and consistency (iterate once and remember) vs syscall overhead and inconsistency (iterate twice) > + let group = match group { > + Ok(group) => group, > + Err(err) => { > + sender.blocking_send(Record::error(err))?; > + continue; > + } > + }; > + let owner = match get_group_owner(&store, &ns, &group) { > + Some(auth_id) => auth_id, > + None => continue, > + }; if we remember which groups we emitted above, instead of re-iterating here, then maybe we don't need to re-check here. but if we re-iterate, we should also re-check I guess, like Wolfgang said.. but I'd like to clarify another point - the "legacy" code path here needs the owner in the snapshot list because we effectively re-create the groups from it.. but here, we could drop it - all snapshots within a group are always owned by the owner of the group, which we emit earlier on (if we don't iterate twice with potentially inconsistent results). I guess the same could be argued for w.r.t. the namespace fields in groups and snapshots which could be replaced by a single record that tells us that the groups/snapshots following it are part of that namespace.. maybe doesn't buy us much if we have to reconstruct the full records on the client side for $reasons, but wanted to raise it nevertheless.. > + for backup_dir in group.iter_snapshots()? { if this fails, it likely means the whole group got removed, or the FS permissions are somehow wrong, we shouldn't fail the whole contents call in that case? > + let backup_info = match backup_dir { > + Ok(snap) => BackupInfo::new(snap)?, same logic applies here.. > + Err(err) => { > + sender.blocking_send(Record::error(err))?; > + continue; > + } > + }; > + let snapshot = backup_info_to_snapshot_list_item(&backup_info, &owner); > + sender.blocking_send(Record::new(DatastoreContent::from(( > + ns.clone(), > + snapshot, > + ))))?; > + } > + } > + } > + Ok::<_, Error>(()) > + }); and all the errors are lost/ignored anyway, so the stream just stops at whatever record we were at when the error occured, with no indication its incomplete on either end? > + > + Ok(ReceiverStream::new(receiver).into()) > } > > fn get_group_owner( > @@ -514,6 +603,51 @@ unsafe fn list_snapshots_blocking( > }) > } > > +#[api( > + serializing: true, > + input: { > + properties: { > + store: { schema: DATASTORE_SCHEMA }, > + ns: { > + type: BackupNamespace, > + optional: true, > + }, > + "backup-type": { > + optional: true, > + type: BackupType, > + }, > + "backup-id": { > + optional: true, > + schema: BACKUP_ID_SCHEMA, > + }, > + }, > + }, > + returns: pbs_api_types::ADMIN_DATASTORE_LIST_SNAPSHOTS_RETURN_TYPE, > + access: { > + permission: &Permission::Anybody, > + description: "Requires on /datastore/{store}[/{namespace}] either DATASTORE_AUDIT for any \ > + or DATASTORE_BACKUP and being the owner of the group", > + }, > +)] > +/// List backup snapshots. > +pub async fn list_snapshots( > + store: String, > + ns: Option, > + backup_type: Option, > + backup_id: Option, > + _param: Value, > + _info: &ApiMethod, > + rpcenv: &mut dyn RpcEnvironment, > +) -> Result, Error> { > + let auth_id: Authid = rpcenv.get_auth_id().unwrap().parse()?; > + > + tokio::task::spawn_blocking(move || unsafe { > + list_snapshots_blocking(store, ns, backup_type, backup_id, auth_id) > + }) > + .await > + .map_err(|err| format_err!("failed to await blocking task: {err}"))? > +} > + > async fn get_snapshots_count( > store: &Arc, > owner: Option<&Authid>, > @@ -2764,6 +2898,7 @@ const DATASTORE_INFO_SUBDIRS: SubdirMap = &[ > "change-owner", > &Router::new().post(&API_METHOD_SET_BACKUP_OWNER), > ), > + ("content", &Router::new().get(&API_METHOD_LIST_CONTENT)), > ( > "download", > &Router::new().download(&API_METHOD_DOWNLOAD_FILE), > -- > 2.47.3 > > > > _______________________________________________ > pbs-devel mailing list > pbs-devel@lists.proxmox.com > https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel > > > _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel