From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 0191C92B48 for ; Tue, 14 Feb 2023 15:34:17 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id D675E9972 for ; Tue, 14 Feb 2023 15:33:46 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Tue, 14 Feb 2023 15:33:43 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 49CAB46FD8 for ; Tue, 14 Feb 2023 15:33:43 +0100 (CET) Date: Tue, 14 Feb 2023 15:33:32 +0100 From: Fabian =?iso-8859-1?q?Gr=FCnbichler?= To: Proxmox Backup Server development discussion References: <20230213154555.49610-1-h.laimer@proxmox.com> <20230213154555.49610-3-h.laimer@proxmox.com> In-Reply-To: <20230213154555.49610-3-h.laimer@proxmox.com> MIME-Version: 1.0 User-Agent: astroid/0.16.0 (https://github.com/astroidmail/astroid) Message-Id: <1676378957.un5a3mf7ht.astroid@yuna.none> Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: quoted-printable X-SPAM-LEVEL: Spam detection results: 0 AWL 0.127 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [pull.rs, datastore.rs, proxmox.com, hierarchy.rs] Subject: Re: [pbs-devel] [PATCH proxmox-backup 2/4] pull: add logic for local pull X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 14 Feb 2023 14:34:17 -0000 On February 13, 2023 4:45 pm, Hannes Laimer wrote: > ... since creating a HttpClient(which would be needed > to reuse existing pull logic) without a remote was not > possible. This also improves the speed for local > sync-jobs. >=20 > Signed-off-by: Hannes Laimer a high level remark - for all the parts below where I suggest not going ove= r the API as root for local pulling, you need to properly filter the results by t= he "owner" from PullParameters (or maybe that doesn't make sense for the sourc= e side, and we need to pass in the "local" authid of the "caller" from the AP= I to replace the "remote" authid we get currently). there is a src/backup/hierarchy.rs module that might be of help for that ;) > --- > pbs-client/src/backup_reader.rs | 5 + > src/api2/admin/datastore.rs | 10 + > src/server/pull.rs | 499 ++++++++++++++++++++------------ > 3 files changed, 335 insertions(+), 179 deletions(-) >=20 > diff --git a/pbs-client/src/backup_reader.rs b/pbs-client/src/backup_read= er.rs > index 2cd4dc27..9dacef74 100644 > --- a/pbs-client/src/backup_reader.rs > +++ b/pbs-client/src/backup_reader.rs > @@ -20,6 +20,11 @@ use pbs_tools::sha::sha256; > =20 > use super::{H2Client, HttpClient}; > =20 > +pub enum BackupSource { > + Remote(Arc), > + Local(pbs_datastore::BackupDir), the Local variant could also take a SnapshotReader instead of BackupDir, li= ke pbs-tape does. not sure whether that would simplify/improve code - did you evaluate it? this is also only used by server::pull, so likely it should go there.. the = name is also rather generic, something like `PullReader` or even just `Reader`, = since it's a pull-internal enum that doesn't need to be pub anyway. but see below for some suggestions! > +} > + > /// Backup Reader > pub struct BackupReader { > h2: H2Client, > diff --git a/src/api2/admin/datastore.rs b/src/api2/admin/datastore.rs > index 8d3a6146..8ad78f29 100644 > --- a/src/api2/admin/datastore.rs > +++ b/src/api2/admin/datastore.rs > @@ -445,6 +445,16 @@ pub async fn list_snapshots( > _param: Value, > _info: &ApiMethod, > rpcenv: &mut dyn RpcEnvironment, > +) -> Result, Error> { > + do_list_snapshots(store, ns, backup_type, backup_id, rpcenv).await > +} > + > +pub async fn do_list_snapshots( > + store: String, > + ns: Option, > + backup_type: Option, > + backup_id: Option, > + rpcenv: &mut dyn RpcEnvironment, this change is not needed (see below) > ) -> Result, Error> { > let auth_id: Authid =3D rpcenv.get_auth_id().unwrap().parse()?; > =20 > diff --git a/src/server/pull.rs b/src/server/pull.rs > index 65eedf2c..81df00c3 100644 > --- a/src/server/pull.rs > +++ b/src/server/pull.rs > @@ -1,7 +1,7 @@ > //! Sync datastore from remote server > =20 > use std::collections::{HashMap, HashSet}; > -use std::io::{Seek, SeekFrom}; > +use std::io::{Seek, SeekFrom, Write}; > use std::sync::atomic::{AtomicUsize, Ordering}; > use std::sync::{Arc, Mutex}; > use std::time::SystemTime; > @@ -9,6 +9,7 @@ use std::time::SystemTime; > use anyhow::{bail, format_err, Error}; > use http::StatusCode; > use pbs_config::CachedUserInfo; > +use pbs_datastore::read_chunk::AsyncReadChunk; > use serde_json::json; > =20 > use proxmox_router::HttpError; > @@ -21,7 +22,7 @@ use pbs_api_types::{ > }; > =20 > use pbs_client::{ > - BackupReader, BackupRepository, HttpClient, HttpClientOptions, Remot= eChunkReader, > + BackupReader, BackupRepository, BackupSource, HttpClient, HttpClient= Options, RemoteChunkReader, > }; > use pbs_datastore::data_blob::DataBlob; > use pbs_datastore::dynamic_index::DynamicIndexReader; > @@ -30,7 +31,7 @@ use pbs_datastore::index::IndexFile; > use pbs_datastore::manifest::{ > archive_type, ArchiveType, BackupManifest, FileInfo, CLIENT_LOG_BLOB= _NAME, MANIFEST_BLOB_NAME, > }; > -use pbs_datastore::{check_backup_owner, DataStore, StoreProgress}; > +use pbs_datastore::{check_backup_owner, DataStore, LocalChunkReader, Sto= reProgress}; > use pbs_tools::sha::sha256; > use proxmox_rest_server::WorkerTask; > =20 > @@ -40,7 +41,7 @@ use crate::tools::parallel_handler::ParallelHandler; > /// Parameters for a pull operation. > pub(crate) struct PullParameters { > /// Remote that is pulled from > - remote: Remote, > + remote: Option, > /// Full specification of remote datastore > source: BackupRepository, might make more sense to refactor this type to properly differentiate betwe= en remote source and local source? e.g., we could have pub(crate) struct LocalSource { store: Arc, ns: BackupNamespace, } pub(crate) struct RemoteSource { remote: Remote, repo: BackupRepository, ns: BackupNamespace, } pub(crate) enum PullSource { Local(LocalSource), Remote(RemoteSOurce), } pub(crate) struct PullTarget { store: Arc, ns: BackupNamespace, } pub(crate) struct PullParameters { source: PullSource, target: PullTarget, /// Owner of synced groups (needs to match local owner of pre-existing = groups) owner: Authid, /// Whether to remove groups which exist locally, but not on the remote= end remove_vanished: bool, /// How many levels of sub-namespaces to pull (0 =3D=3D no recursion, N= one =3D=3D maximum recursion) max_depth: Option, /// Filters for reducing the pull scope group_filter: Option>, /// Rate limits for all transfers from `remote` limit: RateLimitConfig, } and as a first refactoring step, just do this switch (without RemoteSource = ;)) and dropping the Client from signatures. just a suggestion, feel free to name/structure things differently if that m= akes more sense! > /// Local store that is pulled into > @@ -70,7 +71,7 @@ impl PullParameters { > pub(crate) fn new( > store: &str, > ns: BackupNamespace, > - remote: &str, > + remote: Option<&str>, > remote_store: &str, > remote_ns: BackupNamespace, same here - could have a=20 fn new_remote(..) and fn new_local(..) to keep the signatures and parameter names sensible.. > owner: Authid, > @@ -86,18 +87,24 @@ impl PullParameters { > remote_ns.check_max_depth(max_depth)?; > } > =20 > - let (remote_config, _digest) =3D pbs_config::remote::config()?; > - let remote: Remote =3D remote_config.lookup("remote", remote)?; > + let (remote, source): (Option, BackupRepository) =3D if = let Some(remote_str) =3D remote > + { > + let (remote_config, _digest) =3D pbs_config::remote::config(= )?; > + let remote =3D remote_config.lookup::("remote", remo= te_str)?; > + let source =3D BackupRepository::new( > + Some(remote.config.auth_id.clone()), > + Some(remote.config.host.clone()), > + remote.config.port, > + remote_store.to_string(), > + ); > + (Some(remote), source) > + } else { > + let source =3D BackupRepository::new(None, None, None, remot= e_store.to_string()); > + (None, source) > + }; > =20 > let remove_vanished =3D remove_vanished.unwrap_or(false); > =20 > - let source =3D BackupRepository::new( > - Some(remote.config.auth_id.clone()), > - Some(remote.config.host.clone()), > - remote.config.port, > - remote_store.to_string(), > - ); > - > Ok(Self { > remote, > remote_ns, > @@ -114,13 +121,17 @@ impl PullParameters { > =20 > /// Creates a new [HttpClient] for accessing the [Remote] that is pu= lled from. > pub async fn client(&self) -> Result { > - crate::api2::config::remote::remote_client(&self.remote, Some(se= lf.limit.clone())).await > + if let Some(remote) =3D &self.remote { > + crate::api2::config::remote::remote_client(remote, Some(self= .limit.clone())).await > + } else { > + bail!("No remote specified. Do not use a HttpClient for a lo= cal sync.") > + } > } > } > =20 > -async fn pull_index_chunks( > +async fn pull_index_chunks( > worker: &WorkerTask, > - chunk_reader: RemoteChunkReader, > + chunk_reader: C, > target: Arc, > index: I, > downloaded_chunks: Arc>>, > @@ -256,10 +267,10 @@ fn verify_archive(info: &FileInfo, csum: &[u8; 32],= size: u64) -> Result<(), Err > /// - Verify tmp file checksum > /// - if archive is an index, pull referenced chunks > /// - Rename tmp file into real path > -async fn pull_single_archive( > +async fn pull_single_archive( > worker: &WorkerTask, > - reader: &BackupReader, > - chunk_reader: &mut RemoteChunkReader, > + source: &BackupSource, > + chunk_reader: C, > snapshot: &pbs_datastore::BackupDir, > archive_info: &FileInfo, > downloaded_chunks: Arc>>, > @@ -279,7 +290,15 @@ async fn pull_single_archive( > .read(true) > .open(&tmp_path)?; > =20 > - reader.download(archive_name, &mut tmpfile).await?; > + match source { > + BackupSource::Remote(reader) =3D> reader.download(archive_name, = &mut tmpfile).await?, > + BackupSource::Local(dir) =3D> { > + let mut source_path =3D dir.full_path(); > + source_path.push(archive_name); > + let data =3D std::fs::read(source_path)?; > + tmpfile.write_all(&data)?; could use std::fs::copy() > + } > + }; > =20 > match archive_type(archive_name)? { > ArchiveType::DynamicIndex =3D> { > @@ -289,14 +308,23 @@ async fn pull_single_archive( > let (csum, size) =3D index.compute_csum(); > verify_archive(archive_info, &csum, size)?; > =20 > - pull_index_chunks( > - worker, > - chunk_reader.clone(), > - snapshot.datastore().clone(), > - index, > - downloaded_chunks, > - ) > - .await?; > + match source { > + BackupSource::Local(ref dir) > + if dir.datastore().name() =3D=3D snapshot.datastore(= ).name() =3D> > + { > + task_log!(worker, "skipping chunk sync for same data= tsore"); > + } not sure if this is a good idea - I think pulling locally should check that= the referenced chunks are all there and fail otherwise.. we can always re-intro= duce skipping here (possibly opt-in?) later on if we think it's a good performan= ce improvement. > + _ =3D> { > + pull_index_chunks( > + worker, > + chunk_reader, > + snapshot.datastore().clone(), > + index, > + downloaded_chunks, > + ) > + .await?; > + } > + }; > } > ArchiveType::FixedIndex =3D> { > let index =3D FixedIndexReader::new(tmpfile).map_err(|err| { > @@ -304,15 +332,23 @@ async fn pull_single_archive( > })?; > let (csum, size) =3D index.compute_csum(); > verify_archive(archive_info, &csum, size)?; > - > - pull_index_chunks( > - worker, > - chunk_reader.clone(), > - snapshot.datastore().clone(), > - index, > - downloaded_chunks, > - ) > - .await?; > + match source { > + BackupSource::Local(ref dir) > + if dir.datastore().name() =3D=3D snapshot.datastore(= ).name() =3D> > + { > + task_log!(worker, "skipping chunk sync for same data= tsore"); > + } same here > + _ =3D> { > + pull_index_chunks( > + worker, > + chunk_reader, > + snapshot.datastore().clone(), > + index, > + downloaded_chunks, > + ) > + .await?; > + } > + }; > } > ArchiveType::Blob =3D> { > tmpfile.seek(SeekFrom::Start(0))?; > @@ -321,6 +357,9 @@ async fn pull_single_archive( > } > } > if let Err(err) =3D std::fs::rename(&tmp_path, &path) { > + task_log!(worker, "sync archive {}", archive_name); > + task_log!(worker, "tmpfile path {:?}", tmp_path.as_os_str()); > + task_log!(worker, "path path {:?}", path.as_os_str()); left-over debug logs? ;) > bail!("Atomic rename file {:?} failed - {}", path, err); > } > Ok(()) > @@ -364,7 +403,7 @@ async fn try_client_log_download( > /// - Download log if not already existing > async fn pull_snapshot( > worker: &WorkerTask, > - reader: Arc, > + source: BackupSource, > snapshot: &pbs_datastore::BackupDir, > downloaded_chunks: Arc>>, > ) -> Result<(), Error> { > @@ -377,31 +416,45 @@ async fn pull_snapshot( > let mut tmp_manifest_name =3D manifest_name.clone(); > tmp_manifest_name.set_extension("tmp"); this whole part here could be refactored so that the new/refreshed manifest= is not stored in a tmpfile at all, but only kept in memory > - let download_res =3D download_manifest(&reader, &tmp_manifest_name).= await; > - let mut tmp_manifest_file =3D match download_res { > - Ok(manifest_file) =3D> manifest_file, > - Err(err) =3D> { > - match err.downcast_ref::() { > - Some(HttpError { code, message }) =3D> match *code { > - StatusCode::NOT_FOUND =3D> { > - task_log!( > - worker, > - "skipping snapshot {} - vanished since start= of sync", > - snapshot.dir(), > - ); > - return Ok(()); > - } > - _ =3D> { > - bail!("HTTP error {code} - {message}"); > - } > - }, > - None =3D> { > - return Err(err); > + let tmp_manifest_blob =3D match source { > + BackupSource::Remote(ref reader) =3D> { > + let mut tmp_manifest_file =3D match download_manifest(reader= , &tmp_manifest_name).await { > + Ok(manifest_file) =3D> manifest_file, > + Err(err) =3D> { > + match err.downcast_ref::() { > + Some(HttpError { code, message }) =3D> match *co= de { > + StatusCode::NOT_FOUND =3D> { > + task_log!( > + worker, > + "skipping snapshot {} - vanished sin= ce start of sync", > + snapshot.dir(), > + ); > + return Ok(()); > + } > + _ =3D> { > + bail!("HTTP error {code} - {message}"); > + } > + }, > + None =3D> { > + return Err(err); > + } > + }; > } > }; > + DataBlob::load_from_reader(&mut tmp_manifest_file)? > + } > + BackupSource::Local(ref dir) =3D> { > + let data =3D dir.load_blob(MANIFEST_BLOB_NAME)?; > + let mut tmp_manifest_file =3D std::fs::OpenOptions::new() > + .write(true) > + .create(true) > + .truncate(true) > + .read(true) > + .open(&tmp_manifest_name)?; > + tmp_manifest_file.write_all(data.raw_data())?; > + data > } > }; > - let tmp_manifest_blob =3D DataBlob::load_from_reader(&mut tmp_manife= st_file)?; > =20 > if manifest_name.exists() { > let manifest_blob =3D proxmox_lang::try_block!({ > @@ -417,13 +470,17 @@ async fn pull_snapshot( > })?; > =20 > if manifest_blob.raw_data() =3D=3D tmp_manifest_blob.raw_data() = { > - if !client_log_name.exists() { > - try_client_log_download(worker, reader, &client_log_name= ).await?; > + if let BackupSource::Remote(ref reader) =3D source { > + if !client_log_name.exists() { > + try_client_log_download(worker, reader.clone(), &cli= ent_log_name).await?; > + }; logs should also be fetched for local operations.. > + } > + if tmp_manifest_name.exists() { > + let _ =3D std::fs::remove_file(&tmp_manifest_name); > } > task_log!(worker, "no data changes"); > - let _ =3D std::fs::remove_file(&tmp_manifest_name); > return Ok(()); // nothing changed > - } > + }; > } > =20 > let manifest =3D BackupManifest::try_from(tmp_manifest_blob)?; > @@ -467,32 +524,49 @@ async fn pull_snapshot( > } > } > =20 > - let mut chunk_reader =3D RemoteChunkReader::new( > - reader.clone(), > - None, > - item.chunk_crypt_mode(), > - HashMap::new(), > - ); > - > - pull_single_archive( > - worker, > - &reader, > - &mut chunk_reader, > - snapshot, > - item, > - downloaded_chunks.clone(), > - ) > - .await?; > + match source { > + BackupSource::Remote(ref reader) =3D> { > + let chunk_reader =3D RemoteChunkReader::new( > + reader.clone(), > + None, > + item.chunk_crypt_mode(), > + HashMap::new(), > + ); > + pull_single_archive( > + worker, > + &source, > + chunk_reader, > + snapshot, > + item, > + downloaded_chunks.clone(), > + ) > + .await? > + } > + BackupSource::Local(ref dir) =3D> { > + let chunk_reader =3D > + LocalChunkReader::new(dir.datastore().clone(), None,= item.chunk_crypt_mode()); > + pull_single_archive( > + worker, > + &source, > + chunk_reader, > + snapshot, > + item, > + downloaded_chunks.clone(), > + ) > + .await? > + } > + } > } > =20 > if let Err(err) =3D std::fs::rename(&tmp_manifest_name, &manifest_na= me) { > bail!("Atomic rename file {:?} failed - {}", manifest_name, err)= ; > } this could become a call to update_manifest that replaces the old one with = the new one instead, if the manifest is kept in-memory only.. > =20 > - if !client_log_name.exists() { > - try_client_log_download(worker, reader, &client_log_name).await?= ; > + if let BackupSource::Remote(reader) =3D source { > + if !client_log_name.exists() { > + try_client_log_download(worker, reader, &client_log_name).aw= ait?; > + }; not only relevant for local operations.. > } > - > snapshot > .cleanup_unreferenced_files(&manifest) > .map_err(|err| format_err!("failed to cleanup unreferenced files= - {err}"))?; > @@ -506,7 +580,7 @@ async fn pull_snapshot( > /// pointing to the local datastore and target namespace. > async fn pull_snapshot_from( > worker: &WorkerTask, > - reader: Arc, > + source: BackupSource, > snapshot: &pbs_datastore::BackupDir, > downloaded_chunks: Arc>>, > ) -> Result<(), Error> { > @@ -517,7 +591,7 @@ async fn pull_snapshot_from( > if is_new { > task_log!(worker, "sync snapshot {}", snapshot.dir()); > =20 > - if let Err(err) =3D pull_snapshot(worker, reader, snapshot, down= loaded_chunks).await { > + if let Err(err) =3D pull_snapshot(worker, source, snapshot, down= loaded_chunks).await { > if let Err(cleanup_err) =3D snapshot.datastore().remove_back= up_dir( > snapshot.backup_ns(), > snapshot.as_ref(), > @@ -530,7 +604,7 @@ async fn pull_snapshot_from( > task_log!(worker, "sync snapshot {} done", snapshot.dir()); > } else { > task_log!(worker, "re-sync snapshot {}", snapshot.dir()); > - pull_snapshot(worker, reader, snapshot, downloaded_chunks).await= ?; > + pull_snapshot(worker, source, snapshot, downloaded_chunks).await= ?; > task_log!(worker, "re-sync snapshot {} done", snapshot.dir()); > } > =20 > @@ -600,36 +674,52 @@ impl std::fmt::Display for SkipInfo { > /// - local group owner is already checked by pull_store > async fn pull_group( > worker: &WorkerTask, > - client: &HttpClient, > + client: Option<&HttpClient>, the client here is redundant anyway, since for the remote case we can alway= s get it from params.. > params: &PullParameters, > group: &pbs_api_types::BackupGroup, > remote_ns: BackupNamespace, > progress: &mut StoreProgress, > ) -> Result<(), Error> { > - let path =3D format!( > - "api2/json/admin/datastore/{}/snapshots", > - params.source.store() > - ); > - > - let mut args =3D json!({ > - "backup-type": group.ty, > - "backup-id": group.id, > - }); > - > - if !remote_ns.is_root() { > - args["ns"] =3D serde_json::to_value(&remote_ns)?; > - } > - > let target_ns =3D remote_ns.map_prefix(¶ms.remote_ns, ¶ms.ns= )?; > + let mut list: Vec =3D if let Some(client) =3D clie= nt { > + let path =3D format!( > + "api2/json/admin/datastore/{}/snapshots", > + params.source.store() > + ); > =20 > - let mut result =3D client.get(&path, Some(args)).await?; > - let mut list: Vec =3D serde_json::from_value(resul= t["data"].take())?; > + let mut args =3D json!({ > + "backup-type": group.ty, > + "backup-id": group.id, > + }); > =20 > - list.sort_unstable_by(|a, b| a.backup.time.cmp(&b.backup.time)); > + if !remote_ns.is_root() { > + args["ns"] =3D serde_json::to_value(&remote_ns)?; > + } > =20 > - client.login().await?; // make sure auth is complete > + let mut result =3D client.get(&path, Some(args)).await?; > + serde_json::from_value(result["data"].take())? > + } else { > + let mut rpcenv =3D proxmox_router::cli::CliEnvironment::new(); > + proxmox_router::RpcEnvironment::set_auth_id(&mut rpcenv, Some(St= ring::from("root@pam"))); not needed (and would be wrong, since this would elevate logical privileges= to root!) > + let source_ns =3D if remote_ns.is_root() { > + None > + } else { > + Some(remote_ns.clone()) > + }; > + crate::api2::admin::datastore::do_list_snapshots( > + params.source.store().to_string(), > + source_ns, > + Some(group.ty), > + Some(group.id.clone()), > + &mut rpcenv, > + ) > + .await? this could simply list the local snapshots via the local datastore's store.backup_group().list_backups(), with a bit of refactoring: - we need BackupDir and in-progress status for both remote and Local - BackupDir is returned by both local and remote list - skipping can be done based on size being None (remote) or manifest being missing (local) for example, this could be a Vec<(BackupDir, bool)> or skipping could be do= ne up front, and this could just become a Vec > + }; > + list.sort_unstable_by(|a, b| a.backup.time.cmp(&b.backup.time)); > =20 > - let fingerprint =3D client.fingerprint(); > + if let Some(client) =3D client { > + client.login().await?; // make sure auth is complete > + } > =20 > let last_sync =3D params.store.last_successful_backup(&target_ns, gr= oup)?; > =20 > @@ -646,6 +736,13 @@ async fn pull_group( > count: 0, > }; > =20 > + let datastore: Option> =3D match client { > + None =3D> Some(DataStore::lookup_datastore( > + params.source.store(), > + Some(Operation::Read), > + )?), > + _ =3D> None, > + }; > for (pos, item) in list.into_iter().enumerate() { > let snapshot =3D item.backup; > =20 > @@ -668,33 +765,47 @@ async fn pull_group( > } > } > =20 > - // get updated auth_info (new tickets) > - let auth_info =3D client.login().await?; > - > - let options =3D > - HttpClientOptions::new_non_interactive(auth_info.ticket.clon= e(), fingerprint.clone()) > - .rate_limit(params.limit.clone()); > + let backup_source =3D if let Some(client) =3D client { with client possibly dropped, this should rather match on the refactored pa= ram field that tells us whether we are pulling from a local or remote source.. > + // get updated auth_info (new tickets) > + let auth_info =3D client.login().await?; > + let fingerprint =3D client.fingerprint(); > =20 > - let new_client =3D HttpClient::new( > - params.source.host(), > - params.source.port(), > - params.source.auth_id(), > - options, > - )?; > - > - let reader =3D BackupReader::start( > - new_client, > - None, > - params.source.store(), > - &remote_ns, > - &snapshot, > - true, > - ) > - .await?; > + let options =3D HttpClientOptions::new_non_interactive( > + auth_info.ticket.clone(), > + fingerprint.clone(), > + ) > + .rate_limit(params.limit.clone()); > + > + let new_client =3D HttpClient::new( > + params.source.host(), > + params.source.port(), > + params.source.auth_id(), > + options, > + )?; > + > + BackupSource::Remote( > + BackupReader::start( > + new_client, > + None, > + params.source.store(), > + &remote_ns, > + &snapshot, > + true, > + ) > + .await?, > + ) > + } else { > + if let Some(datastore) =3D datastore.clone() { this would then be the second match arm > + BackupSource::Local(datastore.backup_dir(remote_ns.clone= (), snapshot.clone())?) > + } else { and this would no longer exist, since the match can be exhaustive ;) > + unreachable!("if there is no client and no datastore, th= en the ds lookup would have failed earlier") > + } > + }; otherwise, this } else { if ... else ... } can be collapsed (there are a fe= w more clippy lints triggered by this series that might be worthy of cleaning= up as well). > =20 > let snapshot =3D params.store.backup_dir(target_ns.clone(), snap= shot)?; > =20 > - let result =3D pull_snapshot_from(worker, reader, &snapshot, dow= nloaded_chunks.clone()).await; > + let result =3D > + pull_snapshot_from(worker, backup_source, &snapshot, downloa= ded_chunks.clone()).await; > =20 > progress.done_snapshots =3D pos as u64 + 1; > task_log!(worker, "percentage done: {}", progress); > @@ -735,49 +846,64 @@ async fn pull_group( > // will modify params if switching to backwards mode for lack of NS supp= ort on remote end > async fn query_namespaces( > worker: &WorkerTask, > - client: &HttpClient, > + client: Option<&HttpClient>, the client here is redundant anyway, since for the remote case we can alway= s get it from params.. > params: &mut PullParameters, > ) -> Result, Error> { > - let path =3D format!( > - "api2/json/admin/datastore/{}/namespace", > - params.source.store() > - ); > - let mut data =3D json!({}); > - if let Some(max_depth) =3D params.max_depth { > - data["max-depth"] =3D json!(max_depth); > - } > + let mut list: Vec =3D if let Some(client) =3D cli= ent { same here again - this should match on params.source being Local or Remote > + let path =3D format!( > + "api2/json/admin/datastore/{}/namespace", > + params.source.store() > + ); > + let mut data =3D json!({}); > + if let Some(max_depth) =3D params.max_depth { > + data["max-depth"] =3D json!(max_depth); > + } > =20 > - if !params.remote_ns.is_root() { > - data["parent"] =3D json!(params.remote_ns); > - } > + if !params.remote_ns.is_root() { > + data["parent"] =3D json!(params.remote_ns); > + } > =20 > - let mut result =3D match client.get(&path, Some(data)).await { > - Ok(res) =3D> res, > - Err(err) =3D> match err.downcast_ref::() { > - Some(HttpError { code, message }) =3D> match *code { > - StatusCode::NOT_FOUND =3D> { > - if params.remote_ns.is_root() && params.max_depth.is= _none() { > - task_log!(worker, "Could not query remote for na= mespaces (404) -> temporarily switching to backwards-compat mode"); > - task_log!(worker, "Either make backwards-compat = mode explicit (max-depth =3D=3D 0) or upgrade remote system."); > - params.max_depth =3D Some(0); > - } else { > - bail!("Remote namespace set/recursive sync reque= sted, but remote does not support namespaces.") > - } > + let mut result =3D match client.get(&path, Some(data)).await { > + Ok(res) =3D> res, > + Err(err) =3D> match err.downcast_ref::() { > + Some(HttpError { code, message }) =3D> match *code { > + StatusCode::NOT_FOUND =3D> { > + if params.remote_ns.is_root() && params.max_dept= h.is_none() { > + task_log!(worker, "Could not query remote fo= r namespaces (404) -> temporarily switching to backwards-compat mode"); > + task_log!(worker, "Either make backwards-com= pat mode explicit (max-depth =3D=3D 0) or upgrade remote system."); > + params.max_depth =3D Some(0); > + } else { > + bail!("Remote namespace set/recursive sync r= equested, but remote does not support namespaces.") > + } > =20 > - return Ok(vec![params.remote_ns.clone()]); > - } > - _ =3D> { > - bail!("Querying namespaces failed - HTTP error {code= } - {message}"); > + return Ok(vec![params.remote_ns.clone()]); > + } > + _ =3D> { > + bail!("Querying namespaces failed - HTTP error {= code} - {message}"); > + } > + }, > + None =3D> { > + bail!("Querying namespaces failed - {err}"); > } > }, > - None =3D> { > - bail!("Querying namespaces failed - {err}"); > - } > - }, > - }; > - > - let mut list: Vec =3D serde_json::from_value(resu= lt["data"].take())?; > + }; > =20 > + serde_json::from_value(result["data"].take())? > + } else { > + let mut rpcenv =3D proxmox_router::cli::CliEnvironment::new(); > + proxmox_router::RpcEnvironment::set_auth_id(&mut rpcenv, Some(St= ring::from("root@pam"))); > + let parent_ns =3D if params.remote_ns.is_root() { > + None > + } else { > + Some(params.remote_ns.clone()) > + }; > + crate::api2::admin::namespace::list_namespaces( > + params.source.store().to_string(), > + parent_ns, > + params.max_depth, > + &mut rpcenv, > + )? and here as well - instead of pretending to be root@pam and querying over t= he API, this could just query directly via pbs_datastore.. we only need a Vec after all. > + }; > // parents first > list.sort_unstable_by(|a, b| a.ns.name_len().cmp(&b.ns.name_len())); > =20 > @@ -897,7 +1023,7 @@ fn check_and_remove_vanished_ns( > /// - access to sub-NS checked here > pub(crate) async fn pull_store( > worker: &WorkerTask, > - client: &HttpClient, > + client: Option<&HttpClient>, the client here is redundant anyway, since for the remote case we can alway= s get it from params.. > mut params: PullParameters, > ) -> Result<(), Error> { > // explicit create shared lock to prevent GC on newly created chunks > @@ -1000,27 +1126,42 @@ pub(crate) async fn pull_store( > /// - owner check for vanished groups done here > pub(crate) async fn pull_ns( > worker: &WorkerTask, > - client: &HttpClient, > + client: Option<&HttpClient>, > params: &PullParameters, > source_ns: BackupNamespace, > target_ns: BackupNamespace, > ) -> Result<(StoreProgress, bool), Error> { > - let path =3D format!("api2/json/admin/datastore/{}/groups", params.s= ource.store()); > + let mut list: Vec =3D if let Some(client) =3D client = { and here we have the same pattern again - should match on params.source - we are actually only interested in a Vec down below > + let path =3D format!("api2/json/admin/datastore/{}/groups", para= ms.source.store()); > =20 > - let args =3D if !source_ns.is_root() { > - Some(json!({ > - "ns": source_ns, > - })) > - } else { > - None > - }; > + let args =3D if !source_ns.is_root() { > + Some(json!({ > + "ns": source_ns, > + })) > + } else { > + None > + }; > =20 > - let mut result =3D client > - .get(&path, args) > - .await > - .map_err(|err| format_err!("Failed to retrieve backup groups fro= m remote - {}", err))?; > + let mut result =3D client > + .get(&path, args) > + .await > + .map_err(|err| format_err!("Failed to retrieve backup groups= from remote - {}", err))?; > =20 > - let mut list: Vec =3D serde_json::from_value(result["= data"].take())?; > + serde_json::from_value(result["data"].take())? > + } else { > + let mut rpcenv =3D proxmox_router::cli::CliEnvironment::new(); > + proxmox_router::RpcEnvironment::set_auth_id(&mut rpcenv, Some(St= ring::from("root@pam"))); > + let source_ns =3D if source_ns.is_root() { > + None > + } else { > + Some(source_ns.clone()) > + }; > + crate::api2::admin::datastore::list_groups( > + params.source.store().to_string(), > + source_ns, > + &mut rpcenv, > + )? so this doesn't need to query over the API as pretend-root, but can just do= a local query of the store+NS. > + }; > =20 > let total_count =3D list.len(); > list.sort_unstable_by(|a, b| { > --=20 > 2.30.2 >=20 >=20 >=20 > _______________________________________________ > pbs-devel mailing list > pbs-devel@lists.proxmox.com > https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel >=20 >=20 >=20