From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id DAD241FF164 for ; Fri, 4 Jul 2025 13:30:19 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id E7578365B6; Fri, 4 Jul 2025 13:31:00 +0200 (CEST) Message-ID: <0c0ec7a8-993c-46f7-9daa-6f02335dd26e@proxmox.com> Date: Fri, 4 Jul 2025 13:30:26 +0200 MIME-Version: 1.0 User-Agent: Mozilla Thunderbird To: Proxmox Backup Server development discussion , Hannes Laimer References: <20250604123054.87007-1-h.laimer@proxmox.com> <20250604123054.87007-4-h.laimer@proxmox.com> Content-Language: en-US, de-DE From: Christian Ebner In-Reply-To: <20250604123054.87007-4-h.laimer@proxmox.com> X-SPAM-LEVEL: Spam detection results: 0 AWL -0.070 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment RCVD_IN_VALIDITY_CERTIFIED_BLOCKED 0.218 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. RCVD_IN_VALIDITY_RPBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. RCVD_IN_VALIDITY_SAFE_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [job.store, datastore.rs, datastore.name] Subject: Re: [pbs-devel] [PATCH proxmox-backup v3 3/7] api: admin: run configured sync jobs when a datastore is mounted X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="us-ascii"; Format="flowed" Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" a few comments inline. On 6/4/25 14:30, Hannes Laimer wrote: > When a datastore is mounted, spawn a new task to run all sync jobs > marked with `run-on-mount`. These jobs run sequentially and include > any job for which the mounted datastore is: > > - The source or target in a local push/pull job > - The source in a push job to a remote datastore > - The target in a pull job from a remote datastore > > Signed-off-by: Hannes Laimer > --- > src/api2/admin/datastore.rs | 106 ++++++++++++++++++++++++++++++++++-- > 1 file changed, 100 insertions(+), 6 deletions(-) > > diff --git a/src/api2/admin/datastore.rs b/src/api2/admin/datastore.rs > index 39249448..68bb2a1f 100644 > --- a/src/api2/admin/datastore.rs > +++ b/src/api2/admin/datastore.rs > @@ -42,8 +42,8 @@ use pbs_api_types::{ > DataStoreConfig, DataStoreListItem, DataStoreMountStatus, DataStoreStatus, > GarbageCollectionJobStatus, GroupListItem, JobScheduleStatus, KeepOptions, MaintenanceMode, > MaintenanceType, Operation, PruneJobOptions, SnapshotListItem, SnapshotVerifyState, > - BACKUP_ARCHIVE_NAME_SCHEMA, BACKUP_ID_SCHEMA, BACKUP_NAMESPACE_SCHEMA, BACKUP_TIME_SCHEMA, > - BACKUP_TYPE_SCHEMA, CATALOG_NAME, CLIENT_LOG_BLOB_NAME, DATASTORE_SCHEMA, > + SyncJobConfig, BACKUP_ARCHIVE_NAME_SCHEMA, BACKUP_ID_SCHEMA, BACKUP_NAMESPACE_SCHEMA, > + BACKUP_TIME_SCHEMA, BACKUP_TYPE_SCHEMA, CATALOG_NAME, CLIENT_LOG_BLOB_NAME, DATASTORE_SCHEMA, > IGNORE_VERIFIED_BACKUPS_SCHEMA, MANIFEST_BLOB_NAME, MAX_NAMESPACE_DEPTH, NS_MAX_DEPTH_SCHEMA, > PRIV_DATASTORE_AUDIT, PRIV_DATASTORE_BACKUP, PRIV_DATASTORE_MODIFY, PRIV_DATASTORE_PRUNE, > PRIV_DATASTORE_READ, PRIV_DATASTORE_VERIFY, PRIV_SYS_MODIFY, UPID, UPID_SCHEMA, > @@ -66,7 +66,7 @@ use pbs_datastore::{ > DataStore, LocalChunkReader, StoreProgress, > }; > use pbs_tools::json::required_string_param; > -use proxmox_rest_server::{formatter, WorkerTask}; > +use proxmox_rest_server::{formatter, worker_is_active, WorkerTask}; > > use crate::api2::backup::optional_ns_param; > use crate::api2::node::rrd::create_value_from_rrd; > @@ -2510,6 +2510,63 @@ pub fn do_mount_device(datastore: DataStoreConfig) -> Result<(), Error> { > Ok(()) > } > > +async fn do_sync_jobs( > + jobs_to_run: Vec, > + worker: Arc, > +) -> Result<(), Error> { > + let count = jobs_to_run.len(); > + info!( > + "will run {} sync jobs: {}", > + count, nit, `count` can be in-lined > + jobs_to_run > + .iter() > + .map(|job| job.id.as_str()) > + .collect::>() > + .join(", ") > + ); > + > + for (i, job_config) in jobs_to_run.into_iter().enumerate() { > + if worker.abort_requested() { > + bail!("aborted due to user request"); > + } > + let job_id = &job_config.id; > + let client = crate::client_helpers::connect_to_localhost()?; nit: Adding some error context here would be good so it is clear that an eventual connection error stems from the client. nit: Further, the whole client instantiation can be moved outside of the sync jobs loop, so it does not need to be reconstructed for each loop. > + let Ok(result) = client > + .post(format!("api2/json/admin/sync/{job_id}/run").as_str(), None) > + .await > + else { > + warn!("unable to start sync job {job_id}"); > + continue; > + }; > + info!("[{}/{count}] starting '{job_id}'...", i + 1); > + let Some(upid_str) = &result["data"].as_str() else { nit: there is no need for the dereference, as_str() already takes care of that > + warn!( > + "could not recieve UPID of started job (may be runnig, just can't track it here)" nit: 2 typos: should be `receive` and `running` > + ); > + continue; > + }; > + let upid: UPID = upid_str.parse()?; > + > + let sleep_duration = core::time::Duration::new(1, 0); nit: this could use `Duration::from_secs(1)` instead, which inherently documents the arguments timeunit scale > + let mut status_retries = 1; > + loop { > + if worker.abort_requested() { > + bail!("aborted due to user request, already started job will finish"); > + } > + match worker_is_active(&upid).await { > + Ok(true) => tokio::time::sleep(sleep_duration).await, > + Ok(false) => break, > + Err(_) if status_retries > 3 => break, > + Err(err) => { > + warn!("could not get job status: {err} ({}/3)", status_retries); nit: the max retry count could be defined as constant and the same constant used also for the warning output, to guarantee consistency. Further, `status_retries` can be in-lined. > + status_retries += 1; > + } > + } > + } > + } > + Ok(()) > +} > + > #[api( > protected: true, > input: { > @@ -2541,12 +2598,49 @@ pub fn mount(store: String, rpcenv: &mut dyn RpcEnvironment) -> Result let auth_id: Authid = rpcenv.get_auth_id().unwrap().parse()?; > let to_stdout = rpcenv.env_type() == RpcEnvironmentType::CLI; > > - let upid = WorkerTask::new_thread( > + let upid = WorkerTask::spawn( question: already mentioned last time, but still not sure if I understand why spawn() instead of new_thread() is okay here. After all this would execute the whole mount code on the current thread, and potentially block that? So shouldn't do_mount_device() be non-blocking for this change to be okay? > "mount-device", > - Some(store), > + Some(store.clone()), > auth_id.to_string(), > to_stdout, > - move |_worker| do_mount_device(datastore), > + move |_worker| async move { > + do_mount_device(datastore.clone())?; > + let Ok((sync_config, _digest)) = pbs_config::sync::config() else { > + warn!("unable to read sync job config, won't run any sync jobs"); > + return Ok(()); > + }; > + let Ok(list) = sync_config.convert_to_typed_array("sync") else { > + warn!("unable to parse sync job config, won't run any sync jobs"); > + return Ok(()); > + }; > + let jobs_to_run: Vec = list > + .into_iter() > + .filter(|job: &SyncJobConfig| { > + // add job iff (running on mount is enabled and) any of these apply > + // - the jobs is local and we are source or target > + // - we are the source of a push to a remote > + // - we are the target of a pull from a remote > + // > + // `job.store == datastore.name` iff we are the target for pull from remote or we > + // are the source for push to remote, therefore we don't have to check for the > + // direction of the job. > + job.run_on_mount.unwrap_or(false) > + && (job.remote.is_none() && job.remote_store == datastore.name > + || job.store == datastore.name) > + }) > + .collect(); note: it would make sense to sort the list of sync jobs by their sync id here, so that the execution ordering can be inferred. This should be documented in the user documentation as well. > + if !jobs_to_run.is_empty() { > + info!("starting {} sync jobs", jobs_to_run.len()); > + let _ = WorkerTask::spawn( > + "mount-sync-jobs", > + Some(store), > + auth_id.to_string(), > + false, > + move |worker| async move { do_sync_jobs(jobs_to_run, worker).await }, > + ); > + } > + Ok(()) > + }, > )?; > > Ok(json!(upid)) _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel