From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id CFDB4B83A9 for ; Thu, 7 Mar 2024 14:54:33 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id B7B22373CE for ; Thu, 7 Mar 2024 14:54:33 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Thu, 7 Mar 2024 14:54:32 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 7C57748889 for ; Thu, 7 Mar 2024 14:54:32 +0100 (CET) Message-ID: <01a44ab2-0759-440a-bcf0-0e2761b7edec@proxmox.com> Date: Thu, 7 Mar 2024 14:54:30 +0100 MIME-Version: 1.0 User-Agent: Mozilla Thunderbird Content-Language: en-US To: Proxmox Backup Server development discussion , Gabriel Goller References: <20240125114857.128507-1-g.goller@proxmox.com> From: Max Carrara In-Reply-To: <20240125114857.128507-1-g.goller@proxmox.com> Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 7bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.002 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record T_SCC_BODY_TEXT_LINE -0.01 - URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [datastore.rs] Subject: Re: [pbs-devel] [PATCH proxmox-backup v2] api: make prune-group a real workertask X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 07 Mar 2024 13:54:33 -0000 On 1/25/24 12:48, Gabriel Goller wrote: > `prune-group` is currently not a real workertask, ie it behaves like one > but doesn't start a thread nor a task to do its work. > > Changed it to start a tokio-task, so that we can delete snapshots > asynchronously. The `dry-run` feature still behaves in the same way and > returns early. > > This paves the way for the new logging infra (which uses `task_local` to > define a logger) and improves performance of bigger backup-groups. > > Signed-off-by: Gabriel Goller Got some comments inline, mostly regarding code style. Final remarks at the end. > --- > > Changes since v1: > - use feature flag to activate, so we don't break the api > - convert the result to a structure and print it in the tasklog > - enable the feature flag in the frontend > > src/api2/admin/datastore.rs | 146 ++++++++++++++++++++++-------------- > www/datastore/Prune.js | 1 + > 2 files changed, 92 insertions(+), 55 deletions(-) > > diff --git a/src/api2/admin/datastore.rs b/src/api2/admin/datastore.rs > index a95031e7..f2a94448 100644 > --- a/src/api2/admin/datastore.rs > +++ b/src/api2/admin/datastore.rs > @@ -944,6 +944,12 @@ pub fn verify( > type: BackupNamespace, > optional: true, > }, > + "use-task": { > + type: bool, > + default: false, > + optional: true, > + description: "Spins up an asynchronous task that does the work.", > + }, > }, > }, > returns: pbs_api_types::ADMIN_DATASTORE_PRUNE_RETURN_TYPE, > @@ -960,6 +966,7 @@ pub fn prune( > keep_options: KeepOptions, > store: String, > ns: Option, > + use_task: bool, Mmh, the number of arguments of this function makes clippy complain - would it make sense to use `_param` below and decode the parameter instead? That being said, perhaps we should find a more general solution for handling that many arguments at once (the arg-struct thing), as I've noticed that this is a recurring pattern at this point. That's not too relevant for this series otherwise though - if extracting the `bool` from `Value` (below) is too awkward, I guess it's fine to just disable the lint here (for now). > _param: Value, > rpcenv: &mut dyn RpcEnvironment, > ) -> Result { > @@ -978,7 +985,20 @@ pub fn prune( > let worker_id = format!("{}:{}:{}", store, ns, group); > let group = datastore.backup_group(ns.clone(), group); > > - let mut prune_result = Vec::new(); > + #[derive(Debug, serde::Serialize)] > + struct PruneResult { > + #[serde(rename = "backup-type")] > + backup_type: BackupType, > + #[serde(rename = "backup-id")] > + backup_id: String, > + #[serde(rename = "backup-time")] > + backup_time: i64, > + keep: bool, > + protected: bool, > + #[serde(skip_serializing_if = "Option::is_none")] > + ns: Option, > + } > + let mut prune_result: Vec = Vec::new(); > > let list = group.list_backups()?; > > @@ -992,77 +1012,93 @@ pub fn prune( > for (info, mark) in prune_info { > let keep = keep_all || mark.keep(); > > - let mut result = json!({ > - "backup-type": info.backup_dir.backup_type(), > - "backup-id": info.backup_dir.backup_id(), > - "backup-time": info.backup_dir.backup_time(), > - "keep": keep, > - "protected": mark.protected(), > - }); > + let mut result = PruneResult { > + backup_type: info.backup_dir.backup_type(), > + backup_id: info.backup_dir.backup_id().to_owned(), > + backup_time: info.backup_dir.backup_time(), > + keep, > + protected: mark.protected(), > + ns: None, > + }; > let prune_ns = info.backup_dir.backup_ns(); > if !prune_ns.is_root() { > - result["ns"] = serde_json::to_value(prune_ns)?; > + result.ns = Some(prune_ns.to_owned()); > } > prune_result.push(result); > } > return Ok(json!(prune_result)); > } > - > - // We use a WorkerTask just to have a task log, but run synchrounously > - let worker = WorkerTask::new("prune", Some(worker_id), auth_id.to_string(), true)?; > - > - if keep_all { > - task_log!(worker, "No prune selection - keeping all files."); > - } else { > - let mut opts = Vec::new(); > - if !ns.is_root() { > - opts.push(format!("--ns {ns}")); > + let prune_group = move |worker: Arc| { > + if keep_all { > + task_log!(worker, "No prune selection - keeping all files."); > + } else { > + let mut opts = Vec::new(); > + if !ns.is_root() { > + opts.push(format!("--ns {ns}")); > + } > + crate::server::cli_keep_options(&mut opts, &keep_options); > + > + task_log!(worker, "retention options: {}", opts.join(" ")); > + task_log!( > + worker, > + "Starting prune on {} group \"{}\"", > + print_store_and_ns(&store, &ns), > + group.group(), > + ); > } > - crate::server::cli_keep_options(&mut opts, &keep_options); > - > - task_log!(worker, "retention options: {}", opts.join(" ")); > - task_log!( > - worker, > - "Starting prune on {} group \"{}\"", > - print_store_and_ns(&store, &ns), > - group.group(), > - ); > - } > > - for (info, mark) in prune_info { > - let keep = keep_all || mark.keep(); > + for (info, mark) in prune_info { > + let keep = keep_all || mark.keep(); > > - let backup_time = info.backup_dir.backup_time(); > - let timestamp = info.backup_dir.backup_time_string(); > - let group: &pbs_api_types::BackupGroup = info.backup_dir.as_ref(); > + let backup_time = info.backup_dir.backup_time(); > + let timestamp = info.backup_dir.backup_time_string(); > + let group: &pbs_api_types::BackupGroup = info.backup_dir.as_ref(); > > - let msg = format!("{}/{}/{} {}", group.ty, group.id, timestamp, mark,); > + let msg = format!("{}/{}/{} {}", group.ty, group.id, timestamp, mark); > > - task_log!(worker, "{}", msg); > + task_log!(worker, "{}", msg); While you're touching that code, would be nice to inline variables to be formatted into the format strings: task_log!(worker, "{msg}"); Goes for some things above and below too, but could IMO also be a separate patch. > > - prune_result.push(json!({ > - "backup-type": group.ty, > - "backup-id": group.id, > - "backup-time": backup_time, > - "keep": keep, > - "protected": mark.protected(), > - })); > + prune_result.push(PruneResult { > + backup_type: group.ty, > + backup_id: group.id.clone(), > + backup_time, > + keep, > + protected: mark.protected(), > + ns: None, > + }); > > - if !(dry_run || keep) { > - if let Err(err) = info.backup_dir.destroy(false) { > - task_warn!( > - worker, > - "failed to remove dir {:?}: {}", > - info.backup_dir.relative_path(), > - err, > - ); > + if !(dry_run || keep) { Early exit on `dry_run` is above, so this condition should just be `!keep`. > + if let Err(err) = info.backup_dir.destroy(false) { > + task_warn!( > + worker, > + "failed to remove dir {:?}: {}", > + info.backup_dir.relative_path(), > + err, > + ); > + } Small style notice: Regarding formatting, it comes rather naturally if you bind fields that you use multiple times to variables. For example: The code already accesses fields of `info.backup_dir` in multiple locations above, so it would make sense to borrow it for the time being: // further above let backup_dir = &info.backup_dir; // ... if let Err(err) = backup_dir.destroy(false) { let path = backup_dir.relative_path(); task_warn!( worker, "failed to remove dir {path:?}: {err}", ); } That way the code becomes more readable even though it code doesn't do anything different. > } > } > + prune_result > + }; Small thing: Some spacing here would be nice (or in general between scopes). > + if use_task { > + let upid = WorkerTask::spawn( > + "prune", > + Some(worker_id), > + auth_id.to_string(), > + true, > + move |worker| async move { > + let result = prune_group(worker.clone()); > + task_log!(worker, "{:#?}", result); `result` is easily inlined into the format string here as well. > + Ok(()) > + }, > + )?; > + Ok(json!(upid)) > + } else { > + let worker = WorkerTask::new("prune", Some(worker_id), auth_id.to_string(), true)?; > + let result = prune_group(worker.clone()); > + worker.log_result(&Ok(())); > + Ok(json!(result)) > } > - > - worker.log_result(&Ok(())); > - > - Ok(json!(prune_result)) > } > > #[api( > diff --git a/www/datastore/Prune.js b/www/datastore/Prune.js > index 81f6927b..5752907e 100644 > --- a/www/datastore/Prune.js > +++ b/www/datastore/Prune.js > @@ -52,6 +52,7 @@ Ext.define('PBS.Datastore.PruneInputPanel', { > if (me.ns && me.ns !== '') { > values.ns = me.ns; > } > + values["use-task"] = true; > return values; > }, > All in all the changes are straightforward IMO - can't really complain here! The style things could be changed in a follow-up commit to be honest, I personally don't find them to be a blocker or anything. Though, regarding the number of function params of `prune()`: I'm not sure what the others think, but I feel it's *fine* to apply it as is, unless you want to try out packing things into `_param: Value` and extracting them by hand later. Don't want to bikeshed too much about smaller changes to fn signatures like this, tbh. We should have some sort of mechanism to define arg-structs for our API methods (and in general), but that's off-topic. One last thing: Since the *complete* result is logged above (where the worker is spawned), maybe we should reconsider what we log here exactly? For the record, this is what the output looks like with the patch applied: Mar 07 10:22:29 pbs-dev proxmox-backup-proxy[5636]: retention options: --keep-last 1 Mar 07 10:22:29 pbs-dev proxmox-backup-proxy[5636]: Starting prune on datastore 'test-pool', root namespace group "vm/100" Mar 07 10:22:29 pbs-dev proxmox-backup-proxy[5636]: vm/100/2023-11-28T13:07:05Z remove Mar 07 10:22:29 pbs-dev proxmox-backup-[5636]: pbs-dev proxmox-backup-proxy[5636]: removing backup snapshot "/mnt/datastore/test-pool/vm/100/2023-11-28T13:07:05Z" Mar 07 10:22:29 pbs-dev proxmox-backup-proxy[5636]: vm/100/2023-12-04T08:57:22Z keep Mar 07 10:22:29 pbs-dev proxmox-backup-proxy[5636]: [ Mar 07 10:22:29 pbs-dev proxmox-backup-proxy[5636]: PruneResult { Mar 07 10:22:29 pbs-dev proxmox-backup-proxy[5636]: backup_type: Vm, Mar 07 10:22:29 pbs-dev proxmox-backup-proxy[5636]: backup_id: "100", Mar 07 10:22:29 pbs-dev proxmox-backup-proxy[5636]: backup_time: 1701176825, Mar 07 10:22:29 pbs-dev proxmox-backup-proxy[5636]: keep: false, Mar 07 10:22:29 pbs-dev proxmox-backup-proxy[5636]: protected: false, Mar 07 10:22:29 pbs-dev proxmox-backup-proxy[5636]: ns: None, Mar 07 10:22:29 pbs-dev proxmox-backup-proxy[5636]: }, Mar 07 10:22:29 pbs-dev proxmox-backup-proxy[5636]: PruneResult { Mar 07 10:22:29 pbs-dev proxmox-backup-proxy[5636]: backup_type: Vm, Mar 07 10:22:29 pbs-dev proxmox-backup-proxy[5636]: backup_id: "100", Mar 07 10:22:29 pbs-dev proxmox-backup-proxy[5636]: backup_time: 1701680242, Mar 07 10:22:29 pbs-dev proxmox-backup-proxy[5636]: keep: true, Mar 07 10:22:29 pbs-dev proxmox-backup-proxy[5636]: protected: false, Mar 07 10:22:29 pbs-dev proxmox-backup-proxy[5636]: ns: None, Mar 07 10:22:29 pbs-dev proxmox-backup-proxy[5636]: }, Mar 07 10:22:29 pbs-dev proxmox-backup-proxy[5636]: ] Mar 07 10:22:29 pbs-dev proxmox-backup-proxy[5636]: TASK OK Perhaps it's better to log whether `result` is `Err` or `Ok` instead? In any case: Reviewed-by: Max Carrara Tested-by: Max Carrara