From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id E1BABD43F for ; Wed, 30 Nov 2022 16:01:59 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id C3DC520190 for ; Wed, 30 Nov 2022 16:01:29 +0100 (CET) Received: from lana.proxmox.com (unknown [94.136.29.99]) by firstgate.proxmox.com (Proxmox) with ESMTP for ; Wed, 30 Nov 2022 16:01:26 +0100 (CET) Received: by lana.proxmox.com (Postfix, from userid 10043) id C4A852C2781; Wed, 30 Nov 2022 16:01:26 +0100 (CET) From: Stefan Hanreich To: pbs-devel@lists.proxmox.com Date: Wed, 30 Nov 2022 16:00:57 +0100 Message-Id: <20221130150102.242374-3-s.hanreich@proxmox.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20221130150102.242374-1-s.hanreich@proxmox.com> References: <20221130150102.242374-1-s.hanreich@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL -0.304 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment KAM_LAZY_DOMAIN_SECURITY 1 Sending domain does not have any anti-forgery methods NO_DNS_FOR_FROM 0.001 Envelope sender has no MX or A DNS records RDNS_NONE 0.793 Delivered to internal network by a host with no rDNS SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_NONE 0.001 SPF: sender does not publish an SPF Record Subject: [pbs-devel] [PATCH proxmox-backup v2 2/7] refactor prune.rs - add PruneJob struct for handling prune jobs X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 30 Nov 2022 15:01:59 -0000 This encapsulates the previously available methods into a struct that is responsible for creating PruneJobs. This struct can be used to create a new, configurable PruneJob. It can handle several configuration options that needed to be handled by the call sites before. Moving logging and dry-run into this struct reduces the need for manually writing code for those two features. Additionally the log messages are unified across all instances where pruning is used. This commit only introduces the PruneJob struct, the callsites are only slightly adjusted so they call the compute_prune_info static method now. They will be refactored in future commits to use the newly exposed functionality from PruneJob. Signed-off-by: Stefan Hanreich --- pbs-datastore/src/prune.rs | 279 ++++++++++++++++++++++-------------- src/api2/admin/datastore.rs | 4 +- src/server/prune_job.rs | 4 +- tests/prune.rs | 4 +- 4 files changed, 179 insertions(+), 112 deletions(-) diff --git a/pbs-datastore/src/prune.rs b/pbs-datastore/src/prune.rs index 96da5826..b840fef6 100644 --- a/pbs-datastore/src/prune.rs +++ b/pbs-datastore/src/prune.rs @@ -1,7 +1,9 @@ use std::collections::{HashMap, HashSet}; use std::path::PathBuf; +use std::sync::Arc; use anyhow::Error; +use proxmox_sys::{task_log, task_warn, WorkerTaskContext}; use pbs_api_types::KeepOptions; @@ -36,137 +38,202 @@ impl std::fmt::Display for PruneMark { } } -fn mark_selections Result>( - mark: &mut HashMap, - list: &[BackupInfo], - keep: usize, - select_id: F, -) -> Result<(), Error> { - let mut include_hash = HashSet::new(); - - let mut already_included = HashSet::new(); - for info in list { - let backup_id = info.backup_dir.relative_path(); - if let Some(PruneMark::Keep) = mark.get(&backup_id) { - let sel_id: String = select_id(info)?; - already_included.insert(sel_id); - } +pub struct PruneJob { + marks: Vec<(BackupInfo, PruneMark)>, + dry_run: bool, + worker: Option>, +} + +impl PruneJob { + pub fn new(list: Vec, options: &KeepOptions) -> Result { + let mut prune_info = PruneJob::compute_prune_info(list, options)?; + prune_info.reverse(); + + Ok(Self { + marks: prune_info, + dry_run: false, + worker: None, + }) } - for info in list { - let backup_id = info.backup_dir.relative_path(); - if mark.get(&backup_id).is_some() { - continue; - } - if info.protected { - mark.insert(backup_id, PruneMark::Protected); - continue; - } - let sel_id: String = select_id(info)?; + pub fn logging(&mut self, worker: Arc) -> &mut Self { + self.worker = Some(worker); + self + } - if already_included.contains(&sel_id) { - continue; - } + pub fn dry_run(&mut self, enabled: bool) -> &mut Self { + self.dry_run = enabled; + self + } + + pub fn run(&self) { + self.run_with_callback(|_, _, _| ()) + } - if !include_hash.contains(&sel_id) { - if include_hash.len() >= keep { - break; + pub fn run_with_callback(&self, callback: E) + where + E: Fn(&BackupInfo, &PruneMark, Result<(), Error>), + { + for (info, mark) in &self.marks { + if let Some(worker) = &self.worker { + task_log!( + worker, + "{}{} {}/{}/{}", + if self.dry_run { "would " } else { "" }, + mark, + info.backup_dir.backup_type(), + info.backup_dir.backup_id(), + info.backup_dir.backup_time_string() + ); } - include_hash.insert(sel_id); - mark.insert(backup_id, PruneMark::Keep); - } else { - mark.insert(backup_id, PruneMark::Remove); + + let mut result = Ok(()); + + if !self.dry_run && !mark.keep() { + result = info.backup_dir.destroy(false); + + if let (Err(err), Some(worker)) = (&result, &self.worker) { + let path = info.backup_dir.relative_path(); + task_warn!(worker, "failed to remove dir {path:?}: {err}") + } + } + + callback(info, mark, result); } } - Ok(()) -} + fn mark_selections Result>( + mark: &mut HashMap, + list: &[BackupInfo], + keep: usize, + select_id: E, + ) -> Result<(), Error> { + let mut include_hash = HashSet::new(); -fn remove_incomplete_snapshots(mark: &mut HashMap, list: &[BackupInfo]) { - let mut keep_unfinished = true; - for info in list.iter() { - // backup is considered unfinished if there is no manifest - if info.is_finished() { - // There is a new finished backup, so there is no need - // to keep older unfinished backups. - keep_unfinished = false; - } else { + let mut already_included = HashSet::new(); + for info in list { let backup_id = info.backup_dir.relative_path(); - if keep_unfinished { - // keep first unfinished - mark.insert(backup_id, PruneMark::KeepPartial); + if let Some(PruneMark::Keep) = mark.get(&backup_id) { + let sel_id: String = select_id(info)?; + already_included.insert(sel_id); + } + } + + for info in list { + let backup_id = info.backup_dir.relative_path(); + if mark.get(&backup_id).is_some() { + continue; + } + if info.protected { + mark.insert(backup_id, PruneMark::Protected); + continue; + } + let sel_id: String = select_id(info)?; + + if already_included.contains(&sel_id) { + continue; + } + + if !include_hash.contains(&sel_id) { + if include_hash.len() >= keep { + break; + } + include_hash.insert(sel_id); + mark.insert(backup_id, PruneMark::Keep); } else { mark.insert(backup_id, PruneMark::Remove); } - keep_unfinished = false; } - } -} -/// This filters incomplete and kept backups. -pub fn compute_prune_info( - mut list: Vec, - options: &KeepOptions, -) -> Result, Error> { - let mut mark = HashMap::new(); + Ok(()) + } - BackupInfo::sort_list(&mut list, false); + fn remove_incomplete_snapshots(mark: &mut HashMap, list: &[BackupInfo]) { + let mut keep_unfinished = true; + for info in list.iter() { + // backup is considered unfinished if there is no manifest + if info.is_finished() { + // There is a new finished backup, so there is no need + // to keep older unfinished backups. + keep_unfinished = false; + } else { + let backup_id = info.backup_dir.relative_path(); + if keep_unfinished { + // keep first unfinished + mark.insert(backup_id, PruneMark::KeepPartial); + } else { + mark.insert(backup_id, PruneMark::Remove); + } + keep_unfinished = false; + } + } + } - remove_incomplete_snapshots(&mut mark, &list); + /// This filters incomplete and kept backups. + pub fn compute_prune_info( + mut list: Vec, + options: &KeepOptions, + ) -> Result, Error> { + let mut mark = HashMap::new(); - if let Some(keep_last) = options.keep_last { - mark_selections(&mut mark, &list, keep_last as usize, |info| { - Ok(info.backup_dir.backup_time_string().to_owned()) - })?; - } + BackupInfo::sort_list(&mut list, false); - use proxmox_time::strftime_local; + Self::remove_incomplete_snapshots(&mut mark, &list); - if let Some(keep_hourly) = options.keep_hourly { - mark_selections(&mut mark, &list, keep_hourly as usize, |info| { - strftime_local("%Y/%m/%d/%H", info.backup_dir.backup_time()).map_err(Error::from) - })?; - } + if let Some(keep_last) = options.keep_last { + Self::mark_selections(&mut mark, &list, keep_last as usize, |info| { + Ok(info.backup_dir.backup_time_string().to_owned()) + })?; + } - if let Some(keep_daily) = options.keep_daily { - mark_selections(&mut mark, &list, keep_daily as usize, |info| { - strftime_local("%Y/%m/%d", info.backup_dir.backup_time()).map_err(Error::from) - })?; - } + use proxmox_time::strftime_local; - if let Some(keep_weekly) = options.keep_weekly { - mark_selections(&mut mark, &list, keep_weekly as usize, |info| { - // Note: Use iso-week year/week here. This year number - // might not match the calendar year number. - strftime_local("%G/%V", info.backup_dir.backup_time()).map_err(Error::from) - })?; - } + if let Some(keep_hourly) = options.keep_hourly { + Self::mark_selections(&mut mark, &list, keep_hourly as usize, |info| { + strftime_local("%Y/%m/%d/%H", info.backup_dir.backup_time()).map_err(Error::from) + })?; + } - if let Some(keep_monthly) = options.keep_monthly { - mark_selections(&mut mark, &list, keep_monthly as usize, |info| { - strftime_local("%Y/%m", info.backup_dir.backup_time()).map_err(Error::from) - })?; - } + if let Some(keep_daily) = options.keep_daily { + Self::mark_selections(&mut mark, &list, keep_daily as usize, |info| { + strftime_local("%Y/%m/%d", info.backup_dir.backup_time()).map_err(Error::from) + })?; + } - if let Some(keep_yearly) = options.keep_yearly { - mark_selections(&mut mark, &list, keep_yearly as usize, |info| { - strftime_local("%Y", info.backup_dir.backup_time()).map_err(Error::from) - })?; - } + if let Some(keep_weekly) = options.keep_weekly { + Self::mark_selections(&mut mark, &list, keep_weekly as usize, |info| { + // Note: Use iso-week year/week here. This year number + // might not match the calendar year number. + strftime_local("%G/%V", info.backup_dir.backup_time()).map_err(Error::from) + })?; + } - let prune_info: Vec<(BackupInfo, PruneMark)> = list - .into_iter() - .map(|info| { - let backup_id = info.backup_dir.relative_path(); - let mark = if info.protected { - PruneMark::Protected - } else { - mark.get(&backup_id).copied().unwrap_or(PruneMark::Remove) - }; + if let Some(keep_monthly) = options.keep_monthly { + Self::mark_selections(&mut mark, &list, keep_monthly as usize, |info| { + strftime_local("%Y/%m", info.backup_dir.backup_time()).map_err(Error::from) + })?; + } - (info, mark) - }) - .collect(); + if let Some(keep_yearly) = options.keep_yearly { + Self::mark_selections(&mut mark, &list, keep_yearly as usize, |info| { + strftime_local("%Y", info.backup_dir.backup_time()).map_err(Error::from) + })?; + } - Ok(prune_info) + let prune_info: Vec<(BackupInfo, PruneMark)> = list + .into_iter() + .map(|info| { + let backup_id = info.backup_dir.relative_path(); + let mark = if info.protected { + PruneMark::Protected + } else { + mark.get(&backup_id).copied().unwrap_or(PruneMark::Remove) + }; + + (info, mark) + }) + .collect(); + + Ok(prune_info) + } } diff --git a/src/api2/admin/datastore.rs b/src/api2/admin/datastore.rs index c8e86b07..6797844e 100644 --- a/src/api2/admin/datastore.rs +++ b/src/api2/admin/datastore.rs @@ -52,7 +52,7 @@ use pbs_datastore::dynamic_index::{BufferedDynamicReader, DynamicIndexReader, Lo use pbs_datastore::fixed_index::FixedIndexReader; use pbs_datastore::index::IndexFile; use pbs_datastore::manifest::{BackupManifest, CLIENT_LOG_BLOB_NAME, MANIFEST_BLOB_NAME}; -use pbs_datastore::prune::compute_prune_info; +use pbs_datastore::prune::PruneJob; use pbs_datastore::{ check_backup_owner, task_tracking, BackupDir, BackupGroup, DataStore, LocalChunkReader, StoreProgress, CATALOG_NAME, @@ -982,7 +982,7 @@ pub fn prune( let list = group.list_backups()?; - let mut prune_info = compute_prune_info(list, &keep_options)?; + let mut prune_info = PruneJob::compute_prune_info(list, &keep_options)?; prune_info.reverse(); // delete older snapshots first diff --git a/src/server/prune_job.rs b/src/server/prune_job.rs index 2de34973..b9f7ebdf 100644 --- a/src/server/prune_job.rs +++ b/src/server/prune_job.rs @@ -8,7 +8,7 @@ use pbs_api_types::{ print_store_and_ns, Authid, KeepOptions, Operation, PruneJobOptions, MAX_NAMESPACE_DEPTH, PRIV_DATASTORE_MODIFY, PRIV_DATASTORE_PRUNE, }; -use pbs_datastore::prune::compute_prune_info; +use pbs_datastore::prune::PruneJob; use pbs_datastore::DataStore; use proxmox_rest_server::WorkerTask; @@ -58,7 +58,7 @@ pub fn prune_datastore( let ns = group.backup_ns(); let list = group.list_backups()?; - let mut prune_info = compute_prune_info(list, &prune_options.keep)?; + let mut prune_info = PruneJob::compute_prune_info(list, &prune_options.keep)?; prune_info.reverse(); // delete older snapshots first task_log!( diff --git a/tests/prune.rs b/tests/prune.rs index 3b320969..47e99841 100644 --- a/tests/prune.rs +++ b/tests/prune.rs @@ -4,7 +4,7 @@ use anyhow::Error; use pbs_api_types::PruneJobOptions; use pbs_datastore::manifest::MANIFEST_BLOB_NAME; -use pbs_datastore::prune::compute_prune_info; +use pbs_datastore::prune::PruneJob; use pbs_datastore::{BackupDir, BackupInfo}; fn get_prune_list( @@ -12,7 +12,7 @@ fn get_prune_list( return_kept: bool, options: &PruneJobOptions, ) -> Vec { - let mut prune_info = compute_prune_info(list, &options.keep).unwrap(); + let mut prune_info = PruneJob::compute_prune_info(list, &options.keep).unwrap(); prune_info.reverse(); -- 2.30.2