From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 31580BC378 for ; Fri, 22 Dec 2023 17:40:27 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 1B4518B91 for ; Fri, 22 Dec 2023 17:40:27 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Fri, 22 Dec 2023 17:40:26 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id B913548B4F for ; Fri, 22 Dec 2023 17:40:25 +0100 (CET) From: Philipp Hufnagl To: pbs-devel@lists.proxmox.com Date: Fri, 22 Dec 2023 17:39:59 +0100 Message-Id: <20231222164002.455227-2-p.hufnagl@proxmox.com> X-Mailer: git-send-email 2.39.2 In-Reply-To: <20231222164002.455227-1-p.hufnagl@proxmox.com> References: <20231222164002.455227-1-p.hufnagl@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL -0.040 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record T_SCC_BODY_TEXT_LINE -0.01 - Subject: [pbs-devel] [PATCH proxmox-backup v6 1/4] fix #4315: jobs: modify GroupFilter so include/exclude is tracked X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 22 Dec 2023 16:40:27 -0000 After some discussion I canged the include/exclude behavior to first run all include filter and after that all exclude filter (rather then allowing to alternate inbetween). This is done by splitting them into 2 lists, running include first. A lot of discussion happened how edge cases should be handled and we came to following conclusion: no include filter + no exclude filter => include all some include filter + no exclude filter => filter as always no include filter + some exclude filter => include all then exclude Since a GroupFilter now also features an behavior, the Struct has been renamed To GroupType (since simply type is a keyword). The new GroupFilter now has a behaviour as a flag 'is_exclude'. I considered calling it 'is_include' but a reader later then might not know what the opposite of 'include' is (do not include? deactivate?). I also considered making a new enum 'behaviour' but since there are only 2 values I considered it over engeneered. Signed-off-by: Philipp Hufnagl --- pbs-api-types/src/datastore.rs | 39 ++++++++++++++++++++------ pbs-api-types/src/jobs.rs | 51 +++++++++++++++++++++++++--------- src/api2/tape/backup.rs | 39 +++++++++++--------------- src/server/pull.rs | 46 +++++++++++++----------------- 4 files changed, 103 insertions(+), 72 deletions(-) diff --git a/pbs-api-types/src/datastore.rs b/pbs-api-types/src/datastore.rs index 74f610d1..d3d10a3a 100644 --- a/pbs-api-types/src/datastore.rs +++ b/pbs-api-types/src/datastore.rs @@ -10,9 +10,9 @@ use proxmox_schema::{ }; use crate::{ - Authid, CryptMode, Fingerprint, MaintenanceMode, Userid, DATASTORE_NOTIFY_STRING_SCHEMA, - GC_SCHEDULE_SCHEMA, PROXMOX_SAFE_ID_FORMAT, PRUNE_SCHEDULE_SCHEMA, SHA256_HEX_REGEX, - SINGLE_LINE_COMMENT_SCHEMA, UPID, + Authid, CryptMode, Fingerprint, GroupFilter, MaintenanceMode, Userid, + DATASTORE_NOTIFY_STRING_SCHEMA, GC_SCHEDULE_SCHEMA, PROXMOX_SAFE_ID_FORMAT, + PRUNE_SCHEDULE_SCHEMA, SHA256_HEX_REGEX, SINGLE_LINE_COMMENT_SCHEMA, UPID, }; const_regex! { @@ -843,19 +843,40 @@ impl BackupGroup { } pub fn matches(&self, filter: &crate::GroupFilter) -> bool { - use crate::GroupFilter; - - match filter { - GroupFilter::Group(backup_group) => { + use crate::FilterType; + match &filter.filter_type { + FilterType::Group(backup_group) => { match backup_group.parse::() { Ok(group) => *self == group, Err(_) => false, // shouldn't happen if value is schema-checked } } - GroupFilter::BackupType(ty) => self.ty == *ty, - GroupFilter::Regex(regex) => regex.is_match(&self.to_string()), + FilterType::BackupType(ty) => self.ty == *ty, + FilterType::Regex(regex) => regex.is_match(&self.to_string()), } } + + pub fn apply_filters(&self, filters: &[GroupFilter]) -> bool { + let mut is_match: bool; + let includes: Vec<&GroupFilter> = filters.iter().filter(|f| !f.is_exclude).collect(); + let excludes: Vec<&GroupFilter> = filters.iter().filter(|f| f.is_exclude).collect(); + + if includes.is_empty() { + is_match = true; + } else { + is_match = includes.iter().any(|filter| self.matches(filter)); + } + + // only run exclude if includes matched + if is_match { + is_match = !excludes + .iter() + .filter(|f| f.is_exclude) + .any(|filter| self.matches(filter)); + } + + is_match + } } impl AsRef for BackupGroup { diff --git a/pbs-api-types/src/jobs.rs b/pbs-api-types/src/jobs.rs index 1f5b3cf1..607451ff 100644 --- a/pbs-api-types/src/jobs.rs +++ b/pbs-api-types/src/jobs.rs @@ -388,7 +388,7 @@ pub struct TapeBackupJobStatus { #[derive(Clone, Debug)] /// Filter for matching `BackupGroup`s, for use with `BackupGroup::filter`. -pub enum GroupFilter { +pub enum FilterType { /// BackupGroup type - either `vm`, `ct`, or `host`. BackupType(BackupType), /// Full identifier of BackupGroup, including type @@ -397,7 +397,7 @@ pub enum GroupFilter { Regex(Regex), } -impl PartialEq for GroupFilter { +impl PartialEq for FilterType { fn eq(&self, other: &Self) -> bool { match (self, other) { (Self::BackupType(a), Self::BackupType(b)) => a == b, @@ -408,27 +408,52 @@ impl PartialEq for GroupFilter { } } +#[derive(Clone, Debug)] +pub struct GroupFilter { + pub is_exclude: bool, + pub filter_type: FilterType, +} + +impl PartialEq for GroupFilter { + fn eq(&self, other: &Self) -> bool { + self.filter_type == other.filter_type && self.is_exclude == other.is_exclude + } +} + +impl Eq for GroupFilter {} + impl std::str::FromStr for GroupFilter { type Err = anyhow::Error; fn from_str(s: &str) -> Result { - match s.split_once(':') { - Some(("group", value)) => BACKUP_GROUP_SCHEMA.parse_simple_value(value).map(|_| GroupFilter::Group(value.to_string())), - Some(("type", value)) => Ok(GroupFilter::BackupType(value.parse()?)), - Some(("regex", value)) => Ok(GroupFilter::Regex(Regex::new(value)?)), + let (is_exclude, type_str) = match s.split_once(':') { + Some(("include", value)) => (false, value), + Some(("exclude", value)) => (true, value), + _ => (false, s), + }; + + let filter_type = match type_str.split_once(':') { + Some(("group", value)) => BACKUP_GROUP_SCHEMA.parse_simple_value(value).map(|_| FilterType::Group(value.to_string())), + Some(("type", value)) => Ok(FilterType::BackupType(value.parse()?)), + Some(("regex", value)) => Ok(FilterType::Regex(Regex::new(value)?)), Some((ty, _value)) => Err(format_err!("expected 'group', 'type' or 'regex' prefix, got '{}'", ty)), None => Err(format_err!("input doesn't match expected format '|regex:REGEX>'")), - }.map_err(|err| format_err!("'{}' - {}", s, err)) + }?; + Ok(GroupFilter { + is_exclude, + filter_type, + }) } } // used for serializing below, caution! impl std::fmt::Display for GroupFilter { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - GroupFilter::BackupType(backup_type) => write!(f, "type:{}", backup_type), - GroupFilter::Group(backup_group) => write!(f, "group:{}", backup_group), - GroupFilter::Regex(regex) => write!(f, "regex:{}", regex.as_str()), + let exclude = if self.is_exclude { "exclude:" } else { "" }; + match &self.filter_type { + FilterType::BackupType(backup_type) => write!(f, "{}type:{}", exclude, backup_type), + FilterType::Group(backup_group) => write!(f, "{}group:{}", exclude, backup_group), + FilterType::Regex(regex) => write!(f, "{}regex:{}", exclude, regex.as_str()), } } } @@ -441,9 +466,9 @@ fn verify_group_filter(input: &str) -> Result<(), anyhow::Error> { } pub const GROUP_FILTER_SCHEMA: Schema = StringSchema::new( - "Group filter based on group identifier ('group:GROUP'), group type ('type:'), or regex ('regex:RE').") + "Group filter based on group identifier ('group:GROUP'), group type ('type:'), or regex ('regex:RE'). Can be inverted by adding 'exclude:' before.") .format(&ApiStringFormat::VerifyFn(verify_group_filter)) - .type_text("|group:GROUP|regex:RE>") + .type_text("[]|group:GROUP|regex:RE>") .schema(); pub const GROUP_FILTER_LIST_SCHEMA: Schema = diff --git a/src/api2/tape/backup.rs b/src/api2/tape/backup.rs index 2f9385a7..28d7e720 100644 --- a/src/api2/tape/backup.rs +++ b/src/api2/tape/backup.rs @@ -9,13 +9,13 @@ use proxmox_schema::api; use proxmox_sys::{task_log, task_warn, WorkerTaskContext}; use pbs_api_types::{ - print_ns_and_snapshot, print_store_and_ns, Authid, GroupFilter, MediaPoolConfig, Operation, + print_ns_and_snapshot, print_store_and_ns, Authid, MediaPoolConfig, Operation, TapeBackupJobConfig, TapeBackupJobSetup, TapeBackupJobStatus, Userid, JOB_ID_SCHEMA, PRIV_DATASTORE_READ, PRIV_TAPE_AUDIT, PRIV_TAPE_WRITE, UPID_SCHEMA, }; use pbs_config::CachedUserInfo; -use pbs_datastore::backup_info::{BackupDir, BackupGroup, BackupInfo}; +use pbs_datastore::backup_info::{BackupDir, BackupInfo}; use pbs_datastore::{DataStore, StoreProgress}; use proxmox_rest_server::WorkerTask; @@ -411,31 +411,24 @@ fn backup_worker( group_list.sort_unstable_by(|a, b| a.group().cmp(b.group())); - let (group_list, group_count) = if let Some(group_filters) = &setup.group_filter { - let filter_fn = |group: &BackupGroup, group_filters: &[GroupFilter]| { - group_filters.iter().any(|filter| group.matches(filter)) - }; + let group_count_full = group_list.len(); - let group_count_full = group_list.len(); - let list: Vec = group_list + let group_list = match &setup.group_filter { + Some(f) => group_list .into_iter() - .filter(|group| filter_fn(group, group_filters)) - .collect(); - let group_count = list.len(); - task_log!( - worker, - "found {} groups (out of {} total)", - group_count, - group_count_full - ); - (list, group_count) - } else { - let group_count = group_list.len(); - task_log!(worker, "found {} groups", group_count); - (group_list, group_count) + .filter(|group| group.group().apply_filters(f)) + .collect(), + None => group_list, }; - let mut progress = StoreProgress::new(group_count as u64); + task_log!( + worker, + "found {} groups (out of {} total)", + group_list.len(), + group_count_full + ); + + let mut progress = StoreProgress::new(group_list.len() as u64); let latest_only = setup.latest_only.unwrap_or(false); diff --git a/src/server/pull.rs b/src/server/pull.rs index 3b71c156..b60f3a1e 100644 --- a/src/server/pull.rs +++ b/src/server/pull.rs @@ -486,7 +486,7 @@ pub(crate) struct PullParameters { /// How many levels of sub-namespaces to pull (0 == no recursion, None == maximum recursion) max_depth: Option, /// Filters for reducing the pull scope - group_filter: Option>, + group_filter: Vec, /// How many snapshots should be transferred at most (taking the newest N snapshots) transfer_last: Option, } @@ -539,6 +539,11 @@ impl PullParameters { ns, }; + let group_filter = match group_filter { + Some(f) => f, + None => Vec::::new(), + }; + Ok(Self { source, target, @@ -1358,7 +1363,6 @@ pub(crate) async fn pull_ns( ) -> Result<(StoreProgress, bool), Error> { let mut list: Vec = params.source.list_groups(namespace, ¶ms.owner).await?; - let total_count = list.len(); list.sort_unstable_by(|a, b| { let type_order = a.ty.cmp(&b.ty); if type_order == std::cmp::Ordering::Equal { @@ -1368,27 +1372,17 @@ pub(crate) async fn pull_ns( } }); - let apply_filters = |group: &BackupGroup, filters: &[GroupFilter]| -> bool { - filters.iter().any(|filter| group.matches(filter)) - }; - - let list = if let Some(ref group_filter) = ¶ms.group_filter { - let unfiltered_count = list.len(); - let list: Vec = list - .into_iter() - .filter(|group| apply_filters(group, group_filter)) - .collect(); - task_log!( - worker, - "found {} groups to sync (out of {} total)", - list.len(), - unfiltered_count - ); - list - } else { - task_log!(worker, "found {} groups to sync", total_count); - list - }; + let unfiltered_count = list.len(); + let list: Vec = list + .into_iter() + .filter(|group| group.apply_filters(¶ms.group_filter)) + .collect(); + task_log!( + worker, + "found {} groups to sync (out of {} total)", + list.len(), + unfiltered_count + ); let mut errors = false; @@ -1457,10 +1451,8 @@ pub(crate) async fn pull_ns( if check_backup_owner(&owner, ¶ms.owner).is_err() { continue; } - if let Some(ref group_filter) = ¶ms.group_filter { - if !apply_filters(local_group, group_filter) { - continue; - } + if !local_group.apply_filters(¶ms.group_filter) { + continue; } task_log!(worker, "delete vanished group '{local_group}'",); match params -- 2.39.2