From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id E9EA9BA844 for ; Thu, 14 Dec 2023 17:23:22 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id CB7BA19CA4 for ; Thu, 14 Dec 2023 17:22:52 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Thu, 14 Dec 2023 17:22:48 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id C053447667 for ; Thu, 14 Dec 2023 17:22:48 +0100 (CET) Message-ID: <1f3aa0a6-9cad-4e39-8ac3-5c75262d2f07@proxmox.com> Date: Thu, 14 Dec 2023 17:22:47 +0100 MIME-Version: 1.0 User-Agent: Mozilla Thunderbird From: Lukas Wagner To: Proxmox Backup Server development discussion , Philipp Hufnagl References: <20231204100414.152770-1-p.hufnagl@proxmox.com> <20231204100414.152770-2-p.hufnagl@proxmox.com> Content-Language: de-AT, en-US In-Reply-To: <20231204100414.152770-2-p.hufnagl@proxmox.com> Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 7bit X-SPAM-LEVEL: Spam detection results: 0 AWL -0.004 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record T_SCC_BODY_TEXT_LINE -0.01 - Subject: Re: [pbs-devel] [PATCH proxmox-backup v4 1/3] fix #4315: jobs: modify GroupFilter so include/exclude is tracked X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 14 Dec 2023 16:23:23 -0000 Hi Philipp, some comments inline :) On 12/4/23 11:04, Philipp Hufnagl wrote: > After some discussion I canged the include/exclude behavior to first run > all include filter and after that all exclude filter (rather then > allowing to alternate inbetween). This is simply done by sorting the > list (include first) before executing it. > > Since a GroupFilter now also features an behavior, the Struct has been > renamed To GroupType (since simply type is a keyword). The new > GroupFilter now has a behaviour as a flag 'is_exclude'. > > I considered calling it 'is_include' but a reader later then might not > know what the opposite of 'include' is (do not include? deactivate?). I > also considered making a new enum 'behaviour' but since there are only 2 > values I considered it over engeneered. > > Matching a filter will now iterate with a forech loop in order to also > exclude matches. > > Signed-off-by: Philipp Hufnagl > --- > pbs-api-types/src/datastore.rs | 11 +++--- > pbs-api-types/src/jobs.rs | 64 +++++++++++++++++++++++++++------- > src/api2/pull.rs | 11 +++++- > src/api2/tape/backup.rs | 17 +++++++-- > src/server/pull.rs | 23 +++++++++--- > 5 files changed, 99 insertions(+), 27 deletions(-) > > diff --git a/pbs-api-types/src/datastore.rs b/pbs-api-types/src/datastore.rs > index d4ead1d1..c8f26b57 100644 > --- a/pbs-api-types/src/datastore.rs > +++ b/pbs-api-types/src/datastore.rs > @@ -843,17 +843,16 @@ impl BackupGroup { > } > > pub fn matches(&self, filter: &crate::GroupFilter) -> bool { > - use crate::GroupFilter; > - > - match filter { > - GroupFilter::Group(backup_group) => { > + use crate::FilterType; > + match &filter.filter_type { > + FilterType::Group(backup_group) => { > match backup_group.parse::() { > Ok(group) => *self == group, > Err(_) => false, // shouldn't happen if value is schema-checked > } > } > - GroupFilter::BackupType(ty) => self.ty == *ty, > - GroupFilter::Regex(regex) => regex.is_match(&self.to_string()), > + FilterType::BackupType(ty) => self.ty == *ty, > + FilterType::Regex(regex) => regex.is_match(&self.to_string()), > } > } > } > diff --git a/pbs-api-types/src/jobs.rs b/pbs-api-types/src/jobs.rs > index 1f5b3cf1..dff02395 100644 > --- a/pbs-api-types/src/jobs.rs > +++ b/pbs-api-types/src/jobs.rs > @@ -3,6 +3,7 @@ use std::str::FromStr; > > use regex::Regex; > use serde::{Deserialize, Serialize}; > +use std::cmp::Ordering; > > use proxmox_schema::*; > > @@ -388,7 +389,7 @@ pub struct TapeBackupJobStatus { > > #[derive(Clone, Debug)] > /// Filter for matching `BackupGroup`s, for use with `BackupGroup::filter`. > -pub enum GroupFilter { > +pub enum FilterType { > /// BackupGroup type - either `vm`, `ct`, or `host`. > BackupType(BackupType), > /// Full identifier of BackupGroup, including type > @@ -397,7 +398,7 @@ pub enum GroupFilter { > Regex(Regex), > } > > -impl PartialEq for GroupFilter { > +impl PartialEq for FilterType { > fn eq(&self, other: &Self) -> bool { > match (self, other) { > (Self::BackupType(a), Self::BackupType(b)) => a == b, > @@ -408,27 +409,64 @@ impl PartialEq for GroupFilter { > } > } > > +#[derive(Clone, Debug)] > +pub struct GroupFilter { > + pub is_exclude: bool, > + pub filter_type: FilterType, > +} > + > +impl PartialEq for GroupFilter { > + fn eq(&self, other: &Self) -> bool { > + self.filter_type == other.filter_type && self.is_exclude == other.is_exclude > + } > +} > + > +impl Eq for GroupFilter {} > + > +impl PartialOrd for GroupFilter { > + fn partial_cmp(&self, other: &Self) -> Option { > + self.is_exclude.partial_cmp(&other.is_exclude) > + } > + > + > +impl Ord for GroupFilter { > + fn cmp(&self, other: &Self) -> Ordering { > + self.is_exclude.cmp(&other.is_exclude) > + } > +} Having Ord/ParitalOrd based on the exclude flag is extremely confusing, please don't do this. See later comments for a less confusing way. > + > impl std::str::FromStr for GroupFilter { > type Err = anyhow::Error; > > fn from_str(s: &str) -> Result { > - match s.split_once(':') { > - Some(("group", value)) => BACKUP_GROUP_SCHEMA.parse_simple_value(value).map(|_| GroupFilter::Group(value.to_string())), > - Some(("type", value)) => Ok(GroupFilter::BackupType(value.parse()?)), > - Some(("regex", value)) => Ok(GroupFilter::Regex(Regex::new(value)?)), > + let (is_exclude, type_str) = match s.split_once(':') { > + Some(("include", value)) => (false, value), > + Some(("exclude", value)) => (true, value), > + _ => (false, s), > + }; > + > + let filter_type = match type_str.split_once(':') { > + Some(("group", value)) => BACKUP_GROUP_SCHEMA.parse_simple_value(value).map(|_| FilterType::Group(value.to_string())), > + Some(("type", value)) => Ok(FilterType::BackupType(value.parse()?)), > + Some(("regex", value)) => Ok(FilterType::Regex(Regex::new(value)?)), > Some((ty, _value)) => Err(format_err!("expected 'group', 'type' or 'regex' prefix, got '{}'", ty)), > None => Err(format_err!("input doesn't match expected format '|regex:REGEX>'")), > - }.map_err(|err| format_err!("'{}' - {}", s, err)) > + }?; Is there a reason why you change the error format here? > + Ok(GroupFilter { > + is_exclude, > + filter_type, > + }) > } > } > > // used for serializing below, caution! > impl std::fmt::Display for GroupFilter { > fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { > - match self { > - GroupFilter::BackupType(backup_type) => write!(f, "type:{}", backup_type), > - GroupFilter::Group(backup_group) => write!(f, "group:{}", backup_group), > - GroupFilter::Regex(regex) => write!(f, "regex:{}", regex.as_str()), > + let exclude = if self.is_exclude { "exclude:" } else { "" }; > + match &self.filter_type { > + FilterType::BackupType(backup_type) => write!(f, "{}type:{}", exclude, backup_type), > + FilterType::Group(backup_group) => write!(f, "{}group:{}", exclude, backup_group), > + FilterType::Regex(regex) => write!(f, "{}regex:{}", exclude, regex.as_str()), > } > } > } > @@ -441,9 +479,9 @@ fn verify_group_filter(input: &str) -> Result<(), anyhow::Error> { > } > > pub const GROUP_FILTER_SCHEMA: Schema = StringSchema::new( > - "Group filter based on group identifier ('group:GROUP'), group type ('type:'), or regex ('regex:RE').") > + "Group filter based on group identifier ('group:GROUP'), group type ('type:'), or regex ('regex:RE'). Can be inverted by adding 'exclude:' before.") 'adding ... before' sounds a bit odd - maybe "Can be inverted by prepending 'exclude:'" would be better here? Also 'include' is not documented here. > .format(&ApiStringFormat::VerifyFn(verify_group_filter)) > - .type_text("|group:GROUP|regex:RE>") > + .type_text("[]|group:GROUP|regex:RE>") > .schema(); > > pub const GROUP_FILTER_LIST_SCHEMA: Schema = > diff --git a/src/api2/pull.rs b/src/api2/pull.rs > index eb9a2199..f174926c 100644 > --- a/src/api2/pull.rs > +++ b/src/api2/pull.rs > @@ -72,6 +72,15 @@ impl TryFrom<&SyncJobConfig> for PullParameters { > type Error = Error; > > fn try_from(sync_job: &SyncJobConfig) -> Result { > + let filters = match &sync_job.group_filter { > + Some(v) => { > + let mut f = v.clone(); > + f.sort(); > + Some(f) > + } > + None => None, > + }; > + I don't think that .sort()'ing is a good way to separate include/exclude groups. PartialEq/PartialOrd/Ord being only based on the exclude flag is extremely confusing. Rather split the GroupFilter into two groups manually via a helper (since you need to do it in multiple places), based on the exclude flag. Then, first process the includes and subtract the excludes afterwards. I'd do that at [1]. > PullParameters::new( > &sync_job.store, > sync_job.ns.clone().unwrap_or_default(), > @@ -85,7 +94,7 @@ impl TryFrom<&SyncJobConfig> for PullParameters { > .clone(), > sync_job.remove_vanished, > sync_job.max_depth, > - sync_job.group_filter.clone(), > + filters, > sync_job.limit.clone(), > sync_job.transfer_last, > ) > diff --git a/src/api2/tape/backup.rs b/src/api2/tape/backup.rs > index 2f9385a7..80dcdd1d 100644 > --- a/src/api2/tape/backup.rs > +++ b/src/api2/tape/backup.rs > @@ -412,14 +412,25 @@ fn backup_worker( > group_list.sort_unstable_by(|a, b| a.group().cmp(b.group())); > > let (group_list, group_count) = if let Some(group_filters) = &setup.group_filter { > - let filter_fn = |group: &BackupGroup, group_filters: &[GroupFilter]| { > - group_filters.iter().any(|filter| group.matches(filter)) > + let filter_fn = |group: &BackupGroup, group_filters: &[GroupFilter], start_with: bool| { > + let mut is_match = start_with; > + for filter in group_filters.iter() { I think calling .iter() is not necessary here. > + if group.matches(filter) { > + is_match = !filter.is_exclude; > + } > + } > + is_match > }; > > let group_count_full = group_list.len(); > + // if there are only exclude filter, inculude everything Typo in 'include' > + let mut include_all = false; > + if !group_filters.is_empty() || group_filters.first().unwrap().is_exclude { > + include_all = true; > + } I think the logic is off here. If group_filters only includes INCLUDE filters, we only want to include those groups. So .is_empty() returns false and we invert that, we set include_all to true... which is not what we want. Just to illustrate the different cases: - no filters: All groups - only include filters: ONLY the included ones - only exclude filters: ALL BUT the excluded ones - both: ONLY the included ones, minus the excluded ones ---- [1]: I would split the GroupFilters into includes/excludes here. > let list: Vec = group_list > .into_iter() > - .filter(|group| filter_fn(group, group_filters)) > + .filter(|group| filter_fn(group, group_filters, include_all)) > .collect(); > let group_count = list.len(); > task_log!( > diff --git a/src/server/pull.rs b/src/server/pull.rs > index 3b71c156..027194a1 100644 > --- a/src/server/pull.rs > +++ b/src/server/pull.rs > @@ -1368,15 +1368,26 @@ pub(crate) async fn pull_ns( > } > }); > > - let apply_filters = |group: &BackupGroup, filters: &[GroupFilter]| -> bool { > - filters.iter().any(|filter| group.matches(filter)) > + let apply_filters = |group: &BackupGroup, filters: &[GroupFilter], start_with: bool| -> bool { > + let mut is_match = start_with; > + for filter in filters.iter() { > + if group.matches(filter) { > + is_match = !filter.is_exclude; > + } > + } > + is_match > }; > > let list = if let Some(ref group_filter) = ¶ms.group_filter { > + // if there are only exclude filter, inculude everything > + let mut include_all = false; > + if !group_filter.is_empty() || group_filter.first().unwrap().is_exclude { > + include_all = true; > + } Same logic error here. > let unfiltered_count = list.len(); > let list: Vec = list > .into_iter() > - .filter(|group| apply_filters(group, group_filter)) > + .filter(|group| apply_filters(group, group_filter, include_all)) > .collect(); > task_log!( > worker, > @@ -1458,7 +1469,11 @@ pub(crate) async fn pull_ns( > continue; > } > if let Some(ref group_filter) = ¶ms.group_filter { > - if !apply_filters(local_group, group_filter) { > + let mut include_all = false; > + if !group_filter.is_empty() || group_filter.first().unwrap().is_exclude { > + include_all = true; > + } Same logic error here. > + if !apply_filters(local_group, group_filter, include_all) { > continue; > } > } -- - Lukas