From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id BD556BBCB5 for ; Tue, 19 Dec 2023 14:23:03 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 9DB2237D44 for ; Tue, 19 Dec 2023 14:23:03 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Tue, 19 Dec 2023 14:23:02 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 12ECB485BB for ; Tue, 19 Dec 2023 14:23:02 +0100 (CET) Message-ID: <764d3a02-dce2-4943-a268-9629e49563c2@proxmox.com> Date: Tue, 19 Dec 2023 14:22:59 +0100 MIME-Version: 1.0 User-Agent: Mozilla Thunderbird From: Lukas Wagner To: Proxmox Backup Server development discussion , Philipp Hufnagl References: <20231218153638.609440-1-p.hufnagl@proxmox.com> <20231218153638.609440-2-p.hufnagl@proxmox.com> Content-Language: de-AT, en-US In-Reply-To: <20231218153638.609440-2-p.hufnagl@proxmox.com> Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 7bit X-SPAM-LEVEL: Spam detection results: 0 AWL -0.004 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record T_SCC_BODY_TEXT_LINE -0.01 - URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [pull.rs, backup.rs, jobs.rs, datastore.rs] Subject: Re: [pbs-devel] [PATCH proxmox-backup v5 1/4] fix #4315: jobs: modify GroupFilter so include/exclude is tracked X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 19 Dec 2023 13:23:03 -0000 Hey Philipp, some comments inline. I quite like the changes compared to the last version, but I think there are some small aspects that could still be improved before this goes in. On 12/18/23 16:36, Philipp Hufnagl wrote: > After some discussion I canged the include/exclude behavior to first run > all include filter and after that all exclude filter (rather then > allowing to alternate inbetween). This is done by splitting them into 2 > lists, running include first. > > A lot of discussion happened how edge cases should be handled and we > came to following conclusion: > > no include filter + no exclude filter => include all > some include filter + no exclude filter => filter as always > no include filter + some exclude filter => include all then exclude > > Since a GroupFilter now also features an behavior, the Struct has been > renamed To GroupType (since simply type is a keyword). The new > GroupFilter now has a behaviour as a flag 'is_exclude'. > > I considered calling it 'is_include' but a reader later then might not > know what the opposite of 'include' is (do not include? deactivate?). I > also considered making a new enum 'behaviour' but since there are only 2 > values I considered it over engeneered. > > Signed-off-by: Philipp Hufnagl > --- > pbs-api-types/src/datastore.rs | 11 ++--- > pbs-api-types/src/jobs.rs | 85 ++++++++++++++++++++++++++++------ > src/api2/tape/backup.rs | 44 +++++++----------- > src/server/pull.rs | 55 ++++++++++------------ > 4 files changed, 118 insertions(+), 77 deletions(-) > > diff --git a/pbs-api-types/src/datastore.rs b/pbs-api-types/src/datastore.rs > index 74f610d1..673b5bcd 100644 > --- a/pbs-api-types/src/datastore.rs > +++ b/pbs-api-types/src/datastore.rs > @@ -843,17 +843,16 @@ impl BackupGroup { > } > > pub fn matches(&self, filter: &crate::GroupFilter) -> bool { > - use crate::GroupFilter; > - > - match filter { > - GroupFilter::Group(backup_group) => { > + use crate::FilterType; > + match &filter.filter_type { > + FilterType::Group(backup_group) => { > match backup_group.parse::() { > Ok(group) => *self == group, > Err(_) => false, // shouldn't happen if value is schema-checked > } > } > - GroupFilter::BackupType(ty) => self.ty == *ty, > - GroupFilter::Regex(regex) => regex.is_match(&self.to_string()), > + FilterType::BackupType(ty) => self.ty == *ty, > + FilterType::Regex(regex) => regex.is_match(&self.to_string()), > } > } > } > diff --git a/pbs-api-types/src/jobs.rs b/pbs-api-types/src/jobs.rs > index 1f5b3cf1..982f08a6 100644 > --- a/pbs-api-types/src/jobs.rs > +++ b/pbs-api-types/src/jobs.rs > @@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize}; > use proxmox_schema::*; > > use crate::{ > - Authid, BackupNamespace, BackupType, RateLimitConfig, Userid, BACKUP_GROUP_SCHEMA, > + Authid, BackupGroup, BackupNamespace, BackupType, RateLimitConfig, Userid, BACKUP_GROUP_SCHEMA, > BACKUP_NAMESPACE_SCHEMA, DATASTORE_SCHEMA, DRIVE_NAME_SCHEMA, MEDIA_POOL_NAME_SCHEMA, > NS_MAX_DEPTH_REDUCED_SCHEMA, PROXMOX_SAFE_ID_FORMAT, REMOTE_ID_SCHEMA, > SINGLE_LINE_COMMENT_SCHEMA, > @@ -388,7 +388,7 @@ pub struct TapeBackupJobStatus { > > #[derive(Clone, Debug)] > /// Filter for matching `BackupGroup`s, for use with `BackupGroup::filter`. > -pub enum GroupFilter { > +pub enum FilterType { > /// BackupGroup type - either `vm`, `ct`, or `host`. > BackupType(BackupType), > /// Full identifier of BackupGroup, including type > @@ -397,7 +397,7 @@ pub enum GroupFilter { > Regex(Regex), > } > > -impl PartialEq for GroupFilter { > +impl PartialEq for FilterType { > fn eq(&self, other: &Self) -> bool { > match (self, other) { > (Self::BackupType(a), Self::BackupType(b)) => a == b, > @@ -408,27 +408,52 @@ impl PartialEq for GroupFilter { > } > } > > +#[derive(Clone, Debug)] > +pub struct GroupFilter { > + pub is_exclude: bool, > + pub filter_type: FilterType, > +} > + > +impl PartialEq for GroupFilter { > + fn eq(&self, other: &Self) -> bool { > + self.filter_type == other.filter_type && self.is_exclude == other.is_exclude > + } > +} > + > +impl Eq for GroupFilter {} > + > impl std::str::FromStr for GroupFilter { > type Err = anyhow::Error; > > fn from_str(s: &str) -> Result { > - match s.split_once(':') { > - Some(("group", value)) => BACKUP_GROUP_SCHEMA.parse_simple_value(value).map(|_| GroupFilter::Group(value.to_string())), > - Some(("type", value)) => Ok(GroupFilter::BackupType(value.parse()?)), > - Some(("regex", value)) => Ok(GroupFilter::Regex(Regex::new(value)?)), > + let (is_exclude, type_str) = match s.split_once(':') { > + Some(("include", value)) => (false, value), > + Some(("exclude", value)) => (true, value), > + _ => (false, s), > + }; > + > + let filter_type = match type_str.split_once(':') { > + Some(("group", value)) => BACKUP_GROUP_SCHEMA.parse_simple_value(value).map(|_| FilterType::Group(value.to_string())), > + Some(("type", value)) => Ok(FilterType::BackupType(value.parse()?)), > + Some(("regex", value)) => Ok(FilterType::Regex(Regex::new(value)?)), > Some((ty, _value)) => Err(format_err!("expected 'group', 'type' or 'regex' prefix, got '{}'", ty)), > None => Err(format_err!("input doesn't match expected format '|regex:REGEX>'")), > - }.map_err(|err| format_err!("'{}' - {}", s, err)) > + }?; > + Ok(GroupFilter { > + is_exclude, > + filter_type, > + }) > } > } > > // used for serializing below, caution! > impl std::fmt::Display for GroupFilter { > fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { > - match self { > - GroupFilter::BackupType(backup_type) => write!(f, "type:{}", backup_type), > - GroupFilter::Group(backup_group) => write!(f, "group:{}", backup_group), > - GroupFilter::Regex(regex) => write!(f, "regex:{}", regex.as_str()), > + let exclude = if self.is_exclude { "exclude:" } else { "" }; > + match &self.filter_type { > + FilterType::BackupType(backup_type) => write!(f, "{}type:{}", exclude, backup_type), > + FilterType::Group(backup_group) => write!(f, "{}group:{}", exclude, backup_group), > + FilterType::Regex(regex) => write!(f, "{}regex:{}", exclude, regex.as_str()), > } nit: Some of these format string vars can be inlined, so "{}", foo can become "{foo}". Only works for variables though, since the {} cannot contain arbitrary expressions. Also, just FIY (no need to change that if you don't like it), you can also do the following: "{exclude}regex:{regex}", regex = regex.as_str() It's a bit more verbose, but I quite like this style, especially if all other vars can be inlined. > } > } > @@ -440,10 +465,42 @@ fn verify_group_filter(input: &str) -> Result<(), anyhow::Error> { > GroupFilter::from_str(input).map(|_| ()) > } > > +pub fn split_by_include_exclude( > + all_filter: Option>, > +) -> (Vec, Vec) { > + if let Some(all_filter) = all_filter { > + all_filter > + .into_iter() > + .partition(|filter| !filter.is_exclude) > + } else { > + (Vec::new(), Vec::new()) > + } > +} ... would not be needed then, see following coment > + > +pub fn apply_filters( > + group: &BackupGroup, > + include_filters: &[GroupFilter], > + exclude_filters: &[GroupFilter], > +) -> bool { > + let mut is_match: bool; > + > + if include_filters.is_empty() { > + is_match = true; > + } else { > + is_match = include_filters.iter().any(|filter| group.matches(filter)); > + } > + > + if !exclude_filters.is_empty() && is_match { > + is_match = !exclude_filters.iter().any(|filter| group.matches(filter)); > + } > + > + is_match > +} > + I prefer to avoid freestanding functions; I think this helper should be a method on the BackupGroup struct and should be named slightly different, e.g.: BackupGroup::matches_filters Also, now that I see the whole code, I think splitting includes/excludes beforehand is actually not really necessary. You could just do a include_filters.iter().filter(|f| !f.is_exclude).any(|f| group.matches(f)) and the same thing with inverted filter condition for the excludes. Also, the `!exclude_filters.is_empty()` is not really needed, since in that case .iter() won't yield any elements anyway ;) > pub const GROUP_FILTER_SCHEMA: Schema = StringSchema::new( > - "Group filter based on group identifier ('group:GROUP'), group type ('type:'), or regex ('regex:RE').") > + "Group filter based on group identifier ('group:GROUP'), group type ('type:'), or regex ('regex:RE'). Can be inverted by adding 'exclude:' before.") > .format(&ApiStringFormat::VerifyFn(verify_group_filter)) > - .type_text("|group:GROUP|regex:RE>") > + .type_text("[]|group:GROUP|regex:RE>") > .schema(); > > pub const GROUP_FILTER_LIST_SCHEMA: Schema = > diff --git a/src/api2/tape/backup.rs b/src/api2/tape/backup.rs > index 2f9385a7..1e2953c4 100644 > --- a/src/api2/tape/backup.rs > +++ b/src/api2/tape/backup.rs > @@ -9,9 +9,9 @@ use proxmox_schema::api; > use proxmox_sys::{task_log, task_warn, WorkerTaskContext}; > > use pbs_api_types::{ > - print_ns_and_snapshot, print_store_and_ns, Authid, GroupFilter, MediaPoolConfig, Operation, > - TapeBackupJobConfig, TapeBackupJobSetup, TapeBackupJobStatus, Userid, JOB_ID_SCHEMA, > - PRIV_DATASTORE_READ, PRIV_TAPE_AUDIT, PRIV_TAPE_WRITE, UPID_SCHEMA, > + apply_filters, print_ns_and_snapshot, print_store_and_ns, split_by_include_exclude, Authid, > + MediaPoolConfig, Operation, TapeBackupJobConfig, TapeBackupJobSetup, TapeBackupJobStatus, > + Userid, JOB_ID_SCHEMA, PRIV_DATASTORE_READ, PRIV_TAPE_AUDIT, PRIV_TAPE_WRITE, UPID_SCHEMA, > }; > > use pbs_config::CachedUserInfo; > @@ -411,31 +411,23 @@ fn backup_worker( > > group_list.sort_unstable_by(|a, b| a.group().cmp(b.group())); > > - let (group_list, group_count) = if let Some(group_filters) = &setup.group_filter { > - let filter_fn = |group: &BackupGroup, group_filters: &[GroupFilter]| { > - group_filters.iter().any(|filter| group.matches(filter)) > - }; > + let (include_filter, exclude_filter) = split_by_include_exclude(setup.group_filter.clone()); > > - let group_count_full = group_list.len(); > - let list: Vec = group_list > - .into_iter() > - .filter(|group| filter_fn(group, group_filters)) > - .collect(); > - let group_count = list.len(); > - task_log!( > - worker, > - "found {} groups (out of {} total)", > - group_count, > - group_count_full > - ); > - (list, group_count) > - } else { > - let group_count = group_list.len(); > - task_log!(worker, "found {} groups", group_count); > - (group_list, group_count) > - }; > + let group_count_full = group_list.len(); > + // if there are only exclude filter, inculude everything > + let group_list: Vec = group_list > + .into_iter() > + .filter(|group| apply_filters(group.group(), &include_filter, &exclude_filter)) > + .collect(); > + > + task_log!( > + worker, > + "found {} groups (out of {} total)", > + group_list.len(), > + group_count_full > + ); > > - let mut progress = StoreProgress::new(group_count as u64); > + let mut progress = StoreProgress::new(group_list.len() as u64); > > let latest_only = setup.latest_only.unwrap_or(false); > > diff --git a/src/server/pull.rs b/src/server/pull.rs > index 3b71c156..e458008b 100644 > --- a/src/server/pull.rs > +++ b/src/server/pull.rs > @@ -15,9 +15,10 @@ use proxmox_sys::{task_log, task_warn}; > use serde_json::json; > > use pbs_api_types::{ > - print_store_and_ns, Authid, BackupDir, BackupGroup, BackupNamespace, CryptMode, GroupFilter, > - GroupListItem, Operation, RateLimitConfig, Remote, SnapshotListItem, MAX_NAMESPACE_DEPTH, > - PRIV_DATASTORE_AUDIT, PRIV_DATASTORE_BACKUP, PRIV_DATASTORE_READ, > + apply_filters, print_store_and_ns, split_by_include_exclude, Authid, BackupDir, BackupGroup, > + BackupNamespace, CryptMode, GroupFilter, GroupListItem, Operation, RateLimitConfig, Remote, > + SnapshotListItem, MAX_NAMESPACE_DEPTH, PRIV_DATASTORE_AUDIT, PRIV_DATASTORE_BACKUP, > + PRIV_DATASTORE_READ, > }; > use pbs_client::{BackupReader, BackupRepository, HttpClient, RemoteChunkReader}; > use pbs_config::CachedUserInfo; > @@ -486,7 +487,8 @@ pub(crate) struct PullParameters { > /// How many levels of sub-namespaces to pull (0 == no recursion, None == maximum recursion) > max_depth: Option, > /// Filters for reducing the pull scope > - group_filter: Option>, > + include_filter: Vec, > + exclude_filter: Vec, Nit: better add a second doc comment for exclude_filter, otherwise `cargo doc` won't pick that up > /// How many snapshots should be transferred at most (taking the newest N snapshots) > transfer_last: Option, > } > @@ -539,13 +541,16 @@ impl PullParameters { > ns, > }; > > + let (include_filter, exclude_filter) = split_by_include_exclude(group_filter); > + > Ok(Self { > source, > target, > owner, > remove_vanished, > max_depth, > - group_filter, > + include_filter, > + exclude_filter, > transfer_last, > }) > } > @@ -1358,7 +1363,6 @@ pub(crate) async fn pull_ns( > ) -> Result<(StoreProgress, bool), Error> { > let mut list: Vec = params.source.list_groups(namespace, ¶ms.owner).await?; > > - let total_count = list.len(); > list.sort_unstable_by(|a, b| { > let type_order = a.ty.cmp(&b.ty); > if type_order == std::cmp::Ordering::Equal { > @@ -1368,27 +1372,18 @@ pub(crate) async fn pull_ns( > } > }); > > - let apply_filters = |group: &BackupGroup, filters: &[GroupFilter]| -> bool { > - filters.iter().any(|filter| group.matches(filter)) > - }; > - > - let list = if let Some(ref group_filter) = ¶ms.group_filter { > - let unfiltered_count = list.len(); > - let list: Vec = list > - .into_iter() > - .filter(|group| apply_filters(group, group_filter)) > - .collect(); > - task_log!( > - worker, > - "found {} groups to sync (out of {} total)", > - list.len(), > - unfiltered_count > - ); > - list > - } else { > - task_log!(worker, "found {} groups to sync", total_count); > - list > - }; > + // if there are only exclude filter, inculude everything > + let unfiltered_count = list.len(); > + let list: Vec = list > + .into_iter() > + .filter(|group| apply_filters(&group, ¶ms.include_filter, ¶ms.exclude_filter)) > + .collect(); > + task_log!( > + worker, > + "found {} groups to sync (out of {} total)", > + list.len(), > + unfiltered_count > + ); > > let mut errors = false; > > @@ -1457,10 +1452,8 @@ pub(crate) async fn pull_ns( > if check_backup_owner(&owner, ¶ms.owner).is_err() { > continue; > } > - if let Some(ref group_filter) = ¶ms.group_filter { > - if !apply_filters(local_group, group_filter) { > - continue; > - } > + if !apply_filters(&local_group, ¶ms.include_filter, ¶ms.include_filter) { > + continue; > } > task_log!(worker, "delete vanished group '{local_group}'",); > match params -- - Lukas