From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 635C0D040 for ; Thu, 17 Aug 2023 11:01:29 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 44E2C1E5D1 for ; Thu, 17 Aug 2023 11:01:29 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Thu, 17 Aug 2023 11:01:27 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 81A1D4081E for ; Thu, 17 Aug 2023 11:01:27 +0200 (CEST) Date: Thu, 17 Aug 2023 11:01:26 +0200 From: Wolfgang Bumiller To: Gabriel Goller Cc: pbs-devel@lists.proxmox.com Message-ID: References: <20230816140330.191947-1-g.goller@proxmox.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20230816140330.191947-1-g.goller@proxmox.com> X-SPAM-LEVEL: Spam detection results: 0 AWL 0.105 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pbs-devel] [PATCH pathpatterns v2] match_list: added `matches_mode_lazy()`, which only retrieves file mode if necessary X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 17 Aug 2023 09:01:29 -0000 Getting there :-) I'd still like a few changes. On Wed, Aug 16, 2023 at 04:03:29PM +0200, Gabriel Goller wrote: > Added `matches_mode_lazy()` function, which takes a closure that should return the > `file_mode`. The function will go through the patterns and match by path only > until it finds a pattern which does not have `MatchFlag::ANY_FILE_TYPE`, in case > it will call the closure, which will return a `file_mode`. This will ensure that > the closure (which in our case executes `stat()`) will only be run once(at most), > every pattern will only be processed once and that the order of the patterns > will be respected. > > Signed-off-by: Gabriel Goller > --- > > changes v2: > - updated the `matches_path()` function to take a closure and renamed > it to `matches_mode_lazy()`. This allows us to only match once, so we > don't need to match before and after `stat()` (without and with > `file_mode`) anymore. > > changes v1: > - added `matches_path()` function, which matches by path only and returns an > error if the `file_mode` is required. > > src/match_list.rs | 203 +++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 202 insertions(+), 1 deletion(-) > > diff --git a/src/match_list.rs b/src/match_list.rs > index c5b14e0..9914130 100644 > --- a/src/match_list.rs > +++ b/src/match_list.rs > @@ -1,6 +1,6 @@ > //! Helpers for include/exclude lists. > - > use bitflags::bitflags; > +use std::{error::Error, fmt}; > > use crate::PatternFlag; > > @@ -39,6 +39,17 @@ impl Default for MatchFlag { > } > } > > +#[derive(Debug, PartialEq)] > +pub struct FileModeRequired; > + > +impl fmt::Display for FileModeRequired { > + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { > + write!(f, "File mode is required for matching") > + } > +} > + > +impl Error for FileModeRequired {} > + > /// A pattern entry. (Glob patterns or literal patterns.) > // Note: > // For regex we'd likely use the POSIX extended REs via `regexec(3)`, since we're targetting > @@ -304,12 +315,26 @@ impl MatchEntry { > > self.matches_path_exact(path) > } > + These can be dropped, because we can actually avoid the need to call any `matches` functions that would require a mode if we don't actually have a mode. For this purpose, `MatchEntry` should just have a pub fn needs_file_mode(&self) -> bool; (also utilized in its `fn matches_mode()` for less duplicate bitflag handling code) > + /// Check whether the path contains a matching suffix. Returns an error if a file mode is required. > + pub fn matches_path>(&self, path: T) -> Result { > + self.matches_path_do(path.as_ref()) > + } > + > + fn matches_path_do(&self, path: &[u8]) -> Result { > + if !self.flags.contains(MatchFlag::ANY_FILE_TYPE) { > + return Err(FileModeRequired); > + } > + > + Ok(self.matches_path_suffix_do(path)) > + } > } > > #[doc(hidden)] > pub trait MatchListEntry { > fn entry_matches(&self, path: &[u8], file_mode: Option) -> Option; > fn entry_matches_exact(&self, path: &[u8], file_mode: Option) -> Option; > + fn entry_matches_path(&self, path: &[u8]) -> Result, FileModeRequired>; Instead of this, we should expose fn needs_file_mode(&self) -> bool in this trait. > } > > impl MatchListEntry for &'_ MatchEntry { > @@ -328,6 +353,15 @@ impl MatchListEntry for &'_ MatchEntry { > None > } > } > + > + fn entry_matches_path(&self, path: &[u8]) -> Result, FileModeRequired> { > + let matches = self.matches_path(path)?; > + if matches { > + Ok(Some(self.match_type())) > + } else { > + Ok(None) > + } > + } > } > > impl MatchListEntry for &'_ &'_ MatchEntry { > @@ -346,6 +380,15 @@ impl MatchListEntry for &'_ &'_ MatchEntry { > None > } > } > + > + fn entry_matches_path(&self, path: &[u8]) -> Result, FileModeRequired> { > + let matches = self.matches_path(path)?; > + if matches { > + Ok(Some(self.match_type())) > + } else { > + Ok(None) > + } > + } > } > > /// This provides [`matches`](MatchList::matches) and [`matches_exact`](MatchList::matches_exact) > @@ -374,6 +417,22 @@ pub trait MatchList { > } And here would be the main API break. All of the methods here in `MatchList` should act as lazy instead of having this as a new method. The only downside is that this means fixing up all the test cases... ;-) > > fn matches_exact_do(&self, path: &[u8], file_mode: Option) -> Option; > + > + /// Check whether this list contains anything exactly matching the path, returns error if > + /// `file_mode` is required for exact matching and cannot be retrieved using closure. > + fn matches_mode_lazy, E: Error, U: FnMut() -> Result>( Style nit: with large trait bounds, please move them into a `where` clause instead (same in the other cases where you add these). > + &self, > + path: T, > + get_file_mode: U, > + ) -> Result, E> { > + self.matches_mode_lazy_do(path.as_ref(), get_file_mode) > + } > + > + fn matches_mode_lazy_do Result>( > + &self, > + path: &[u8], > + get_file_mode: T, > + ) -> Result, E>; > } > > impl<'a, T> MatchList for T > @@ -408,6 +467,39 @@ where > > None > } > + > + fn matches_mode_lazy_do Result>( `FnOnce` should suffice, to guarantee at the API level that we only call it once. You can make this work by moving it into an `Option` first: let mut get_file_mode = Some(get_file_mode); let mut file_mode = None; for m in ... { if file_mode.is_none() && m.needs_file_mode() { // unwrap: we always have get_file_mode XOR file_mode as Some() file_mode = Some((get_file_mode.take().unwrap())()?); } if let Some(mt) = m.entry_matches(path, file_mode) { return Ok(Some(mt)); } } Note that we just call the regular `entry_matches`, since given `None` as file mode it simply won't bother with the file mode, but at the same time, if the pattern does *want* a file mode, we try to fetch it. Now, as a bonus, we could also make `None` work again as a parameter for these methods by replacing the `FnOnce` parameter with a new small custom trait: pub trait GetFileMode { type Error; fn get(self) -> Result; } With this being our own trait, we can have a generic impl along with a one for `Option`: impl GetFileMode for T where T: FnOnce() -> Result, { type Error = E; fn get(self) -> Result { return self(); } } impl GetFileMode for Option { type Error = FileModeRequired; fn get(self) -> Result { self.ok_or(FileModeRequired) } } ^ otherwise we also won't need the `FileModeRequired` error type ;-) This will also make adapting the test cases much easier (only the `Ok()` around the result types needs to be added, no closures for the modes). > + &self, > + path: &[u8], > + mut get_file_mode: U, > + ) -> Result, E> { > + // This is an &self method on a `T where T: 'a`. > + let this: &'a Self = unsafe { std::mem::transmute(self) }; > + > + let mut file_mode: Option = None; > + for m in this.into_iter().rev() { > + if file_mode.is_none() { > + match m.entry_matches_path(path) { > + Ok(mt) => { > + if mt.is_some() { > + return Ok(mt); > + } > + } > + Err(_) => match get_file_mode() { > + Ok(x) => file_mode = Some(x), > + Err(e) => return Err(e), > + }, > + } > + } > + if file_mode.is_some() { > + let full_match = m.entry_matches(path, file_mode); > + if full_match.is_some() { > + return Ok(full_match); > + } > + } > + } > + Ok(None) > + } > } > > #[test]