From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 35EBAD6D7 for ; Mon, 21 Aug 2023 10:41:36 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 1EB5E14A89 for ; Mon, 21 Aug 2023 10:41:06 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Mon, 21 Aug 2023 10:41:05 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id DE4E440E25 for ; Mon, 21 Aug 2023 10:41:04 +0200 (CEST) Date: Mon, 21 Aug 2023 10:41:03 +0200 From: Wolfgang Bumiller To: Gabriel Goller Cc: pbs-devel@lists.proxmox.com Message-ID: References: <20230818143212.91113-1-g.goller@proxmox.com> <20230818143212.91113-2-g.goller@proxmox.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20230818143212.91113-2-g.goller@proxmox.com> X-SPAM-LEVEL: Spam detection results: 0 AWL 0.104 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pbs-devel] [PATCH proxmox-backup v5] fix #4380: check if file is excluded before running `stat()` X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 21 Aug 2023 08:41:36 -0000 needs a rebase, and comments inline: On Fri, Aug 18, 2023 at 04:32:12PM +0200, Gabriel Goller wrote: > Passed a closure with the `stat()` function call to `matches()`. This > will traverse through all patterns and try to match using the path only, if a > `file_mode` is needed, it will run the closure. This means that if we exclude > a file with the `MatchType::ANY_FILE_TYPE`, we will skip it without running > `stat()` on it. > Added `pathpatterns` crate to local overrides in cargo.toml. > > Signed-off-by: Gabriel Goller > --- > > changes v4: > - match only by path and exclude the matched files, the run `stat()` and > match again, this time using the `file_mode`. This will match everything > twice in the worst case, which is not optimal. > changes v3: > - checking for `read` and `execute` permissions before entering directory, > doesn't work because there are a lot of side-effects (executed by > different user, AppArmor, SELinux, ...). > changes v2: > - checking for excluded files with `matches()` before executing `stat()`, > this doesn't work because we get the file_mode from `stat()` and don't > want to ignore it when matching. > > Cargo.toml | 1 + > pbs-client/src/catalog_shell.rs | 12 +++--- > pbs-client/src/pxar/create.rs | 69 ++++++++++++++++++++++++--------- > pbs-client/src/pxar/extract.rs | 9 +++-- > pbs-datastore/src/catalog.rs | 6 +-- > 5 files changed, 65 insertions(+), 32 deletions(-) > > diff --git a/Cargo.toml b/Cargo.toml > index 2a05c471..c54bc28b 100644 > --- a/Cargo.toml > +++ b/Cargo.toml > @@ -262,6 +262,7 @@ proxmox-rrd.workspace = true > > #proxmox-apt = { path = "../proxmox-apt" } > #proxmox-openid = { path = "../proxmox-openid-rs" } > +#pathpatterns = {path = "../pathpatterns" } > > #pxar = { path = "../pxar" } > > diff --git a/pbs-client/src/catalog_shell.rs b/pbs-client/src/catalog_shell.rs > index 98af5699..89bcc280 100644 > --- a/pbs-client/src/catalog_shell.rs > +++ b/pbs-client/src/catalog_shell.rs > @@ -13,7 +13,7 @@ use nix::dir::Dir; > use nix::fcntl::OFlag; > use nix::sys::stat::Mode; > > -use pathpatterns::{MatchEntry, MatchList, MatchPattern, MatchType, PatternFlag}; > +use pathpatterns::{FileModeRequired, MatchEntry, MatchList, MatchPattern, MatchType, PatternFlag}; > use proxmox_router::cli::{self, CliCommand, CliCommandMap, CliHelper, CommandLineInterface}; > use proxmox_schema::api; > use proxmox_sys::fs::{create_path, CreateOptions}; > @@ -1108,7 +1108,7 @@ impl<'a> ExtractorState<'a> { > async fn handle_new_directory( > &mut self, > entry: catalog::DirEntry, > - match_result: Option, > + match_result: Result, FileModeRequired>, ^ This is not needed, let's not take errors with us throughout the process. This is our catalog shell. The catalog file always contains file types for its contents except for hardlinks, since we cannot resolve them. However, hardlinks also cannot recurse down any further, since they're also never directories, so `handle_new_directory` will never actually get `Err()` passed here. > ) -> Result<(), Error> { > // enter a new directory: > self.read_dir_stack.push(mem::replace( > @@ -1123,7 +1123,7 @@ impl<'a> ExtractorState<'a> { > Shell::walk_pxar_archive(self.accessor, &mut self.dir_stack).await?; > let dir_pxar = self.dir_stack.last().unwrap().pxar.as_ref().unwrap(); > let dir_meta = dir_pxar.entry().metadata().clone(); > - let create = self.matches && match_result != Some(MatchType::Exclude); > + let create = self.matches && match_result != Ok(Some(MatchType::Exclude)); ^ so no need for this change > self.extractor > .enter_directory(dir_pxar.file_name().to_os_string(), dir_meta, create)?; > > @@ -1133,9 +1133,9 @@ impl<'a> ExtractorState<'a> { > pub async fn handle_entry(&mut self, entry: catalog::DirEntry) -> Result<(), Error> { > let match_result = self.match_list.matches(&self.path, entry.get_file_mode()); I'd like to point out that if we don't know what to do, we should fail, so, just use `?` here. Since this is an interactive tool, we *may* get away with just logging this and continuing with the matches you wrote out below here. But we should not just be silent about it :-) Personally, I'm fine with erroring and telling the user to investigate. In practice I doubt anyone will really run into this. The catalog shell is too awkward to have many users (I think???) > let did_match = match match_result { > - Some(MatchType::Include) => true, > - Some(MatchType::Exclude) => false, > - None => self.matches, > + Ok(Some(MatchType::Include)) => true, > + Ok(Some(MatchType::Exclude)) => false, > + _ => self.matches, > }; > > match (did_match, &entry.attr) { > diff --git a/pbs-client/src/pxar/create.rs b/pbs-client/src/pxar/create.rs > index 2577cf98..4a844599 100644 > --- a/pbs-client/src/pxar/create.rs > +++ b/pbs-client/src/pxar/create.rs > @@ -21,7 +21,6 @@ use pxar::Metadata; > > use proxmox_io::vec; > use proxmox_lang::c_str; > -use proxmox_sys::error::SysError; > use proxmox_sys::fs::{self, acl, xattr}; > > use pbs_datastore::catalog::BackupCatalogWriter; > @@ -420,7 +419,7 @@ impl Archiver { > for file in dir.iter() { > let file = file?; > > - let file_name = file.file_name().to_owned(); > + let file_name = file.file_name(); > let file_name_bytes = file_name.to_bytes(); > if file_name_bytes == b"." || file_name_bytes == b".." { > continue; > @@ -434,25 +433,57 @@ impl Archiver { > assert_single_path_component(os_file_name)?; > let full_path = self.path.join(os_file_name); > > - let stat = match nix::sys::stat::fstatat( > - dir_fd, > - file_name.as_c_str(), > - nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW, > - ) { > - Ok(stat) => stat, > - Err(ref err) if err.not_found() => continue, > - Err(err) => return Err(err).context(format!("stat failed on {:?}", full_path)), > - }; > - > let match_path = PathBuf::from("/").join(full_path.clone()); > - if self > + > + let mut stat_results: Option = None; Please factor the duplicate `fstatat` call out up here into a closure let get_file_mode = || fstatat(...); > + > + let stat = self ^ stat is not a good name for this. This can stay an `if` as before since the condition doesn't need to be this huge. > .patterns > - .matches(match_path.as_os_str().as_bytes(), Some(stat.st_mode)) > - == Some(MatchType::Exclude) > - { > + .matches( > + match_path.as_os_str().as_bytes(), > + || match nix::sys::stat::fstatat( > + dir_fd, > + file_name.to_owned().as_c_str(), > + nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW, > + ) { > + Ok(stat) => { > + stat_results = Some(stat); > + Ok(stat.st_mode) > + } > + Err(e) => Err(e), > + }, > + ) ^ this block would just be .matches(match_path.as_os_str().as_bytes(), || { Ok::<_, Errno>(match &stat_results { Some(result) => result.st_mode, None => stat_results.insert(get_file_mode()?).st_mode, }) }) > + .map_err(|err| { > + anyhow::Error::new(err).context(format!( > + "stat failed on {:?} with {:?}", > + full_path, > + err.desc() When switching to 'context', there's no need to include the error in the message, it just causes duplicate text in the output. > + )) > + })?; ^ We have both `Error` and `Context` in imported. The entire `.map_err` can be turned into just: .with_context(|| format!("stat failed on {full_path:?}"))? > + > + if stat == Some(MatchType::Exclude) { > continue; > } > > + if stat_results.is_none() { > + match nix::sys::stat::fstatat( > + dir_fd, > + file_name.to_owned().as_c_str(), > + nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW, > + ) { > + Ok(stat) => { > + stat_results = Some(stat); > + } > + Err(err) => { > + return Err(err).context(format!( > + "stat failed on {:?} with {:?}", > + full_path, > + err.desc() > + )); > + } > + } > + } ^ Instead of this large block we then only need: let stat = stat_results.map(Ok).unwrap_or_else(get_file_mode)?; > + > self.entry_counter += 1; > if self.entry_counter > self.entry_limit { > bail!( > @@ -462,9 +493,9 @@ impl Archiver { > } > > file_list.push(FileListEntry { > - name: file_name, > + name: file_name.to_owned(), > path: full_path, > - stat, > + stat: stat_results.unwrap(), ^ and this change can just be dropped :-) > }); > } > > @@ -534,7 +565,7 @@ impl Archiver { > if self > .patterns > .matches(match_path.as_os_str().as_bytes(), Some(stat.st_mode)) > - == Some(MatchType::Exclude) > + == Ok(Some(MatchType::Exclude)) ^ Please don't just ignore errors, just put a `?` onto the call. On the one hand we know that this cannot error since we pass `Some`, so we could even unwrap, but that's worse, since there's no guarantee that no other errors can happen. We *may* be able to guarantee this by adding a `GetFileMode` impl in pathpatterns directly to `u32` with `type Error = std::convert::Infallible`, and then drop the `Some()` wrapping around this piece? > { > return Ok(()); > } > diff --git a/pbs-client/src/pxar/extract.rs b/pbs-client/src/pxar/extract.rs > index 4dbaf52d..e08af3c0 100644 > --- a/pbs-client/src/pxar/extract.rs > +++ b/pbs-client/src/pxar/extract.rs > @@ -244,16 +244,17 @@ where > ); > > let did_match = match match_result { > - Some(MatchType::Include) => true, > - Some(MatchType::Exclude) => false, > - None => self.state.current_match, > + Ok(Some(MatchType::Include)) => true, > + Ok(Some(MatchType::Exclude)) => false, > + _ => self.state.current_match, Same here, 3 lines above we pass `Some(file_type)` to `matches()`. We should not just use `_` here, but rather, the above should fail. > }; > > let extract_res = match (did_match, entry.kind()) { > (_, EntryKind::Directory) => { > self.callback(entry.path()); > > - let create = self.state.current_match && match_result != Some(MatchType::Exclude); > + let create = > + self.state.current_match && match_result != Ok(Some(MatchType::Exclude)); > let res = self > .extractor > .enter_directory(file_name_os.to_owned(), metadata.clone(), create) > diff --git a/pbs-datastore/src/catalog.rs b/pbs-datastore/src/catalog.rs > index 11c14b64..86e20c92 100644 > --- a/pbs-datastore/src/catalog.rs > +++ b/pbs-datastore/src/catalog.rs > @@ -678,9 +678,9 @@ impl CatalogReader { > } > file_path.extend(&e.name); > match match_list.matches(&file_path, e.get_file_mode()) { > - Some(MatchType::Exclude) => continue, > - Some(MatchType::Include) => callback(file_path)?, > - None => (), > + Ok(Some(MatchType::Exclude)) => continue, > + Ok(Some(MatchType::Include)) => callback(file_path)?, > + _ => (), > } > if is_dir { > self.find(&e, file_path, match_list, callback)?; > -- > 2.39.2