From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 9736ED770 for ; Mon, 21 Aug 2023 13:04:30 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 7FC27163EA for ; Mon, 21 Aug 2023 13:04:00 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Mon, 21 Aug 2023 13:03:58 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id C346442CE2 for ; Mon, 21 Aug 2023 13:03:58 +0200 (CEST) Message-ID: Date: Mon, 21 Aug 2023 13:03:56 +0200 MIME-Version: 1.0 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Thunderbird/102.14.0 Content-Language: en-US To: Wolfgang Bumiller Cc: pbs-devel@lists.proxmox.com References: <20230818143212.91113-1-g.goller@proxmox.com> <20230818143212.91113-2-g.goller@proxmox.com> From: Gabriel Goller In-Reply-To: Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 1.745 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment NICE_REPLY_A -4.279 Looks like a legit reply (A) SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pbs-devel] [PATCH proxmox-backup v5] fix #4380: check if file is excluded before running `stat()` X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 21 Aug 2023 11:04:30 -0000 On 8/21/23 10:41, Wolfgang Bumiller wrote: > needs a rebase, and comments inline: > > On Fri, Aug 18, 2023 at 04:32:12PM +0200, Gabriel Goller wrote: >> Passed a closure with the `stat()` function call to `matches()`. This >> will traverse through all patterns and try to match using the path only, if a >> `file_mode` is needed, it will run the closure. This means that if we exclude >> a file with the `MatchType::ANY_FILE_TYPE`, we will skip it without running >> `stat()` on it. >> Added `pathpatterns` crate to local overrides in cargo.toml. >> >> Signed-off-by: Gabriel Goller >> --- >> >> changes v4: >> - match only by path and exclude the matched files, the run `stat()` and >> match again, this time using the `file_mode`. This will match everything >> twice in the worst case, which is not optimal. >> changes v3: >> - checking for `read` and `execute` permissions before entering directory, >> doesn't work because there are a lot of side-effects (executed by >> different user, AppArmor, SELinux, ...). >> changes v2: >> - checking for excluded files with `matches()` before executing `stat()`, >> this doesn't work because we get the file_mode from `stat()` and don't >> want to ignore it when matching. >> >> Cargo.toml | 1 + >> pbs-client/src/catalog_shell.rs | 12 +++--- >> pbs-client/src/pxar/create.rs | 69 ++++++++++++++++++++++++--------- >> pbs-client/src/pxar/extract.rs | 9 +++-- >> pbs-datastore/src/catalog.rs | 6 +-- >> 5 files changed, 65 insertions(+), 32 deletions(-) >> >> diff --git a/Cargo.toml b/Cargo.toml >> index 2a05c471..c54bc28b 100644 >> --- a/Cargo.toml >> +++ b/Cargo.toml >> @@ -262,6 +262,7 @@ proxmox-rrd.workspace = true >> >> #proxmox-apt = { path = "../proxmox-apt" } >> #proxmox-openid = { path = "../proxmox-openid-rs" } >> +#pathpatterns = {path = "../pathpatterns" } >> >> #pxar = { path = "../pxar" } >> >> diff --git a/pbs-client/src/catalog_shell.rs b/pbs-client/src/catalog_shell.rs >> index 98af5699..89bcc280 100644 >> --- a/pbs-client/src/catalog_shell.rs >> +++ b/pbs-client/src/catalog_shell.rs >> @@ -13,7 +13,7 @@ use nix::dir::Dir; >> use nix::fcntl::OFlag; >> use nix::sys::stat::Mode; >> >> -use pathpatterns::{MatchEntry, MatchList, MatchPattern, MatchType, PatternFlag}; >> +use pathpatterns::{FileModeRequired, MatchEntry, MatchList, MatchPattern, MatchType, PatternFlag}; >> use proxmox_router::cli::{self, CliCommand, CliCommandMap, CliHelper, CommandLineInterface}; >> use proxmox_schema::api; >> use proxmox_sys::fs::{create_path, CreateOptions}; >> @@ -1108,7 +1108,7 @@ impl<'a> ExtractorState<'a> { >> async fn handle_new_directory( >> &mut self, >> entry: catalog::DirEntry, >> - match_result: Option, >> + match_result: Result, FileModeRequired>, > ^ This is not needed, let's not take errors with us throughout the > process. > > This is our catalog shell. The catalog file always contains file types > for its contents except for hardlinks, since we cannot resolve them. > However, hardlinks also cannot recurse down any further, since they're > also never directories, so `handle_new_directory` will never actually > get `Err()` passed here. > >> ) -> Result<(), Error> { >> // enter a new directory: >> self.read_dir_stack.push(mem::replace( >> @@ -1123,7 +1123,7 @@ impl<'a> ExtractorState<'a> { >> Shell::walk_pxar_archive(self.accessor, &mut self.dir_stack).await?; >> let dir_pxar = self.dir_stack.last().unwrap().pxar.as_ref().unwrap(); >> let dir_meta = dir_pxar.entry().metadata().clone(); >> - let create = self.matches && match_result != Some(MatchType::Exclude); >> + let create = self.matches && match_result != Ok(Some(MatchType::Exclude)); > ^ so no need for this change > >> self.extractor >> .enter_directory(dir_pxar.file_name().to_os_string(), dir_meta, create)?; >> >> @@ -1133,9 +1133,9 @@ impl<'a> ExtractorState<'a> { >> pub async fn handle_entry(&mut self, entry: catalog::DirEntry) -> Result<(), Error> { >> let match_result = self.match_list.matches(&self.path, entry.get_file_mode()); > I'd like to point out that if we don't know what to do, we should fail, > so, just use `?` here. > > Since this is an interactive tool, we *may* get away with just logging > this and continuing with the matches you wrote out below here. > > But we should not just be silent about it :-) > > Personally, I'm fine with erroring and telling the user to investigate. > In practice I doubt anyone will really run into this. The catalog shell > is too awkward to have many users (I think???) How about we introduce this log message, then use the question mark operator?     match (did_match, &entry.attr) {         (_, DirEntryAttribute::Directory { .. }) => {             if match_result.is_err() {                 log::error!("error while matching file paths");             }             self.handle_new_directory(entry, match_result?).await?;         }         ...     } >> let did_match = match match_result { >> - Some(MatchType::Include) => true, >> - Some(MatchType::Exclude) => false, >> - None => self.matches, >> + Ok(Some(MatchType::Include)) => true, >> + Ok(Some(MatchType::Exclude)) => false, >> + _ => self.matches, >> }; >> >> match (did_match, &entry.attr) { >> diff --git a/pbs-client/src/pxar/create.rs b/pbs-client/src/pxar/create.rs >> index 2577cf98..4a844599 100644 >> --- a/pbs-client/src/pxar/create.rs >> +++ b/pbs-client/src/pxar/create.rs >> @@ -21,7 +21,6 @@ use pxar::Metadata; >> >> use proxmox_io::vec; >> use proxmox_lang::c_str; >> -use proxmox_sys::error::SysError; >> use proxmox_sys::fs::{self, acl, xattr}; >> >> use pbs_datastore::catalog::BackupCatalogWriter; >> @@ -420,7 +419,7 @@ impl Archiver { >> for file in dir.iter() { >> let file = file?; >> >> - let file_name = file.file_name().to_owned(); >> + let file_name = file.file_name(); >> let file_name_bytes = file_name.to_bytes(); >> if file_name_bytes == b"." || file_name_bytes == b".." { >> continue; >> @@ -434,25 +433,57 @@ impl Archiver { >> assert_single_path_component(os_file_name)?; >> let full_path = self.path.join(os_file_name); >> >> - let stat = match nix::sys::stat::fstatat( >> - dir_fd, >> - file_name.as_c_str(), >> - nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW, >> - ) { >> - Ok(stat) => stat, >> - Err(ref err) if err.not_found() => continue, >> - Err(err) => return Err(err).context(format!("stat failed on {:?}", full_path)), >> - }; >> - >> let match_path = PathBuf::from("/").join(full_path.clone()); >> - if self >> + >> + let mut stat_results: Option = None; > Please factor the duplicate `fstatat` call out up here into a closure > let get_file_mode = || fstatat(...); > >> + >> + let stat = self > ^ stat is not a good name for this. > This can stay an `if` as before since the condition doesn't need to be > this huge. > >> .patterns >> - .matches(match_path.as_os_str().as_bytes(), Some(stat.st_mode)) >> - == Some(MatchType::Exclude) >> - { >> + .matches( >> + match_path.as_os_str().as_bytes(), >> + || match nix::sys::stat::fstatat( >> + dir_fd, >> + file_name.to_owned().as_c_str(), >> + nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW, >> + ) { >> + Ok(stat) => { >> + stat_results = Some(stat); >> + Ok(stat.st_mode) >> + } >> + Err(e) => Err(e), >> + }, >> + ) > ^ this block would just be > .matches(match_path.as_os_str().as_bytes(), || { > Ok::<_, Errno>(match &stat_results { > Some(result) => result.st_mode, > None => stat_results.insert(get_file_mode()?).st_mode, > }) > }) > > >> + .map_err(|err| { >> + anyhow::Error::new(err).context(format!( >> + "stat failed on {:?} with {:?}", >> + full_path, >> + err.desc() > When switching to 'context', there's no need to include the error in the > message, it just causes duplicate text in the output. > >> + )) >> + })?; > ^ We have both `Error` and `Context` in imported. The entire `.map_err` > can be turned into just: > > .with_context(|| format!("stat failed on {full_path:?}"))? > >> + >> + if stat == Some(MatchType::Exclude) { >> continue; >> } >> >> + if stat_results.is_none() { >> + match nix::sys::stat::fstatat( >> + dir_fd, >> + file_name.to_owned().as_c_str(), >> + nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW, >> + ) { >> + Ok(stat) => { >> + stat_results = Some(stat); >> + } >> + Err(err) => { >> + return Err(err).context(format!( >> + "stat failed on {:?} with {:?}", >> + full_path, >> + err.desc() >> + )); >> + } >> + } >> + } > ^ Instead of this large block we then only need: > > let stat = stat_results.map(Ok).unwrap_or_else(get_file_mode)?; > >> + >> self.entry_counter += 1; >> if self.entry_counter > self.entry_limit { >> bail!( >> @@ -462,9 +493,9 @@ impl Archiver { >> } >> >> file_list.push(FileListEntry { >> - name: file_name, >> + name: file_name.to_owned(), >> path: full_path, >> - stat, >> + stat: stat_results.unwrap(), > ^ and this change can just be dropped :-) > >> }); >> } >> >> @@ -534,7 +565,7 @@ impl Archiver { >> if self >> .patterns >> .matches(match_path.as_os_str().as_bytes(), Some(stat.st_mode)) >> - == Some(MatchType::Exclude) >> + == Ok(Some(MatchType::Exclude)) > ^ Please don't just ignore errors, just put a `?` onto the call. > > On the one hand we know that this cannot error since we pass `Some`, so > we could even unwrap, but that's worse, since there's no guarantee that > no other errors can happen. > We *may* be able to guarantee this by adding a `GetFileMode` impl > in pathpatterns directly to `u32` with `type Error = > std::convert::Infallible`, and then drop the `Some()` wrapping around > this piece? Yes, that's nice. Adding this implementation to pathpatterns:     impl GetFileMode for u32 {         type Error = std::convert::Infallible;         fn get(self) -> Result {             Ok(self)         }     } then it should look like this:     if self         .patterns         .matches(match_path.as_os_str().as_bytes(), stat.st_mode)?             == Some(MatchType::Exclude)     {         return Ok(());     } >> { >> return Ok(()); >> } >> diff --git a/pbs-client/src/pxar/extract.rs b/pbs-client/src/pxar/extract.rs >> index 4dbaf52d..e08af3c0 100644 >> --- a/pbs-client/src/pxar/extract.rs >> +++ b/pbs-client/src/pxar/extract.rs >> @@ -244,16 +244,17 @@ where >> ); >> >> let did_match = match match_result { >> - Some(MatchType::Include) => true, >> - Some(MatchType::Exclude) => false, >> - None => self.state.current_match, >> + Ok(Some(MatchType::Include)) => true, >> + Ok(Some(MatchType::Exclude)) => false, >> + _ => self.state.current_match, > Same here, 3 lines above we pass `Some(file_type)` to `matches()`. > We should not just use `_` here, but rather, the above should fail. Here we can write this:     let match_result = self.match_list.matches(         entry.path().as_os_str().as_bytes(),         metadata.file_type() as u32     ).unwrap();     let did_match = match match_result {         Some(MatchType::Include) => true,         Some(MatchType::Exclude) => false,         _ => self.state.current_match,     }; Unwrapping should (?) be safe because the `Err` is `Infallible`. >> }; >> >> let extract_res = match (did_match, entry.kind()) { >> (_, EntryKind::Directory) => { >> self.callback(entry.path()); >> >> - let create = self.state.current_match && match_result != Some(MatchType::Exclude); >> + let create = >> + self.state.current_match && match_result != Ok(Some(MatchType::Exclude)); >> let res = self >> .extractor >> .enter_directory(file_name_os.to_owned(), metadata.clone(), create) >> diff --git a/pbs-datastore/src/catalog.rs b/pbs-datastore/src/catalog.rs >> index 11c14b64..86e20c92 100644 >> --- a/pbs-datastore/src/catalog.rs >> +++ b/pbs-datastore/src/catalog.rs >> @@ -678,9 +678,9 @@ impl CatalogReader { >> } >> file_path.extend(&e.name); >> match match_list.matches(&file_path, e.get_file_mode()) { >> - Some(MatchType::Exclude) => continue, >> - Some(MatchType::Include) => callback(file_path)?, >> - None => (), >> + Ok(Some(MatchType::Exclude)) => continue, >> + Ok(Some(MatchType::Include)) => callback(file_path)?, >> + _ => (), >> } >> if is_dir { >> self.find(&e, file_path, match_list, callback)?; >> -- >> 2.39.2