From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id A905A8B34 for ; Tue, 22 Aug 2023 16:07:36 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 92190DFE7 for ; Tue, 22 Aug 2023 16:07:36 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Tue, 22 Aug 2023 16:07:35 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 70D0A43358 for ; Tue, 22 Aug 2023 16:07:35 +0200 (CEST) Message-ID: <445f03cb-bdd7-5828-2256-d6bd89e77daa@proxmox.com> Date: Tue, 22 Aug 2023 16:07:33 +0200 MIME-Version: 1.0 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Thunderbird/102.14.0 Content-Language: en-US To: Wolfgang Bumiller Cc: pbs-devel@lists.proxmox.com References: <20230821130826.147473-1-g.goller@proxmox.com> <20230821130826.147473-2-g.goller@proxmox.com> From: Gabriel Goller In-Reply-To: Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 7bit X-SPAM-LEVEL: Spam detection results: 0 AWL 1.209 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment NICE_REPLY_A -3.374 Looks like a legit reply (A) SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pbs-devel] [PATCH proxmox-backup v6] fix #4380: check if file is excluded before running `stat()` X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 22 Aug 2023 14:07:36 -0000 Submitted a new version. On 8/22/23 15:04, Wolfgang Bumiller wrote: > On Mon, Aug 21, 2023 at 03:08:26PM +0200, Gabriel Goller wrote: >> Passed a closure with the `stat()` function call to `matches()`. This >> will traverse through all patterns and try to match using the path only, if a >> `file_mode` is needed, it will run the closure. This means that if we exclude >> a file with the `MatchType::ANY_FILE_TYPE`, we will skip it without running >> `stat()` on it. As we updated the `matches()` function, we also updated all the >> invocations of it. >> Added `pathpatterns` crate to local overrides in cargo.toml. >> >> Signed-off-by: Gabriel Goller >> --- >> >> changes v5: >> - updated all invocations of `matches()` >> >> changes v4: >> - match only by path and exclude the matched files, the run `stat()` and >> match again, this time using the `file_mode`. This will match everything >> twice in the worst case, which is not optimal. >> changes v3: >> - checking for `read` and `execute` permissions before entering directory, >> doesn't work because there are a lot of side-effects (executed by >> different user, AppArmor, SELinux, ...). >> changes v2: >> - checking for excluded files with `matches()` before executing `stat()`, >> this doesn't work because we get the file_mode from `stat()` and don't >> want to ignore it when matching. >> >> >> Cargo.toml | 5 +++-- >> pbs-client/src/catalog_shell.rs | 8 +++---- >> pbs-client/src/pxar/create.rs | 38 ++++++++++++++++++++------------- >> pbs-client/src/pxar/extract.rs | 10 +++++---- >> pbs-datastore/src/catalog.rs | 6 +++--- >> 5 files changed, 39 insertions(+), 28 deletions(-) >> >> diff --git a/Cargo.toml b/Cargo.toml >> index 5cbae1b8..560794a4 100644 >> --- a/Cargo.toml >> +++ b/Cargo.toml >> @@ -264,8 +264,9 @@ proxmox-rrd.workspace = true >> #proxmox-sortable-macro = { path = "../proxmox/proxmox-sortable-macro" } >> #proxmox-human-byte = { path = "../proxmox/proxmox-human-byte" } >> >> -#proxmox-apt = { path = "../proxmox/proxmox-apt" } >> -#proxmox-openid = { path = "../proxmox/proxmox-openid" } >> +#proxmox-apt = { path = "../proxmox-apt" } >> +#proxmox-openid = { path = "../proxmox-openid-rs" } >> +#pathpatterns = {path = "../pathpatterns" } >> >> #pxar = { path = "../pxar" } >> >> diff --git a/pbs-client/src/catalog_shell.rs b/pbs-client/src/catalog_shell.rs >> index b8aaf8cb..f53b3cc5 100644 >> --- a/pbs-client/src/catalog_shell.rs >> +++ b/pbs-client/src/catalog_shell.rs >> @@ -1138,14 +1138,14 @@ impl<'a> ExtractorState<'a> { >> pub async fn handle_entry(&mut self, entry: catalog::DirEntry) -> Result<(), Error> { >> let match_result = self.match_list.matches(&self.path, entry.get_file_mode()); >> let did_match = match match_result { >> - Some(MatchType::Include) => true, >> - Some(MatchType::Exclude) => false, >> - None => self.matches, >> + Ok(Some(MatchType::Include)) => true, >> + Ok(Some(MatchType::Exclude)) => false, >> + _ => self.matches, >> }; >> >> match (did_match, &entry.attr) { >> (_, DirEntryAttribute::Directory { .. }) => { >> - self.handle_new_directory(entry, match_result).await?; >> + self.handle_new_directory(entry, match_result?).await?; >> } >> (true, DirEntryAttribute::File { .. }) => { >> self.dir_stack.push(PathStackEntry::new(entry)); >> diff --git a/pbs-client/src/pxar/create.rs b/pbs-client/src/pxar/create.rs >> index 2577cf98..2d516cfa 100644 >> --- a/pbs-client/src/pxar/create.rs >> +++ b/pbs-client/src/pxar/create.rs >> @@ -21,7 +21,6 @@ use pxar::Metadata; >> >> use proxmox_io::vec; >> use proxmox_lang::c_str; >> -use proxmox_sys::error::SysError; >> use proxmox_sys::fs::{self, acl, xattr}; >> >> use pbs_datastore::catalog::BackupCatalogWriter; >> @@ -420,7 +419,7 @@ impl Archiver { >> for file in dir.iter() { >> let file = file?; >> >> - let file_name = file.file_name().to_owned(); >> + let file_name = file.file_name(); >> let file_name_bytes = file_name.to_bytes(); >> if file_name_bytes == b"." || file_name_bytes == b".." { >> continue; >> @@ -434,25 +433,34 @@ impl Archiver { >> assert_single_path_component(os_file_name)?; >> let full_path = self.path.join(os_file_name); >> >> - let stat = match nix::sys::stat::fstatat( >> + let match_path = PathBuf::from("/").join(full_path.clone()); >> + >> + let mut stat_results: Option = None; >> + >> + let get_file_mode = || match nix::sys::stat::fstatat( > You don't need that 'match' here if your cases are basically a no-op. > > You *could* attach the `stat failed on ...` context with the file name > here... but this will make it a bit more tedious to check for `ENOENT` > as we'd need to use `.downcast()` on the anyhow::Error. > > So either that, or we'll need to duplicate the context line down below > where we do the final `let stat = results.unwrap_or_else(get_file_mode)?`. > >> dir_fd, >> - file_name.as_c_str(), >> + file_name.to_owned().as_c_str(), >> nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW, >> ) { >> - Ok(stat) => stat, >> - Err(ref err) if err.not_found() => continue, > Because the thing is, we still need to handle this case - just noticed > that this was removed entirely, which is of course not what we want :-) > If the file gets removed between listing it from the directory and us > trying to `stat` it, we should just act as if it had never existed. > >> - Err(err) => return Err(err).context(format!("stat failed on {:?}", full_path)), >> + Ok(stat) => Ok(stat), >> + Err(e) => Err(e), >> }; >> - >> - let match_path = PathBuf::from("/").join(full_path.clone()); >> - if self >> - .patterns >> - .matches(match_path.as_os_str().as_bytes(), Some(stat.st_mode)) >> - == Some(MatchType::Exclude) >> + if Some(MatchType::Exclude) >> + == self >> + .patterns >> + .matches(match_path.as_os_str().as_bytes(), || { >> + Ok::<_, Errno>(match &stat_results { >> + Some(result) => result.st_mode, >> + None => stat_results.insert(get_file_mode()?).st_mode, >> + }) >> + }) > ... it will instead need to be checked here ^ > Basically, we will need to swap the `if` for a `match` after all, with > the cases > Ok(Some(MatchType::Exclude)) => continue, > Ok(_) => (), > Err(err) if err.not_found() => continue, > err => return err.with_context(...), > or the final 2 cases when using *not* not changing the `get_file_mode` > error to `anyhow::Error` would be: > Err(err) => match err.downcast::() { > Some(err) if err.not_found() => continue, > _ => return Err(err), > } > > > I hope I didn't miss anything now. > >> + .with_context(|| format!("stat failed on {full_path:?}"))? >> { >> continue; >> } >> >> + let stat = stat_results.map(Ok).unwrap_or_else(get_file_mode)?; > If the context line is not already in the closure, we'd need to > duplicate it here. > >> + >> self.entry_counter += 1; >> if self.entry_counter > self.entry_limit { >> bail!( >> @@ -462,7 +470,7 @@ impl Archiver { >> } >> >> file_list.push(FileListEntry { >> - name: file_name, >> + name: file_name.to_owned(), >> path: full_path, >> stat, >> }); >> @@ -533,7 +541,7 @@ impl Archiver { >> let match_path = PathBuf::from("/").join(self.path.clone()); >> if self >> .patterns >> - .matches(match_path.as_os_str().as_bytes(), Some(stat.st_mode)) >> + .matches(match_path.as_os_str().as_bytes(), stat.st_mode)? >> == Some(MatchType::Exclude) >> { >> return Ok(()); >> diff --git a/pbs-client/src/pxar/extract.rs b/pbs-client/src/pxar/extract.rs >> index 4eb6fb90..e24a1560 100644 >> --- a/pbs-client/src/pxar/extract.rs >> +++ b/pbs-client/src/pxar/extract.rs >> @@ -251,22 +251,24 @@ where >> >> self.extractor.set_path(entry.path().as_os_str().to_owned()); >> >> + // We can `unwrap()` safely here because we get a `Result<_, std::convert::Infallible>` >> let match_result = self.match_list.matches( >> entry.path().as_os_str().as_bytes(), >> - Some(metadata.file_type() as u32), >> - ); >> + metadata.file_type() as u32 >> + ).unwrap(); >> >> let did_match = match match_result { >> Some(MatchType::Include) => true, >> Some(MatchType::Exclude) => false, >> - None => self.state.current_match, >> + _ => self.state.current_match, >> }; >> >> let extract_res = match (did_match, entry.kind()) { >> (_, EntryKind::Directory) => { >> self.callback(entry.path()); >> >> - let create = self.state.current_match && match_result != Some(MatchType::Exclude); >> + let create = >> + self.state.current_match && match_result != Some(MatchType::Exclude); >> let res = self >> .extractor >> .enter_directory(file_name_os.to_owned(), metadata.clone(), create) >> diff --git a/pbs-datastore/src/catalog.rs b/pbs-datastore/src/catalog.rs >> index 11c14b64..86e20c92 100644 >> --- a/pbs-datastore/src/catalog.rs >> +++ b/pbs-datastore/src/catalog.rs >> @@ -678,9 +678,9 @@ impl CatalogReader { >> } >> file_path.extend(&e.name); >> match match_list.matches(&file_path, e.get_file_mode()) { >> - Some(MatchType::Exclude) => continue, >> - Some(MatchType::Include) => callback(file_path)?, >> - None => (), >> + Ok(Some(MatchType::Exclude)) => continue, >> + Ok(Some(MatchType::Include)) => callback(file_path)?, >> + _ => (),