From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 8BC38CDF1 for ; Tue, 11 Jul 2023 15:24:51 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 65C5723B69 for ; Tue, 11 Jul 2023 15:24:21 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Tue, 11 Jul 2023 15:24:19 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 998FB42E77 for ; Tue, 11 Jul 2023 15:24:19 +0200 (CEST) Date: Tue, 11 Jul 2023 15:24:17 +0200 From: Wolfgang Bumiller To: Max Carrara Cc: pbs-devel@lists.proxmox.com Message-ID: References: <20230607163428.1154123-1-m.carrara@proxmox.com> <20230607163428.1154123-3-m.carrara@proxmox.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20230607163428.1154123-3-m.carrara@proxmox.com> X-SPAM-LEVEL: Spam detection results: 0 AWL 0.121 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record T_SCC_BODY_TEXT_LINE -0.01 - Subject: Re: [pbs-devel] [PATCH proxmox-backup 2/5] pbs-client: pxar: refactor body of `extract_archive` to `ExtractorIter` X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 11 Jul 2023 13:24:51 -0000 On Wed, Jun 07, 2023 at 06:34:25PM +0200, Max Carrara wrote: > This change factors the body of `extract_archive()` into a separate > struct named `ExtractorIter` which implements the `Iterator` trait. > > This refactor has two goals: > * Make it easier to provide and propagate errors and additional > information via `anyhow::Context` > * Introduce a means to handle errors that occur during extraction, > with the possibility to continue extraction if the handler decides > that the error is not fatal Does it really help much with that both those things? Which parts actually benefit from this? > > The latter point benefits from the information provided by the former; > previously, errors could only be handled in certain locations > (e.g. application of metadata), but not on a "per-entry" basis. > > Since `extract_archive()` was already using a "desugared" version of > the iterator pattern to begin with, wrapping its body up in an actual > `Iterator` made the most sense, as it didn't require changing the already > existing control flow that much. > > Signed-off-by: Max Carrara > --- > pbs-client/src/pxar/extract.rs | 382 +++++++++++++++++++++++---------- > 1 file changed, 273 insertions(+), 109 deletions(-) > > diff --git a/pbs-client/src/pxar/extract.rs b/pbs-client/src/pxar/extract.rs > index 7af80bb9..fa08bfd7 100644 > --- a/pbs-client/src/pxar/extract.rs > +++ b/pbs-client/src/pxar/extract.rs > @@ -8,7 +8,7 @@ use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; > use std::path::{Path, PathBuf}; > use std::sync::{Arc, Mutex}; > > -use anyhow::{bail, Context, Error}; > +use anyhow::{anyhow, bail, Context, Error}; > use nix::dir::Dir; > use nix::fcntl::OFlag; > use nix::sys::stat::Mode; > @@ -40,75 +40,224 @@ pub struct PxarExtractOptions<'a> { > pub type ErrorHandler = Box Result<(), Error> + Send>; > > pub fn extract_archive( > - mut decoder: pxar::decoder::Decoder, > + decoder: pxar::decoder::Decoder, > destination: &Path, > feature_flags: Flags, > - mut callback: F, > + callback: F, > options: PxarExtractOptions, > ) -> Result<(), Error> > where > T: pxar::decoder::SeqRead, > F: FnMut(&Path), > { > - // we use this to keep track of our directory-traversal > - decoder.enable_goodbye_entries(true); > + ExtractorIter::new(decoder, destination, feature_flags, callback, options) > + .context("failed to initialize extractor")? > + .collect::>() > + .context("encountered unexpected error during extraction") > +} > > - let root = decoder > - .next() > - .context("found empty pxar archive")? > - .context("error reading pxar archive")?; > +struct ExtractorIterState { > + match_stack: Vec, > + err_path_stack: Vec, > + current_match: bool, > + end_reached: bool, > +} > > - if !root.is_dir() { > - bail!("pxar archive does not start with a directory entry!"); > +/// An [`Iterator`] that encapsulates the process of extraction in [extract_archive]. > +/// Therefore, traversing over an [`ExtractorIter`] until exhaustion extracts an > +/// entire PXAR archive. > +struct ExtractorIter<'a, T, F> > +where > + T: pxar::decoder::SeqRead, > + F: FnMut(&Path), > +{ > + decoder: pxar::decoder::Decoder, > + callback: F, > + extractor: Extractor, > + match_list: &'a [MatchEntry], > + state: ExtractorIterState, > +} > + > +impl ExtractorIterState { > + fn new(options: &PxarExtractOptions) -> Self { > + Self { > + match_stack: Vec::new(), > + err_path_stack: Vec::new(), > + current_match: options.extract_match_default, > + end_reached: false, > + } > } > +} > > - create_path( > - destination, > - None, > - Some(CreateOptions::new().perm(Mode::from_bits_truncate(0o700))), > - ) > - .context(format!("error creating directory {:?}", destination))?; > +impl<'a, T, F> ExtractorIter<'a, T, F> > +where > + T: pxar::decoder::SeqRead, > + F: FnMut(&Path), > +{ > + /// Creates and initializes the state of a new [`ExtractorIter`]. > + /// > + /// This function requires that the given [`Decoder`][D] has not made a single > + /// traversal (a call to [`next()`][next]) yet. > + /// > + /// [D]: pxar::decoder::Decoder > + /// [next]: std::iter::Iterator::next() > + fn new( > + mut decoder: pxar::decoder::Decoder, > + destination: &Path, > + feature_flags: Flags, > + callback: F, > + options: PxarExtractOptions<'a>, > + ) -> Result { > + // we use this to keep track of our directory-traversal > + decoder.enable_goodbye_entries(true); > + > + let root = decoder > + .next() > + .context("found empty pxar archive")? > + .context("error reading pxar archive")?; > + > + if !root.is_dir() { > + bail!("pxar archive does not start with a directory entry!"); > + } > > - let dir = Dir::open( > - destination, > - OFlag::O_DIRECTORY | OFlag::O_CLOEXEC, > - Mode::empty(), > - ) > - .context(format!("unable to open target directory {:?}", destination))?; > - > - let mut extractor = Extractor::new( > - dir, > - root.metadata().clone(), > - options.allow_existing_dirs, > - options.overwrite, > - feature_flags, > - ); > - > - if let Some(on_error) = options.on_error { > - extractor.on_error(on_error); > + let mut state = ExtractorIterState::new(&options); > + state.err_path_stack.push(OsString::from("/")); > + > + create_path( > + destination, > + None, > + Some(CreateOptions::new().perm(Mode::from_bits_truncate(0o700))), > + ) > + .context(format!("error creating directory {destination:?}"))?; > + > + let dir = Dir::open( > + destination, > + OFlag::O_DIRECTORY | OFlag::O_CLOEXEC, > + Mode::empty(), > + ) > + .context(format!("unable to open target directory {destination:?}"))?; > + > + let mut extractor = Extractor::new( > + dir, > + root.metadata().clone(), > + options.allow_existing_dirs, > + options.overwrite, > + feature_flags, > + ); > + > + if let Some(on_error) = options.on_error { > + extractor.on_error(on_error); > + } > + > + Ok(Self { > + decoder, > + callback, > + extractor, > + match_list: options.match_list, > + state, > + }) > } > > - let mut match_stack = Vec::new(); > - let mut err_path_stack = vec![OsString::from("/")]; > - let mut current_match = options.extract_match_default; > - while let Some(entry) = decoder.next() { > - let entry = entry.context("error reading pxar archive")?; > + #[inline(always)] > + fn callback(&mut self, path: &Path) { > + (self.callback)(path) > + } > > - let file_name_os = entry.file_name(); > + /// Yields the [`Decoder`][D]'s [`Result`]s and validates each [`Entry`][E] > + /// it encounters. It also controls the [`ExtractorIter`]'s stop condition. > + /// > + /// In detail, the [`ExtractorIter`] will stop if and only if one of the > + /// following conditions is true: > + /// * The [`Decoder`][D] is exhausted > + /// * The [`Decoder`][D] failed to read from the archive and consequently > + /// yielded an [`io::Error`] > + /// * Validation of an [`Entry`][E] failed > + /// > + /// The iterator will yield the encountered error before stopping. > + /// > + /// [D]: pxar::decoder::Decoder > + /// [E]: pxar::Entry > + #[inline] > + fn next_entry(&mut self) -> Option> { > + if self.state.end_reached { > + return None; > + } > > - // safety check: a file entry in an archive must never contain slashes: > - if file_name_os.as_bytes().contains(&b'/') { > - bail!("archive file entry contains slashes, which is invalid and a security concern"); > + let entry = self.decoder.next(); > + > + if entry.is_none() { ^ This could be a diverging `match` to avoid the `unwrap` below. let entry = match entry { None => { .. the code from here .. } Some(entry) => entry, }; (side note: too much code for a `let else` to be formatted properly) > + self.state.end_reached = true; > + if !self.extractor.dir_stack.is_empty() { > + return Some(Err(anyhow!("unexpected eof while decoding pxar archive"))); style: we typically use `format_err!`, I don't think we should start mixing in `anyhow!()` unless there's a good reason for it. > + } else { > + return None; > + } > } > > - let file_name = CString::new(file_name_os.as_bytes()) > - .context("encountered file name with null-bytes")?; > + #[inline(always)] > + fn validate_entry(entry: Result) -> Result { > + let entry = entry.context("error reading pxar archive")?; > + > + let file_name_bytes = entry.file_name().as_bytes(); > + > + if file_name_bytes.contains(&b'/') { > + bail!( > + "archive file entry contains slashes, which is invalid and a security concern" > + ); > + } > + > + CString::new(file_name_bytes).context("encountered file name with null-bytes")?; ^ I'd rather drop this here, because this allocates for no reason other than to check for errors, both of which happens again later on. As a side note, do we even need a `CString`? Or can the whole code using `file_name` below work on a `CStr` as well? If you insist on checking here - at least use `CStr::from_bytes_with_nul` here - add a comment about the code below - use an `unsafe{}` block with a comment below with `CStr::from_bytes_with_nul_unchecked` But only checking later makes a lot more sense to me :-) > + > + Ok(entry) > + } > + > + let entry = validate_entry(entry.unwrap()); > + > + if entry.is_err() { > + self.state.end_reached = true; > + } > > + Some(entry) > + } > +} > + > +impl<'a, T, F> Iterator for ExtractorIter<'a, T, F> > +where > + T: pxar::decoder::SeqRead, > + F: FnMut(&Path), > +{ > + type Item = Result<(), Error>; > + > + /// Performs the actual extraction of decoded [`Entries`][E]. > + /// > + /// Should an error occur during any point of extraction (**not** while > + /// fetching the next [`Entry`][E]), the error may be handled by the > + /// [`ErrorHandler`] provided by the [`PxarExtractOptions`] used to > + /// initialize the iterator. > + /// > + /// [E]: pxar::Entry > + fn next(&mut self) -> Option { > + let entry = self.next_entry()?; > + > + if entry.is_err() { > + assert!( > + self.state.end_reached, > + "encountered an error while reading the archive, but the iterator was not stopped!" > + ); > + > + return Some(entry.map(drop)); > + } > + > + let entry = entry.unwrap(); ^ Same as in `next_entry` - we can easily get rid of the `unwrap` with a `match`. > + let file_name_os = entry.file_name(); > let metadata = entry.metadata(); > > - extractor.set_path(entry.path().as_os_str().to_owned()); > + // can unwrap here because next_entry already checked for null bytes > + let file_name = CString::new(file_name_os.as_bytes()).unwrap(); > + > + self.extractor.set_path(entry.path().as_os_str().to_owned()); > > - let match_result = options.match_list.matches( > + let match_result = self.match_list.matches( > entry.path().as_os_str().as_bytes(), > Some(metadata.file_type() as u32), > ); > @@ -116,99 +265,114 @@ where > let did_match = match match_result { > Some(MatchType::Include) => true, > Some(MatchType::Exclude) => false, > - None => current_match, > + None => self.state.current_match, > }; > - match (did_match, entry.kind()) { > - (_, EntryKind::Directory) => { > - callback(entry.path()); > - > - let create = current_match && match_result != Some(MatchType::Exclude); > - extractor > - .enter_directory(file_name_os.to_owned(), metadata.clone(), create) > - .context(format!("error at entry {:?}", file_name_os))?; > > - // We're starting a new directory, push our old matching state and replace it with > - // our new one: > - match_stack.push(current_match); > - current_match = did_match; > - > - // When we hit the goodbye table we'll try to apply metadata to the directory, but > - // the Goodbye entry will not contain the path, so push it to our path stack for > - // error messages: > - err_path_stack.push(extractor.clone_path()); > + let extract_res = match (did_match, entry.kind()) { > + (_, EntryKind::Directory) => { > + self.callback(entry.path()); > + > + let create = self.state.current_match && match_result != Some(MatchType::Exclude); > + let res = self.extractor.enter_directory( > + file_name_os.to_owned(), > + metadata.clone(), > + create, > + ); > + > + if res.is_ok() { > + // We're starting a new directory, push our old matching state and replace it with > + // our new one: > + self.state.match_stack.push(self.state.current_match); > + self.state.current_match = did_match; > + > + // When we hit the goodbye table we'll try to apply metadata to the directory, but > + // the Goodbye entry will not contain the path, so push it to our path stack for > + // error messages: > + self.state.err_path_stack.push(self.extractor.clone_path()); > + } > > - Ok(()) > + res > } > (_, EntryKind::GoodbyeTable) => { > // go up a directory > > - extractor.set_path(err_path_stack.pop().context(format!( > - "error at entry {:?} - unexpected end of directory", > - file_name_os > - ))?); > - > - extractor > - .leave_directory() > - .context(format!("error at entry {:?}", file_name_os))?; > - > - // We left a directory, also get back our previous matching state. This is in sync > - // with `dir_stack` so this should never be empty except for the final goodbye > - // table, in which case we get back to the default of `true`. > - current_match = match_stack.pop().unwrap_or(true); > + let res = self > + .state > + .err_path_stack > + .pop() > + .context("unexpected end of directory") > + .map(|path| self.extractor.set_path(path)) > + .and(self.extractor.leave_directory()); > + > + if res.is_ok() { > + // We left a directory, also get back our previous matching state. This is in sync > + // with `dir_stack` so this should never be empty except for the final goodbye > + // table, in which case we get back to the default of `true`. > + self.state.current_match = self.state.match_stack.pop().unwrap_or(true); > + } > > - Ok(()) > + res > } > (true, EntryKind::Symlink(link)) => { > - callback(entry.path()); > - extractor.extract_symlink(&file_name, metadata, link.as_ref()) > + self.callback(entry.path()); > + self.extractor > + .extract_symlink(&file_name, metadata, link.as_ref()) > } > (true, EntryKind::Hardlink(link)) => { > - callback(entry.path()); > - extractor.extract_hardlink(&file_name, link.as_os_str()) > + self.callback(entry.path()); > + self.extractor > + .extract_hardlink(&file_name, link.as_os_str()) > } > (true, EntryKind::Device(dev)) => { > - if extractor.contains_flags(Flags::WITH_DEVICE_NODES) { > - callback(entry.path()); > - extractor.extract_device(&file_name, metadata, dev) > + if self.extractor.contains_flags(Flags::WITH_DEVICE_NODES) { > + self.callback(entry.path()); > + self.extractor.extract_device(&file_name, metadata, dev) > } else { > Ok(()) > } > } > (true, EntryKind::Fifo) => { > - if extractor.contains_flags(Flags::WITH_FIFOS) { > - callback(entry.path()); > - extractor.extract_special(&file_name, metadata, 0) > + if self.extractor.contains_flags(Flags::WITH_FIFOS) { > + self.callback(entry.path()); > + self.extractor.extract_special(&file_name, metadata, 0) > } else { > Ok(()) > } > } > (true, EntryKind::Socket) => { > - if extractor.contains_flags(Flags::WITH_SOCKETS) { > - callback(entry.path()); > - extractor.extract_special(&file_name, metadata, 0) > + if self.extractor.contains_flags(Flags::WITH_SOCKETS) { > + self.callback(entry.path()); > + self.extractor.extract_special(&file_name, metadata, 0) > } else { > Ok(()) > } > } > - (true, EntryKind::File { size, .. }) => extractor.extract_file( > - &file_name, > - metadata, > - *size, > - &mut decoder > - .contents() > - .context("found regular file entry without contents in archive")?, > - extractor.overwrite, > - ), > + (true, EntryKind::File { size, .. }) => { > + let contents = self.decoder.contents(); > + > + if let Some(mut contents) = contents { > + self.extractor.extract_file( > + &file_name, > + metadata, > + *size, > + &mut contents, > + self.extractor.overwrite, > + ) > + } else { > + Err(anyhow!( > + "found regular file entry without contents in archive" > + )) > + } > + } > (false, _) => Ok(()), // skip this > - } > - .context(format!("error at entry {:?}", file_name_os))?; > - } > + }; > > - if !extractor.dir_stack.is_empty() { > - bail!("unexpected eof while decoding pxar archive"); > + Some( > + extract_res > + .context(format!("error at entry {file_name_os:?}")) > + .or_else(&mut *self.extractor.on_error), > + ) > } > - > - Ok(()) > } > > /// Common state for file extraction. > -- > 2.30.2