From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 335179F1E6 for ; Wed, 7 Jun 2023 18:35:08 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 15E291CFEB for ; Wed, 7 Jun 2023 18:34:38 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Wed, 7 Jun 2023 18:34:36 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id CE65A41F8E for ; Wed, 7 Jun 2023 18:34:35 +0200 (CEST) From: Max Carrara To: pbs-devel@lists.proxmox.com Date: Wed, 7 Jun 2023 18:34:25 +0200 Message-Id: <20230607163428.1154123-3-m.carrara@proxmox.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20230607163428.1154123-1-m.carrara@proxmox.com> References: <20230607163428.1154123-1-m.carrara@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL -0.011 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record T_SCC_BODY_TEXT_LINE -0.01 - Subject: [pbs-devel] [PATCH proxmox-backup 2/5] pbs-client: pxar: refactor body of `extract_archive` to `ExtractorIter` X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 07 Jun 2023 16:35:08 -0000 This change factors the body of `extract_archive()` into a separate struct named `ExtractorIter` which implements the `Iterator` trait. This refactor has two goals: * Make it easier to provide and propagate errors and additional information via `anyhow::Context` * Introduce a means to handle errors that occur during extraction, with the possibility to continue extraction if the handler decides that the error is not fatal The latter point benefits from the information provided by the former; previously, errors could only be handled in certain locations (e.g. application of metadata), but not on a "per-entry" basis. Since `extract_archive()` was already using a "desugared" version of the iterator pattern to begin with, wrapping its body up in an actual `Iterator` made the most sense, as it didn't require changing the already existing control flow that much. Signed-off-by: Max Carrara --- pbs-client/src/pxar/extract.rs | 382 +++++++++++++++++++++++---------- 1 file changed, 273 insertions(+), 109 deletions(-) diff --git a/pbs-client/src/pxar/extract.rs b/pbs-client/src/pxar/extract.rs index 7af80bb9..fa08bfd7 100644 --- a/pbs-client/src/pxar/extract.rs +++ b/pbs-client/src/pxar/extract.rs @@ -8,7 +8,7 @@ use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex}; -use anyhow::{bail, Context, Error}; +use anyhow::{anyhow, bail, Context, Error}; use nix::dir::Dir; use nix::fcntl::OFlag; use nix::sys::stat::Mode; @@ -40,75 +40,224 @@ pub struct PxarExtractOptions<'a> { pub type ErrorHandler = Box Result<(), Error> + Send>; pub fn extract_archive( - mut decoder: pxar::decoder::Decoder, + decoder: pxar::decoder::Decoder, destination: &Path, feature_flags: Flags, - mut callback: F, + callback: F, options: PxarExtractOptions, ) -> Result<(), Error> where T: pxar::decoder::SeqRead, F: FnMut(&Path), { - // we use this to keep track of our directory-traversal - decoder.enable_goodbye_entries(true); + ExtractorIter::new(decoder, destination, feature_flags, callback, options) + .context("failed to initialize extractor")? + .collect::>() + .context("encountered unexpected error during extraction") +} - let root = decoder - .next() - .context("found empty pxar archive")? - .context("error reading pxar archive")?; +struct ExtractorIterState { + match_stack: Vec, + err_path_stack: Vec, + current_match: bool, + end_reached: bool, +} - if !root.is_dir() { - bail!("pxar archive does not start with a directory entry!"); +/// An [`Iterator`] that encapsulates the process of extraction in [extract_archive]. +/// Therefore, traversing over an [`ExtractorIter`] until exhaustion extracts an +/// entire PXAR archive. +struct ExtractorIter<'a, T, F> +where + T: pxar::decoder::SeqRead, + F: FnMut(&Path), +{ + decoder: pxar::decoder::Decoder, + callback: F, + extractor: Extractor, + match_list: &'a [MatchEntry], + state: ExtractorIterState, +} + +impl ExtractorIterState { + fn new(options: &PxarExtractOptions) -> Self { + Self { + match_stack: Vec::new(), + err_path_stack: Vec::new(), + current_match: options.extract_match_default, + end_reached: false, + } } +} - create_path( - destination, - None, - Some(CreateOptions::new().perm(Mode::from_bits_truncate(0o700))), - ) - .context(format!("error creating directory {:?}", destination))?; +impl<'a, T, F> ExtractorIter<'a, T, F> +where + T: pxar::decoder::SeqRead, + F: FnMut(&Path), +{ + /// Creates and initializes the state of a new [`ExtractorIter`]. + /// + /// This function requires that the given [`Decoder`][D] has not made a single + /// traversal (a call to [`next()`][next]) yet. + /// + /// [D]: pxar::decoder::Decoder + /// [next]: std::iter::Iterator::next() + fn new( + mut decoder: pxar::decoder::Decoder, + destination: &Path, + feature_flags: Flags, + callback: F, + options: PxarExtractOptions<'a>, + ) -> Result { + // we use this to keep track of our directory-traversal + decoder.enable_goodbye_entries(true); + + let root = decoder + .next() + .context("found empty pxar archive")? + .context("error reading pxar archive")?; + + if !root.is_dir() { + bail!("pxar archive does not start with a directory entry!"); + } - let dir = Dir::open( - destination, - OFlag::O_DIRECTORY | OFlag::O_CLOEXEC, - Mode::empty(), - ) - .context(format!("unable to open target directory {:?}", destination))?; - - let mut extractor = Extractor::new( - dir, - root.metadata().clone(), - options.allow_existing_dirs, - options.overwrite, - feature_flags, - ); - - if let Some(on_error) = options.on_error { - extractor.on_error(on_error); + let mut state = ExtractorIterState::new(&options); + state.err_path_stack.push(OsString::from("/")); + + create_path( + destination, + None, + Some(CreateOptions::new().perm(Mode::from_bits_truncate(0o700))), + ) + .context(format!("error creating directory {destination:?}"))?; + + let dir = Dir::open( + destination, + OFlag::O_DIRECTORY | OFlag::O_CLOEXEC, + Mode::empty(), + ) + .context(format!("unable to open target directory {destination:?}"))?; + + let mut extractor = Extractor::new( + dir, + root.metadata().clone(), + options.allow_existing_dirs, + options.overwrite, + feature_flags, + ); + + if let Some(on_error) = options.on_error { + extractor.on_error(on_error); + } + + Ok(Self { + decoder, + callback, + extractor, + match_list: options.match_list, + state, + }) } - let mut match_stack = Vec::new(); - let mut err_path_stack = vec![OsString::from("/")]; - let mut current_match = options.extract_match_default; - while let Some(entry) = decoder.next() { - let entry = entry.context("error reading pxar archive")?; + #[inline(always)] + fn callback(&mut self, path: &Path) { + (self.callback)(path) + } - let file_name_os = entry.file_name(); + /// Yields the [`Decoder`][D]'s [`Result`]s and validates each [`Entry`][E] + /// it encounters. It also controls the [`ExtractorIter`]'s stop condition. + /// + /// In detail, the [`ExtractorIter`] will stop if and only if one of the + /// following conditions is true: + /// * The [`Decoder`][D] is exhausted + /// * The [`Decoder`][D] failed to read from the archive and consequently + /// yielded an [`io::Error`] + /// * Validation of an [`Entry`][E] failed + /// + /// The iterator will yield the encountered error before stopping. + /// + /// [D]: pxar::decoder::Decoder + /// [E]: pxar::Entry + #[inline] + fn next_entry(&mut self) -> Option> { + if self.state.end_reached { + return None; + } - // safety check: a file entry in an archive must never contain slashes: - if file_name_os.as_bytes().contains(&b'/') { - bail!("archive file entry contains slashes, which is invalid and a security concern"); + let entry = self.decoder.next(); + + if entry.is_none() { + self.state.end_reached = true; + if !self.extractor.dir_stack.is_empty() { + return Some(Err(anyhow!("unexpected eof while decoding pxar archive"))); + } else { + return None; + } } - let file_name = CString::new(file_name_os.as_bytes()) - .context("encountered file name with null-bytes")?; + #[inline(always)] + fn validate_entry(entry: Result) -> Result { + let entry = entry.context("error reading pxar archive")?; + + let file_name_bytes = entry.file_name().as_bytes(); + + if file_name_bytes.contains(&b'/') { + bail!( + "archive file entry contains slashes, which is invalid and a security concern" + ); + } + + CString::new(file_name_bytes).context("encountered file name with null-bytes")?; + + Ok(entry) + } + + let entry = validate_entry(entry.unwrap()); + + if entry.is_err() { + self.state.end_reached = true; + } + Some(entry) + } +} + +impl<'a, T, F> Iterator for ExtractorIter<'a, T, F> +where + T: pxar::decoder::SeqRead, + F: FnMut(&Path), +{ + type Item = Result<(), Error>; + + /// Performs the actual extraction of decoded [`Entries`][E]. + /// + /// Should an error occur during any point of extraction (**not** while + /// fetching the next [`Entry`][E]), the error may be handled by the + /// [`ErrorHandler`] provided by the [`PxarExtractOptions`] used to + /// initialize the iterator. + /// + /// [E]: pxar::Entry + fn next(&mut self) -> Option { + let entry = self.next_entry()?; + + if entry.is_err() { + assert!( + self.state.end_reached, + "encountered an error while reading the archive, but the iterator was not stopped!" + ); + + return Some(entry.map(drop)); + } + + let entry = entry.unwrap(); + let file_name_os = entry.file_name(); let metadata = entry.metadata(); - extractor.set_path(entry.path().as_os_str().to_owned()); + // can unwrap here because next_entry already checked for null bytes + let file_name = CString::new(file_name_os.as_bytes()).unwrap(); + + self.extractor.set_path(entry.path().as_os_str().to_owned()); - let match_result = options.match_list.matches( + let match_result = self.match_list.matches( entry.path().as_os_str().as_bytes(), Some(metadata.file_type() as u32), ); @@ -116,99 +265,114 @@ where let did_match = match match_result { Some(MatchType::Include) => true, Some(MatchType::Exclude) => false, - None => current_match, + None => self.state.current_match, }; - match (did_match, entry.kind()) { - (_, EntryKind::Directory) => { - callback(entry.path()); - - let create = current_match && match_result != Some(MatchType::Exclude); - extractor - .enter_directory(file_name_os.to_owned(), metadata.clone(), create) - .context(format!("error at entry {:?}", file_name_os))?; - // We're starting a new directory, push our old matching state and replace it with - // our new one: - match_stack.push(current_match); - current_match = did_match; - - // When we hit the goodbye table we'll try to apply metadata to the directory, but - // the Goodbye entry will not contain the path, so push it to our path stack for - // error messages: - err_path_stack.push(extractor.clone_path()); + let extract_res = match (did_match, entry.kind()) { + (_, EntryKind::Directory) => { + self.callback(entry.path()); + + let create = self.state.current_match && match_result != Some(MatchType::Exclude); + let res = self.extractor.enter_directory( + file_name_os.to_owned(), + metadata.clone(), + create, + ); + + if res.is_ok() { + // We're starting a new directory, push our old matching state and replace it with + // our new one: + self.state.match_stack.push(self.state.current_match); + self.state.current_match = did_match; + + // When we hit the goodbye table we'll try to apply metadata to the directory, but + // the Goodbye entry will not contain the path, so push it to our path stack for + // error messages: + self.state.err_path_stack.push(self.extractor.clone_path()); + } - Ok(()) + res } (_, EntryKind::GoodbyeTable) => { // go up a directory - extractor.set_path(err_path_stack.pop().context(format!( - "error at entry {:?} - unexpected end of directory", - file_name_os - ))?); - - extractor - .leave_directory() - .context(format!("error at entry {:?}", file_name_os))?; - - // We left a directory, also get back our previous matching state. This is in sync - // with `dir_stack` so this should never be empty except for the final goodbye - // table, in which case we get back to the default of `true`. - current_match = match_stack.pop().unwrap_or(true); + let res = self + .state + .err_path_stack + .pop() + .context("unexpected end of directory") + .map(|path| self.extractor.set_path(path)) + .and(self.extractor.leave_directory()); + + if res.is_ok() { + // We left a directory, also get back our previous matching state. This is in sync + // with `dir_stack` so this should never be empty except for the final goodbye + // table, in which case we get back to the default of `true`. + self.state.current_match = self.state.match_stack.pop().unwrap_or(true); + } - Ok(()) + res } (true, EntryKind::Symlink(link)) => { - callback(entry.path()); - extractor.extract_symlink(&file_name, metadata, link.as_ref()) + self.callback(entry.path()); + self.extractor + .extract_symlink(&file_name, metadata, link.as_ref()) } (true, EntryKind::Hardlink(link)) => { - callback(entry.path()); - extractor.extract_hardlink(&file_name, link.as_os_str()) + self.callback(entry.path()); + self.extractor + .extract_hardlink(&file_name, link.as_os_str()) } (true, EntryKind::Device(dev)) => { - if extractor.contains_flags(Flags::WITH_DEVICE_NODES) { - callback(entry.path()); - extractor.extract_device(&file_name, metadata, dev) + if self.extractor.contains_flags(Flags::WITH_DEVICE_NODES) { + self.callback(entry.path()); + self.extractor.extract_device(&file_name, metadata, dev) } else { Ok(()) } } (true, EntryKind::Fifo) => { - if extractor.contains_flags(Flags::WITH_FIFOS) { - callback(entry.path()); - extractor.extract_special(&file_name, metadata, 0) + if self.extractor.contains_flags(Flags::WITH_FIFOS) { + self.callback(entry.path()); + self.extractor.extract_special(&file_name, metadata, 0) } else { Ok(()) } } (true, EntryKind::Socket) => { - if extractor.contains_flags(Flags::WITH_SOCKETS) { - callback(entry.path()); - extractor.extract_special(&file_name, metadata, 0) + if self.extractor.contains_flags(Flags::WITH_SOCKETS) { + self.callback(entry.path()); + self.extractor.extract_special(&file_name, metadata, 0) } else { Ok(()) } } - (true, EntryKind::File { size, .. }) => extractor.extract_file( - &file_name, - metadata, - *size, - &mut decoder - .contents() - .context("found regular file entry without contents in archive")?, - extractor.overwrite, - ), + (true, EntryKind::File { size, .. }) => { + let contents = self.decoder.contents(); + + if let Some(mut contents) = contents { + self.extractor.extract_file( + &file_name, + metadata, + *size, + &mut contents, + self.extractor.overwrite, + ) + } else { + Err(anyhow!( + "found regular file entry without contents in archive" + )) + } + } (false, _) => Ok(()), // skip this - } - .context(format!("error at entry {:?}", file_name_os))?; - } + }; - if !extractor.dir_stack.is_empty() { - bail!("unexpected eof while decoding pxar archive"); + Some( + extract_res + .context(format!("error at entry {file_name_os:?}")) + .or_else(&mut *self.extractor.on_error), + ) } - - Ok(()) } /// Common state for file extraction. -- 2.30.2