From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 17B8C6D076 for ; Wed, 31 Mar 2021 12:23:09 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 0CFE8DFF4 for ; Wed, 31 Mar 2021 12:22:39 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [212.186.127.180]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 42CECDEC6 for ; Wed, 31 Mar 2021 12:22:28 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 12FC241DE8 for ; Wed, 31 Mar 2021 12:22:28 +0200 (CEST) From: Stefan Reiter To: pbs-devel@lists.proxmox.com Date: Wed, 31 Mar 2021 12:22:00 +0200 Message-Id: <20210331102202.14767-19-s.reiter@proxmox.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20210331102202.14767-1-s.reiter@proxmox.com> References: <20210331102202.14767-1-s.reiter@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL -0.019 Adjusted score from AWL reputation of From: address KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment RCVD_IN_DNSWL_MED -2.3 Sender listed at https://www.dnswl.org/, medium trust SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [mod.rs, extract.rs] Subject: [pbs-devel] [PATCH v3 proxmox-backup 18/20] pxar/extract: add sequential variant of extract_sub_dir X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 31 Mar 2021 10:23:09 -0000 extract_sub_dir_seq, together with seq_files_extractor, allow extracting files from a pxar Decoder, along with the existing option for an Accessor. To facilitate code re-use, some helper functions are extracted in the process. Signed-off-by: Stefan Reiter --- v3: * basically a do-over, no more bogus types src/pxar/extract.rs | 316 ++++++++++++++++++++++++++++++-------------- src/pxar/mod.rs | 5 +- 2 files changed, 224 insertions(+), 97 deletions(-) diff --git a/src/pxar/extract.rs b/src/pxar/extract.rs index 952e2d20..8f85c441 100644 --- a/src/pxar/extract.rs +++ b/src/pxar/extract.rs @@ -16,9 +16,10 @@ use nix::fcntl::OFlag; use nix::sys::stat::Mode; use pathpatterns::{MatchEntry, MatchList, MatchType}; -use pxar::format::Device; -use pxar::Metadata; use pxar::accessor::aio::{Accessor, FileContents, FileEntry}; +use pxar::decoder::aio::Decoder; +use pxar::format::Device; +use pxar::{Entry, EntryKind, Metadata}; use proxmox::c_result; use proxmox::tools::{ @@ -93,8 +94,6 @@ where let mut err_path_stack = vec![OsString::from("/")]; let mut current_match = options.extract_match_default; while let Some(entry) = decoder.next() { - use pxar::EntryKind; - let entry = entry.map_err(|err| format_err!("error reading pxar archive: {}", err))?; let file_name_os = entry.file_name(); @@ -552,7 +551,6 @@ where T: Clone + pxar::accessor::ReadAt + Unpin + Send + Sync + 'static, W: tokio::io::AsyncWrite + Unpin + Send + 'static, { - use pxar::EntryKind; Box::pin(async move { let metadata = file.entry().metadata(); let path = file.entry().path().strip_prefix(&prefix)?.to_path_buf(); @@ -612,10 +610,42 @@ where }) } +fn get_extractor(destination: DEST, metadata: Metadata) -> Result +where + DEST: AsRef, +{ + create_path( + &destination, + None, + Some(CreateOptions::new().perm(Mode::from_bits_truncate(0o700))), + ) + .map_err(|err| { + format_err!( + "error creating directory {:?}: {}", + destination.as_ref(), + err + ) + })?; + + let dir = Dir::open( + destination.as_ref(), + OFlag::O_DIRECTORY | OFlag::O_CLOEXEC, + Mode::empty(), + ) + .map_err(|err| { + format_err!( + "unable to open target directory {:?}: {}", + destination.as_ref(), + err, + ) + })?; + + Ok(Extractor::new(dir, metadata, false, Flags::DEFAULT)) +} pub async fn extract_sub_dir( destination: DEST, - mut decoder: Accessor, + decoder: Accessor, path: PATH, verbose: bool, ) -> Result<(), Error> @@ -626,111 +656,205 @@ where { let root = decoder.open_root().await?; - create_path( - &destination, - None, - Some(CreateOptions::new().perm(Mode::from_bits_truncate(0o700))), - ) - .map_err(|err| format_err!("error creating directory {:?}: {}", destination.as_ref(), err))?; - - let dir = Dir::open( - destination.as_ref(), - OFlag::O_DIRECTORY | OFlag::O_CLOEXEC, - Mode::empty(), - ) - .map_err(|err| format_err!("unable to open target directory {:?}: {}", destination.as_ref(), err,))?; - - let mut extractor = Extractor::new( - dir, + let mut extractor = get_extractor( + destination, root.lookup_self().await?.entry().metadata().clone(), - false, - Flags::DEFAULT, - ); + )?; let file = root - .lookup(&path).await? + .lookup(&path) + .await? .ok_or(format_err!("error opening '{:?}'", path.as_ref()))?; - recurse_files_extractor(&mut extractor, &mut decoder, file, verbose).await + recurse_files_extractor(&mut extractor, file, verbose).await } -fn recurse_files_extractor<'a, T>( +pub async fn extract_sub_dir_seq( + destination: DEST, + mut decoder: Decoder, + verbose: bool, +) -> Result<(), Error> +where + S: pxar::decoder::SeqRead + Unpin + Send + 'static, + DEST: AsRef, +{ + decoder.enable_goodbye_entries(true); + let root = match decoder.next().await { + Some(Ok(root)) => root, + Some(Err(err)) => bail!("error getting root entry from pxar: {}", err), + None => bail!("cannot extract empty archive"), + }; + + let mut extractor = get_extractor(destination, root.metadata().clone())?; + + if let Err(err) = seq_files_extractor(&mut extractor, decoder, verbose).await { + eprintln!("error extracting pxar archive: {}", err); + } + + Ok(()) +} + +fn extract_special( + extractor: &mut Extractor, + entry: &Entry, + file_name: &CStr, +) -> Result<(), Error> { + let metadata = entry.metadata(); + match entry.kind() { + EntryKind::Symlink(link) => { + extractor.extract_symlink(file_name, metadata, link.as_ref())?; + } + EntryKind::Hardlink(link) => { + extractor.extract_hardlink(file_name, link.as_os_str())?; + } + EntryKind::Device(dev) => { + if extractor.contains_flags(Flags::WITH_DEVICE_NODES) { + extractor.extract_device(file_name, metadata, dev)?; + } + } + EntryKind::Fifo => { + if extractor.contains_flags(Flags::WITH_FIFOS) { + extractor.extract_special(file_name, metadata, 0)?; + } + } + EntryKind::Socket => { + if extractor.contains_flags(Flags::WITH_SOCKETS) { + extractor.extract_special(file_name, metadata, 0)?; + } + } + _ => bail!("extract_special used with unsupported entry kind"), + } + Ok(()) +} + +fn get_filename(entry: &Entry) -> Result<(OsString, CString), Error> { + let file_name_os = entry.file_name().to_owned(); + + // safety check: a file entry in an archive must never contain slashes: + if file_name_os.as_bytes().contains(&b'/') { + bail!("archive file entry contains slashes, which is invalid and a security concern"); + } + + let file_name = CString::new(file_name_os.as_bytes()) + .map_err(|_| format_err!("encountered file name with null-bytes"))?; + + Ok((file_name_os, file_name)) +} + +async fn recurse_files_extractor<'a, T>( extractor: &'a mut Extractor, - decoder: &'a mut Accessor, file: FileEntry, verbose: bool, -) -> Pin> + Send + 'a>> +) -> Result<(), Error> where T: Clone + pxar::accessor::ReadAt + Unpin + Send + Sync + 'static, { - use pxar::EntryKind; - Box::pin(async move { - let metadata = file.entry().metadata(); - let file_name_os = file.file_name(); + let entry = file.entry(); + let metadata = entry.metadata(); + let (file_name_os, file_name) = get_filename(entry)?; - // safety check: a file entry in an archive must never contain slashes: - if file_name_os.as_bytes().contains(&b'/') { - bail!("archive file entry contains slashes, which is invalid and a security concern"); + if verbose { + eprintln!("extracting: {}", file.path().display()); + } + + match file.kind() { + EntryKind::Directory => { + extractor + .enter_directory(file_name_os.to_owned(), metadata.clone(), true) + .map_err(|err| format_err!("error at entry {:?}: {}", file_name_os, err))?; + + let dir = file.enter_directory().await?; + let mut seq_decoder = dir.decode_full().await?; + seq_decoder.enable_goodbye_entries(true); + seq_files_extractor(extractor, seq_decoder, verbose).await?; + extractor.leave_directory()?; } - - let file_name = CString::new(file_name_os.as_bytes()) - .map_err(|_| format_err!("encountered file name with null-bytes"))?; - - if verbose { - eprintln!("extracting: {}", file.path().display()); + EntryKind::File { size, .. } => { + extractor + .async_extract_file( + &file_name, + metadata, + *size, + &mut file.contents().await.map_err(|_| { + format_err!("found regular file entry without contents in archive") + })?, + ) + .await? } - - match file.kind() { - EntryKind::Directory => { - extractor - .enter_directory(file_name_os.to_owned(), metadata.clone(), true) - .map_err(|err| format_err!("error at entry {:?}: {}", file_name_os, err))?; - - let dir = file.enter_directory().await?; - let mut readdir = dir.read_dir(); - while let Some(entry) = readdir.next().await { - let entry = entry?.decode_entry().await?; - let filename = entry.path().to_path_buf(); - - // log errors and continue - if let Err(err) = recurse_files_extractor(extractor, decoder, entry, verbose).await { - eprintln!("error extracting {:?}: {}", filename.display(), err); - } - } - extractor.leave_directory()?; - } - EntryKind::Symlink(link) => { - extractor.extract_symlink(&file_name, metadata, link.as_ref())?; - } - EntryKind::Hardlink(link) => { - extractor.extract_hardlink(&file_name, link.as_os_str())?; - } - EntryKind::Device(dev) => { - if extractor.contains_flags(Flags::WITH_DEVICE_NODES) { - extractor.extract_device(&file_name, metadata, dev)?; - } - } - EntryKind::Fifo => { - if extractor.contains_flags(Flags::WITH_FIFOS) { - extractor.extract_special(&file_name, metadata, 0)?; - } - } - EntryKind::Socket => { - if extractor.contains_flags(Flags::WITH_SOCKETS) { - extractor.extract_special(&file_name, metadata, 0)?; - } - } - EntryKind::File { size, .. } => extractor.async_extract_file( - &file_name, - metadata, - *size, - &mut file.contents().await.map_err(|_| { - format_err!("found regular file entry without contents in archive") - })?, - ).await?, - EntryKind::GoodbyeTable => {}, // ignore - } - Ok(()) - }) + EntryKind::GoodbyeTable => {} // ignore + _ => extract_special(extractor, entry, &file_name)?, + } + Ok(()) } +async fn seq_files_extractor<'a, T>( + extractor: &'a mut Extractor, + mut decoder: pxar::decoder::aio::Decoder, + verbose: bool, +) -> Result<(), Error> +where + T: pxar::decoder::SeqRead, +{ + let mut dir_level = 0; + loop { + let entry = match decoder.next().await { + Some(entry) => entry?, + None => return Ok(()), + }; + + let metadata = entry.metadata(); + let (file_name_os, file_name) = get_filename(&entry)?; + + if verbose && !matches!(entry.kind(), EntryKind::GoodbyeTable) { + eprintln!("extracting: {}", entry.path().display()); + } + + if let Err(err) = async { + match entry.kind() { + EntryKind::Directory => { + dir_level += 1; + extractor + .enter_directory(file_name_os.to_owned(), metadata.clone(), true) + .map_err(|err| format_err!("error at entry {:?}: {}", file_name_os, err))?; + } + EntryKind::File { size, .. } => { + extractor + .async_extract_file( + &file_name, + metadata, + *size, + &mut decoder.contents().ok_or_else(|| { + format_err!("found regular file entry without contents in archive") + })?, + ) + .await? + } + EntryKind::GoodbyeTable => { + dir_level -= 1; + extractor.leave_directory()?; + } + _ => extract_special(extractor, &entry, &file_name)?, + } + Ok(()) as Result<(), Error> + } + .await + { + let display = entry.path().display().to_string(); + eprintln!( + "error extracting {}: {}", + if matches!(entry.kind(), EntryKind::GoodbyeTable) { + "" + } else { + &display + }, + err + ); + } + + if dir_level < 0 { + // we've encountered one Goodbye more then Directory, meaning we've left the dir we + // started in - exit early, otherwise the extractor might panic + return Ok(()); + } + } +} diff --git a/src/pxar/mod.rs b/src/pxar/mod.rs index d1302962..13eb9bd4 100644 --- a/src/pxar/mod.rs +++ b/src/pxar/mod.rs @@ -59,7 +59,10 @@ mod flags; pub use flags::Flags; pub use create::{create_archive, PxarCreateOptions}; -pub use extract::{create_zip, extract_archive, extract_sub_dir, ErrorHandler, PxarExtractOptions}; +pub use extract::{ + create_zip, extract_archive, extract_sub_dir, extract_sub_dir_seq, ErrorHandler, + PxarExtractOptions, +}; /// The format requires to build sorted directory lookup tables in /// memory, so we restrict the number of allowed entries to limit -- 2.20.1