From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 2D797CB15 for ; Tue, 12 Apr 2022 13:04:23 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 183EDDCA5 for ; Tue, 12 Apr 2022 13:04:23 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id D3C39DC3D for ; Tue, 12 Apr 2022 13:04:19 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id ABD51402F1 for ; Tue, 12 Apr 2022 13:04:19 +0200 (CEST) From: Dominik Csapak To: pbs-devel@lists.proxmox.com Date: Tue, 12 Apr 2022 13:04:16 +0200 Message-Id: <20220412110418.3360746-5-d.csapak@proxmox.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20220412110418.3360746-1-d.csapak@proxmox.com> References: <20220412110418.3360746-1-d.csapak@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.139 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record T_SCC_BODY_TEXT_LINE -0.01 - URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [mod.rs, extract.rs] Subject: [pbs-devel] [PATCH proxmox-backup 1/3] pbs-client: add 'create_tar' helper function X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 12 Apr 2022 11:04:23 -0000 similar to create_zip, uses an accessor to write a tar into an output that implements AsyncWrite, but we use a Decoder to iterate instead of having a recursive function. This is done so that we get the entries in the correct order, and it should be faster as well. Includes files, directories, symlinks, hardlink, block/char devs, fifos into the tar. If the hardlink points to outside the current dir to archive, promote the first instance to a 'real' file, and use a hardlink for the rest. Signed-off-by: Dominik Csapak --- pbs-client/Cargo.toml | 1 + pbs-client/src/pxar/extract.rs | 211 ++++++++++++++++++++++++++++++++- pbs-client/src/pxar/mod.rs | 2 +- 3 files changed, 211 insertions(+), 3 deletions(-) diff --git a/pbs-client/Cargo.toml b/pbs-client/Cargo.toml index d713a3ca..68a777b0 100644 --- a/pbs-client/Cargo.toml +++ b/pbs-client/Cargo.toml @@ -27,6 +27,7 @@ tokio = { version = "1.6", features = [ "fs", "signal" ] } tokio-stream = "0.1.0" tower-service = "0.3.0" xdg = "2.2" +tar = "0.4" pathpatterns = "0.1.2" diff --git a/pbs-client/src/pxar/extract.rs b/pbs-client/src/pxar/extract.rs index b1f8718e..a0efcbe4 100644 --- a/pbs-client/src/pxar/extract.rs +++ b/pbs-client/src/pxar/extract.rs @@ -1,9 +1,10 @@ //! Code for extraction of pxar contents onto the file system. +use std::collections::HashMap; use std::convert::TryFrom; use std::ffi::{CStr, CString, OsStr, OsString}; use std::io; -use std::os::unix::ffi::OsStrExt; +use std::os::unix::ffi::{OsStrExt, OsStringExt}; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex}; @@ -17,7 +18,7 @@ use nix::sys::stat::Mode; use pathpatterns::{MatchEntry, MatchList, MatchType}; use pxar::accessor::aio::{Accessor, FileContents, FileEntry}; -use pxar::decoder::aio::Decoder; +use pxar::decoder::{aio::Decoder, Contents}; use pxar::format::Device; use pxar::{Entry, EntryKind, Metadata}; @@ -501,6 +502,212 @@ impl Extractor { } } +fn add_metadata_to_header(header: &mut tar::Header, metadata: &Metadata) { + header.set_mode(metadata.stat.mode as u32); + header.set_mtime(metadata.stat.mtime.secs as u64); + header.set_uid(metadata.stat.uid as u64); + header.set_gid(metadata.stat.gid as u64); +} + +async fn tar_add_file<'a, W, T>( + tar: &mut proxmox_compression::tar::Builder, + contents: Option>, + size: u64, + metadata: &Metadata, + path: &Path, +) -> Result<(), Error> +where + T: pxar::decoder::SeqRead + Unpin + Send + Sync + 'static, + W: tokio::io::AsyncWrite + Unpin + Send + 'static, +{ + let mut header = tar::Header::new_gnu(); + header.set_entry_type(tar::EntryType::Regular); + header.set_size(size); + add_metadata_to_header(&mut header, metadata); + header.set_cksum(); + match contents { + Some(content) => tar.add_entry(&mut header, path, content).await, + None => tar.add_entry(&mut header, path, tokio::io::empty()).await, + } + .map_err(|err| format_err!("could not send file entry: {}", err))?; + Ok(()) +} + +// converts to a pathbuf and removes the trailing '\0' +fn link_to_pathbuf(link: &[u8]) -> PathBuf { + let len = link.len(); + let mut buf = Vec::with_capacity(len); + buf.extend_from_slice(&link[..len - 1]); + OsString::from_vec(buf).into() +} + +/// Creates a tar file from `path` and writes it into `output` +pub async fn create_tar( + output: W, + accessor: Accessor, + path: P, + verbose: bool, +) -> Result<(), Error> +where + T: Clone + pxar::accessor::ReadAt + Unpin + Send + Sync + 'static, + W: tokio::io::AsyncWrite + Unpin + Send + 'static, + P: AsRef, +{ + let root = accessor.open_root().await?; + let file = root + .lookup(&path) + .await? + .ok_or(format_err!("error opening '{:?}'", path.as_ref()))?; + + let mut prefix = PathBuf::new(); + let mut components = file.entry().path().components(); + components.next_back(); // discard last + for comp in components { + prefix.push(comp); + } + + let mut tarencoder = proxmox_compression::tar::Builder::new(output); + let mut hardlinks: HashMap = HashMap::new(); + + if let Ok(dir) = file.enter_directory().await { + let mut decoder = dir.decode_full().await?; + decoder.enable_goodbye_entries(false); + while let Some(entry) = decoder.next().await { + let entry = entry.map_err(|err| format_err!("cannot decode entry: {}", err))?; + + let metadata = entry.metadata(); + let path = entry.path().strip_prefix(&prefix)?.to_path_buf(); + + match entry.kind() { + EntryKind::File { .. } => { + let size = decoder.content_size().unwrap_or(0); + tar_add_file(&mut tarencoder, decoder.contents(), size, &metadata, &path) + .await? + } + EntryKind::Hardlink(link) => { + if !link.data.is_empty() { + let entry = root + .lookup(&path) + .await? + .ok_or(format_err!("error looking up '{:?}'", path))?; + let realfile = accessor.follow_hardlink(&entry).await?; + let metadata = realfile.entry().metadata(); + let realpath = link_to_pathbuf(&link.data); + + if verbose { + eprintln!("adding '{}' to tar", path.display()); + } + + let stripped_path = match realpath.strip_prefix(&prefix) { + Ok(path) => path, + Err(_) => { + // outside of our tar archive, add the first occurrance to the tar + if let Some(path) = hardlinks.get(&realpath) { + path + } else { + let size = decoder.content_size().unwrap_or(0); + tar_add_file( + &mut tarencoder, + decoder.contents(), + size, + metadata, + &path, + ) + .await?; + hardlinks.insert(realpath, path); + continue; + } + } + }; + let mut header = tar::Header::new_gnu(); + header.set_entry_type(tar::EntryType::Link); + add_metadata_to_header(&mut header, metadata); + header.set_size(0); + tarencoder + .add_link(&mut header, path, stripped_path) + .await + .map_err(|err| format_err!("could not send hardlink entry: {}", err))?; + } + } + EntryKind::Symlink(link) if !link.data.is_empty() => { + if verbose { + eprintln!("adding '{}' to tar", path.display()); + } + let realpath = link_to_pathbuf(&link.data); + let mut header = tar::Header::new_gnu(); + header.set_entry_type(tar::EntryType::Symlink); + add_metadata_to_header(&mut header, metadata); + header.set_size(0); + tarencoder + .add_link(&mut header, path, realpath) + .await + .map_err(|err| format_err!("could not send symlink entry: {}", err))?; + } + EntryKind::Fifo => { + if verbose { + eprintln!("adding '{}' to tar", path.display()); + } + let mut header = tar::Header::new_gnu(); + header.set_entry_type(tar::EntryType::Fifo); + add_metadata_to_header(&mut header, metadata); + header.set_size(0); + header.set_device_major(0)?; + header.set_device_minor(0)?; + header.set_cksum(); + tarencoder + .add_entry(&mut header, path, tokio::io::empty()) + .await + .map_err(|err| format_err!("could not send fifo entry: {}", err))?; + } + EntryKind::Directory => { + if verbose { + eprintln!("adding '{}' to tar", path.display()); + } + // we cannot add the root path itself + if path != Path::new("/") { + let mut header = tar::Header::new_gnu(); + header.set_entry_type(tar::EntryType::Directory); + add_metadata_to_header(&mut header, metadata); + header.set_size(0); + header.set_cksum(); + tarencoder + .add_entry(&mut header, path, tokio::io::empty()) + .await + .map_err(|err| format_err!("could not send dir entry: {}", err))?; + } + } + EntryKind::Device(device) => { + if verbose { + eprintln!("adding '{}' to tar", path.display()); + } + let entry_type = if metadata.stat.is_chardev() { + tar::EntryType::Char + } else { + tar::EntryType::Block + }; + let mut header = tar::Header::new_gnu(); + header.set_entry_type(entry_type); + header.set_device_major(device.major as u32)?; + header.set_device_minor(device.minor as u32)?; + add_metadata_to_header(&mut header, metadata); + header.set_size(0); + tarencoder + .add_entry(&mut header, path, tokio::io::empty()) + .await + .map_err(|err| format_err!("could not send device entry: {}", err))?; + } + _ => {} // ignore all else + } + } + } + + tarencoder.finish().await.map_err(|err| { + eprintln!("error during finishing of zip: {}", err); + err + })?; + Ok(()) +} + pub async fn create_zip( output: W, decoder: Accessor, diff --git a/pbs-client/src/pxar/mod.rs b/pbs-client/src/pxar/mod.rs index f20a1f9e..725fc2d9 100644 --- a/pbs-client/src/pxar/mod.rs +++ b/pbs-client/src/pxar/mod.rs @@ -59,7 +59,7 @@ pub use flags::Flags; pub use create::{create_archive, PxarCreateOptions}; pub use extract::{ - create_zip, extract_archive, extract_sub_dir, extract_sub_dir_seq, ErrorHandler, + create_tar, create_zip, extract_archive, extract_sub_dir, extract_sub_dir_seq, ErrorHandler, PxarExtractOptions, }; -- 2.30.2