From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 6D139CB2B for ; Tue, 12 Apr 2022 13:04:52 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 6401ADC4E for ; Tue, 12 Apr 2022 13:04:22 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id A1076DC2A for ; Tue, 12 Apr 2022 13:04:19 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 75C5B402F1 for ; Tue, 12 Apr 2022 13:04:19 +0200 (CEST) From: Dominik Csapak To: pbs-devel@lists.proxmox.com Date: Tue, 12 Apr 2022 13:04:13 +0200 Message-Id: <20220412110418.3360746-2-d.csapak@proxmox.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20220412110418.3360746-1-d.csapak@proxmox.com> References: <20220412110418.3360746-1-d.csapak@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.139 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record T_SCC_BODY_TEXT_LINE -0.01 - URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [tar.rs, lib.rs] Subject: [pbs-devel] [PATCH proxmox 1/2] proxmox-compression: add async tar builder X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 12 Apr 2022 11:04:52 -0000 inspired by tar::Builder, but limited to the things we need and using AsyncRead+AsyncWrite instead of the sync variants. Signed-off-by: Dominik Csapak --- proxmox-compression/Cargo.toml | 1 + proxmox-compression/src/lib.rs | 1 + proxmox-compression/src/tar.rs | 172 +++++++++++++++++++++++++++++++++ 3 files changed, 174 insertions(+) create mode 100644 proxmox-compression/src/tar.rs diff --git a/proxmox-compression/Cargo.toml b/proxmox-compression/Cargo.toml index 0b9edf5..c3f7f49 100644 --- a/proxmox-compression/Cargo.toml +++ b/proxmox-compression/Cargo.toml @@ -17,6 +17,7 @@ flate2 = "1.0" futures = "0.3" tokio = { version = "1.6", features = [ "fs", "io-util"] } walkdir = "2" +tar = "0.4" proxmox-time = { path = "../proxmox-time", version = "1" } proxmox-io = { path = "../proxmox-io", version = "1", features = [ "tokio" ] } diff --git a/proxmox-compression/src/lib.rs b/proxmox-compression/src/lib.rs index 05cf06b..e9dd113 100644 --- a/proxmox-compression/src/lib.rs +++ b/proxmox-compression/src/lib.rs @@ -1,4 +1,5 @@ mod compression; pub use compression::*; +pub mod tar; pub mod zip; diff --git a/proxmox-compression/src/tar.rs b/proxmox-compression/src/tar.rs new file mode 100644 index 0000000..59a8cc1 --- /dev/null +++ b/proxmox-compression/src/tar.rs @@ -0,0 +1,172 @@ +//! tar helper +use std::io; +use std::os::unix::ffi::OsStrExt; +use std::path::{Component, Path, PathBuf}; +use std::str; + +use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; + +use tar::{EntryType, Header}; + +/// An async Builder for tar archives based on [tar::Builder] +/// +/// Wraps an inner [AsyncWrite] struct to write into. +/// Must call [finish()](Builder::finish) to write trailer + close +/// # Example +/// +/// ``` +/// use tar::{EntryType, Header}; +/// use proxmox_compression::tar::Builder; +/// +/// # async fn foo() { +/// let mut tar = Builder::new(Vec::new()); +/// +/// // Add file +/// let mut header = Header::new_gnu(); +/// let mut data: &[u8] = &[1, 2, 3]; +/// header.set_size(data.len() as u64); +/// tar.add_entry(&mut header, "foo", data).await.unwrap(); +/// +/// // Add symlink +/// let mut header = Header::new_gnu(); +/// header.set_entry_type(EntryType::Symlink); +/// tar.add_link(&mut header, "bar", "foo").await.unwrap(); +/// +/// // must call finish at the end +/// let data = tar.finish().await.unwrap(); +/// # } +/// ``` +pub struct Builder { + inner: W, +} + +impl Builder { + /// Takes an AsyncWriter as target + pub fn new(inner: W) -> Builder { + Builder { + inner, + } + } + + async fn add( + &mut self, + header: &Header, + mut data: R, + ) -> io::Result<()> { + append_data(&mut self.inner, header, &mut data).await + } + + /// Adds a new entry to this archive with the specified path. + pub async fn add_entry, R: AsyncRead + Unpin>( + &mut self, + header: &mut Header, + path: P, + data: R, + ) -> io::Result<()> { + append_path_header(&mut self.inner, header, path.as_ref()).await?; + header.set_cksum(); + self.add(&header, data).await + } + + /// Adds a new link (symbolic or hard) entry to this archive with the specified path and target. + pub async fn add_link, T: AsRef>( + &mut self, + header: &mut Header, + path: P, + target: T, + ) -> io::Result<()> { + append_path_header(&mut self.inner, header, path.as_ref()).await?; + + // try to set the linkame, fallback to gnu extension header otherwise + if let Err(err) = header.set_link_name(target.as_ref()) { + let link_name = target.as_ref().as_os_str().as_bytes(); + if link_name.len() < header.as_old().linkname.len() { + return Err(err); + } + // add trailing '\0' + let mut ext_data = link_name.chain(tokio::io::repeat(0).take(1)); + let extension = get_gnu_header(link_name.len() as u64 + 1, EntryType::GNULongLink); + append_data(&mut self.inner, &extension, &mut ext_data).await?; + } + header.set_cksum(); + self.add(&header, tokio::io::empty()).await + } + + /// Finish the archive and flush the underlying writer + /// + /// Consumes the Builder. This must be called when finishing the archive. + /// Flushes the inner writer and returns it. + pub async fn finish(mut self) -> io::Result { + self.inner.write_all(&[0; 1024]).await?; + self.inner.flush().await?; + Ok(self.inner) + } +} + +async fn append_data( + mut dst: &mut W, + header: &Header, + mut data: &mut R, +) -> io::Result<()> { + dst.write_all(header.as_bytes()).await?; + let len = tokio::io::copy(&mut data, &mut dst).await?; + + // Pad with zeros if necessary. + let buf = [0; 512]; + let remaining = 512 - (len % 512); + if remaining < 512 { + dst.write_all(&buf[..remaining as usize]).await?; + } + + Ok(()) +} + +fn get_gnu_header(size: u64, entry_type: EntryType) -> Header { + let mut header = Header::new_gnu(); + let name = b"././@LongLink"; + header.as_gnu_mut().unwrap().name[..name.len()].clone_from_slice(&name[..]); + header.set_mode(0o644); + header.set_uid(0); + header.set_gid(0); + header.set_mtime(0); + header.set_size(size); + header.set_entry_type(entry_type); + header.set_cksum(); + header +} + +// tries to set the path in header, or add a gnu header with 'LongName' +async fn append_path_header( + dst: &mut W, + header: &mut Header, + path: &Path, +) -> io::Result<()> { + let mut relpath = PathBuf::new(); + let components = path.components(); + for comp in components { + if Component::RootDir == comp { + continue; + } + relpath.push(comp); + } + // try to set the path directly, fallback to gnu extension header otherwise + if let Err(err) = header.set_path(&relpath) { + let data = relpath.as_os_str().as_bytes(); + let max = header.as_old().name.len(); + if data.len() < max { + return Err(err); + } + // add trailing '\0' + let mut ext_data = data.chain(tokio::io::repeat(0).take(1)); + let extension = get_gnu_header(data.len() as u64 + 1, EntryType::GNULongName); + append_data(dst, &extension, &mut ext_data).await?; + + // add the path as far as we can + let truncated = match str::from_utf8(&data[..max]) { + Ok(truncated) => truncated, + Err(err) => str::from_utf8(&data[..err.valid_up_to()]).unwrap(), + }; + header.set_path(truncated)?; + } + Ok(()) +} -- 2.30.2