From: Dominik Csapak <d.csapak@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [PATCH proxmox 1/2] proxmox-compression: add async tar builder
Date: Tue, 12 Apr 2022 13:04:13 +0200 [thread overview]
Message-ID: <20220412110418.3360746-2-d.csapak@proxmox.com> (raw)
In-Reply-To: <20220412110418.3360746-1-d.csapak@proxmox.com>
inspired by tar::Builder, but limited to the things we need and using
AsyncRead+AsyncWrite instead of the sync variants.
Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
---
proxmox-compression/Cargo.toml | 1 +
proxmox-compression/src/lib.rs | 1 +
proxmox-compression/src/tar.rs | 172 +++++++++++++++++++++++++++++++++
3 files changed, 174 insertions(+)
create mode 100644 proxmox-compression/src/tar.rs
diff --git a/proxmox-compression/Cargo.toml b/proxmox-compression/Cargo.toml
index 0b9edf5..c3f7f49 100644
--- a/proxmox-compression/Cargo.toml
+++ b/proxmox-compression/Cargo.toml
@@ -17,6 +17,7 @@ flate2 = "1.0"
futures = "0.3"
tokio = { version = "1.6", features = [ "fs", "io-util"] }
walkdir = "2"
+tar = "0.4"
proxmox-time = { path = "../proxmox-time", version = "1" }
proxmox-io = { path = "../proxmox-io", version = "1", features = [ "tokio" ] }
diff --git a/proxmox-compression/src/lib.rs b/proxmox-compression/src/lib.rs
index 05cf06b..e9dd113 100644
--- a/proxmox-compression/src/lib.rs
+++ b/proxmox-compression/src/lib.rs
@@ -1,4 +1,5 @@
mod compression;
pub use compression::*;
+pub mod tar;
pub mod zip;
diff --git a/proxmox-compression/src/tar.rs b/proxmox-compression/src/tar.rs
new file mode 100644
index 0000000..59a8cc1
--- /dev/null
+++ b/proxmox-compression/src/tar.rs
@@ -0,0 +1,172 @@
+//! tar helper
+use std::io;
+use std::os::unix::ffi::OsStrExt;
+use std::path::{Component, Path, PathBuf};
+use std::str;
+
+use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt};
+
+use tar::{EntryType, Header};
+
+/// An async Builder for tar archives based on [tar::Builder]
+///
+/// Wraps an inner [AsyncWrite] struct to write into.
+/// Must call [finish()](Builder::finish) to write trailer + close
+/// # Example
+///
+/// ```
+/// use tar::{EntryType, Header};
+/// use proxmox_compression::tar::Builder;
+///
+/// # async fn foo() {
+/// let mut tar = Builder::new(Vec::new());
+///
+/// // Add file
+/// let mut header = Header::new_gnu();
+/// let mut data: &[u8] = &[1, 2, 3];
+/// header.set_size(data.len() as u64);
+/// tar.add_entry(&mut header, "foo", data).await.unwrap();
+///
+/// // Add symlink
+/// let mut header = Header::new_gnu();
+/// header.set_entry_type(EntryType::Symlink);
+/// tar.add_link(&mut header, "bar", "foo").await.unwrap();
+///
+/// // must call finish at the end
+/// let data = tar.finish().await.unwrap();
+/// # }
+/// ```
+pub struct Builder<W: AsyncWrite + Unpin> {
+ inner: W,
+}
+
+impl<W: AsyncWrite + Unpin> Builder<W> {
+ /// Takes an AsyncWriter as target
+ pub fn new(inner: W) -> Builder<W> {
+ Builder {
+ inner,
+ }
+ }
+
+ async fn add<R: AsyncRead + Unpin>(
+ &mut self,
+ header: &Header,
+ mut data: R,
+ ) -> io::Result<()> {
+ append_data(&mut self.inner, header, &mut data).await
+ }
+
+ /// Adds a new entry to this archive with the specified path.
+ pub async fn add_entry<P: AsRef<Path>, R: AsyncRead + Unpin>(
+ &mut self,
+ header: &mut Header,
+ path: P,
+ data: R,
+ ) -> io::Result<()> {
+ append_path_header(&mut self.inner, header, path.as_ref()).await?;
+ header.set_cksum();
+ self.add(&header, data).await
+ }
+
+ /// Adds a new link (symbolic or hard) entry to this archive with the specified path and target.
+ pub async fn add_link<P: AsRef<Path>, T: AsRef<Path>>(
+ &mut self,
+ header: &mut Header,
+ path: P,
+ target: T,
+ ) -> io::Result<()> {
+ append_path_header(&mut self.inner, header, path.as_ref()).await?;
+
+ // try to set the linkame, fallback to gnu extension header otherwise
+ if let Err(err) = header.set_link_name(target.as_ref()) {
+ let link_name = target.as_ref().as_os_str().as_bytes();
+ if link_name.len() < header.as_old().linkname.len() {
+ return Err(err);
+ }
+ // add trailing '\0'
+ let mut ext_data = link_name.chain(tokio::io::repeat(0).take(1));
+ let extension = get_gnu_header(link_name.len() as u64 + 1, EntryType::GNULongLink);
+ append_data(&mut self.inner, &extension, &mut ext_data).await?;
+ }
+ header.set_cksum();
+ self.add(&header, tokio::io::empty()).await
+ }
+
+ /// Finish the archive and flush the underlying writer
+ ///
+ /// Consumes the Builder. This must be called when finishing the archive.
+ /// Flushes the inner writer and returns it.
+ pub async fn finish(mut self) -> io::Result<W> {
+ self.inner.write_all(&[0; 1024]).await?;
+ self.inner.flush().await?;
+ Ok(self.inner)
+ }
+}
+
+async fn append_data<W: AsyncWrite + Unpin, R: AsyncRead + Unpin>(
+ mut dst: &mut W,
+ header: &Header,
+ mut data: &mut R,
+) -> io::Result<()> {
+ dst.write_all(header.as_bytes()).await?;
+ let len = tokio::io::copy(&mut data, &mut dst).await?;
+
+ // Pad with zeros if necessary.
+ let buf = [0; 512];
+ let remaining = 512 - (len % 512);
+ if remaining < 512 {
+ dst.write_all(&buf[..remaining as usize]).await?;
+ }
+
+ Ok(())
+}
+
+fn get_gnu_header(size: u64, entry_type: EntryType) -> Header {
+ let mut header = Header::new_gnu();
+ let name = b"././@LongLink";
+ header.as_gnu_mut().unwrap().name[..name.len()].clone_from_slice(&name[..]);
+ header.set_mode(0o644);
+ header.set_uid(0);
+ header.set_gid(0);
+ header.set_mtime(0);
+ header.set_size(size);
+ header.set_entry_type(entry_type);
+ header.set_cksum();
+ header
+}
+
+// tries to set the path in header, or add a gnu header with 'LongName'
+async fn append_path_header<W: AsyncWrite + Unpin>(
+ dst: &mut W,
+ header: &mut Header,
+ path: &Path,
+) -> io::Result<()> {
+ let mut relpath = PathBuf::new();
+ let components = path.components();
+ for comp in components {
+ if Component::RootDir == comp {
+ continue;
+ }
+ relpath.push(comp);
+ }
+ // try to set the path directly, fallback to gnu extension header otherwise
+ if let Err(err) = header.set_path(&relpath) {
+ let data = relpath.as_os_str().as_bytes();
+ let max = header.as_old().name.len();
+ if data.len() < max {
+ return Err(err);
+ }
+ // add trailing '\0'
+ let mut ext_data = data.chain(tokio::io::repeat(0).take(1));
+ let extension = get_gnu_header(data.len() as u64 + 1, EntryType::GNULongName);
+ append_data(dst, &extension, &mut ext_data).await?;
+
+ // add the path as far as we can
+ let truncated = match str::from_utf8(&data[..max]) {
+ Ok(truncated) => truncated,
+ Err(err) => str::from_utf8(&data[..err.valid_up_to()]).unwrap(),
+ };
+ header.set_path(truncated)?;
+ }
+ Ok(())
+}
--
2.30.2
next prev parent reply other threads:[~2022-04-12 11:04 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-04-12 11:04 [pbs-devel] [PATCH proxmox/widget-toolkit/proxmox-backup] add tar.zst support for file download Dominik Csapak
2022-04-12 11:04 ` Dominik Csapak [this message]
2022-04-13 7:36 ` [pbs-devel] applied-series: [PATCH proxmox 1/2] proxmox-compression: add async tar builder Wolfgang Bumiller
2022-04-12 11:04 ` [pbs-devel] [PATCH proxmox 2/2] proxmox-compression: add streaming zstd encoder Dominik Csapak
2022-04-12 11:04 ` [pbs-devel] [PATCH widget-toolkit 1/1] window/FileBrowser: add optional 'tar.zst' button Dominik Csapak
2022-04-13 8:37 ` [pbs-devel] applied: " Wolfgang Bumiller
2022-04-12 11:04 ` [pbs-devel] [PATCH proxmox-backup 1/3] pbs-client: add 'create_tar' helper function Dominik Csapak
2022-04-13 8:34 ` [pbs-devel] applied-series: " Wolfgang Bumiller
2022-04-12 11:04 ` [pbs-devel] [PATCH proxmox-backup 2/3] api: admin/datastore: add tar support for pxar_file_download Dominik Csapak
2022-04-12 11:04 ` [pbs-devel] [PATCH proxmox-backup 3/3] ui: datastore/Content: enable tar download in ui Dominik Csapak
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220412110418.3360746-2-d.csapak@proxmox.com \
--to=d.csapak@proxmox.com \
--cc=pbs-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal