From: Dominik Csapak <d.csapak@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [RFC PATCH proxmox-backup 3/3] tools/zip: compress zips with deflate
Date: Mon, 15 Mar 2021 12:21:18 +0100 [thread overview]
Message-ID: <20210315112118.13641-3-d.csapak@proxmox.com> (raw)
In-Reply-To: <20210315112118.13641-1-d.csapak@proxmox.com>
to get smaller zip files
Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
---
@Wolfgang, could you please look at this? I am not sure about using
the Compress in an async function. It is only in memory, but does it
'block'? i am not sure how we could do this differently in an
async context though...
Cargo.toml | 1 +
src/tools/zip.rs | 75 +++++++++++++++++++++++++++++++++++++++---------
2 files changed, 63 insertions(+), 13 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index 79945312..06967c20 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -31,6 +31,7 @@ crc32fast = "1"
endian_trait = { version = "0.6", features = ["arrays"] }
anyhow = "1.0"
futures = "0.3"
+flate2 = "1.0"
h2 = { version = "0.3", features = [ "stream" ] }
handlebars = "3.0"
http = "0.2"
diff --git a/src/tools/zip.rs b/src/tools/zip.rs
index 55f2a24a..237b8a1f 100644
--- a/src/tools/zip.rs
+++ b/src/tools/zip.rs
@@ -11,9 +11,10 @@ use std::mem::size_of;
use std::os::unix::ffi::OsStrExt;
use std::path::{Component, Path, PathBuf};
-use anyhow::{Error, Result};
+use anyhow::{bail, Error, Result};
use endian_trait::Endian;
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
+use flate2::{Compress, Compression, FlushCompress};
use crc32fast::Hasher;
use proxmox::tools::time::gmtime;
@@ -245,7 +246,7 @@ impl ZipEntry {
signature: LOCAL_FH_SIG,
version_needed: 0x2d,
flags: 1 << 3,
- compression: 0,
+ compression: 0x8,
time,
date,
crc32: 0,
@@ -328,7 +329,7 @@ impl ZipEntry {
version_made_by: VERSION_MADE_BY,
version_needed: VERSION_NEEDED,
flags: 1 << 3,
- compression: 0,
+ compression: 0x8,
time,
date,
crc32: self.crc32,
@@ -402,6 +403,7 @@ where
files: Vec<ZipEntry>,
target: W,
buf: ByteBuffer,
+ outbuf: ByteBuffer,
}
impl<W: AsyncWrite + Unpin> ZipEncoder<W> {
@@ -410,7 +412,8 @@ impl<W: AsyncWrite + Unpin> ZipEncoder<W> {
byte_count: 0,
files: Vec::new(),
target,
- buf: ByteBuffer::with_capacity(1024*1024),
+ buf: ByteBuffer::with_capacity(1024 * 1024),
+ outbuf: ByteBuffer::with_capacity(1024 * 1024),
}
}
@@ -423,25 +426,71 @@ impl<W: AsyncWrite + Unpin> ZipEncoder<W> {
self.byte_count += entry.write_local_header(&mut self.target).await?;
if let Some(mut content) = content {
let mut hasher = Hasher::new();
- let mut size = 0;
+ let mut deflate_encoder = Compress::new(Compression::fast(), false);
+
loop {
+ let syncmode = if self.buf.is_full() {
+ FlushCompress::Sync
+ } else {
+ FlushCompress::None
+ };
+
+ let old_pos = self.buf.len();
let count = self.buf.read_from_async(&mut content).await?;
// end of file
- if count == 0 {
+ if count == 0 && syncmode == FlushCompress::None {
break;
}
- size += count;
- hasher.update(&self.buf);
- self.target.write_all(&self.buf).await?;
- self.buf.consume(count);
+ hasher.update(&self.buf[old_pos..]);
+
+ let old_read = deflate_encoder.total_in();
+ let old_write = deflate_encoder.total_out();
+ deflate_encoder.compress(
+ &self.buf,
+ &mut self.outbuf.get_free_mut_slice(),
+ syncmode,
+ )?;
+ let read = (deflate_encoder.total_in() - old_read) as usize;
+ let write = (deflate_encoder.total_out() - old_write) as usize;
+
+ self.outbuf.add_size(write);
+
+ if read == 0 {
+ bail!("did not consume any data!");
+ }
+
+ self.target.write_all(&self.outbuf).await?;
+ self.buf.consume(read);
+ self.outbuf.clear();
}
- self.byte_count += size;
- entry.compressed_size = size.try_into()?;
- entry.uncompressed_size = size.try_into()?;
+ let old_read = deflate_encoder.total_in();
+ let old_write = deflate_encoder.total_out();
+ deflate_encoder.compress(
+ &self.buf,
+ &mut self.outbuf.get_free_mut_slice(),
+ FlushCompress::Finish,
+ )?;
+ let read = (deflate_encoder.total_in() - old_read) as usize;
+ let write = (deflate_encoder.total_out() - old_write) as usize;
+
+ self.outbuf.add_size(write);
+
+ if read != self.buf.len() {
+ bail!("deflate did not use all input bytes!");
+ }
+
+ self.target.write_all(&self.outbuf).await?;
+ self.buf.clear();
+ self.outbuf.clear();
+
+ self.byte_count += deflate_encoder.total_out() as usize;
+ entry.compressed_size = deflate_encoder.total_out();
+ entry.uncompressed_size = deflate_encoder.total_in();
+
entry.crc32 = hasher.finalize();
}
self.byte_count += entry.write_data_descriptor(&mut self.target).await?;
--
2.20.1
next prev parent reply other threads:[~2021-03-15 11:21 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-03-15 11:21 [pbs-devel] [PATCH proxmox-backup 1/3] tools/zip: add missing start_disk field for zip64 extension Dominik Csapak
2021-03-15 11:21 ` [pbs-devel] [RFC PATCH proxmox-backup 2/3] tools/zip: only add zip64 field when necessary Dominik Csapak
2021-03-16 8:13 ` [pbs-devel] applied: " Dietmar Maurer
2021-03-15 11:21 ` Dominik Csapak [this message]
2021-03-16 8:14 ` [pbs-devel] [RFC PATCH proxmox-backup 3/3] tools/zip: compress zips with deflate Dietmar Maurer
2021-03-15 12:02 ` [pbs-devel] applied: [PATCH proxmox-backup 1/3] tools/zip: add missing start_disk field for zip64 extension Thomas Lamprecht
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210315112118.13641-3-d.csapak@proxmox.com \
--to=d.csapak@proxmox.com \
--cc=pbs-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal