public inbox for pbs-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: Dominik Csapak <d.csapak@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [RFC PATCH proxmox-backup 3/3] tools/zip: compress zips with deflate
Date: Mon, 15 Mar 2021 12:21:18 +0100	[thread overview]
Message-ID: <20210315112118.13641-3-d.csapak@proxmox.com> (raw)
In-Reply-To: <20210315112118.13641-1-d.csapak@proxmox.com>

to get smaller zip files

Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
---
@Wolfgang, could you please look at this? I am not sure about using
the Compress in an async function. It is only in memory, but does it
'block'? i am not sure how we could do this differently in an
async context though...

 Cargo.toml       |  1 +
 src/tools/zip.rs | 75 +++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 63 insertions(+), 13 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 79945312..06967c20 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -31,6 +31,7 @@ crc32fast = "1"
 endian_trait = { version = "0.6", features = ["arrays"] }
 anyhow = "1.0"
 futures = "0.3"
+flate2 = "1.0"
 h2 = { version = "0.3", features = [ "stream" ] }
 handlebars = "3.0"
 http = "0.2"
diff --git a/src/tools/zip.rs b/src/tools/zip.rs
index 55f2a24a..237b8a1f 100644
--- a/src/tools/zip.rs
+++ b/src/tools/zip.rs
@@ -11,9 +11,10 @@ use std::mem::size_of;
 use std::os::unix::ffi::OsStrExt;
 use std::path::{Component, Path, PathBuf};
 
-use anyhow::{Error, Result};
+use anyhow::{bail, Error, Result};
 use endian_trait::Endian;
 use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
+use flate2::{Compress, Compression, FlushCompress};
 
 use crc32fast::Hasher;
 use proxmox::tools::time::gmtime;
@@ -245,7 +246,7 @@ impl ZipEntry {
                 signature: LOCAL_FH_SIG,
                 version_needed: 0x2d,
                 flags: 1 << 3,
-                compression: 0,
+                compression: 0x8,
                 time,
                 date,
                 crc32: 0,
@@ -328,7 +329,7 @@ impl ZipEntry {
                 version_made_by: VERSION_MADE_BY,
                 version_needed: VERSION_NEEDED,
                 flags: 1 << 3,
-                compression: 0,
+                compression: 0x8,
                 time,
                 date,
                 crc32: self.crc32,
@@ -402,6 +403,7 @@ where
     files: Vec<ZipEntry>,
     target: W,
     buf: ByteBuffer,
+    outbuf: ByteBuffer,
 }
 
 impl<W: AsyncWrite + Unpin> ZipEncoder<W> {
@@ -410,7 +412,8 @@ impl<W: AsyncWrite + Unpin> ZipEncoder<W> {
             byte_count: 0,
             files: Vec::new(),
             target,
-            buf: ByteBuffer::with_capacity(1024*1024),
+            buf: ByteBuffer::with_capacity(1024 * 1024),
+            outbuf: ByteBuffer::with_capacity(1024 * 1024),
         }
     }
 
@@ -423,25 +426,71 @@ impl<W: AsyncWrite + Unpin> ZipEncoder<W> {
         self.byte_count += entry.write_local_header(&mut self.target).await?;
         if let Some(mut content) = content {
             let mut hasher = Hasher::new();
-            let mut size = 0;
+            let mut deflate_encoder = Compress::new(Compression::fast(), false);
+
             loop {
 
+                let syncmode = if self.buf.is_full() {
+                    FlushCompress::Sync
+                } else {
+                    FlushCompress::None
+                };
+
+                let old_pos = self.buf.len();
                 let count = self.buf.read_from_async(&mut content).await?;
 
                 // end of file
-                if count == 0 {
+                if count == 0 && syncmode == FlushCompress::None {
                     break;
                 }
 
-                size += count;
-                hasher.update(&self.buf);
-                self.target.write_all(&self.buf).await?;
-                self.buf.consume(count);
+                hasher.update(&self.buf[old_pos..]);
+
+                let old_read = deflate_encoder.total_in();
+                let old_write = deflate_encoder.total_out();
+                deflate_encoder.compress(
+                    &self.buf,
+                    &mut self.outbuf.get_free_mut_slice(),
+                    syncmode,
+                )?;
+                let read = (deflate_encoder.total_in() - old_read) as usize;
+                let write = (deflate_encoder.total_out() - old_write) as usize;
+
+                self.outbuf.add_size(write);
+
+                if read == 0 {
+                    bail!("did not consume any data!");
+                }
+
+                self.target.write_all(&self.outbuf).await?;
+                self.buf.consume(read);
+                self.outbuf.clear();
             }
 
-            self.byte_count += size;
-            entry.compressed_size = size.try_into()?;
-            entry.uncompressed_size = size.try_into()?;
+            let old_read = deflate_encoder.total_in();
+            let old_write = deflate_encoder.total_out();
+            deflate_encoder.compress(
+                &self.buf,
+                &mut self.outbuf.get_free_mut_slice(),
+                FlushCompress::Finish,
+            )?;
+            let read = (deflate_encoder.total_in() - old_read) as usize;
+            let write = (deflate_encoder.total_out() - old_write) as usize;
+
+            self.outbuf.add_size(write);
+
+            if read != self.buf.len() {
+                bail!("deflate did not use all input bytes!");
+            }
+
+            self.target.write_all(&self.outbuf).await?;
+            self.buf.clear();
+            self.outbuf.clear();
+
+            self.byte_count += deflate_encoder.total_out() as usize;
+            entry.compressed_size = deflate_encoder.total_out();
+            entry.uncompressed_size = deflate_encoder.total_in();
+
             entry.crc32 = hasher.finalize();
         }
         self.byte_count += entry.write_data_descriptor(&mut self.target).await?;
-- 
2.20.1





  parent reply	other threads:[~2021-03-15 11:21 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-15 11:21 [pbs-devel] [PATCH proxmox-backup 1/3] tools/zip: add missing start_disk field for zip64 extension Dominik Csapak
2021-03-15 11:21 ` [pbs-devel] [RFC PATCH proxmox-backup 2/3] tools/zip: only add zip64 field when necessary Dominik Csapak
2021-03-16  8:13   ` [pbs-devel] applied: " Dietmar Maurer
2021-03-15 11:21 ` Dominik Csapak [this message]
2021-03-16  8:14   ` [pbs-devel] [RFC PATCH proxmox-backup 3/3] tools/zip: compress zips with deflate Dietmar Maurer
2021-03-15 12:02 ` [pbs-devel] applied: [PATCH proxmox-backup 1/3] tools/zip: add missing start_disk field for zip64 extension Thomas Lamprecht

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210315112118.13641-3-d.csapak@proxmox.com \
    --to=d.csapak@proxmox.com \
    --cc=pbs-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal