From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 632646B39F for ; Tue, 16 Mar 2021 13:37:59 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 6028B2DFDE for ; Tue, 16 Mar 2021 13:37:29 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [212.186.127.180]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id B341E2DFD5 for ; Tue, 16 Mar 2021 13:37:28 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 7E471458DB for ; Tue, 16 Mar 2021 13:37:28 +0100 (CET) From: Dominik Csapak To: pbs-devel@lists.proxmox.com Date: Tue, 16 Mar 2021 13:37:26 +0100 Message-Id: <20210316123727.4568-1-d.csapak@proxmox.com> X-Mailer: git-send-email 2.20.1 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.186 Adjusted score from AWL reputation of From: address KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment RCVD_IN_DNSWL_MED -2.3 Sender listed at https://www.dnswl.org/, medium trust SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [zip.rs] Subject: [pbs-devel] [PATCH proxmox-backup v2 1/2] tools/zip: compress zips with deflate X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 16 Mar 2021 12:37:59 -0000 to get smaller zip files Signed-off-by: Dominik Csapak --- changes from v1: * factor out the compression call and use block_in_place Cargo.toml | 1 + src/tools/zip.rs | 74 ++++++++++++++++++++++++++++++++++++------------ 2 files changed, 57 insertions(+), 18 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 79945312..06967c20 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,7 @@ crc32fast = "1" endian_trait = { version = "0.6", features = ["arrays"] } anyhow = "1.0" futures = "0.3" +flate2 = "1.0" h2 = { version = "0.3", features = [ "stream" ] } handlebars = "3.0" http = "0.2" diff --git a/src/tools/zip.rs b/src/tools/zip.rs index 55f2a24a..d7a09d1c 100644 --- a/src/tools/zip.rs +++ b/src/tools/zip.rs @@ -11,9 +11,10 @@ use std::mem::size_of; use std::os::unix::ffi::OsStrExt; use std::path::{Component, Path, PathBuf}; -use anyhow::{Error, Result}; +use anyhow::{bail, Error, Result}; use endian_trait::Endian; use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt}; +use flate2::{Compress, Compression, FlushCompress}; use crc32fast::Hasher; use proxmox::tools::time::gmtime; @@ -245,7 +246,7 @@ impl ZipEntry { signature: LOCAL_FH_SIG, version_needed: 0x2d, flags: 1 << 3, - compression: 0, + compression: 0x8, time, date, crc32: 0, @@ -328,7 +329,7 @@ impl ZipEntry { version_made_by: VERSION_MADE_BY, version_needed: VERSION_NEEDED, flags: 1 << 3, - compression: 0, + compression: 0x8, time, date, crc32: self.crc32, @@ -402,6 +403,7 @@ where files: Vec, target: W, buf: ByteBuffer, + outbuf: ByteBuffer, } impl ZipEncoder { @@ -410,10 +412,24 @@ impl ZipEncoder { byte_count: 0, files: Vec::new(), target, - buf: ByteBuffer::with_capacity(1024*1024), + buf: ByteBuffer::with_capacity(1024 * 1024), + outbuf: ByteBuffer::with_capacity(1024 * 1024), } } + fn compress(&mut self, encoder: &mut Compress, mode: FlushCompress) -> Result { + let old_read = encoder.total_in(); + let old_write = encoder.total_out(); + crate::tools::runtime::block_in_place(|| { + encoder.compress(&self.buf, &mut self.outbuf.get_free_mut_slice(), mode) + })?; + let read = (encoder.total_in() - old_read) as usize; + let write = (encoder.total_out() - old_write) as usize; + + self.outbuf.add_size(write); + Ok(read) + } + pub async fn add_entry( &mut self, mut entry: ZipEntry, @@ -423,25 +439,47 @@ impl ZipEncoder { self.byte_count += entry.write_local_header(&mut self.target).await?; if let Some(mut content) = content { let mut hasher = Hasher::new(); - let mut size = 0; - loop { + let mut deflate_encoder = Compress::new(Compression::fast(), false); - let count = self.buf.read_from_async(&mut content).await?; - - // end of file - if count == 0 { - break; + loop { + let syncmode = if self.buf.is_full() { + FlushCompress::Sync + } else { + let old_pos = self.buf.len(); + let count = self.buf.read_from_async(&mut content).await?; + // end of file + if count == 0 { + break; + } + + hasher.update(&self.buf[old_pos..]); + FlushCompress::None + }; + + let read = self.compress(&mut deflate_encoder, syncmode)?; + + if read == 0 { + bail!("did not consume any data!"); } - size += count; - hasher.update(&self.buf); - self.target.write_all(&self.buf).await?; - self.buf.consume(count); + self.target.write_all(&self.outbuf).await?; + self.buf.consume(read); + self.outbuf.clear(); } - self.byte_count += size; - entry.compressed_size = size.try_into()?; - entry.uncompressed_size = size.try_into()?; + let read = self.compress(&mut deflate_encoder, FlushCompress::Finish)?; + if read != self.buf.len() { + bail!("deflate did not use all input bytes!"); + } + + self.target.write_all(&self.outbuf).await?; + self.buf.clear(); + self.outbuf.clear(); + + self.byte_count += deflate_encoder.total_out() as usize; + entry.compressed_size = deflate_encoder.total_out(); + entry.uncompressed_size = deflate_encoder.total_in(); + entry.crc32 = hasher.finalize(); } self.byte_count += entry.write_data_descriptor(&mut self.target).await?; -- 2.20.1