From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 3CBB4615FA for ; Wed, 21 Oct 2020 10:52:13 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 2B18A15FAF for ; Wed, 21 Oct 2020 10:51:43 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [212.186.127.180]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id A8DF515FA4 for ; Wed, 21 Oct 2020 10:51:41 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 76E0045E98 for ; Wed, 21 Oct 2020 10:51:41 +0200 (CEST) Date: Wed, 21 Oct 2020 10:51:39 +0200 From: Wolfgang Bumiller To: Dominik Csapak Cc: pbs-devel@lists.proxmox.com Message-ID: <20201021085139.fvj6r6wrqbigk3x2@olga.proxmox.com> References: <20201021072908.10516-1-d.csapak@proxmox.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20201021072908.10516-1-d.csapak@proxmox.com> User-Agent: NeoMutt/20180716 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.014 Adjusted score from AWL reputation of From: address KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment RCVD_IN_DNSWL_MED -2.3 Sender listed at https://www.dnswl.org/, medium trust SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [foo.zip, tools.rs, zip.rs, cachefly.net] Subject: [pbs-devel] applied series: [PATCH proxmox-backup v3 1/3] tools: add zip module X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 21 Oct 2020 08:52:13 -0000 applied series with minor fixup commits On Wed, Oct 21, 2020 at 09:29:06AM +0200, Dominik Csapak wrote: > This modules contains the 'ZipEncoder' struct, which wraps an async writer, > to create a ZIP archive on the fly > > To create a ZIP file, have a target that implements AsyncWrite, > give it to ZipEncoder::new, add entries via 'add_entry' and > at the end, call 'finish' > > for now, this does not implement compression (uses ZIPs STORE mode), and > does not support empty directories or hardlinks (or any other special > files) > > Signed-off-by: Dominik Csapak > --- > changes from v2: > * use ByteBuffer instead of Vec and read_buf (also increase buf size to 1M) > > src/tools.rs | 1 + > src/tools/zip.rs | 518 +++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 519 insertions(+) > create mode 100644 src/tools/zip.rs > > diff --git a/src/tools.rs b/src/tools.rs > index 1837c0e0..5a9f020a 100644 > --- a/src/tools.rs > +++ b/src/tools.rs > @@ -36,6 +36,7 @@ pub mod logrotate; > pub mod loopdev; > pub mod fuse_loop; > pub mod socket; > +pub mod zip; > > mod parallel_handler; > pub use parallel_handler::*; > diff --git a/src/tools/zip.rs b/src/tools/zip.rs > new file mode 100644 > index 00000000..3248239f > --- /dev/null > +++ b/src/tools/zip.rs > @@ -0,0 +1,518 @@ > +//! ZIP Helper > +//! > +//! Provides an interface to create a ZIP File from ZipEntries > +//! for a more detailed description of the ZIP format, see: > +//! https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT > + > +use std::convert::TryInto; > +use std::ffi::OsString; > +use std::io; > +use std::mem::size_of; > +use std::os::unix::ffi::OsStrExt; > +use std::path::{Component, Path, PathBuf}; > + > +use anyhow::{Error, Result}; > +use endian_trait::Endian; > +use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt}; > + > +use crc32fast::Hasher; > +use proxmox::tools::time::gmtime; > +use proxmox::tools::byte_buffer::ByteBuffer; > + > +const LOCAL_FH_SIG: u32 = 0x04034B50; > +const LOCAL_FF_SIG: u32 = 0x08074B50; > +const CENTRAL_DIRECTORY_FH_SIG: u32 = 0x02014B50; > +const END_OF_CENTRAL_DIR: u32 = 0x06054B50; > +const VERSION_NEEDED: u16 = 0x002d; > +const VERSION_MADE_BY: u16 = 0x032d; > + > +const ZIP64_EOCD_RECORD: u32 = 0x06064B50; > +const ZIP64_EOCD_LOCATOR: u32 = 0x07064B50; > + > +// bits for time: > +// 0-4: day of the month (1-31) > +// 5-8: month: (1 = jan, etc.) > +// 9-15: year offset from 1980 > +// > +// bits for date: > +// 0-4: second / 2 > +// 5-10: minute (0-59) > +// 11-15: hour (0-23) > +// > +// see https://docs.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-filetimetodosdatetime > +fn epoch_to_dos(epoch: i64) -> (u16, u16) { > + let gmtime = match gmtime(epoch) { > + Ok(gmtime) => gmtime, > + Err(_) => return (0, 0), > + }; > + > + let seconds = (gmtime.tm_sec / 2) & 0b11111; > + let minutes = gmtime.tm_min & 0xb111111; > + let hours = gmtime.tm_hour & 0b11111; > + let time: u16 = ((hours << 11) | (minutes << 5) | (seconds)) as u16; > + > + let date: u16 = if gmtime.tm_year > (2108 - 1900) || gmtime.tm_year < (1980 - 1900) { > + 0 > + } else { > + let day = gmtime.tm_mday & 0b11111; > + let month = (gmtime.tm_mon + 1) & 0b1111; > + let year = (gmtime.tm_year + 1900 - 1980) & 0b1111111; > + ((year << 9) | (month << 5) | (day)) as u16 > + }; > + > + (date, time) > +} > + > +#[derive(Endian)] > +#[repr(C, packed)] > +struct Zip64Field { > + field_type: u16, > + field_size: u16, > + uncompressed_size: u64, > + compressed_size: u64, > +} > + > +#[derive(Endian)] > +#[repr(C, packed)] > +struct Zip64FieldWithOffset { > + field_type: u16, > + field_size: u16, > + uncompressed_size: u64, > + compressed_size: u64, > + offset: u64, > +} > + > +#[derive(Endian)] > +#[repr(C, packed)] > +struct LocalFileHeader { > + signature: u32, > + version_needed: u16, > + flags: u16, > + compression: u16, > + time: u16, > + date: u16, > + crc32: u32, > + compressed_size: u32, > + uncompressed_size: u32, > + filename_len: u16, > + extra_field_len: u16, > +} > + > +#[derive(Endian)] > +#[repr(C, packed)] > +struct LocalFileFooter { > + signature: u32, > + crc32: u32, > + compressed_size: u64, > + uncompressed_size: u64, > +} > + > +#[derive(Endian)] > +#[repr(C, packed)] > +struct CentralDirectoryFileHeader { > + signature: u32, > + version_made_by: u16, > + version_needed: u16, > + flags: u16, > + compression: u16, > + time: u16, > + date: u16, > + crc32: u32, > + compressed_size: u32, > + uncompressed_size: u32, > + filename_len: u16, > + extra_field_len: u16, > + comment_len: u16, > + start_disk: u16, > + internal_flags: u16, > + external_flags: u32, > + offset: u32, > +} > + > +#[derive(Endian)] > +#[repr(C, packed)] > +struct EndOfCentralDir { > + signature: u32, > + disk_number: u16, > + start_disk: u16, > + disk_record_count: u16, > + total_record_count: u16, > + directory_size: u32, > + directory_offset: u32, > + comment_len: u16, > +} > + > +#[derive(Endian)] > +#[repr(C, packed)] > +struct Zip64EOCDRecord { > + signature: u32, > + field_size: u64, > + version_made_by: u16, > + version_needed: u16, > + disk_number: u32, > + disk_number_central_dir: u32, > + disk_record_count: u64, > + total_record_count: u64, > + directory_size: u64, > + directory_offset: u64, > +} > + > +#[derive(Endian)] > +#[repr(C, packed)] > +struct Zip64EOCDLocator { > + signature: u32, > + disk_number: u32, > + offset: u64, > + disk_count: u32, > +} > + > +async fn write_struct(output: &mut T, data: E) -> io::Result<()> > +where > + T: AsyncWrite + ?Sized + Unpin, > + E: Endian, > +{ > + let data = data.to_le(); > + > + let data = unsafe { > + std::slice::from_raw_parts( > + &data as *const E as *const u8, > + core::mem::size_of_val(&data), > + ) > + }; > + output.write_all(data).await > +} > + > +/// Represents an Entry in a ZIP File > +/// > +/// used to add to a ZipEncoder > +pub struct ZipEntry { > + filename: OsString, > + mtime: i64, > + mode: u16, > + crc32: u32, > + uncompressed_size: u64, > + compressed_size: u64, > + offset: u64, > + is_file: bool, > +} > + > +impl ZipEntry { > + /// Creates a new ZipEntry > + /// > + /// if is_file is false the path will contain an trailing separator, > + /// so that the zip file understands that it is a directory > + pub fn new>(path: P, mtime: i64, mode: u16, is_file: bool) -> Self { > + let mut relpath = PathBuf::new(); > + > + for comp in path.as_ref().components() { > + if let Component::Normal(_) = comp { > + relpath.push(comp); > + } > + } > + > + if !is_file { > + relpath.push(""); // adds trailing slash > + } > + > + Self { > + filename: relpath.into(), > + crc32: 0, > + mtime, > + mode, > + uncompressed_size: 0, > + compressed_size: 0, > + offset: 0, > + is_file, > + } > + } > + > + async fn write_local_header(&self, mut buf: &mut W) -> io::Result > + where > + W: AsyncWrite + Unpin + ?Sized, > + { > + let filename = self.filename.as_bytes(); > + let filename_len = filename.len(); > + let header_size = size_of::(); > + let zip_field_size = size_of::(); > + let size: usize = header_size + filename_len + zip_field_size; > + > + let (date, time) = epoch_to_dos(self.mtime); > + > + write_struct( > + &mut buf, > + LocalFileHeader { > + signature: LOCAL_FH_SIG, > + version_needed: 0x2d, > + flags: 1 << 3, > + compression: 0, > + time, > + date, > + crc32: 0, > + compressed_size: 0xFFFFFFFF, > + uncompressed_size: 0xFFFFFFFF, > + filename_len: filename_len as u16, > + extra_field_len: zip_field_size as u16, > + }, > + ) > + .await?; > + > + buf.write_all(filename).await?; > + > + write_struct( > + &mut buf, > + Zip64Field { > + field_type: 0x0001, > + field_size: 2 * 8, > + uncompressed_size: 0, > + compressed_size: 0, > + }, > + ) > + .await?; > + > + Ok(size) > + } > + > + async fn write_data_descriptor( > + &self, > + mut buf: &mut W, > + ) -> io::Result { > + let size = size_of::(); > + > + write_struct( > + &mut buf, > + LocalFileFooter { > + signature: LOCAL_FF_SIG, > + crc32: self.crc32, > + compressed_size: self.compressed_size, > + uncompressed_size: self.uncompressed_size, > + }, > + ) > + .await?; > + > + Ok(size) > + } > + > + async fn write_central_directory_header( > + &self, > + mut buf: &mut W, > + ) -> io::Result { > + let filename = self.filename.as_bytes(); > + let filename_len = filename.len(); > + let header_size = size_of::(); > + let zip_field_size = size_of::(); > + let size: usize = header_size + filename_len + zip_field_size; > + > + let (date, time) = epoch_to_dos(self.mtime); > + > + write_struct( > + &mut buf, > + CentralDirectoryFileHeader { > + signature: CENTRAL_DIRECTORY_FH_SIG, > + version_made_by: VERSION_MADE_BY, > + version_needed: VERSION_NEEDED, > + flags: 1 << 3, > + compression: 0, > + time, > + date, > + crc32: self.crc32, > + compressed_size: 0xFFFFFFFF, > + uncompressed_size: 0xFFFFFFFF, > + filename_len: filename_len as u16, > + extra_field_len: zip_field_size as u16, > + comment_len: 0, > + start_disk: 0, > + internal_flags: 0, > + external_flags: (self.mode as u32) << 16 | (!self.is_file as u32) << 4, > + offset: 0xFFFFFFFF, > + }, > + ) > + .await?; > + > + buf.write_all(filename).await?; > + > + write_struct( > + &mut buf, > + Zip64FieldWithOffset { > + field_type: 1, > + field_size: 3 * 8, > + uncompressed_size: self.uncompressed_size, > + compressed_size: self.compressed_size, > + offset: self.offset, > + }, > + ) > + .await?; > + > + Ok(size) > + } > +} > + > +/// Wraps a writer that implements AsyncWrite for creating a ZIP archive > +/// > +/// This will create a ZIP archive on the fly with files added with > +/// 'add_entry'. To Finish the file, call 'finish' > +/// Example: > +/// ```no_run > +/// use proxmox_backup::tools::zip::*; > +/// use tokio::fs::File; > +/// > +/// #[tokio::async] > +/// async fn main() -> std::io::Result<()> { > +/// let target = File::open("foo.zip").await?; > +/// let mut source = File::open("foo.txt").await?; > +/// > +/// let mut zip = ZipEncoder::new(target); > +/// zip.add_entry(ZipEntry { > +/// "foo.txt", > +/// 0, > +/// 0o100755, > +/// true, > +/// }, source).await?; > +/// > +/// zip.finish().await? > +/// > +/// Ok(()) > +/// } > +/// ``` > +pub struct ZipEncoder > +where > + W: AsyncWrite + Unpin, > +{ > + byte_count: usize, > + files: Vec, > + target: W, > + buf: ByteBuffer, > +} > + > +impl ZipEncoder { > + pub fn new(target: W) -> Self { > + Self { > + byte_count: 0, > + files: Vec::new(), > + target, > + buf: ByteBuffer::with_capacity(1024*1024), > + } > + } > + > + pub async fn add_entry( > + &mut self, > + mut entry: ZipEntry, > + content: Option, > + ) -> Result<(), Error> { > + entry.offset = self.byte_count.try_into()?; > + self.byte_count += entry.write_local_header(&mut self.target).await?; > + if let Some(mut content) = content { > + let mut hasher = Hasher::new(); > + let mut size = 0; > + loop { > + > + let count = self.buf.read_from_async(&mut content).await?; > + > + // end of file > + if count == 0 { > + break; > + } > + > + size += count; > + hasher.update(&self.buf); > + self.target.write_all(&self.buf).await?; > + self.buf.consume(count); > + } > + > + self.byte_count += size; > + entry.compressed_size = size.try_into()?; > + entry.uncompressed_size = size.try_into()?; > + entry.crc32 = hasher.finalize(); > + } > + self.byte_count += entry.write_data_descriptor(&mut self.target).await?; > + > + self.files.push(entry); > + > + Ok(()) > + } > + > + async fn write_eocd( > + &mut self, > + central_dir_size: usize, > + central_dir_offset: usize, > + ) -> Result<(), Error> { > + let entrycount = self.files.len(); > + > + let mut count = entrycount as u16; > + let mut directory_size = central_dir_size as u32; > + let mut directory_offset = central_dir_offset as u32; > + > + if central_dir_size > u32::MAX as usize > + || central_dir_offset > u32::MAX as usize > + || entrycount > u16::MAX as usize > + { > + count = 0xFFFF; > + directory_size = 0xFFFFFFFF; > + directory_offset = 0xFFFFFFFF; > + > + write_struct( > + &mut self.target, > + Zip64EOCDRecord { > + signature: ZIP64_EOCD_RECORD, > + field_size: 44, > + version_made_by: VERSION_MADE_BY, > + version_needed: VERSION_NEEDED, > + disk_number: 0, > + disk_number_central_dir: 0, > + disk_record_count: entrycount.try_into()?, > + total_record_count: entrycount.try_into()?, > + directory_size: central_dir_size.try_into()?, > + directory_offset: central_dir_offset.try_into()?, > + }, > + ) > + .await?; > + > + let locator_offset = central_dir_offset + central_dir_size; > + > + write_struct( > + &mut self.target, > + Zip64EOCDLocator { > + signature: ZIP64_EOCD_LOCATOR, > + disk_number: 0, > + offset: locator_offset.try_into()?, > + disk_count: 1, > + }, > + ) > + .await?; > + } > + > + write_struct( > + &mut self.target, > + EndOfCentralDir { > + signature: END_OF_CENTRAL_DIR, > + disk_number: 0, > + start_disk: 0, > + disk_record_count: count, > + total_record_count: count, > + directory_size, > + directory_offset, > + comment_len: 0, > + }, > + ) > + .await?; > + > + Ok(()) > + } > + > + pub async fn finish(&mut self) -> Result<(), Error> { > + let central_dir_offset = self.byte_count; > + let mut central_dir_size = 0; > + > + for file in &self.files { > + central_dir_size += file > + .write_central_directory_header(&mut self.target) > + .await?; > + } > + > + self.write_eocd(central_dir_size, central_dir_offset) > + .await?; > + > + self.target.flush().await?; > + > + Ok(()) > + } > +} > -- > 2.20.1