From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 5A6CD615F3 for ; Wed, 21 Oct 2020 09:29:13 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 070FF15353 for ; Wed, 21 Oct 2020 09:29:13 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [212.186.127.180]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 5DEFD152D7 for ; Wed, 21 Oct 2020 09:29:10 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 25FDB45E8B for ; Wed, 21 Oct 2020 09:29:10 +0200 (CEST) From: Dominik Csapak To: pbs-devel@lists.proxmox.com Date: Wed, 21 Oct 2020 09:29:06 +0200 Message-Id: <20201021072908.10516-1-d.csapak@proxmox.com> X-Mailer: git-send-email 2.20.1 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.484 Adjusted score from AWL reputation of From: address KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment RCVD_IN_DNSWL_MED -2.3 Sender listed at https://www.dnswl.org/, medium trust SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [cachefly.net, zip.rs, tools.rs, foo.zip] Subject: [pbs-devel] [PATCH proxmox-backup v3 1/3] tools: add zip module X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 21 Oct 2020 07:29:13 -0000 This modules contains the 'ZipEncoder' struct, which wraps an async writer, to create a ZIP archive on the fly To create a ZIP file, have a target that implements AsyncWrite, give it to ZipEncoder::new, add entries via 'add_entry' and at the end, call 'finish' for now, this does not implement compression (uses ZIPs STORE mode), and does not support empty directories or hardlinks (or any other special files) Signed-off-by: Dominik Csapak --- changes from v2: * use ByteBuffer instead of Vec and read_buf (also increase buf size to 1M) src/tools.rs | 1 + src/tools/zip.rs | 518 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 519 insertions(+) create mode 100644 src/tools/zip.rs diff --git a/src/tools.rs b/src/tools.rs index 1837c0e0..5a9f020a 100644 --- a/src/tools.rs +++ b/src/tools.rs @@ -36,6 +36,7 @@ pub mod logrotate; pub mod loopdev; pub mod fuse_loop; pub mod socket; +pub mod zip; mod parallel_handler; pub use parallel_handler::*; diff --git a/src/tools/zip.rs b/src/tools/zip.rs new file mode 100644 index 00000000..3248239f --- /dev/null +++ b/src/tools/zip.rs @@ -0,0 +1,518 @@ +//! ZIP Helper +//! +//! Provides an interface to create a ZIP File from ZipEntries +//! for a more detailed description of the ZIP format, see: +//! https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT + +use std::convert::TryInto; +use std::ffi::OsString; +use std::io; +use std::mem::size_of; +use std::os::unix::ffi::OsStrExt; +use std::path::{Component, Path, PathBuf}; + +use anyhow::{Error, Result}; +use endian_trait::Endian; +use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt}; + +use crc32fast::Hasher; +use proxmox::tools::time::gmtime; +use proxmox::tools::byte_buffer::ByteBuffer; + +const LOCAL_FH_SIG: u32 = 0x04034B50; +const LOCAL_FF_SIG: u32 = 0x08074B50; +const CENTRAL_DIRECTORY_FH_SIG: u32 = 0x02014B50; +const END_OF_CENTRAL_DIR: u32 = 0x06054B50; +const VERSION_NEEDED: u16 = 0x002d; +const VERSION_MADE_BY: u16 = 0x032d; + +const ZIP64_EOCD_RECORD: u32 = 0x06064B50; +const ZIP64_EOCD_LOCATOR: u32 = 0x07064B50; + +// bits for time: +// 0-4: day of the month (1-31) +// 5-8: month: (1 = jan, etc.) +// 9-15: year offset from 1980 +// +// bits for date: +// 0-4: second / 2 +// 5-10: minute (0-59) +// 11-15: hour (0-23) +// +// see https://docs.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-filetimetodosdatetime +fn epoch_to_dos(epoch: i64) -> (u16, u16) { + let gmtime = match gmtime(epoch) { + Ok(gmtime) => gmtime, + Err(_) => return (0, 0), + }; + + let seconds = (gmtime.tm_sec / 2) & 0b11111; + let minutes = gmtime.tm_min & 0xb111111; + let hours = gmtime.tm_hour & 0b11111; + let time: u16 = ((hours << 11) | (minutes << 5) | (seconds)) as u16; + + let date: u16 = if gmtime.tm_year > (2108 - 1900) || gmtime.tm_year < (1980 - 1900) { + 0 + } else { + let day = gmtime.tm_mday & 0b11111; + let month = (gmtime.tm_mon + 1) & 0b1111; + let year = (gmtime.tm_year + 1900 - 1980) & 0b1111111; + ((year << 9) | (month << 5) | (day)) as u16 + }; + + (date, time) +} + +#[derive(Endian)] +#[repr(C, packed)] +struct Zip64Field { + field_type: u16, + field_size: u16, + uncompressed_size: u64, + compressed_size: u64, +} + +#[derive(Endian)] +#[repr(C, packed)] +struct Zip64FieldWithOffset { + field_type: u16, + field_size: u16, + uncompressed_size: u64, + compressed_size: u64, + offset: u64, +} + +#[derive(Endian)] +#[repr(C, packed)] +struct LocalFileHeader { + signature: u32, + version_needed: u16, + flags: u16, + compression: u16, + time: u16, + date: u16, + crc32: u32, + compressed_size: u32, + uncompressed_size: u32, + filename_len: u16, + extra_field_len: u16, +} + +#[derive(Endian)] +#[repr(C, packed)] +struct LocalFileFooter { + signature: u32, + crc32: u32, + compressed_size: u64, + uncompressed_size: u64, +} + +#[derive(Endian)] +#[repr(C, packed)] +struct CentralDirectoryFileHeader { + signature: u32, + version_made_by: u16, + version_needed: u16, + flags: u16, + compression: u16, + time: u16, + date: u16, + crc32: u32, + compressed_size: u32, + uncompressed_size: u32, + filename_len: u16, + extra_field_len: u16, + comment_len: u16, + start_disk: u16, + internal_flags: u16, + external_flags: u32, + offset: u32, +} + +#[derive(Endian)] +#[repr(C, packed)] +struct EndOfCentralDir { + signature: u32, + disk_number: u16, + start_disk: u16, + disk_record_count: u16, + total_record_count: u16, + directory_size: u32, + directory_offset: u32, + comment_len: u16, +} + +#[derive(Endian)] +#[repr(C, packed)] +struct Zip64EOCDRecord { + signature: u32, + field_size: u64, + version_made_by: u16, + version_needed: u16, + disk_number: u32, + disk_number_central_dir: u32, + disk_record_count: u64, + total_record_count: u64, + directory_size: u64, + directory_offset: u64, +} + +#[derive(Endian)] +#[repr(C, packed)] +struct Zip64EOCDLocator { + signature: u32, + disk_number: u32, + offset: u64, + disk_count: u32, +} + +async fn write_struct(output: &mut T, data: E) -> io::Result<()> +where + T: AsyncWrite + ?Sized + Unpin, + E: Endian, +{ + let data = data.to_le(); + + let data = unsafe { + std::slice::from_raw_parts( + &data as *const E as *const u8, + core::mem::size_of_val(&data), + ) + }; + output.write_all(data).await +} + +/// Represents an Entry in a ZIP File +/// +/// used to add to a ZipEncoder +pub struct ZipEntry { + filename: OsString, + mtime: i64, + mode: u16, + crc32: u32, + uncompressed_size: u64, + compressed_size: u64, + offset: u64, + is_file: bool, +} + +impl ZipEntry { + /// Creates a new ZipEntry + /// + /// if is_file is false the path will contain an trailing separator, + /// so that the zip file understands that it is a directory + pub fn new>(path: P, mtime: i64, mode: u16, is_file: bool) -> Self { + let mut relpath = PathBuf::new(); + + for comp in path.as_ref().components() { + if let Component::Normal(_) = comp { + relpath.push(comp); + } + } + + if !is_file { + relpath.push(""); // adds trailing slash + } + + Self { + filename: relpath.into(), + crc32: 0, + mtime, + mode, + uncompressed_size: 0, + compressed_size: 0, + offset: 0, + is_file, + } + } + + async fn write_local_header(&self, mut buf: &mut W) -> io::Result + where + W: AsyncWrite + Unpin + ?Sized, + { + let filename = self.filename.as_bytes(); + let filename_len = filename.len(); + let header_size = size_of::(); + let zip_field_size = size_of::(); + let size: usize = header_size + filename_len + zip_field_size; + + let (date, time) = epoch_to_dos(self.mtime); + + write_struct( + &mut buf, + LocalFileHeader { + signature: LOCAL_FH_SIG, + version_needed: 0x2d, + flags: 1 << 3, + compression: 0, + time, + date, + crc32: 0, + compressed_size: 0xFFFFFFFF, + uncompressed_size: 0xFFFFFFFF, + filename_len: filename_len as u16, + extra_field_len: zip_field_size as u16, + }, + ) + .await?; + + buf.write_all(filename).await?; + + write_struct( + &mut buf, + Zip64Field { + field_type: 0x0001, + field_size: 2 * 8, + uncompressed_size: 0, + compressed_size: 0, + }, + ) + .await?; + + Ok(size) + } + + async fn write_data_descriptor( + &self, + mut buf: &mut W, + ) -> io::Result { + let size = size_of::(); + + write_struct( + &mut buf, + LocalFileFooter { + signature: LOCAL_FF_SIG, + crc32: self.crc32, + compressed_size: self.compressed_size, + uncompressed_size: self.uncompressed_size, + }, + ) + .await?; + + Ok(size) + } + + async fn write_central_directory_header( + &self, + mut buf: &mut W, + ) -> io::Result { + let filename = self.filename.as_bytes(); + let filename_len = filename.len(); + let header_size = size_of::(); + let zip_field_size = size_of::(); + let size: usize = header_size + filename_len + zip_field_size; + + let (date, time) = epoch_to_dos(self.mtime); + + write_struct( + &mut buf, + CentralDirectoryFileHeader { + signature: CENTRAL_DIRECTORY_FH_SIG, + version_made_by: VERSION_MADE_BY, + version_needed: VERSION_NEEDED, + flags: 1 << 3, + compression: 0, + time, + date, + crc32: self.crc32, + compressed_size: 0xFFFFFFFF, + uncompressed_size: 0xFFFFFFFF, + filename_len: filename_len as u16, + extra_field_len: zip_field_size as u16, + comment_len: 0, + start_disk: 0, + internal_flags: 0, + external_flags: (self.mode as u32) << 16 | (!self.is_file as u32) << 4, + offset: 0xFFFFFFFF, + }, + ) + .await?; + + buf.write_all(filename).await?; + + write_struct( + &mut buf, + Zip64FieldWithOffset { + field_type: 1, + field_size: 3 * 8, + uncompressed_size: self.uncompressed_size, + compressed_size: self.compressed_size, + offset: self.offset, + }, + ) + .await?; + + Ok(size) + } +} + +/// Wraps a writer that implements AsyncWrite for creating a ZIP archive +/// +/// This will create a ZIP archive on the fly with files added with +/// 'add_entry'. To Finish the file, call 'finish' +/// Example: +/// ```no_run +/// use proxmox_backup::tools::zip::*; +/// use tokio::fs::File; +/// +/// #[tokio::async] +/// async fn main() -> std::io::Result<()> { +/// let target = File::open("foo.zip").await?; +/// let mut source = File::open("foo.txt").await?; +/// +/// let mut zip = ZipEncoder::new(target); +/// zip.add_entry(ZipEntry { +/// "foo.txt", +/// 0, +/// 0o100755, +/// true, +/// }, source).await?; +/// +/// zip.finish().await? +/// +/// Ok(()) +/// } +/// ``` +pub struct ZipEncoder +where + W: AsyncWrite + Unpin, +{ + byte_count: usize, + files: Vec, + target: W, + buf: ByteBuffer, +} + +impl ZipEncoder { + pub fn new(target: W) -> Self { + Self { + byte_count: 0, + files: Vec::new(), + target, + buf: ByteBuffer::with_capacity(1024*1024), + } + } + + pub async fn add_entry( + &mut self, + mut entry: ZipEntry, + content: Option, + ) -> Result<(), Error> { + entry.offset = self.byte_count.try_into()?; + self.byte_count += entry.write_local_header(&mut self.target).await?; + if let Some(mut content) = content { + let mut hasher = Hasher::new(); + let mut size = 0; + loop { + + let count = self.buf.read_from_async(&mut content).await?; + + // end of file + if count == 0 { + break; + } + + size += count; + hasher.update(&self.buf); + self.target.write_all(&self.buf).await?; + self.buf.consume(count); + } + + self.byte_count += size; + entry.compressed_size = size.try_into()?; + entry.uncompressed_size = size.try_into()?; + entry.crc32 = hasher.finalize(); + } + self.byte_count += entry.write_data_descriptor(&mut self.target).await?; + + self.files.push(entry); + + Ok(()) + } + + async fn write_eocd( + &mut self, + central_dir_size: usize, + central_dir_offset: usize, + ) -> Result<(), Error> { + let entrycount = self.files.len(); + + let mut count = entrycount as u16; + let mut directory_size = central_dir_size as u32; + let mut directory_offset = central_dir_offset as u32; + + if central_dir_size > u32::MAX as usize + || central_dir_offset > u32::MAX as usize + || entrycount > u16::MAX as usize + { + count = 0xFFFF; + directory_size = 0xFFFFFFFF; + directory_offset = 0xFFFFFFFF; + + write_struct( + &mut self.target, + Zip64EOCDRecord { + signature: ZIP64_EOCD_RECORD, + field_size: 44, + version_made_by: VERSION_MADE_BY, + version_needed: VERSION_NEEDED, + disk_number: 0, + disk_number_central_dir: 0, + disk_record_count: entrycount.try_into()?, + total_record_count: entrycount.try_into()?, + directory_size: central_dir_size.try_into()?, + directory_offset: central_dir_offset.try_into()?, + }, + ) + .await?; + + let locator_offset = central_dir_offset + central_dir_size; + + write_struct( + &mut self.target, + Zip64EOCDLocator { + signature: ZIP64_EOCD_LOCATOR, + disk_number: 0, + offset: locator_offset.try_into()?, + disk_count: 1, + }, + ) + .await?; + } + + write_struct( + &mut self.target, + EndOfCentralDir { + signature: END_OF_CENTRAL_DIR, + disk_number: 0, + start_disk: 0, + disk_record_count: count, + total_record_count: count, + directory_size, + directory_offset, + comment_len: 0, + }, + ) + .await?; + + Ok(()) + } + + pub async fn finish(&mut self) -> Result<(), Error> { + let central_dir_offset = self.byte_count; + let mut central_dir_size = 0; + + for file in &self.files { + central_dir_size += file + .write_central_directory_header(&mut self.target) + .await?; + } + + self.write_eocd(central_dir_size, central_dir_offset) + .await?; + + self.target.flush().await?; + + Ok(()) + } +} -- 2.20.1