From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id EDBDA6140B for ; Tue, 20 Oct 2020 16:45:41 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id E0519F8B0 for ; Tue, 20 Oct 2020 16:45:06 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [212.186.127.180]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 05C78F8A1 for ; Tue, 20 Oct 2020 16:45:05 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id C933F45E5B for ; Tue, 20 Oct 2020 16:45:04 +0200 (CEST) From: Dominik Csapak To: pbs-devel@lists.proxmox.com Date: Tue, 20 Oct 2020 16:45:00 +0200 Message-Id: <20201020144502.13725-1-d.csapak@proxmox.com> X-Mailer: git-send-email 2.20.1 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.490 Adjusted score from AWL reputation of From: address KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment RCVD_IN_DNSWL_MED -2.3 Sender listed at https://www.dnswl.org/, medium trust SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [tools.rs, cachefly.net, zip.rs, foo.zip] Subject: [pbs-devel] [PATCH proxmox-backup v2 1/3] tools: add zip module X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 20 Oct 2020 14:45:42 -0000 This modules contains the 'ZipEncoder' struct, which wraps an async writer, to create a ZIP archive on the fly To create a ZIP file, have a target that implements AsyncWrite, give it to ZipEncoder::new, add entries via 'add_entry' and at the end, call 'finish' for now, this does not implement compression (uses ZIPs STORE mode), and does not support empty directories or hardlinks (or any other special files) Signed-off-by: Dominik Csapak --- changes from v1: * completely revamped the code and interface, it now uses 'add_entry' to add files, and an upstream async writer instead of implementing stream * incorporate suggested changes from Wolfgang, namely using Endian,etc. to write structs as byte array * can now also handle directories src/tools.rs | 1 + src/tools/zip.rs | 516 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 517 insertions(+) create mode 100644 src/tools/zip.rs diff --git a/src/tools.rs b/src/tools.rs index 1837c0e0..5a9f020a 100644 --- a/src/tools.rs +++ b/src/tools.rs @@ -36,6 +36,7 @@ pub mod logrotate; pub mod loopdev; pub mod fuse_loop; pub mod socket; +pub mod zip; mod parallel_handler; pub use parallel_handler::*; diff --git a/src/tools/zip.rs b/src/tools/zip.rs new file mode 100644 index 00000000..8745bff3 --- /dev/null +++ b/src/tools/zip.rs @@ -0,0 +1,516 @@ +//! ZIP Helper +//! +//! Provides an interface to create a ZIP File from ZipEntries +//! for a more detailed description of the ZIP format, see: +//! https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT + +use std::convert::TryInto; +use std::ffi::OsString; +use std::io; +use std::mem::size_of; +use std::os::unix::ffi::OsStrExt; +use std::path::{Component, Path, PathBuf}; + +use anyhow::{Error, Result}; +use endian_trait::Endian; +use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; + +use crc32fast::Hasher; +use proxmox::tools::time::gmtime; + +const LOCAL_FH_SIG: u32 = 0x04034B50; +const LOCAL_FF_SIG: u32 = 0x08074B50; +const CENTRAL_DIRECTORY_FH_SIG: u32 = 0x02014B50; +const END_OF_CENTRAL_DIR: u32 = 0x06054B50; +const VERSION_NEEDED: u16 = 0x002d; +const VERSION_MADE_BY: u16 = 0x032d; + +const ZIP64_EOCD_RECORD: u32 = 0x06064B50; +const ZIP64_EOCD_LOCATOR: u32 = 0x07064B50; + +// bits for time: +// 0-4: day of the month (1-31) +// 5-8: month: (1 = jan, etc.) +// 9-15: year offset from 1980 +// +// bits for date: +// 0-4: second / 2 +// 5-10: minute (0-59) +// 11-15: hour (0-23) +// +// see https://docs.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-filetimetodosdatetime +fn epoch_to_dos(epoch: i64) -> (u16, u16) { + let gmtime = match gmtime(epoch) { + Ok(gmtime) => gmtime, + Err(_) => return (0, 0), + }; + + let seconds = (gmtime.tm_sec / 2) & 0b11111; + let minutes = gmtime.tm_min & 0xb111111; + let hours = gmtime.tm_hour & 0b11111; + let time: u16 = ((hours << 11) | (minutes << 5) | (seconds)) as u16; + + let date: u16 = if gmtime.tm_year > (2108 - 1900) || gmtime.tm_year < (1980 - 1900) { + 0 + } else { + let day = gmtime.tm_mday & 0b11111; + let month = (gmtime.tm_mon + 1) & 0b1111; + let year = (gmtime.tm_year + 1900 - 1980) & 0b1111111; + ((year << 9) | (month << 5) | (day)) as u16 + }; + + (date, time) +} + +#[derive(Endian)] +#[repr(C, packed)] +struct Zip64Field { + field_type: u16, + field_size: u16, + uncompressed_size: u64, + compressed_size: u64, +} + +#[derive(Endian)] +#[repr(C, packed)] +struct Zip64FieldWithOffset { + field_type: u16, + field_size: u16, + uncompressed_size: u64, + compressed_size: u64, + offset: u64, +} + +#[derive(Endian)] +#[repr(C, packed)] +struct LocalFileHeader { + signature: u32, + version_needed: u16, + flags: u16, + compression: u16, + time: u16, + date: u16, + crc32: u32, + compressed_size: u32, + uncompressed_size: u32, + filename_len: u16, + extra_field_len: u16, +} + +#[derive(Endian)] +#[repr(C, packed)] +struct LocalFileFooter { + signature: u32, + crc32: u32, + compressed_size: u64, + uncompressed_size: u64, +} + +#[derive(Endian)] +#[repr(C, packed)] +struct CentralDirectoryFileHeader { + signature: u32, + version_made_by: u16, + version_needed: u16, + flags: u16, + compression: u16, + time: u16, + date: u16, + crc32: u32, + compressed_size: u32, + uncompressed_size: u32, + filename_len: u16, + extra_field_len: u16, + comment_len: u16, + start_disk: u16, + internal_flags: u16, + external_flags: u32, + offset: u32, +} + +#[derive(Endian)] +#[repr(C, packed)] +struct EndOfCentralDir { + signature: u32, + disk_number: u16, + start_disk: u16, + disk_record_count: u16, + total_record_count: u16, + directory_size: u32, + directory_offset: u32, + comment_len: u16, +} + +#[derive(Endian)] +#[repr(C, packed)] +struct Zip64EOCDRecord { + signature: u32, + field_size: u64, + version_made_by: u16, + version_needed: u16, + disk_number: u32, + disk_number_central_dir: u32, + disk_record_count: u64, + total_record_count: u64, + directory_size: u64, + directory_offset: u64, +} + +#[derive(Endian)] +#[repr(C, packed)] +struct Zip64EOCDLocator { + signature: u32, + disk_number: u32, + offset: u64, + disk_count: u32, +} + +async fn write_struct(output: &mut T, data: E) -> io::Result<()> +where + T: AsyncWrite + ?Sized + Unpin, + E: Endian, +{ + let data = data.to_le(); + + let data = unsafe { + std::slice::from_raw_parts( + &data as *const E as *const u8, + core::mem::size_of_val(&data), + ) + }; + output.write_all(data).await +} + +/// Represents an Entry in a ZIP File +/// +/// used to add to a ZipEncoder +pub struct ZipEntry { + filename: OsString, + mtime: i64, + mode: u16, + crc32: u32, + uncompressed_size: u64, + compressed_size: u64, + offset: u64, + is_file: bool, +} + +impl ZipEntry { + /// Creates a new ZipEntry + /// + /// if is_file is false the path will contain an trailing separator, + /// so that the zip file understands that it is a directory + pub fn new>(path: P, mtime: i64, mode: u16, is_file: bool) -> Self { + let mut relpath = PathBuf::new(); + + for comp in path.as_ref().components() { + if let Component::Normal(_) = comp { + relpath.push(comp); + } + } + + if !is_file { + relpath.push(""); // adds trailing slash + } + + Self { + filename: relpath.into(), + crc32: 0, + mtime, + mode, + uncompressed_size: 0, + compressed_size: 0, + offset: 0, + is_file, + } + } + + async fn write_local_header(&self, mut buf: &mut W) -> io::Result + where + W: AsyncWrite + Unpin + ?Sized, + { + let filename = self.filename.as_bytes(); + let filename_len = filename.len(); + let header_size = size_of::(); + let zip_field_size = size_of::(); + let size: usize = header_size + filename_len + zip_field_size; + + let (date, time) = epoch_to_dos(self.mtime); + + write_struct( + &mut buf, + LocalFileHeader { + signature: LOCAL_FH_SIG, + version_needed: 0x2d, + flags: 1 << 3, + compression: 0, + time, + date, + crc32: 0, + compressed_size: 0xFFFFFFFF, + uncompressed_size: 0xFFFFFFFF, + filename_len: filename_len as u16, + extra_field_len: zip_field_size as u16, + }, + ) + .await?; + + buf.write_all(filename).await?; + + write_struct( + &mut buf, + Zip64Field { + field_type: 0x0001, + field_size: 2 * 8, + uncompressed_size: 0, + compressed_size: 0, + }, + ) + .await?; + + Ok(size) + } + + async fn write_data_descriptor( + &self, + mut buf: &mut W, + ) -> io::Result { + let size = size_of::(); + + write_struct( + &mut buf, + LocalFileFooter { + signature: LOCAL_FF_SIG, + crc32: self.crc32, + compressed_size: self.compressed_size, + uncompressed_size: self.uncompressed_size, + }, + ) + .await?; + + Ok(size) + } + + async fn write_central_directory_header( + &self, + mut buf: &mut W, + ) -> io::Result { + let filename = self.filename.as_bytes(); + let filename_len = filename.len(); + let header_size = size_of::(); + let zip_field_size = size_of::(); + let size: usize = header_size + filename_len + zip_field_size; + + let (date, time) = epoch_to_dos(self.mtime); + + write_struct( + &mut buf, + CentralDirectoryFileHeader { + signature: CENTRAL_DIRECTORY_FH_SIG, + version_made_by: VERSION_MADE_BY, + version_needed: VERSION_NEEDED, + flags: 1 << 3, + compression: 0, + time, + date, + crc32: self.crc32, + compressed_size: 0xFFFFFFFF, + uncompressed_size: 0xFFFFFFFF, + filename_len: filename_len as u16, + extra_field_len: zip_field_size as u16, + comment_len: 0, + start_disk: 0, + internal_flags: 0, + external_flags: (self.mode as u32) << 16 | (!self.is_file as u32) << 4, + offset: 0xFFFFFFFF, + }, + ) + .await?; + + buf.write_all(filename).await?; + + write_struct( + &mut buf, + Zip64FieldWithOffset { + field_type: 1, + field_size: 3 * 8, + uncompressed_size: self.uncompressed_size, + compressed_size: self.compressed_size, + offset: self.offset, + }, + ) + .await?; + + Ok(size) + } +} + +/// Wraps a writer that implements AsyncWrite for creating a ZIP archive +/// +/// This will create a ZIP archive on the fly with files added with +/// 'add_entry'. To Finish the file, call 'finish' +/// Example: +/// ```no_run +/// use proxmox_backup::tools::zip::*; +/// use tokio::fs::File; +/// +/// #[tokio::async] +/// async fn main() -> std::io::Result<()> { +/// let target = File::open("foo.zip").await?; +/// let mut source = File::open("foo.txt").await?; +/// +/// let mut zip = ZipEncoder::new(target); +/// zip.add_entry(ZipEntry { +/// "foo.txt", +/// 0, +/// 0o100755, +/// true, +/// }, source).await?; +/// +/// zip.finish().await? +/// +/// Ok(()) +/// } +/// ``` +pub struct ZipEncoder +where + W: AsyncWrite + Unpin, +{ + byte_count: usize, + files: Vec, + target: W, + buf: Vec, +} + +impl ZipEncoder { + pub fn new(target: W) -> Self { + Self { + byte_count: 0, + files: Vec::new(), + target, + buf: Vec::with_capacity(4096), + } + } + + pub async fn add_entry( + &mut self, + mut entry: ZipEntry, + content: Option, + ) -> Result<(), Error> { + entry.offset = self.byte_count.try_into()?; + self.byte_count += entry.write_local_header(&mut self.target).await?; + if let Some(mut content) = content { + let mut hasher = Hasher::new(); + let mut size = 0; + loop { + let count = content.read_buf(&mut self.buf).await?; + + // end of file + if count == 0 { + break; + } + + size += count; + hasher.update(&self.buf); + self.target.write_all(&self.buf).await?; + self.buf.clear(); + } + + self.byte_count += size; + entry.compressed_size = size.try_into()?; + entry.uncompressed_size = size.try_into()?; + entry.crc32 = hasher.finalize(); + } + self.byte_count += entry.write_data_descriptor(&mut self.target).await?; + + self.files.push(entry); + + Ok(()) + } + + async fn write_eocd( + &mut self, + central_dir_size: usize, + central_dir_offset: usize, + ) -> Result<(), Error> { + let entrycount = self.files.len(); + + let mut count = entrycount as u16; + let mut directory_size = central_dir_size as u32; + let mut directory_offset = central_dir_offset as u32; + + if central_dir_size > u32::MAX as usize + || central_dir_offset > u32::MAX as usize + || entrycount > u16::MAX as usize + { + count = 0xFFFF; + directory_size = 0xFFFFFFFF; + directory_offset = 0xFFFFFFFF; + + write_struct( + &mut self.target, + Zip64EOCDRecord { + signature: ZIP64_EOCD_RECORD, + field_size: 44, + version_made_by: VERSION_MADE_BY, + version_needed: VERSION_NEEDED, + disk_number: 0, + disk_number_central_dir: 0, + disk_record_count: entrycount.try_into()?, + total_record_count: entrycount.try_into()?, + directory_size: central_dir_size.try_into()?, + directory_offset: central_dir_offset.try_into()?, + }, + ) + .await?; + + let locator_offset = central_dir_offset + central_dir_size; + + write_struct( + &mut self.target, + Zip64EOCDLocator { + signature: ZIP64_EOCD_LOCATOR, + disk_number: 0, + offset: locator_offset.try_into()?, + disk_count: 1, + }, + ) + .await?; + } + + write_struct( + &mut self.target, + EndOfCentralDir { + signature: END_OF_CENTRAL_DIR, + disk_number: 0, + start_disk: 0, + disk_record_count: count, + total_record_count: count, + directory_size, + directory_offset, + comment_len: 0, + }, + ) + .await?; + + Ok(()) + } + + pub async fn finish(&mut self) -> Result<(), Error> { + let central_dir_offset = self.byte_count; + let mut central_dir_size = 0; + + for file in &self.files { + central_dir_size += file + .write_central_directory_header(&mut self.target) + .await?; + } + + self.write_eocd(central_dir_size, central_dir_offset) + .await?; + + self.target.flush().await?; + + Ok(()) + } +} -- 2.20.1