From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id EEF9795F3D for ; Wed, 28 Feb 2024 15:03:52 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 4AD0FD096 for ; Wed, 28 Feb 2024 15:03:00 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Wed, 28 Feb 2024 15:02:56 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 925FC47C67 for ; Wed, 28 Feb 2024 15:02:54 +0100 (CET) From: Christian Ebner To: pbs-devel@lists.proxmox.com Date: Wed, 28 Feb 2024 15:02:24 +0100 Message-Id: <20240228140226.1251979-35-c.ebner@proxmox.com> X-Mailer: git-send-email 2.39.2 In-Reply-To: <20240228140226.1251979-1-c.ebner@proxmox.com> References: <20240228140226.1251979-1-c.ebner@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.046 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record T_SCC_BODY_TEXT_LINE -0.01 - Subject: [pbs-devel] [RFC proxmox-backup 34/36] fix #3174: client: pxar: enable caching and meta comparison X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 28 Feb 2024 14:03:53 -0000 Add the final glue logic to enable the look-ahead caching and metadata comparison introduced in the preparatory patches. Signed-off-by: Christian Ebner --- pbs-client/src/pxar/create.rs | 107 ++++++++++++++++++++++++++++++++-- 1 file changed, 102 insertions(+), 5 deletions(-) diff --git a/pbs-client/src/pxar/create.rs b/pbs-client/src/pxar/create.rs index fbcccb2e..ad76890c 100644 --- a/pbs-client/src/pxar/create.rs +++ b/pbs-client/src/pxar/create.rs @@ -32,10 +32,14 @@ use pbs_datastore::dynamic_index::{ }; use crate::inject_reused_chunks::InjectChunks; +use crate::pxar::lookahead_cache::{CacheEntry, CacheEntryData}; use crate::pxar::metadata::errno_is_unsupported; use crate::pxar::tools::assert_single_path_component; use crate::pxar::Flags; +const MAX_CACHE_SIZE: usize = 1024; +const CACHED_PAYLOAD_THRESHOLD: u64 = 2 * 1024 * 1024; + #[derive(Default)] struct ReusedChunks { start_boundary: PayloadOffset, @@ -253,6 +257,9 @@ struct Archiver { reused_chunks: ReusedChunks, previous_payload_index: Option, forced_boundaries: Arc>>, + cached_entries: Vec, + caching_enabled: bool, + cached_payload_size: u64, } type Encoder<'a, T> = pxar::encoder::aio::Encoder<'a, T>; @@ -335,11 +342,26 @@ where reused_chunks: ReusedChunks::new(), previous_payload_index, forced_boundaries, + cached_entries: Vec::new(), + caching_enabled: false, + cached_payload_size: 0, }; archiver .archive_dir_contents(&mut encoder, accessor, source_dir, true) .await?; + + if let Some(last) = archiver.cached_entries.pop() { + match last { + CacheEntry::DirEnd => {} + _ => archiver.cached_entries.push(last), + } + } + + archiver + .flush_cached_to_archive(&mut encoder, false) + .await?; + encoder.finish().await?; Ok(()) } @@ -413,6 +435,11 @@ impl Archiver { .await .map_err(|err| self.wrap_err(err))?; } + + if self.caching_enabled { + self.cached_entries.push(CacheEntry::DirEnd); + } + self.path = old_path; self.entry_counter = entry_counter; self.patterns.truncate(old_patterns_count); @@ -693,8 +720,6 @@ impl Archiver { c_file_name: &CStr, stat: &FileStat, ) -> Result<(), Error> { - use pxar::format::mode; - let file_mode = stat.st_mode & libc::S_IFMT; let open_mode = if file_mode == libc::S_IFREG || file_mode == libc::S_IFDIR { OFlag::empty() @@ -732,6 +757,71 @@ impl Archiver { self.skip_e2big_xattr, )?; + if self.previous_payload_index.is_none() { + return self + .add_entry_to_archive(encoder, accessor, c_file_name, stat, fd, &metadata) + .await; + } + + // Avoid having to many open file handles in cached entries + if self.cached_entries.len() > MAX_CACHE_SIZE { + self.flush_cached_to_archive(encoder, false).await?; + } + + if metadata.is_regular_file() { + self.cache_or_flush_entries(encoder, accessor, c_file_name, stat, fd, &metadata) + .await + } else { + if self.caching_enabled { + if stat.st_mode & libc::S_IFMT == libc::S_IFDIR { + let fd_clone = fd.try_clone()?; + let cache_entry = CacheEntry::DirEntry(CacheEntryData::new( + fd, + c_file_name.into(), + stat.clone(), + metadata.clone(), + PayloadOffset::default(), + )); + self.cached_entries.push(cache_entry); + + let dir = Dir::from_fd(fd_clone.into_raw_fd())?; + self.add_directory(encoder, accessor, dir, c_file_name, &metadata, stat) + .await?; + + if let Some(ref catalog) = self.catalog { + if !self.caching_enabled { + catalog.lock().unwrap().end_directory()?; + } + } + } else { + let cache_entry = CacheEntry::RegEntry(CacheEntryData::new( + fd, + c_file_name.into(), + stat.clone(), + metadata, + PayloadOffset::default(), + )); + self.cached_entries.push(cache_entry); + } + Ok(()) + } else { + self.add_entry_to_archive(encoder, accessor, c_file_name, stat, fd, &metadata) + .await + } + } + } + + async fn add_entry_to_archive( + &mut self, + encoder: &mut Encoder<'_, T>, + accessor: &mut Option>>, + c_file_name: &CStr, + stat: &FileStat, + fd: OwnedFd, + metadata: &Metadata, + ) -> Result<(), Error> { + use pxar::format::mode; + let file_name: &Path = OsStr::from_bytes(c_file_name.to_bytes()).as_ref(); match metadata.file_type() { mode::IFREG => { @@ -781,7 +871,9 @@ impl Archiver { .add_directory(encoder, accessor, dir, c_file_name, &metadata, stat) .await; if let Some(ref catalog) = self.catalog { - catalog.lock().unwrap().end_directory()?; + if !self.caching_enabled { + catalog.lock().unwrap().end_directory()?; + } } result } @@ -1123,7 +1215,9 @@ impl Archiver { ) -> Result<(), Error> { let dir_name = OsStr::from_bytes(dir_name.to_bytes()); - encoder.create_directory(dir_name, metadata).await?; + if !self.caching_enabled { + encoder.create_directory(dir_name, metadata).await?; + } let old_fs_magic = self.fs_magic; let old_fs_feature_flags = self.fs_feature_flags; @@ -1163,7 +1257,10 @@ impl Archiver { self.fs_feature_flags = old_fs_feature_flags; self.current_st_dev = old_st_dev; - encoder.finish().await?; + if !self.caching_enabled { + encoder.finish().await?; + } + result } -- 2.39.2