From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id D5A0B1FF389 for ; Wed, 5 Jun 2024 12:54:32 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id C28A5318FC; Wed, 5 Jun 2024 12:54:59 +0200 (CEST) From: Christian Ebner To: pbs-devel@lists.proxmox.com Date: Wed, 5 Jun 2024 12:53:47 +0200 Message-Id: <20240605105416.278748-30-c.ebner@proxmox.com> X-Mailer: git-send-email 2.39.2 In-Reply-To: <20240605105416.278748-1-c.ebner@proxmox.com> References: <20240605105416.278748-1-c.ebner@proxmox.com> MIME-Version: 1.0 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.027 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record T_SCC_BODY_TEXT_LINE -0.01 - Subject: [pbs-devel] [PATCH v9 proxmox-backup 29/58] pxar: caching: add look-ahead cache X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" Add a lookahead cache and the neccessary types to store the required data and keep track of directory boundaries while traversing the filesystem tree, in order to postpone a decision if to reuse or reencode a given regular file with unchanged metadata. Signed-off-by: Christian Ebner --- changes since version 8: - inline clear_range - return start path on `take_and_reset`, removing `start_path` pbs-client/src/pxar/create.rs | 2 +- pbs-client/src/pxar/look_ahead_cache.rs | 162 ++++++++++++++++++++++++ pbs-client/src/pxar/mod.rs | 1 + 3 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 pbs-client/src/pxar/look_ahead_cache.rs diff --git a/pbs-client/src/pxar/create.rs b/pbs-client/src/pxar/create.rs index 1cf11fc08..1961b9b54 100644 --- a/pbs-client/src/pxar/create.rs +++ b/pbs-client/src/pxar/create.rs @@ -131,7 +131,7 @@ impl fmt::Display for ArchiveError { } #[derive(Eq, PartialEq, Hash)] -struct HardLinkInfo { +pub(crate) struct HardLinkInfo { st_dev: u64, st_ino: u64, } diff --git a/pbs-client/src/pxar/look_ahead_cache.rs b/pbs-client/src/pxar/look_ahead_cache.rs new file mode 100644 index 000000000..37c07a9bc --- /dev/null +++ b/pbs-client/src/pxar/look_ahead_cache.rs @@ -0,0 +1,162 @@ +use std::collections::HashSet; +use std::ffi::CString; +use std::ops::Range; +use std::os::unix::io::OwnedFd; +use std::path::PathBuf; + +use nix::sys::stat::FileStat; + +use pxar::encoder::PayloadOffset; +use pxar::Metadata; + +use super::create::*; + +const DEFAULT_CACHE_SIZE: usize = 512; + +pub(crate) struct CacheEntryData { + pub(crate) fd: OwnedFd, + pub(crate) c_file_name: CString, + pub(crate) stat: FileStat, + pub(crate) metadata: Metadata, + pub(crate) payload_offset: PayloadOffset, +} + +pub(crate) enum CacheEntry { + RegEntry(CacheEntryData), + DirEntry(CacheEntryData), + DirEnd, +} + +pub(crate) struct PxarLookaheadCache { + // Current state of the cache + enabled: bool, + // Cached entries + entries: Vec, + // Entries encountered having more than one link given by stat + hardlinks: HashSet, + // Payload range covered by the currently cached entries + range: Range, + // Possible held back last chunk from last flush, used for possible chunk continuation + last_chunk: Option, + // Path when started caching + start_path: PathBuf, + // Number of entries with file descriptors + fd_entries: usize, + // Max number of entries with file descriptors + cache_size: usize, +} + +impl PxarLookaheadCache { + pub(crate) fn new(size: Option) -> Self { + Self { + enabled: false, + entries: Vec::new(), + hardlinks: HashSet::new(), + range: 0..0, + last_chunk: None, + start_path: PathBuf::new(), + fd_entries: 0, + cache_size: size.unwrap_or(DEFAULT_CACHE_SIZE), + } + } + + pub(crate) fn is_full(&self) -> bool { + self.fd_entries >= self.cache_size + } + + pub(crate) fn caching_enabled(&self) -> bool { + self.enabled + } + + pub(crate) fn insert( + &mut self, + fd: OwnedFd, + c_file_name: CString, + stat: FileStat, + metadata: Metadata, + payload_offset: PayloadOffset, + path: PathBuf, + ) { + if !self.enabled { + self.start_path = path; + if !metadata.is_dir() { + self.start_path.pop(); + } + } + self.enabled = true; + self.fd_entries += 1; + if metadata.is_dir() { + self.entries.push(CacheEntry::DirEntry(CacheEntryData { + fd, + c_file_name, + stat, + metadata, + payload_offset, + })) + } else { + self.entries.push(CacheEntry::RegEntry(CacheEntryData { + fd, + c_file_name, + stat, + metadata, + payload_offset, + })) + } + } + + pub(crate) fn insert_dir_end(&mut self) { + self.entries.push(CacheEntry::DirEnd); + } + + pub(crate) fn take_and_reset(&mut self) -> (Vec, PathBuf) { + self.fd_entries = 0; + self.enabled = false; + // keep end for possible continuation if cache has been cleared because + // it was full, but further caching would be fine + self.range = self.range.end..self.range.end; + ( + std::mem::take(&mut self.entries), + std::mem::take(&mut self.start_path), + ) + } + + pub(crate) fn contains_hardlink(&self, info: &HardLinkInfo) -> bool { + self.hardlinks.contains(info) + } + + pub(crate) fn insert_hardlink(&mut self, info: HardLinkInfo) -> bool { + self.hardlinks.insert(info) + } + + pub(crate) fn range(&self) -> &Range { + &self.range + } + + pub(crate) fn update_range(&mut self, range: Range) { + self.range = range; + } + + pub(crate) fn try_extend_range(&mut self, range: Range) -> bool { + if self.range.end == 0 { + // initialize first range to start and end with start of new range + self.range.start = range.start; + self.range.end = range.start; + } + + // range continued, update end + if self.range.end == range.start { + self.range.end = range.end; + return true; + } + + false + } + + pub(crate) fn take_last_chunk(&mut self) -> Option { + self.last_chunk.take() + } + + pub(crate) fn update_last_chunk(&mut self, chunk: Option) { + self.last_chunk = chunk; + } +} diff --git a/pbs-client/src/pxar/mod.rs b/pbs-client/src/pxar/mod.rs index 5248a1956..334759df6 100644 --- a/pbs-client/src/pxar/mod.rs +++ b/pbs-client/src/pxar/mod.rs @@ -50,6 +50,7 @@ pub(crate) mod create; pub(crate) mod dir_stack; pub(crate) mod extract; +pub(crate) mod look_ahead_cache; pub(crate) mod metadata; pub(crate) mod tools; -- 2.39.2 _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel