From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: <pbs-devel-bounces@lists.proxmox.com> Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id E5F7C1FF199 for <inbox@lore.proxmox.com>; Mon, 10 Mar 2025 12:17:32 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 509E2179F0; Mon, 10 Mar 2025 12:17:24 +0100 (CET) From: Christian Ebner <c.ebner@proxmox.com> To: pbs-devel@lists.proxmox.com Date: Mon, 10 Mar 2025 12:16:34 +0100 Message-Id: <20250310111634.162156-5-c.ebner@proxmox.com> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250310111634.162156-1-c.ebner@proxmox.com> References: <20250310111634.162156-1-c.ebner@proxmox.com> MIME-Version: 1.0 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.031 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: [pbs-devel] [PATCH v2 proxmox-backup 4/4] fix #5331: garbage collection: avoid multiple chunk atime updates X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion <pbs-devel.lists.proxmox.com> List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pbs-devel>, <mailto:pbs-devel-request@lists.proxmox.com?subject=unsubscribe> List-Archive: <http://lists.proxmox.com/pipermail/pbs-devel/> List-Post: <mailto:pbs-devel@lists.proxmox.com> List-Help: <mailto:pbs-devel-request@lists.proxmox.com?subject=help> List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel>, <mailto:pbs-devel-request@lists.proxmox.com?subject=subscribe> Reply-To: Proxmox Backup Server development discussion <pbs-devel@lists.proxmox.com> Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" <pbs-devel-bounces@lists.proxmox.com> Reduce the number of atime updates on the same chunk by logically iterating over image index files, following the incremental backup logic. By inserting paths for encountered images during `list_images` using the GroupedImageList structure, the iteration happens now for the same image filenames in the same image namespace and group in a order based on the snapshot timestamp. For each image, keep track of the encountered chunk digests, and remember these as seen for the next snapshot. Chunks which have been encountered in the previous image index, but are not present anymore are removed from the list after each image, in order to reduce memory footprint. Fixes: https://bugzilla.proxmox.com/show_bug.cgi?id=5331 Signed-off-by: Christian Ebner <c.ebner@proxmox.com> --- changes since version 1: - Use pre-existing datastore iterator helpers, following the logic other datastore operations take. - Chunks are now remembered for all archives per snapshot, not just a single archive per snapshot as previously, this mimics more closely the backup behaviour. pbs-datastore/src/datastore.rs | 117 +++++++++++++++++++++++---------- 1 file changed, 84 insertions(+), 33 deletions(-) diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs index fdbb33a98..a80343d9b 100644 --- a/pbs-datastore/src/datastore.rs +++ b/pbs-datastore/src/datastore.rs @@ -25,7 +25,7 @@ use pbs_api_types::{ MaintenanceMode, MaintenanceType, Operation, UPID, }; -use crate::backup_info::{BackupDir, BackupGroup}; +use crate::backup_info::{BackupDir, BackupGroup, BackupInfo}; use crate::chunk_store::ChunkStore; use crate::dynamic_index::{DynamicIndexReader, DynamicIndexWriter}; use crate::fixed_index::{FixedIndexReader, FixedIndexWriter}; @@ -970,10 +970,10 @@ impl DataStore { ListGroups::new(Arc::clone(self), ns)?.collect() } - fn list_images(&self) -> Result<Vec<PathBuf>, Error> { + fn list_images(&self) -> Result<HashSet<PathBuf>, Error> { let base = self.base_path(); - let mut list = vec![]; + let mut list = HashSet::new(); use walkdir::WalkDir; @@ -1021,7 +1021,7 @@ impl DataStore { if archive_type == ArchiveType::FixedIndex || archive_type == ArchiveType::DynamicIndex { - list.push(path); + list.insert(path); } } } @@ -1071,6 +1071,7 @@ impl DataStore { &self, index: Box<dyn IndexFile>, file_name: &Path, // only used for error reporting + touched_chunks: &mut TouchedChunks, status: &mut GarbageCollectionStatus, worker: &dyn WorkerTaskContext, ) -> Result<(), Error> { @@ -1081,6 +1082,12 @@ impl DataStore { worker.check_abort()?; worker.fail_on_shutdown()?; let digest = index.index_digest(pos).unwrap(); + + // Avoid multiple expensive atime updates by utimensat + if touched_chunks.insert(*digest) { + continue; + } + if !self.inner.chunk_store.cond_touch_chunk(digest, false)? { let hex = hex::encode(digest); warn!( @@ -1107,43 +1114,87 @@ impl DataStore { status: &mut GarbageCollectionStatus, worker: &dyn WorkerTaskContext, ) -> Result<(), Error> { - let image_list = self.list_images()?; - let image_count = image_list.len(); - + // Iterate twice over the datastore to fetch images, even if this comes with an additional + // runtime cost: + // - First iteration to find all index files, no matter if they are in a location expected + // by the datastore's hierarchy + // - Iterate using the datastore's helpers, so the namespaces, groups and snapshots are + // looked up given the expected hierarchy and iterator logic + // + // By this it is assured that all index files are used, even if they would not have been + // seen by the regular logic and the user is informed by the garbage collection run about + // the detected index files not following the iterators logic. + + let mut unprocessed_image_list = self.list_images()?; + let image_count = unprocessed_image_list.len(); + + // Optimize for avoiding updates of chunks atime in same group for consecutive + // snapshots multiple times. + let mut touched_chunks = TouchedChunks::new(); + let mut processed_images = 0; let mut last_percentage: usize = 0; - let mut strange_paths_count: u64 = 0; - - for (i, img) in image_list.into_iter().enumerate() { - worker.check_abort()?; - worker.fail_on_shutdown()?; - - if let Some(backup_dir_path) = img.parent() { - let backup_dir_path = backup_dir_path.strip_prefix(self.base_path())?; - if let Some(backup_dir_str) = backup_dir_path.to_str() { - if pbs_api_types::parse_ns_and_snapshot(backup_dir_str).is_err() { - strange_paths_count += 1; + let arc_self = Arc::new(self.clone()); + for namespace in arc_self + .recursive_iter_backup_ns(BackupNamespace::root()) + .context("creating namespace iterator failed")? + { + let namespace = namespace.context("iterating namespaces failed")?; + for group in arc_self.iter_backup_groups(namespace)? { + let group = group.context("iterating backup groups failed")?; + let mut snapshots = group.list_backups().context("listing snapshots failed")?; + // Sort by snapshot timestamp to iterate over consecutive snapshots for each image. + BackupInfo::sort_list(&mut snapshots, true); + for snapshot in snapshots { + for file in snapshot.files { + worker.check_abort()?; + worker.fail_on_shutdown()?; + + let mut path = snapshot.backup_dir.full_path(); + path.push(file); + + let index = match self.open_index_reader(&path)? { + Some(index) => index, + None => continue, + }; + self.index_mark_used_chunks( + index, + &path, + &mut touched_chunks, + status, + worker, + )?; + + unprocessed_image_list.remove(&path); + + let percentage = (processed_images + 1) * 100 / image_count; + if percentage > last_percentage { + info!( + "marked {percentage}% ({} of {image_count} index files)", + processed_images + 1, + ); + last_percentage = percentage; + } + processed_images += 1; } + touched_chunks.reset(); } } - - if let Some(index) = self.open_index_reader(&img)? { - self.index_mark_used_chunks(index, &img, status, worker)?; - } - - let percentage = (i + 1) * 100 / image_count; - if percentage > last_percentage { - info!( - "marked {percentage}% ({} of {image_count} index files)", - i + 1, - ); - last_percentage = percentage; - } } + let strange_paths_count = unprocessed_image_list.len(); if strange_paths_count > 0 { - info!( - "found (and marked) {strange_paths_count} index files outside of expected directory scheme" + warn!("found {strange_paths_count} index files outside of expected directory scheme"); + } + for path in unprocessed_image_list { + let index = match self.open_index_reader(&path)? { + Some(index) => index, + None => continue, + }; + self.index_mark_used_chunks(index, &path, &mut touched_chunks, status, worker)?; + warn!( + "Marked chunks for unexpected index file at '{}'", + path.to_string_lossy() ); } -- 2.39.5 _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel