From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id 604691FF143 for ; Mon, 19 Jan 2026 14:27:16 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 916CB1EC76; Mon, 19 Jan 2026 14:27:25 +0100 (CET) From: Christian Ebner To: pbs-devel@lists.proxmox.com Date: Mon, 19 Jan 2026 14:27:07 +0100 Message-ID: <20260119132707.686523-5-c.ebner@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260119132707.686523-1-c.ebner@proxmox.com> References: <20260119132707.686523-1-c.ebner@proxmox.com> MIME-Version: 1.0 X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1768829187692 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.048 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: [pbs-devel] [PATCH proxmox-backup 4/4] fix #5799: GC: track chunk digests and accumulate statistics X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" Keep track of all digests referenced by snapshots index files encountered during phase 1 of garbage collection in the reverse lookup table and fill in raw chunk size information during phase 2. Allows to finally gather the unique count and raw size information printed at the end of garbage collection. Signed-off-by: Christian Ebner --- pbs-datastore/src/chunk_store.rs | 9 +++++++ pbs-datastore/src/datastore.rs | 46 ++++++++++++++++++++++++++++++-- 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/pbs-datastore/src/chunk_store.rs b/pbs-datastore/src/chunk_store.rs index e7e94b29f..3bf21e1eb 100644 --- a/pbs-datastore/src/chunk_store.rs +++ b/pbs-datastore/src/chunk_store.rs @@ -434,6 +434,7 @@ impl ChunkStore { status: &mut GarbageCollectionStatus, worker: &dyn WorkerTaskContext, cache: Option<&LocalDatastoreLruCache>, + mut digest_map: Option<&mut crate::reverse_digest_map::ReverseDigestMap>, ) -> Result<(), Error> { // unwrap: only `None` in unit tests assert!(self.locker.is_some()); @@ -524,6 +525,14 @@ impl ChunkStore { }, )?; } + + // Chunk info not inserted if no already present in mapping + if chunk_ext == ChunkExt::None { + if let Some(ref mut map) = digest_map { + let digest = <[u8; 32]>::from_hex(filename.to_bytes())?; + map.set_raw_chunk_size(&digest, stat.st_size as u64); + } + } } drop(lock); } diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs index 7ad3d917d..7efa15335 100644 --- a/pbs-datastore/src/datastore.rs +++ b/pbs-datastore/src/datastore.rs @@ -45,6 +45,7 @@ use crate::dynamic_index::{DynamicIndexReader, DynamicIndexWriter}; use crate::fixed_index::{FixedIndexReader, FixedIndexWriter}; use crate::hierarchy::{ListGroups, ListGroupsType, ListNamespaces, ListNamespacesRecursive}; use crate::index::IndexFile; +use crate::reverse_digest_map::{DigestStatAccumulator, ReverseDigestMap}; use crate::s3::S3_CONTENT_PREFIX; use crate::task_tracking::{self, update_active_operations}; use crate::{DataBlob, LocalDatastoreLruCache}; @@ -1433,6 +1434,7 @@ impl DataStore { index: Box, file_name: &Path, // only used for error reporting chunk_lru_cache: &mut Option>, + mut digest_map: Option, status: &mut GarbageCollectionStatus, worker: &dyn WorkerTaskContext, s3_client: Option>, @@ -1443,7 +1445,13 @@ impl DataStore { for pos in 0..index.index_count() { worker.check_abort()?; worker.fail_on_shutdown()?; - let digest = index.index_digest(pos).unwrap(); + let chunk_info = index.chunk_info(pos).unwrap(); + let digest = &chunk_info.digest; + + if let Some(map) = digest_map.as_mut() { + map.digests + .insert(digest, map.namespace, map.snapshot, chunk_info.size()); + } // Avoid multiple expensive atime updates by utimensat if let Some(chunk_lru_cache) = chunk_lru_cache { @@ -1493,6 +1501,7 @@ impl DataStore { worker: &dyn WorkerTaskContext, cache_capacity: usize, s3_client: Option>, + mut digest_map: Option<&mut ReverseDigestMap>, ) -> Result<(), Error> { // Iterate twice over the datastore to fetch index files, even if this comes with an // additional runtime cost: @@ -1522,7 +1531,7 @@ impl DataStore { .context("creating namespace iterator failed")? { let namespace = namespace.context("iterating namespaces failed")?; - for group in arc_self.iter_backup_groups(namespace)? { + for group in arc_self.iter_backup_groups(namespace.clone())? { let group = group.context("iterating backup groups failed")?; // Avoid race between listing/marking of snapshots by GC and pruning the last @@ -1580,10 +1589,21 @@ impl DataStore { } }; + let digest_map = if let Some(digests) = digest_map.as_mut() { + Some(ReverseMap { + digests, + namespace: &namespace, + snapshot: snapshot.backup_dir.dir(), + }) + } else { + None + }; + self.index_mark_used_chunks( index, &path, &mut chunk_lru_cache, + digest_map, status, worker, s3_client.as_ref().cloned(), @@ -1625,6 +1645,7 @@ impl DataStore { index, &path, &mut chunk_lru_cache, + None, status, worker, s3_client.as_ref().cloned(), @@ -1766,11 +1787,14 @@ impl DataStore { info!("Start GC phase1 (mark used chunks)"); + let mut digest_map = Some(ReverseDigestMap::default()); + self.mark_used_chunks( &mut gc_status, worker, gc_cache_capacity, s3_client.as_ref().cloned(), + digest_map.as_mut(), ) .context("marking used chunks failed")?; @@ -1796,6 +1820,10 @@ impl DataStore { None => continue, }; + if let Some(map) = digest_map.as_mut() { + map.set_raw_chunk_size(&digest, content.size); + } + let timeout = std::time::Duration::from_secs(0); let _chunk_guard = match self.inner.chunk_store.lock_chunk(&digest, timeout) { Ok(guard) => guard, @@ -1892,6 +1920,7 @@ impl DataStore { &mut tmp_gc_status, worker, self.cache(), + None, )?; } else { self.inner.chunk_store.sweep_unused_chunks( @@ -1900,6 +1929,7 @@ impl DataStore { &mut gc_status, worker, None, + digest_map.as_mut(), )?; } @@ -1913,6 +1943,12 @@ impl DataStore { ); } } + + if let Some(digest_map) = digest_map.take() { + let accumulator = DigestStatAccumulator::default(); + accumulator.accumulate_and_list(digest_map); + } + info!( "Removed garbage: {}", HumanByte::from(gc_status.removed_bytes), @@ -2877,3 +2913,9 @@ impl S3DeleteList { Ok(()) } } + +struct ReverseMap<'a> { + digests: &'a mut ReverseDigestMap, + namespace: &'a pbs_api_types::BackupNamespace, + snapshot: &'a pbs_api_types::BackupDir, +} -- 2.47.3 _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel