all lists on lists.proxmox.com
 help / color / mirror / Atom feed
From: Christian Ebner <c.ebner@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [PATCH proxmox-backup 4/4] fix #5799: GC: track chunk digests and accumulate statistics
Date: Mon, 19 Jan 2026 14:27:07 +0100	[thread overview]
Message-ID: <20260119132707.686523-5-c.ebner@proxmox.com> (raw)
In-Reply-To: <20260119132707.686523-1-c.ebner@proxmox.com>

Keep track of all digests referenced by snapshots index files
encountered during phase 1 of garbage collection in the reverse
lookup table and fill in raw chunk size information during phase 2.

Allows to finally gather the unique count and raw size information
printed at the end of garbage collection.

Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
---
 pbs-datastore/src/chunk_store.rs |  9 +++++++
 pbs-datastore/src/datastore.rs   | 46 ++++++++++++++++++++++++++++++--
 2 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/pbs-datastore/src/chunk_store.rs b/pbs-datastore/src/chunk_store.rs
index e7e94b29f..3bf21e1eb 100644
--- a/pbs-datastore/src/chunk_store.rs
+++ b/pbs-datastore/src/chunk_store.rs
@@ -434,6 +434,7 @@ impl ChunkStore {
         status: &mut GarbageCollectionStatus,
         worker: &dyn WorkerTaskContext,
         cache: Option<&LocalDatastoreLruCache>,
+        mut digest_map: Option<&mut crate::reverse_digest_map::ReverseDigestMap>,
     ) -> Result<(), Error> {
         // unwrap: only `None` in unit tests
         assert!(self.locker.is_some());
@@ -524,6 +525,14 @@ impl ChunkStore {
                         },
                     )?;
                 }
+
+                // Chunk info not inserted if no already present in mapping
+                if chunk_ext == ChunkExt::None {
+                    if let Some(ref mut map) = digest_map {
+                        let digest = <[u8; 32]>::from_hex(filename.to_bytes())?;
+                        map.set_raw_chunk_size(&digest, stat.st_size as u64);
+                    }
+                }
             }
             drop(lock);
         }
diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
index 7ad3d917d..7efa15335 100644
--- a/pbs-datastore/src/datastore.rs
+++ b/pbs-datastore/src/datastore.rs
@@ -45,6 +45,7 @@ use crate::dynamic_index::{DynamicIndexReader, DynamicIndexWriter};
 use crate::fixed_index::{FixedIndexReader, FixedIndexWriter};
 use crate::hierarchy::{ListGroups, ListGroupsType, ListNamespaces, ListNamespacesRecursive};
 use crate::index::IndexFile;
+use crate::reverse_digest_map::{DigestStatAccumulator, ReverseDigestMap};
 use crate::s3::S3_CONTENT_PREFIX;
 use crate::task_tracking::{self, update_active_operations};
 use crate::{DataBlob, LocalDatastoreLruCache};
@@ -1433,6 +1434,7 @@ impl DataStore {
         index: Box<dyn IndexFile>,
         file_name: &Path, // only used for error reporting
         chunk_lru_cache: &mut Option<LruCache<[u8; 32], ()>>,
+        mut digest_map: Option<ReverseMap>,
         status: &mut GarbageCollectionStatus,
         worker: &dyn WorkerTaskContext,
         s3_client: Option<Arc<S3Client>>,
@@ -1443,7 +1445,13 @@ impl DataStore {
         for pos in 0..index.index_count() {
             worker.check_abort()?;
             worker.fail_on_shutdown()?;
-            let digest = index.index_digest(pos).unwrap();
+            let chunk_info = index.chunk_info(pos).unwrap();
+            let digest = &chunk_info.digest;
+
+            if let Some(map) = digest_map.as_mut() {
+                map.digests
+                    .insert(digest, map.namespace, map.snapshot, chunk_info.size());
+            }
 
             // Avoid multiple expensive atime updates by utimensat
             if let Some(chunk_lru_cache) = chunk_lru_cache {
@@ -1493,6 +1501,7 @@ impl DataStore {
         worker: &dyn WorkerTaskContext,
         cache_capacity: usize,
         s3_client: Option<Arc<S3Client>>,
+        mut digest_map: Option<&mut ReverseDigestMap>,
     ) -> Result<(), Error> {
         // Iterate twice over the datastore to fetch index files, even if this comes with an
         // additional runtime cost:
@@ -1522,7 +1531,7 @@ impl DataStore {
             .context("creating namespace iterator failed")?
         {
             let namespace = namespace.context("iterating namespaces failed")?;
-            for group in arc_self.iter_backup_groups(namespace)? {
+            for group in arc_self.iter_backup_groups(namespace.clone())? {
                 let group = group.context("iterating backup groups failed")?;
 
                 // Avoid race between listing/marking of snapshots by GC and pruning the last
@@ -1580,10 +1589,21 @@ impl DataStore {
                                 }
                             };
 
+                            let digest_map = if let Some(digests) = digest_map.as_mut() {
+                                Some(ReverseMap {
+                                    digests,
+                                    namespace: &namespace,
+                                    snapshot: snapshot.backup_dir.dir(),
+                                })
+                            } else {
+                                None
+                            };
+
                             self.index_mark_used_chunks(
                                 index,
                                 &path,
                                 &mut chunk_lru_cache,
+                                digest_map,
                                 status,
                                 worker,
                                 s3_client.as_ref().cloned(),
@@ -1625,6 +1645,7 @@ impl DataStore {
                 index,
                 &path,
                 &mut chunk_lru_cache,
+                None,
                 status,
                 worker,
                 s3_client.as_ref().cloned(),
@@ -1766,11 +1787,14 @@ impl DataStore {
 
         info!("Start GC phase1 (mark used chunks)");
 
+        let mut digest_map = Some(ReverseDigestMap::default());
+
         self.mark_used_chunks(
             &mut gc_status,
             worker,
             gc_cache_capacity,
             s3_client.as_ref().cloned(),
+            digest_map.as_mut(),
         )
         .context("marking used chunks failed")?;
 
@@ -1796,6 +1820,10 @@ impl DataStore {
                             None => continue,
                         };
 
+                    if let Some(map) = digest_map.as_mut() {
+                        map.set_raw_chunk_size(&digest, content.size);
+                    }
+
                     let timeout = std::time::Duration::from_secs(0);
                     let _chunk_guard = match self.inner.chunk_store.lock_chunk(&digest, timeout) {
                         Ok(guard) => guard,
@@ -1892,6 +1920,7 @@ impl DataStore {
                 &mut tmp_gc_status,
                 worker,
                 self.cache(),
+                None,
             )?;
         } else {
             self.inner.chunk_store.sweep_unused_chunks(
@@ -1900,6 +1929,7 @@ impl DataStore {
                 &mut gc_status,
                 worker,
                 None,
+                digest_map.as_mut(),
             )?;
         }
 
@@ -1913,6 +1943,12 @@ impl DataStore {
                 );
             }
         }
+
+        if let Some(digest_map) = digest_map.take() {
+            let accumulator = DigestStatAccumulator::default();
+            accumulator.accumulate_and_list(digest_map);
+        }
+
         info!(
             "Removed garbage: {}",
             HumanByte::from(gc_status.removed_bytes),
@@ -2877,3 +2913,9 @@ impl S3DeleteList {
         Ok(())
     }
 }
+
+struct ReverseMap<'a> {
+    digests: &'a mut ReverseDigestMap,
+    namespace: &'a pbs_api_types::BackupNamespace,
+    snapshot: &'a pbs_api_types::BackupDir,
+}
-- 
2.47.3



_______________________________________________
pbs-devel mailing list
pbs-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel


      parent reply	other threads:[~2026-01-19 13:27 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-19 13:27 [pbs-devel] [RFC proxmox-backup 0/4] fix #5799: Gather per-namespace/group/snapshot storage usage stats Christian Ebner
2026-01-19 13:27 ` [pbs-devel] [PATCH proxmox-backup 1/4] chunk store: restrict chunk sweep helper method to module parent Christian Ebner
2026-01-19 13:27 ` [pbs-devel] [PATCH proxmox-backup 2/4] datastore: add namespace/group/snapshot indices for reverse lookups Christian Ebner
2026-01-19 13:27 ` [pbs-devel] [PATCH proxmox-backup 3/4] datastore: introduce reverse chunk digest lookup table Christian Ebner
2026-01-19 13:27 ` Christian Ebner [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260119132707.686523-5-c.ebner@proxmox.com \
    --to=c.ebner@proxmox.com \
    --cc=pbs-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal