From: Christian Ebner <c.ebner@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [PATCH proxmox-backup 4/4] fix #5799: GC: track chunk digests and accumulate statistics
Date: Mon, 19 Jan 2026 14:27:07 +0100 [thread overview]
Message-ID: <20260119132707.686523-5-c.ebner@proxmox.com> (raw)
In-Reply-To: <20260119132707.686523-1-c.ebner@proxmox.com>
Keep track of all digests referenced by snapshots index files
encountered during phase 1 of garbage collection in the reverse
lookup table and fill in raw chunk size information during phase 2.
Allows to finally gather the unique count and raw size information
printed at the end of garbage collection.
Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
---
pbs-datastore/src/chunk_store.rs | 9 +++++++
pbs-datastore/src/datastore.rs | 46 ++++++++++++++++++++++++++++++--
2 files changed, 53 insertions(+), 2 deletions(-)
diff --git a/pbs-datastore/src/chunk_store.rs b/pbs-datastore/src/chunk_store.rs
index e7e94b29f..3bf21e1eb 100644
--- a/pbs-datastore/src/chunk_store.rs
+++ b/pbs-datastore/src/chunk_store.rs
@@ -434,6 +434,7 @@ impl ChunkStore {
status: &mut GarbageCollectionStatus,
worker: &dyn WorkerTaskContext,
cache: Option<&LocalDatastoreLruCache>,
+ mut digest_map: Option<&mut crate::reverse_digest_map::ReverseDigestMap>,
) -> Result<(), Error> {
// unwrap: only `None` in unit tests
assert!(self.locker.is_some());
@@ -524,6 +525,14 @@ impl ChunkStore {
},
)?;
}
+
+ // Chunk info not inserted if no already present in mapping
+ if chunk_ext == ChunkExt::None {
+ if let Some(ref mut map) = digest_map {
+ let digest = <[u8; 32]>::from_hex(filename.to_bytes())?;
+ map.set_raw_chunk_size(&digest, stat.st_size as u64);
+ }
+ }
}
drop(lock);
}
diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
index 7ad3d917d..7efa15335 100644
--- a/pbs-datastore/src/datastore.rs
+++ b/pbs-datastore/src/datastore.rs
@@ -45,6 +45,7 @@ use crate::dynamic_index::{DynamicIndexReader, DynamicIndexWriter};
use crate::fixed_index::{FixedIndexReader, FixedIndexWriter};
use crate::hierarchy::{ListGroups, ListGroupsType, ListNamespaces, ListNamespacesRecursive};
use crate::index::IndexFile;
+use crate::reverse_digest_map::{DigestStatAccumulator, ReverseDigestMap};
use crate::s3::S3_CONTENT_PREFIX;
use crate::task_tracking::{self, update_active_operations};
use crate::{DataBlob, LocalDatastoreLruCache};
@@ -1433,6 +1434,7 @@ impl DataStore {
index: Box<dyn IndexFile>,
file_name: &Path, // only used for error reporting
chunk_lru_cache: &mut Option<LruCache<[u8; 32], ()>>,
+ mut digest_map: Option<ReverseMap>,
status: &mut GarbageCollectionStatus,
worker: &dyn WorkerTaskContext,
s3_client: Option<Arc<S3Client>>,
@@ -1443,7 +1445,13 @@ impl DataStore {
for pos in 0..index.index_count() {
worker.check_abort()?;
worker.fail_on_shutdown()?;
- let digest = index.index_digest(pos).unwrap();
+ let chunk_info = index.chunk_info(pos).unwrap();
+ let digest = &chunk_info.digest;
+
+ if let Some(map) = digest_map.as_mut() {
+ map.digests
+ .insert(digest, map.namespace, map.snapshot, chunk_info.size());
+ }
// Avoid multiple expensive atime updates by utimensat
if let Some(chunk_lru_cache) = chunk_lru_cache {
@@ -1493,6 +1501,7 @@ impl DataStore {
worker: &dyn WorkerTaskContext,
cache_capacity: usize,
s3_client: Option<Arc<S3Client>>,
+ mut digest_map: Option<&mut ReverseDigestMap>,
) -> Result<(), Error> {
// Iterate twice over the datastore to fetch index files, even if this comes with an
// additional runtime cost:
@@ -1522,7 +1531,7 @@ impl DataStore {
.context("creating namespace iterator failed")?
{
let namespace = namespace.context("iterating namespaces failed")?;
- for group in arc_self.iter_backup_groups(namespace)? {
+ for group in arc_self.iter_backup_groups(namespace.clone())? {
let group = group.context("iterating backup groups failed")?;
// Avoid race between listing/marking of snapshots by GC and pruning the last
@@ -1580,10 +1589,21 @@ impl DataStore {
}
};
+ let digest_map = if let Some(digests) = digest_map.as_mut() {
+ Some(ReverseMap {
+ digests,
+ namespace: &namespace,
+ snapshot: snapshot.backup_dir.dir(),
+ })
+ } else {
+ None
+ };
+
self.index_mark_used_chunks(
index,
&path,
&mut chunk_lru_cache,
+ digest_map,
status,
worker,
s3_client.as_ref().cloned(),
@@ -1625,6 +1645,7 @@ impl DataStore {
index,
&path,
&mut chunk_lru_cache,
+ None,
status,
worker,
s3_client.as_ref().cloned(),
@@ -1766,11 +1787,14 @@ impl DataStore {
info!("Start GC phase1 (mark used chunks)");
+ let mut digest_map = Some(ReverseDigestMap::default());
+
self.mark_used_chunks(
&mut gc_status,
worker,
gc_cache_capacity,
s3_client.as_ref().cloned(),
+ digest_map.as_mut(),
)
.context("marking used chunks failed")?;
@@ -1796,6 +1820,10 @@ impl DataStore {
None => continue,
};
+ if let Some(map) = digest_map.as_mut() {
+ map.set_raw_chunk_size(&digest, content.size);
+ }
+
let timeout = std::time::Duration::from_secs(0);
let _chunk_guard = match self.inner.chunk_store.lock_chunk(&digest, timeout) {
Ok(guard) => guard,
@@ -1892,6 +1920,7 @@ impl DataStore {
&mut tmp_gc_status,
worker,
self.cache(),
+ None,
)?;
} else {
self.inner.chunk_store.sweep_unused_chunks(
@@ -1900,6 +1929,7 @@ impl DataStore {
&mut gc_status,
worker,
None,
+ digest_map.as_mut(),
)?;
}
@@ -1913,6 +1943,12 @@ impl DataStore {
);
}
}
+
+ if let Some(digest_map) = digest_map.take() {
+ let accumulator = DigestStatAccumulator::default();
+ accumulator.accumulate_and_list(digest_map);
+ }
+
info!(
"Removed garbage: {}",
HumanByte::from(gc_status.removed_bytes),
@@ -2877,3 +2913,9 @@ impl S3DeleteList {
Ok(())
}
}
+
+struct ReverseMap<'a> {
+ digests: &'a mut ReverseDigestMap,
+ namespace: &'a pbs_api_types::BackupNamespace,
+ snapshot: &'a pbs_api_types::BackupDir,
+}
--
2.47.3
_______________________________________________
pbs-devel mailing list
pbs-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
prev parent reply other threads:[~2026-01-19 13:27 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-01-19 13:27 [pbs-devel] [RFC proxmox-backup 0/4] fix #5799: Gather per-namespace/group/snapshot storage usage stats Christian Ebner
2026-01-19 13:27 ` [pbs-devel] [PATCH proxmox-backup 1/4] chunk store: restrict chunk sweep helper method to module parent Christian Ebner
2026-01-19 13:27 ` [pbs-devel] [PATCH proxmox-backup 2/4] datastore: add namespace/group/snapshot indices for reverse lookups Christian Ebner
2026-01-19 13:27 ` [pbs-devel] [PATCH proxmox-backup 3/4] datastore: introduce reverse chunk digest lookup table Christian Ebner
2026-01-19 13:27 ` Christian Ebner [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260119132707.686523-5-c.ebner@proxmox.com \
--to=c.ebner@proxmox.com \
--cc=pbs-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.