From: Christian Ebner <c.ebner@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [PATCH proxmox-backup 1/4] GC: Move S3 delete list state and logic to a dedicated struct
Date: Wed, 26 Nov 2025 14:34:16 +0100 [thread overview]
Message-ID: <20251126133419.570874-2-c.ebner@proxmox.com> (raw)
In-Reply-To: <20251126133419.570874-1-c.ebner@proxmox.com>
To better keep track of the state and declutter the code on the
callsites, bundle the S3 delete list and it's logic by a dedicated
struct. Since the check for empty lists is now performed as part
of the deletion related methods, the callsites can get rid of that.
Further, avoid the proxmox_time::epoch_i64() and use SystemTime and
Duration with their methods directly.
Suggested-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
---
pbs-datastore/src/datastore.rs | 132 +++++++++++++++++++--------------
1 file changed, 75 insertions(+), 57 deletions(-)
diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
index 36550ff63..a0890340b 100644
--- a/pbs-datastore/src/datastore.rs
+++ b/pbs-datastore/src/datastore.rs
@@ -24,7 +24,7 @@ use proxmox_sys::error::SysError;
use proxmox_sys::fs::{file_read_optional_string, replace_file, CreateOptions};
use proxmox_sys::linux::procfs::MountInfo;
use proxmox_sys::process_locker::{ProcessLockExclusiveGuard, ProcessLockSharedGuard};
-use proxmox_time::{epoch_i64, TimeSpan};
+use proxmox_time::TimeSpan;
use proxmox_worker_task::WorkerTaskContext;
use pbs_api_types::{
@@ -64,7 +64,7 @@ const CHUNK_LOCK_TIMEOUT: Duration = Duration::from_secs(3 * 60 * 60);
// s3 deletion batch size to avoid 1024 open files soft limit
const S3_DELETE_BATCH_LIMIT: usize = 100;
// max defer time for s3 batch deletions
-const S3_DELETE_DEFER_LIMIT_SECONDS: i64 = 60 * 5;
+const S3_DELETE_DEFER_LIMIT_SECONDS: Duration = Duration::from_secs(60 * 5);
/// checks if auth_id is owner, or, if owner is a token, if
/// auth_id is the user of the token
@@ -1704,40 +1704,8 @@ impl DataStore {
proxmox_async::runtime::block_on(s3_client.list_objects_v2(&prefix, None))
.context("failed to list chunk in s3 object store")?;
- let mut delete_list = Vec::with_capacity(S3_DELETE_BATCH_LIMIT);
- let mut delete_list_age = epoch_i64();
-
- let s3_delete_batch = |delete_list: &mut Vec<(S3ObjectKey, BackupLockGuard)>,
- s3_client: &Arc<S3Client>|
- -> Result<(), Error> {
- let delete_objects_result = proxmox_async::runtime::block_on(
- s3_client.delete_objects(
- &delete_list
- .iter()
- .map(|(key, _)| key.clone())
- .collect::<Vec<S3ObjectKey>>(),
- ),
- )?;
- if let Some(_err) = delete_objects_result.error {
- bail!("failed to delete some objects");
- }
- // drops all chunk flock guards
- delete_list.clear();
- Ok(())
- };
-
- let add_to_delete_list =
- |delete_list: &mut Vec<(S3ObjectKey, BackupLockGuard)>,
- delete_list_age: &mut i64,
- key: S3ObjectKey,
- _chunk_guard: BackupLockGuard| {
- // set age based on first insertion
- if delete_list.is_empty() {
- *delete_list_age = epoch_i64();
- }
- delete_list.push((key, _chunk_guard));
- };
-
+ let mut delete_list =
+ S3DeleteList::with_thresholds(S3_DELETE_BATCH_LIMIT, S3_DELETE_DEFER_LIMIT_SECONDS);
loop {
for content in list_bucket_result.contents {
let (chunk_path, digest, bad) =
@@ -1792,12 +1760,7 @@ impl DataStore {
std::fs::remove_file(chunk_path)?;
}
}
- add_to_delete_list(
- &mut delete_list,
- &mut delete_list_age,
- content.key,
- _chunk_guard,
- );
+ delete_list.push(content.key, _chunk_guard);
Ok(())
},
)?;
@@ -1805,12 +1768,7 @@ impl DataStore {
} else {
gc_status.removed_chunks += 1;
gc_status.removed_bytes += content.size;
- add_to_delete_list(
- &mut delete_list,
- &mut delete_list_age,
- content.key,
- _chunk_guard,
- );
+ delete_list.push(content.key, _chunk_guard);
}
chunk_count += 1;
@@ -1819,12 +1777,7 @@ impl DataStore {
drop(_guard);
// limit pending deletes to avoid holding too many chunk flocks
- if delete_list.len() >= S3_DELETE_BATCH_LIMIT
- || (!delete_list.is_empty()
- && epoch_i64() - delete_list_age > S3_DELETE_DEFER_LIMIT_SECONDS)
- {
- s3_delete_batch(&mut delete_list, s3_client)?;
- }
+ delete_list.conditional_delete_and_drop_locks(s3_client)?;
}
// Process next batch of chunks if there is more
if list_bucket_result.is_truncated {
@@ -1840,9 +1793,7 @@ impl DataStore {
}
// delete the last batch of objects, if there are any remaining
- if !delete_list.is_empty() {
- s3_delete_batch(&mut delete_list, s3_client)?;
- }
+ delete_list.delete_and_drop_locks(s3_client)?;
info!("processed {chunk_count} total chunks");
@@ -2781,3 +2732,70 @@ impl DataStore {
result
}
}
+
+/// Track S3 object keys to be deleted by garbage collection while holding their file lock.
+struct S3DeleteList {
+ list: Vec<(S3ObjectKey, BackupLockGuard)>,
+ first_entry_added: SystemTime,
+ age_threshold: Duration,
+ capacity_threshold: usize,
+}
+
+impl S3DeleteList {
+ /// Create a new list instance with given capacity and age thresholds.
+ fn with_thresholds(capacity_threshold: usize, age_threshold: Duration) -> Self {
+ Self {
+ first_entry_added: SystemTime::now(), // init only, updated once added
+ list: Vec::with_capacity(capacity_threshold),
+ age_threshold,
+ capacity_threshold,
+ }
+ }
+
+ /// Pushes the current key and backup lock guard to the list, updating the delete list age if
+ /// the list was empty before the insert.
+ fn push(&mut self, key: S3ObjectKey, guard: BackupLockGuard) {
+ // set age based on first insertion
+ if self.list.is_empty() {
+ self.first_entry_added = SystemTime::now();
+ }
+ self.list.push((key, guard));
+ }
+
+ /// Delete the objects in the list via the provided S3 client instance.
+ /// Clears the list contents and frees the per-chunk file locks.
+ fn delete_and_drop_locks(&mut self, s3_client: &Arc<S3Client>) -> Result<(), Error> {
+ if self.list.is_empty() {
+ return Ok(());
+ }
+ let delete_objects_result = proxmox_async::runtime::block_on(
+ s3_client.delete_objects(
+ &self
+ .list
+ .iter()
+ .map(|(key, _)| key.clone())
+ .collect::<Vec<S3ObjectKey>>(),
+ ),
+ )?;
+ if delete_objects_result.error.is_some() {
+ bail!("failed to delete some objects");
+ }
+ // drops all chunk flock guards
+ self.list.clear();
+ Ok(())
+ }
+
+ /// Delete the object stored in the list if either the list exceeds the capacity threshold or
+ /// the delete list age threshold.
+ fn conditional_delete_and_drop_locks(
+ &mut self,
+ s3_client: &Arc<S3Client>,
+ ) -> Result<(), Error> {
+ if self.list.len() >= self.capacity_threshold
+ || (!self.list.is_empty() && self.first_entry_added.elapsed()? > self.age_threshold)
+ {
+ self.delete_and_drop_locks(s3_client)?;
+ }
+ Ok(())
+ }
+}
--
2.47.3
_______________________________________________
pbs-devel mailing list
pbs-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
next prev parent reply other threads:[~2025-11-26 13:34 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-11-26 13:34 [pbs-devel] [PATCH proxmox-backup 0/4] followups for garbage collection Christian Ebner
2025-11-26 13:34 ` Christian Ebner [this message]
2025-11-26 13:34 ` [pbs-devel] [PATCH proxmox-backup 2/4] chunk store: rename and limit scope for chunk store iterator Christian Ebner
2025-11-26 13:34 ` [pbs-devel] [PATCH proxmox-backup 3/4] chunk store: invert chunk filename checks in " Christian Ebner
2025-11-26 13:34 ` [pbs-devel] [PATCH proxmox-backup 4/4] chunk store: return chunk extension and check for used marker Christian Ebner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251126133419.570874-2-c.ebner@proxmox.com \
--to=c.ebner@proxmox.com \
--cc=pbs-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox