From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id 47D551FF13B for ; Tue, 13 Jan 2026 11:23:11 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 69F48142BA; Tue, 13 Jan 2026 11:23:13 +0100 (CET) Date: Tue, 13 Jan 2026 11:23:05 +0100 From: Fabian =?iso-8859-1?q?Gr=FCnbichler?= To: Proxmox Backup Server development discussion References: <20251211153835.180405-1-c.ebner@proxmox.com> <20251211153835.180405-2-c.ebner@proxmox.com> In-Reply-To: <20251211153835.180405-2-c.ebner@proxmox.com> MIME-Version: 1.0 User-Agent: astroid/0.17.0 (https://github.com/astroidmail/astroid) Message-Id: <1768296817.hlvzg291lq.astroid@yuna.none> X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1768299744000 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.045 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment RCVD_IN_VALIDITY_CERTIFIED_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. RCVD_IN_VALIDITY_RPBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. RCVD_IN_VALIDITY_SAFE_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [datastore.rs, proxmox.com] Subject: Re: [pbs-devel] [PATCH proxmox-backup v2 1/8] GC: Move S3 delete list state and logic to a dedicated struct X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" On December 11, 2025 4:38 pm, Christian Ebner wrote: > To better keep track of the state and declutter the code on the > callsites, bundle the S3 delete list and it's logic by a dedicated > struct. Since the check for empty lists is now performed as part > of the deletion related methods, the callsites can get rid of that. > > Further, avoid the proxmox_time::epoch_i64() and use SystemTime and > Duration with their methods directly. > > Suggested-by: Thomas Lamprecht > Signed-off-by: Christian Ebner > --- > pbs-datastore/src/datastore.rs | 132 +++++++++++++++++++-------------- > 1 file changed, 75 insertions(+), 57 deletions(-) > > diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs > index 9c57aaac1..58fd034fc 100644 > --- a/pbs-datastore/src/datastore.rs > +++ b/pbs-datastore/src/datastore.rs > @@ -24,7 +24,7 @@ use proxmox_sys::error::SysError; > use proxmox_sys::fs::{file_read_optional_string, replace_file, CreateOptions}; > use proxmox_sys::linux::procfs::MountInfo; > use proxmox_sys::process_locker::{ProcessLockExclusiveGuard, ProcessLockSharedGuard}; > -use proxmox_time::{epoch_i64, TimeSpan}; we already use epoch_i64 for other calculations in this module, why not keep it? > +use proxmox_time::TimeSpan; > use proxmox_worker_task::WorkerTaskContext; > > use pbs_api_types::{ > @@ -64,7 +64,7 @@ const CHUNK_LOCK_TIMEOUT: Duration = Duration::from_secs(3 * 60 * 60); > // s3 deletion batch size to avoid 1024 open files soft limit > const S3_DELETE_BATCH_LIMIT: usize = 100; > // max defer time for s3 batch deletions > -const S3_DELETE_DEFER_LIMIT_SECONDS: i64 = 60 * 5; > +const S3_DELETE_DEFER_LIMIT_SECONDS: Duration = Duration::from_secs(60 * 5); > > /// checks if auth_id is owner, or, if owner is a token, if > /// auth_id is the user of the token > @@ -1689,40 +1689,8 @@ impl DataStore { > proxmox_async::runtime::block_on(s3_client.list_objects_v2(&prefix, None)) > .context("failed to list chunk in s3 object store")?; > > - let mut delete_list = Vec::with_capacity(S3_DELETE_BATCH_LIMIT); > - let mut delete_list_age = epoch_i64(); > - > - let s3_delete_batch = |delete_list: &mut Vec<(S3ObjectKey, BackupLockGuard)>, > - s3_client: &Arc| > - -> Result<(), Error> { > - let delete_objects_result = proxmox_async::runtime::block_on( > - s3_client.delete_objects( > - &delete_list > - .iter() > - .map(|(key, _)| key.clone()) > - .collect::>(), > - ), > - )?; > - if let Some(_err) = delete_objects_result.error { > - bail!("failed to delete some objects"); > - } > - // drops all chunk flock guards > - delete_list.clear(); > - Ok(()) > - }; > - > - let add_to_delete_list = > - |delete_list: &mut Vec<(S3ObjectKey, BackupLockGuard)>, > - delete_list_age: &mut i64, > - key: S3ObjectKey, > - _chunk_guard: BackupLockGuard| { > - // set age based on first insertion > - if delete_list.is_empty() { > - *delete_list_age = epoch_i64(); > - } > - delete_list.push((key, _chunk_guard)); > - }; > - > + let mut delete_list = > + S3DeleteList::with_thresholds(S3_DELETE_BATCH_LIMIT, S3_DELETE_DEFER_LIMIT_SECONDS); > loop { > for content in list_bucket_result.contents { > worker.check_abort()?; > @@ -1779,12 +1747,7 @@ impl DataStore { > std::fs::remove_file(chunk_path)?; > } > } > - add_to_delete_list( > - &mut delete_list, > - &mut delete_list_age, > - content.key, > - _chunk_guard, > - ); > + delete_list.push(content.key, _chunk_guard); > Ok(()) > }, > )?; > @@ -1792,12 +1755,7 @@ impl DataStore { > } else { > gc_status.removed_chunks += 1; > gc_status.removed_bytes += content.size; > - add_to_delete_list( > - &mut delete_list, > - &mut delete_list_age, > - content.key, > - _chunk_guard, > - ); > + delete_list.push(content.key, _chunk_guard); > } > > chunk_count += 1; > @@ -1806,12 +1764,7 @@ impl DataStore { > drop(_guard); > > // limit pending deletes to avoid holding too many chunk flocks > - if delete_list.len() >= S3_DELETE_BATCH_LIMIT > - || (!delete_list.is_empty() > - && epoch_i64() - delete_list_age > S3_DELETE_DEFER_LIMIT_SECONDS) > - { > - s3_delete_batch(&mut delete_list, s3_client)?; > - } > + delete_list.conditional_delete_and_drop_locks(s3_client)?; > } > // Process next batch of chunks if there is more > if list_bucket_result.is_truncated { > @@ -1827,9 +1780,7 @@ impl DataStore { > } > > // delete the last batch of objects, if there are any remaining > - if !delete_list.is_empty() { > - s3_delete_batch(&mut delete_list, s3_client)?; > - } > + delete_list.delete_and_drop_locks(s3_client)?; > > info!("processed {chunk_count} total chunks"); > > @@ -2768,3 +2719,70 @@ impl DataStore { > result > } > } > + > +/// Track S3 object keys to be deleted by garbage collection while holding their file lock. > +struct S3DeleteList { > + list: Vec<(S3ObjectKey, BackupLockGuard)>, > + first_entry_added: SystemTime, > + age_threshold: Duration, > + capacity_threshold: usize, > +} > + > +impl S3DeleteList { > + /// Create a new list instance with given capacity and age thresholds. > + fn with_thresholds(capacity_threshold: usize, age_threshold: Duration) -> Self { > + Self { > + first_entry_added: SystemTime::now(), // init only, updated once added > + list: Vec::with_capacity(capacity_threshold), > + age_threshold, > + capacity_threshold, > + } > + } > + > + /// Pushes the current key and backup lock guard to the list, updating the delete list age if > + /// the list was empty before the insert. > + fn push(&mut self, key: S3ObjectKey, guard: BackupLockGuard) { > + // set age based on first insertion > + if self.list.is_empty() { > + self.first_entry_added = SystemTime::now(); > + } > + self.list.push((key, guard)); > + } > + > + /// Delete the objects in the list via the provided S3 client instance. > + /// Clears the list contents and frees the per-chunk file locks. > + fn delete_and_drop_locks(&mut self, s3_client: &Arc) -> Result<(), Error> { > + if self.list.is_empty() { > + return Ok(()); > + } > + let delete_objects_result = proxmox_async::runtime::block_on( > + s3_client.delete_objects( > + &self > + .list > + .iter() > + .map(|(key, _)| key.clone()) > + .collect::>(), > + ), > + )?; > + if delete_objects_result.error.is_some() { > + bail!("failed to delete some objects"); > + } > + // drops all chunk flock guards > + self.list.clear(); > + Ok(()) > + } > + > + /// Delete the object stored in the list if either the list exceeds the capacity threshold or > + /// the delete list age threshold. > + fn conditional_delete_and_drop_locks( > + &mut self, > + s3_client: &Arc, > + ) -> Result<(), Error> { > + if self.list.len() >= self.capacity_threshold > + || (!self.list.is_empty() && self.first_entry_added.elapsed()? > self.age_threshold) this bails if the clock jumps backwards (further than the first_entry_added timestamp). maybe we should instead always delete? or at least add a meaningful error message/context.. > + { > + self.delete_and_drop_locks(s3_client)?; > + } > + Ok(()) > + } > +} > -- > 2.47.3 > > > > _______________________________________________ > pbs-devel mailing list > pbs-devel@lists.proxmox.com > https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel > > > _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel