From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id 797491FF15E for ; Wed, 21 Jan 2026 11:46:09 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 2DD0B2236F; Wed, 21 Jan 2026 11:46:26 +0100 (CET) From: Christian Ebner To: pbs-devel@lists.proxmox.com Date: Wed, 21 Jan 2026 11:45:37 +0100 Message-ID: <20260121104537.495434-3-c.ebner@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260121104537.495434-1-c.ebner@proxmox.com> References: <20260121104537.495434-1-c.ebner@proxmox.com> MIME-Version: 1.0 X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1768992295750 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.048 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: [pbs-devel] [PATCH proxmox-backup v2 1/1] GC: log progress output for phase 2 on datastores backed by s3 X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" Currently there is no progress output for phase 2 of garbage collection on datastores with s3 backend. To avoid log spamming, introduce a rate limited logger, updating the log frequency to max number of outputs for given intervals: - 30 seconds within the first hour, max 120 lines - 1 minute for the second hour, max 60 lines - 5 minutes for [3..24] hour range, max 12 lines per hour - 15 minutes after the 24 hours, max 96 per day List request statistics are gathered and displayed as well to get basic S3 API performance insights. Example output: ``` ... Start GC phase2 (sweep unused chunks) 2961 chunks processed, removing 0 B in 0 chunks, 0 pending (3 total list requests, 4 ms average) 5961 chunks processed, removing 0 B in 0 chunks, 0 pending (6 total list requests, 1 ms average) 8949 chunks processed, removing 0 B in 0 chunks, 0 pending (9 total list requests, 1 ms average) processed 9125 chunks, 0 B in 0 chunks deleted ... ``` Signed-off-by: Christian Ebner --- changes since version 1: - switch from dedicated thread for logging to inline rate limited logger - adapt log rate intervals as suggested - extend statistics pbs-datastore/src/datastore.rs | 98 +++++++++++++++++++++++++++++++++- 1 file changed, 96 insertions(+), 2 deletions(-) diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs index 7ad3d917d..51dcde3d8 100644 --- a/pbs-datastore/src/datastore.rs +++ b/pbs-datastore/src/datastore.rs @@ -4,11 +4,12 @@ use std::os::unix::ffi::OsStrExt; use std::os::unix::io::AsRawFd; use std::path::{Path, PathBuf}; use std::sync::{Arc, LazyLock, Mutex}; -use std::time::{Duration, SystemTime}; +use std::time::{Duration, Instant, SystemTime}; use anyhow::{bail, format_err, Context, Error}; use http_body_util::BodyExt; use hyper::body::Bytes; +use hyper::http::method::Method; use nix::unistd::{unlinkat, UnlinkatFlags}; use pbs_tools::lru_cache::LruCache; use tokio::io::AsyncWriteExt; @@ -1778,14 +1779,20 @@ impl DataStore { if let Some(ref s3_client) = s3_client { let mut chunk_count = 0; + let mut list_request_count = 0; let prefix = S3PathPrefix::Some(".chunks/".to_string()); // Operates in batches of 1000 objects max per request let mut list_bucket_result = proxmox_async::runtime::block_on(s3_client.list_objects_v2(&prefix, None)) .context("failed to list chunk in s3 object store")?; + let mut average_request_duration = + s3_client.last_request_duration().unwrap_or_default(); + list_request_count += 1; let mut delete_list = S3DeleteList::with_thresholds(S3_DELETE_BATCH_LIMIT, S3_DELETE_DEFER_LIMIT_SECONDS); + let mut rate_limited_logger = RateLimitedLogger::with_interval(Duration::from_secs(30)); + loop { for content in list_bucket_result.contents { worker.check_abort()?; @@ -1861,6 +1868,35 @@ impl DataStore { // limit pending deletes to avoid holding too many chunk flocks delete_list.conditional_delete_and_drop_locks(s3_client)?; } + + match rate_limited_logger.elapsed_since_creation() { + e if e >= Duration::from_secs(24 * 60 * 60) => { + // max 96 lines per additional day + rate_limited_logger.update_interval(Duration::from_secs(15 * 60)) + } + e if e >= Duration::from_secs(3 * 60 * 60) => { + // max 12 lines per hour for [3..24] hour interval + rate_limited_logger.update_interval(Duration::from_secs(5 * 60)) + } + e if e >= Duration::from_secs(60 * 60) => { + // max 60 lines for [1..2] hour interval + rate_limited_logger.update_interval(Duration::from_secs(60)) + } + // max 120 lines for first hour (with initial 30s log interval) + _ => (), + } + + rate_limited_logger.log_with(|| { + format!( + "{chunk_count} chunks processed, removing {} in {} chunks, {} pending ({} total list requests, {} ms average)", + HumanByte::from(gc_status.removed_bytes), + gc_status.removed_chunks, + delete_list.len(), + s3_client.request_count(Method::GET), + average_request_duration.as_millis(), + ) + }); + // Process next batch of chunks if there is more if list_bucket_result.is_truncated { list_bucket_result = @@ -1868,6 +1904,14 @@ impl DataStore { &prefix, list_bucket_result.next_continuation_token.as_deref(), ))?; + list_request_count += 1; + if let Some(duration) = s3_client.last_request_duration() { + average_request_duration += duration; + average_request_duration = average_request_duration + .checked_div(list_request_count) + .unwrap_or_default(); + } + continue; } @@ -1877,7 +1921,11 @@ impl DataStore { // delete the last batch of objects, if there are any remaining delete_list.delete_and_drop_locks(s3_client)?; - info!("processed {chunk_count} total chunks"); + info!( + "processed {chunk_count} chunks, {} in {} chunks deleted", + HumanByte::from(gc_status.removed_bytes), + gc_status.removed_chunks, + ); // Phase 2 GC of Filesystem backed storage is phase 3 for S3 backed GC info!("Start GC phase3 (sweep unused chunk markers)"); @@ -2876,4 +2924,50 @@ impl S3DeleteList { } Ok(()) } + + /// Current number of object keys pending deletion + fn len(&self) -> usize { + self.list.len() + } +} + +struct RateLimitedLogger { + interval: Duration, + start_time: Instant, + last_log_time: Instant, +} + +impl RateLimitedLogger { + /// Create a rate limited logger with given maximum log frequency by given interval. + fn with_interval(interval: Duration) -> Self { + let now = Instant::now(); + Self { + interval, + start_time: now, + last_log_time: now, + } + } + + /// Set the maximum log frequency to given interval. + fn update_interval(&mut self, interval: Duration) { + self.interval = interval; + } + + /// Returns the age of the logger instance as time elapsed since creation. + fn elapsed_since_creation(&self) -> Duration { + self.start_time.elapsed() + } + + /// Executes the provided callback if the last invocation exceeds the + /// configured interval, logging the returned value. + fn log_with(&mut self, callback: F) + where + F: Fn() -> String, + { + let now = Instant::now(); + if now.duration_since(self.last_log_time) >= self.interval { + info!("{}", callback()); + self.last_log_time = now; + } + } } -- 2.47.3 _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel