From: Christian Ebner <c.ebner@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [PATCH proxmox-backup] GC: log progress output for phase 2 on datastores backed by s3
Date: Mon, 15 Dec 2025 14:24:54 +0100 [thread overview]
Message-ID: <20251215132454.628090-1-c.ebner@proxmox.com> (raw)
Currently there is no progress output for phase 2 of garbage
collection on datastores with s3 backend.
Since spamming of the task log must be avoided and the number of
chunks to be processed cannot be inferred neither during phase 1, nor
by the S3 API, use a time based approach for this.
Spawn a new thread for logging and keep track of the processed chunks
by an atomic counter. The progress handle and a channel are used to
abort the logging thread in case garbage collection is finished or an
abort has been triggered.
Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
---
pbs-datastore/Cargo.toml | 1 +
pbs-datastore/src/datastore.rs | 47 +++++++++++++++++++++++++++++++---
2 files changed, 45 insertions(+), 3 deletions(-)
diff --git a/pbs-datastore/Cargo.toml b/pbs-datastore/Cargo.toml
index 8ce930a94..80e06cb0e 100644
--- a/pbs-datastore/Cargo.toml
+++ b/pbs-datastore/Cargo.toml
@@ -38,6 +38,7 @@ proxmox-http.workspace = true
proxmox-human-byte.workspace = true
proxmox-io.workspace = true
proxmox-lang.workspace=true
+proxmox-log.workspace=true
proxmox-s3-client = { workspace = true, features = [ "impl" ] }
proxmox-schema = { workspace = true, features = [ "api-macro" ] }
proxmox-serde = { workspace = true, features = [ "serde_json" ] }
diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
index 9c57aaac1..84866a529 100644
--- a/pbs-datastore/src/datastore.rs
+++ b/pbs-datastore/src/datastore.rs
@@ -3,6 +3,8 @@ use std::io::{self, Write};
use std::os::unix::ffi::OsStrExt;
use std::os::unix::io::AsRawFd;
use std::path::{Path, PathBuf};
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::mpsc::RecvTimeoutError;
use std::sync::{Arc, LazyLock, Mutex};
use std::time::{Duration, SystemTime};
@@ -15,6 +17,7 @@ use tokio::io::AsyncWriteExt;
use tracing::{info, warn};
use proxmox_human_byte::HumanByte;
+use proxmox_log::LogContext;
use proxmox_s3_client::{
S3Client, S3ClientConf, S3ClientOptions, S3ObjectKey, S3PathPrefix, S3RateLimiterOptions,
};
@@ -1682,7 +1685,7 @@ impl DataStore {
info!("Start GC phase2 (sweep unused chunks)");
if let Some(ref s3_client) = s3_client {
- let mut chunk_count = 0;
+ let chunk_count = Arc::new(AtomicUsize::new(0));
let prefix = S3PathPrefix::Some(".chunks/".to_string());
// Operates in batches of 1000 objects max per request
let mut list_bucket_result =
@@ -1692,6 +1695,34 @@ impl DataStore {
let mut delete_list = Vec::with_capacity(S3_DELETE_BATCH_LIMIT);
let mut delete_list_age = epoch_i64();
+ let (abort_tx, abort_rx) = std::sync::mpsc::channel();
+ let counter = Arc::clone(&chunk_count);
+ let log_context = LogContext::current();
+
+ let progress_log_handle = std::thread::spawn(move || {
+ if let Some(log_context) = log_context {
+ log_context.sync_scope(|| {
+ let mut timeout = Duration::from_secs(60);
+ let mut iterations = 0;
+ while let Err(RecvTimeoutError::Timeout) = abort_rx.recv_timeout(timeout) {
+ let count = counter.load(Ordering::SeqCst);
+ info!("sweeped chunks: {count}");
+
+ iterations += 1;
+ if iterations >= 60 && timeout < Duration::from_secs(60 * 60) {
+ timeout = timeout.saturating_mul(60);
+ info!("log output reduced to once per hour");
+ } else if iterations >= 60 * 60
+ && timeout < Duration::from_secs(24 * 60 * 60)
+ {
+ timeout = timeout.saturating_mul(24);
+ info!("log output reduced to once per day");
+ }
+ }
+ })
+ }
+ });
+
let s3_delete_batch = |delete_list: &mut Vec<(S3ObjectKey, BackupLockGuard)>,
s3_client: &Arc<S3Client>|
-> Result<(), Error> {
@@ -1800,7 +1831,7 @@ impl DataStore {
);
}
- chunk_count += 1;
+ chunk_count.fetch_add(1, Ordering::SeqCst);
// drop guard because of async S3 call below
drop(_guard);
@@ -1831,7 +1862,17 @@ impl DataStore {
s3_delete_batch(&mut delete_list, s3_client)?;
}
- info!("processed {chunk_count} total chunks");
+ abort_tx
+ .send(())
+ .context("failed to abort progress logging")?;
+ progress_log_handle
+ .join()
+ .map_err(|_err| format_err!("failed to join progress log handle"))?;
+
+ info!(
+ "processed {} total chunks",
+ chunk_count.load(Ordering::SeqCst),
+ );
// Phase 2 GC of Filesystem backed storage is phase 3 for S3 backed GC
info!("Start GC phase3 (sweep unused chunk markers)");
--
2.47.3
_______________________________________________
pbs-devel mailing list
pbs-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
reply other threads:[~2025-12-15 13:24 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251215132454.628090-1-c.ebner@proxmox.com \
--to=c.ebner@proxmox.com \
--cc=pbs-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.