From: "Hannes Laimer" <h.laimer@proxmox.com>
To: "Proxmox Backup Server development discussion"
<pbs-devel@lists.proxmox.com>
Cc: "pbs-devel" <pbs-devel-bounces@lists.proxmox.com>
Subject: Re: [pbs-devel] [PATCH proxmox-backup v9 17/46] verify: implement chunk verification for stores with s3 backend
Date: Mon, 21 Jul 2025 15:35:31 +0200 [thread overview]
Message-ID: <DBHRTZDCNF0D.3DZ4UC82WIHZX@proxmox.com> (raw)
In-Reply-To: <20250719125035.9926-21-c.ebner@proxmox.com>
On Sat Jul 19, 2025 at 2:50 PM CEST, Christian Ebner wrote:
> For datastores backed by an S3 compatible object store, rather than
> reading the chunks to be verified from the local filesystem, fetch
> them via the s3 client from the configured bucket.
>
Could we somehow take advantage of the hash S3 provides for objects[1]?
We can't use our hashes tough, so not if how we would...
[1] https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html
> Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
> ---
> changes since version 8:
> - refactored corrupt chunk marking into more compact methods
>
> src/backup/verify.rs | 118 +++++++++++++++++++++++++++++++++++++------
> 1 file changed, 103 insertions(+), 15 deletions(-)
>
> diff --git a/src/backup/verify.rs b/src/backup/verify.rs
> index dea10f618..9a2148b56 100644
> --- a/src/backup/verify.rs
> +++ b/src/backup/verify.rs
> @@ -5,6 +5,7 @@ use std::sync::{Arc, Mutex};
> use std::time::Instant;
>
> use anyhow::{bail, Error};
> +use http_body_util::BodyExt;
> use tracing::{error, info, warn};
>
> use proxmox_worker_task::WorkerTaskContext;
> @@ -15,7 +16,7 @@ use pbs_api_types::{
> UPID,
> };
> use pbs_datastore::backup_info::{BackupDir, BackupGroup, BackupInfo};
> -use pbs_datastore::index::IndexFile;
> +use pbs_datastore::index::{ChunkReadInfo, IndexFile};
> use pbs_datastore::manifest::{BackupManifest, FileInfo};
> use pbs_datastore::{DataBlob, DataStore, DatastoreBackend, StoreProgress};
>
> @@ -89,6 +90,38 @@ impl VerifyWorker {
> }
> }
>
> + if let Ok(DatastoreBackend::S3(s3_client)) = datastore.backend() {
> + let suffix = format!(".{}.bad", counter);
> + let target_key =
> + match pbs_datastore::s3::object_key_from_digest_with_suffix(digest, &suffix) {
> + Ok(target_key) => target_key,
> + Err(err) => {
> + info!("could not generate target key for corrupted chunk {path:?} - {err}");
> + return;
> + }
> + };
> + let object_key = match pbs_datastore::s3::object_key_from_digest(digest) {
> + Ok(object_key) => object_key,
> + Err(err) => {
> + info!("could not generate object key for corrupted chunk {path:?} - {err}");
> + return;
> + }
> + };
> + if proxmox_async::runtime::block_on(
> + s3_client.copy_object(object_key.clone(), target_key),
> + )
> + .is_ok()
> + {
> + if proxmox_async::runtime::block_on(s3_client.delete_object(object_key)).is_err() {
> + info!("failed to delete corrupt chunk on s3 backend: {digest_str}");
> + }
> + } else {
> + info!("failed to copy corrupt chunk on s3 backend: {digest_str}");
> + }
> + } else {
> + info!("failed to get s3 backend while trying to rename bad chunk: {digest_str}");
> + }
> +
> match std::fs::rename(&path, &new_path) {
> Ok(_) => {
> info!("corrupted chunk renamed to {:?}", &new_path);
> @@ -189,20 +222,13 @@ impl VerifyWorker {
> continue; // already verified or marked corrupt
> }
>
> - match self.datastore.load_chunk(&info.digest) {
> - Err(err) => {
> - self.corrupt_chunks.lock().unwrap().insert(info.digest);
> - error!("can't verify chunk, load failed - {err}");
> - errors.fetch_add(1, Ordering::SeqCst);
> - Self::rename_corrupted_chunk(self.datastore.clone(), &info.digest);
> - }
> - Ok(chunk) => {
> - let size = info.size();
> - read_bytes += chunk.raw_size();
> - decoder_pool.send((chunk, info.digest, size))?;
> - decoded_bytes += size;
> - }
> - }
> + self.verify_chunk_by_backend(
> + &info,
> + &mut read_bytes,
> + &mut decoded_bytes,
> + Arc::clone(&errors),
> + &decoder_pool,
> + )?;
> }
>
> decoder_pool.complete()?;
> @@ -228,6 +254,68 @@ impl VerifyWorker {
> Ok(())
> }
>
> + fn verify_chunk_by_backend(
> + &self,
> + info: &ChunkReadInfo,
> + read_bytes: &mut u64,
> + decoded_bytes: &mut u64,
> + errors: Arc<AtomicUsize>,
> + decoder_pool: &ParallelHandler<(DataBlob, [u8; 32], u64)>,
> + ) -> Result<(), Error> {
> + match &self.backend {
> + DatastoreBackend::Filesystem => match self.datastore.load_chunk(&info.digest) {
> + Err(err) => self.add_corrupt_chunk(
> + info.digest,
> + errors,
> + &format!("can't verify chunk, load failed - {err}"),
> + ),
> + Ok(chunk) => {
> + let size = info.size();
> + *read_bytes += chunk.raw_size();
> + decoder_pool.send((chunk, info.digest, size))?;
> + *decoded_bytes += size;
> + }
> + },
> + DatastoreBackend::S3(s3_client) => {
> + let object_key = pbs_datastore::s3::object_key_from_digest(&info.digest)?;
> + match proxmox_async::runtime::block_on(s3_client.get_object(object_key)) {
> + Ok(Some(response)) => {
> + let bytes = proxmox_async::runtime::block_on(response.content.collect())?
> + .to_bytes();
> + let chunk = DataBlob::from_raw(bytes.to_vec())?;
> + let size = info.size();
> + *read_bytes += chunk.raw_size();
> + decoder_pool.send((chunk, info.digest, size))?;
> + *decoded_bytes += size;
> + }
> + Ok(None) => self.add_corrupt_chunk(
> + info.digest,
> + errors,
> + &format!(
> + "can't verify missing chunk with digest {}",
> + hex::encode(info.digest)
> + ),
> + ),
> + Err(err) => self.add_corrupt_chunk(
> + info.digest,
> + errors,
> + &format!("can't verify chunk, load failed - {err}"),
> + ),
> + }
> + }
> + }
> + Ok(())
> + }
> +
> + fn add_corrupt_chunk(&self, digest: [u8; 32], errors: Arc<AtomicUsize>, message: &str) {
> + // Panic on poisoned mutex
> + let mut corrupt_chunks = self.corrupt_chunks.lock().unwrap();
> + corrupt_chunks.insert(digest);
> + error!(message);
> + errors.fetch_add(1, Ordering::SeqCst);
> + Self::rename_corrupted_chunk(self.datastore.clone(), &digest);
> + }
> +
> fn verify_fixed_index(&self, backup_dir: &BackupDir, info: &FileInfo) -> Result<(), Error> {
> let mut path = backup_dir.relative_path();
> path.push(&info.filename);
_______________________________________________
pbs-devel mailing list
pbs-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
next prev parent reply other threads:[~2025-07-21 13:34 UTC|newest]
Thread overview: 74+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-07-19 12:49 [pbs-devel] [PATCH proxmox{, -backup} v9 00/49] fix #2943: S3 storage backend for datastores Christian Ebner
2025-07-19 12:49 ` [pbs-devel] [PATCH proxmox v9 1/3] pbs-api-types: extend datastore config by backend config enum Christian Ebner
2025-07-19 12:49 ` [pbs-devel] [PATCH proxmox v9 2/3] pbs-api-types: maintenance: add new maintenance mode S3 refresh Christian Ebner
2025-07-19 12:49 ` [pbs-devel] [PATCH proxmox v9 3/3] s3 client: wrap upload with retry into dedicated methods Christian Ebner
2025-07-21 15:37 ` [pbs-devel] applied: " Thomas Lamprecht
2025-07-19 12:49 ` [pbs-devel] [PATCH proxmox-backup v9 01/46] datastore: add helpers for path/digest to s3 object key conversion Christian Ebner
2025-07-21 12:29 ` Hannes Laimer
2025-07-21 12:51 ` Christian Ebner
2025-07-21 12:55 ` Hannes Laimer
2025-07-21 13:58 ` Hannes Laimer
2025-07-21 14:15 ` Christian Ebner
2025-07-21 14:20 ` Hannes Laimer
2025-07-19 12:49 ` [pbs-devel] [PATCH proxmox-backup v9 02/46] config: introduce s3 object store client configuration Christian Ebner
2025-07-19 12:49 ` [pbs-devel] [PATCH proxmox-backup v9 03/46] api: config: implement endpoints to manipulate and list s3 configs Christian Ebner
2025-07-19 12:49 ` [pbs-devel] [PATCH proxmox-backup v9 04/46] api: datastore: check s3 backend bucket access on datastore create Christian Ebner
2025-07-19 12:49 ` [pbs-devel] [PATCH proxmox-backup v9 05/46] api/cli: add endpoint and command to check s3 client connection Christian Ebner
2025-07-19 12:49 ` [pbs-devel] [PATCH proxmox-backup v9 06/46] datastore: allow to get the backend for a datastore Christian Ebner
2025-07-19 12:49 ` [pbs-devel] [PATCH proxmox-backup v9 07/46] api: backup: store datastore backend in runtime environment Christian Ebner
2025-07-19 12:49 ` [pbs-devel] [PATCH proxmox-backup v9 08/46] api: backup: conditionally upload chunks to s3 object store backend Christian Ebner
2025-07-19 12:49 ` [pbs-devel] [PATCH proxmox-backup v9 09/46] api: backup: conditionally upload blobs " Christian Ebner
2025-07-19 12:49 ` [pbs-devel] [PATCH proxmox-backup v9 10/46] api: backup: conditionally upload indices " Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 11/46] api: backup: conditionally upload manifest " Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 12/46] api: datastore: conditionally upload client log to s3 backend Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 13/46] sync: pull: conditionally upload content " Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 14/46] api: reader: fetch chunks based on datastore backend Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 15/46] datastore: local chunk reader: read chunks based on backend Christian Ebner
2025-07-21 13:12 ` Hannes Laimer
2025-07-21 13:24 ` Christian Ebner
2025-07-21 13:36 ` Lukas Wagner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 16/46] verify worker: add datastore backed to verify worker Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 17/46] verify: implement chunk verification for stores with s3 backend Christian Ebner
2025-07-21 13:35 ` Hannes Laimer [this message]
2025-07-21 13:38 ` Christian Ebner
2025-07-21 13:55 ` Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 18/46] datastore: create namespace marker in " Christian Ebner
2025-07-21 13:52 ` Hannes Laimer
2025-07-21 14:01 ` Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 19/46] datastore: create/delete protected marker file on s3 storage backend Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 20/46] datastore: prune groups/snapshots from s3 object store backend Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 21/46] datastore: get and set owner for s3 " Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 22/46] datastore: implement garbage collection for s3 backend Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 23/46] ui: add datastore type selector and reorganize component layout Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 24/46] ui: add s3 client edit window for configuration create/edit Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 25/46] ui: add s3 client view for configuration Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 26/46] ui: expose the s3 client view in the navigation tree Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 27/46] ui: add s3 client selector and bucket field for s3 backend setup Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 28/46] tools: lru cache: add removed callback for evicted cache nodes Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 29/46] tools: async lru cache: implement insert, remove and contains methods Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 30/46] datastore: add local datastore cache for network attached storages Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 31/46] api: backup: use local datastore cache on s3 backend chunk upload Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 32/46] api: reader: use local datastore cache on s3 backend chunk fetching Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 33/46] datastore: local chunk reader: get cached chunk from local cache store Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 34/46] backup writer: refactor parameters into backup writer options struct Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 35/46] api: backup: add no-cache flag to bypass local datastore cache Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 36/46] api/datastore: implement refresh endpoint for stores with s3 backend Christian Ebner
2025-07-21 14:16 ` Hannes Laimer
2025-07-21 14:26 ` Christian Ebner
2025-07-21 14:31 ` Hannes Laimer
2025-07-21 14:42 ` Christian Ebner
2025-07-21 14:48 ` Hannes Laimer
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 37/46] cli: add dedicated subcommand for datastore s3 refresh Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 38/46] ui: render s3 refresh as valid maintenance type and task description Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 39/46] ui: expose s3 refresh button for datastores backed by object store Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 40/46] datastore: conditionally upload atime marker chunk to s3 backend Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 41/46] bin: implement client subcommands for s3 configuration manipulation Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 42/46] bin: expose reuse-datastore flag for proxmox-backup-manager Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 43/46] datastore: mark store as in-use by setting marker on s3 backend Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 44/46] datastore: run s3-refresh when reusing a datastore with " Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 45/46] api/ui: add flag to allow overwriting in-use marker for " Christian Ebner
2025-07-19 12:50 ` [pbs-devel] [PATCH proxmox-backup v9 46/46] docs: Add section describing how to setup s3 backed datastore Christian Ebner
2025-07-21 14:24 ` [pbs-devel] [PATCH proxmox{, -backup} v9 00/49] fix #2943: S3 storage backend for datastores Hannes Laimer
2025-07-21 15:05 ` Lukas Wagner
2025-07-21 15:37 ` Christian Ebner
2025-07-21 16:46 ` Christian Ebner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=DBHRTZDCNF0D.3DZ4UC82WIHZX@proxmox.com \
--to=h.laimer@proxmox.com \
--cc=pbs-devel-bounces@lists.proxmox.com \
--cc=pbs-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox