From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id 3F2461FF17A for ; Fri, 18 Jul 2025 10:34:49 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 31C4316B54; Fri, 18 Jul 2025 10:35:57 +0200 (CEST) Message-ID: Date: Fri, 18 Jul 2025 10:35:23 +0200 MIME-Version: 1.0 User-Agent: Mozilla Thunderbird To: Proxmox Backup Server development discussion , Christian Ebner References: <20250715125332.954494-1-c.ebner@proxmox.com> <20250715125332.954494-23-c.ebner@proxmox.com> Content-Language: de-AT, en-US From: Lukas Wagner In-Reply-To: <20250715125332.954494-23-c.ebner@proxmox.com> X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1752827721285 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.020 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [pull.rs] Subject: Re: [pbs-devel] [PATCH proxmox-backup v8 13/45] sync: pull: conditionally upload content to s3 backend X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" On 2025-07-15 14:53, Christian Ebner wrote: > If the datastore is backed by an S3 object store, not only insert the > pulled contents to the local cache store, but also upload it to the > S3 backend. > > Signed-off-by: Christian Ebner > --- > changes since version 7: > - no changes > > src/server/pull.rs | 66 +++++++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 63 insertions(+), 3 deletions(-) > > diff --git a/src/server/pull.rs b/src/server/pull.rs > index b1724c142..fe87359ab 100644 > --- a/src/server/pull.rs > +++ b/src/server/pull.rs > @@ -6,8 +6,9 @@ use std::sync::atomic::{AtomicUsize, Ordering}; > use std::sync::{Arc, Mutex}; > use std::time::SystemTime; > > -use anyhow::{bail, format_err, Error}; > +use anyhow::{bail, format_err, Context, Error}; > use proxmox_human_byte::HumanByte; > +use tokio::io::AsyncReadExt; > use tracing::info; > > use pbs_api_types::{ > @@ -24,7 +25,7 @@ use pbs_datastore::fixed_index::FixedIndexReader; > use pbs_datastore::index::IndexFile; > use pbs_datastore::manifest::{BackupManifest, FileInfo}; > use pbs_datastore::read_chunk::AsyncReadChunk; > -use pbs_datastore::{check_backup_owner, DataStore, StoreProgress}; > +use pbs_datastore::{check_backup_owner, DataStore, DatastoreBackend, StoreProgress}; > use pbs_tools::sha::sha256; > > use super::sync::{ > @@ -167,7 +168,20 @@ async fn pull_index_chunks( > move |(chunk, digest, size): (DataBlob, [u8; 32], u64)| { > // println!("verify and write {}", hex::encode(&digest)); > chunk.verify_unencrypted(size as usize, &digest)?; > - target2.insert_chunk(&chunk, &digest)?; > + match target2.backend()? { > + DatastoreBackend::Filesystem => { > + target2.insert_chunk(&chunk, &digest)?; > + } > + DatastoreBackend::S3(s3_client) => { > + let data = chunk.raw_data().to_vec(); > + let upload_data = hyper::body::Bytes::from(data); > + let object_key = pbs_datastore::s3::object_key_from_digest(&digest)?; > + let _is_duplicate = proxmox_async::runtime::block_on( > + s3_client.upload_with_retry(object_key, upload_data, false), > + ) > + .context("failed to upload chunk to s3 backend")?; > + } > + } > Ok(()) > }, > ); > @@ -331,6 +345,18 @@ async fn pull_single_archive<'a>( > if let Err(err) = std::fs::rename(&tmp_path, &path) { > bail!("Atomic rename file {:?} failed - {}", path, err); > } > + if let DatastoreBackend::S3(s3_client) = snapshot.datastore().backend()? { > + let object_key = > + pbs_datastore::s3::object_key_from_path(&snapshot.relative_path(), archive_name) > + .context("invalid archive object key")?; > + > + let archive = tokio::fs::File::open(&path).await?; > + let mut reader = tokio::io::BufReader::new(archive); > + let mut contents = Vec::new(); > + reader.read_to_end(&mut contents).await?; You can use tokio::fs::read here > + let data = hyper::body::Bytes::from(contents); > + let _is_duplicate = s3_client.upload_with_retry(object_key, data, true).await?; I might do a review of the already merged s3 client code later, but I really don't like the `replace: bool ` parameter for this function very much. I think I'd prefer having to separate functions for replace vs. not replace (which might delegate to a common fn internally, there a bool param is fine IMO), or alternatively, use an enum instead. I think personally I'm gravitating more towards the separate function. What do you think? > + } > Ok(sync_stats) > } > > @@ -401,6 +427,7 @@ async fn pull_snapshot<'a>( > } > } > > + let manifest_data = tmp_manifest_blob.raw_data().to_vec(); > let manifest = BackupManifest::try_from(tmp_manifest_blob)?; > > if ignore_not_verified_or_encrypted( > @@ -467,9 +494,42 @@ async fn pull_snapshot<'a>( > if let Err(err) = std::fs::rename(&tmp_manifest_name, &manifest_name) { > bail!("Atomic rename file {:?} failed - {}", manifest_name, err); > } > + if let DatastoreBackend::S3(s3_client) = snapshot.datastore().backend()? { > + let object_key = pbs_datastore::s3::object_key_from_path( > + &snapshot.relative_path(), > + MANIFEST_BLOB_NAME.as_ref(), > + ) > + .context("invalid manifest object key")?; > + > + let data = hyper::body::Bytes::from(manifest_data); > + let _is_duplicate = s3_client > + .upload_with_retry(object_key, data, true) > + .await > + .context("failed to upload manifest to s3 backend")?; > + } > > if !client_log_name.exists() { > reader.try_download_client_log(&client_log_name).await?; > + if client_log_name.exists() { > + if let DatastoreBackend::S3(s3_client) = snapshot.datastore().backend()? { > + let object_key = pbs_datastore::s3::object_key_from_path( > + &snapshot.relative_path(), > + CLIENT_LOG_BLOB_NAME.as_ref(), > + ) > + .context("invalid archive object key")?; > + > + let log_file = tokio::fs::File::open(&client_log_name).await?; > + let mut reader = tokio::io::BufReader::new(log_file); > + let mut contents = Vec::new(); > + reader.read_to_end(&mut contents).await?; You can use tokio::fs::read(...) here > + > + let data = hyper::body::Bytes::from(contents); > + let _is_duplicate = s3_client > + .upload_with_retry(object_key, data, true) > + .await > + .context("failed to upload client log to s3 backend")?; > + } > + } > }; > snapshot > .cleanup_unreferenced_files(&manifest) -- - Lukas _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel