From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id 6A26D1FF165 for ; Thu, 3 Jul 2025 15:25:01 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 5B43A156BD; Thu, 3 Jul 2025 15:25:42 +0200 (CEST) From: Christian Ebner To: pbs-devel@lists.proxmox.com Date: Thu, 3 Jul 2025 15:18:30 +0200 Message-ID: <20250703131837.786811-43-c.ebner@proxmox.com> X-Mailer: git-send-email 2.47.2 In-Reply-To: <20250703131837.786811-1-c.ebner@proxmox.com> References: <20250703131837.786811-1-c.ebner@proxmox.com> MIME-Version: 1.0 X-SPAM-LEVEL: Spam detection results: 0 AWL -0.080 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment RCVD_IN_VALIDITY_CERTIFIED_BLOCKED 0.237 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. RCVD_IN_VALIDITY_RPBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. RCVD_IN_VALIDITY_SAFE_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [pull.rs] Subject: [pbs-devel] [PATCH proxmox-backup v5 39/46] api: backup: use local datastore cache on s3 backend chunk upload X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" Take advantage of the local datastore cache to avoid re-uploading of already known chunks. This not only helps improve the backup/upload speeds, but also avoids additionally costs by reducing the number of requests and transferred payload data to the S3 object store api. If the cache is present, lookup if it contains the chunk, skipping upload altogether if it is. Otherwise, upload the chunk into memory, upload it to the S3 object store api and insert it into the local datastore cache. Signed-off-by: Christian Ebner --- src/api2/backup/upload_chunk.rs | 36 ++++++++++++++++++++++++++++++--- src/server/pull.rs | 4 ++++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/src/api2/backup/upload_chunk.rs b/src/api2/backup/upload_chunk.rs index 5dd35b435..ea306a8d6 100644 --- a/src/api2/backup/upload_chunk.rs +++ b/src/api2/backup/upload_chunk.rs @@ -2,7 +2,7 @@ use std::pin::Pin; use std::sync::Arc; use std::task::{Context, Poll}; -use anyhow::{bail, format_err, Error}; +use anyhow::{bail, format_err, Context as AnyhowContext, Error}; use futures::*; use hex::FromHex; use http_body_util::{BodyDataStream, BodyExt}; @@ -262,9 +262,39 @@ async fn upload_to_backend( ); } + // Avoid re-upload to S3 if the chunk is either present in the LRU cache or the chunk + // file exists on filesystem. The latter means that the chunk has been present in the + // past an was not cleaned up by garbage collection, so contained in the S3 object store. + if env.datastore.cache_contains(&digest) { + tracing::info!("Skip upload of cached chunk {}", hex::encode(digest)); + return Ok((digest, size, encoded_size, true)); + } + if let Ok(true) = env.datastore.cond_touch_chunk(&digest, false) { + tracing::info!( + "Skip upload of already encountered chunk {}", + hex::encode(digest) + ); + return Ok((digest, size, encoded_size, true)); + } + + tracing::info!("Upload of new chunk {}", hex::encode(digest)); let is_duplicate = s3_client - .upload_with_retry(digest.into(), data, false) - .await?; + .upload_with_retry(digest.into(), data.clone(), false) + .await + .context("failed to upload chunk to s3 backend")?; + + // Only insert the chunk into the cache after it has been successufuly uploaded. + // Although less performant than doing this in parallel, it is required for consisency + // since chunks are considered as present on the backend if the file exists in the local + // cache store. + let datastore = env.datastore.clone(); + tracing::info!("Caching of chunk {}", hex::encode(digest)); + let _ = tokio::task::spawn_blocking(move || { + let chunk = DataBlob::from_raw(data.to_vec())?; + datastore.cache_insert(&digest, &chunk) + }) + .await?; + Ok((digest, size, encoded_size, is_duplicate)) } } diff --git a/src/server/pull.rs b/src/server/pull.rs index ec9518a47..5e99c4af8 100644 --- a/src/server/pull.rs +++ b/src/server/pull.rs @@ -173,6 +173,10 @@ async fn pull_index_chunks( target2.insert_chunk(&chunk, &digest)?; } DatastoreBackend::S3(s3_client) => { + if target2.cache_contains(&digest) { + return Ok(()); + } + target2.cache_insert(&digest, &chunk)?; let data = chunk.raw_data().to_vec(); let upload_data = hyper::body::Bytes::from(data); let _is_duplicate = proxmox_async::runtime::block_on( -- 2.47.2 _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel