From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id 9E2951FF183 for ; Wed, 8 Oct 2025 17:21:42 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 14B4BC2A3; Wed, 8 Oct 2025 17:21:45 +0200 (CEST) From: Christian Ebner To: pbs-devel@lists.proxmox.com Date: Wed, 8 Oct 2025 17:21:24 +0200 Message-ID: <20251008152125.849216-12-c.ebner@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20251008152125.849216-1-c.ebner@proxmox.com> References: <20251008152125.849216-1-c.ebner@proxmox.com> MIME-Version: 1.0 X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1759936868518 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.043 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: [pbs-devel] [PATCH proxmox-backup v2 11/12] api: chunk upload: fix race with garbage collection for no-cache on s3 X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" Chunks uploaded to the s3 backend are never inserted into the local datastore cache. The presence of the chunk marker file is however required for garbage collection to not cleanup the chunks. While the marker files are created during phase 1 of the garbage collection for indexed chunks, this is not the case for in progress backups with the no-cache flag set. Therefore, mark chunks as in-progress while being uploaded just like for the regular mode with cache, but replace this with the zero-sized chunk marker file after upload finished to avoid incorrect garbage collection cleanup. Signed-off-by: Christian Ebner --- pbs-datastore/src/chunk_store.rs | 23 +++++++++++++++++++++++ pbs-datastore/src/datastore.rs | 7 +++++++ src/api2/backup/upload_chunk.rs | 14 ++++++++++++-- 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/pbs-datastore/src/chunk_store.rs b/pbs-datastore/src/chunk_store.rs index 5b1f397bd..323ba06e6 100644 --- a/pbs-datastore/src/chunk_store.rs +++ b/pbs-datastore/src/chunk_store.rs @@ -598,6 +598,29 @@ impl ChunkStore { Ok(true) } + pub(crate) fn persist_backend_upload_marker(&self, digest: &[u8; 32]) -> Result<(), Error> { + if self.datastore_backend_type == DatastoreBackendType::Filesystem { + bail!("cannot create backend upload marker, not a cache store"); + } + let (marker_path, _digest_str) = self.chunk_backed_upload_marker_path(digest); + let (chunk_path, digest_str) = self.chunk_path(digest); + let _lock = self.mutex.lock(); + + if let Err(err) = std::fs::rename(marker_path, chunk_path) { + // Check if the chunk has been inserted since. Otherwise it is not safe to continue, + // as the concurrent chunk upload has failed and the marker file has been cleaned up, + // which leaves a race window open for garbage collection to remove the chunk. + if self.cond_touch_chunk(digest, false)? { + return Ok(()); + } + + return Err(format_err!( + "persisting backup upload marker failed for {digest_str} - {err}" + )); + } + Ok(()) + } + pub(crate) fn cleanup_backend_upload_marker(&self, digest: &[u8; 32]) -> Result<(), Error> { if self.datastore_backend_type == DatastoreBackendType::Filesystem { bail!("cannot cleanup backend upload marker, not a cache store"); diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs index e40b6883b..1f6eb9a7a 100644 --- a/pbs-datastore/src/datastore.rs +++ b/pbs-datastore/src/datastore.rs @@ -1884,6 +1884,13 @@ impl DataStore { self.inner.chunk_store.insert_backend_upload_marker(digest) } + /// Persist the backend upload marker to be a zero size chunk marker. + /// + /// Marks the chunk as present in the local store cache without inserting its payload. + pub fn persist_backend_upload_marker(&self, digest: &[u8; 32]) -> Result<(), Error> { + self.inner.chunk_store.persist_backend_upload_marker(digest) + } + /// Remove the marker file signaling an in-progress upload to the backend pub fn cleanup_backend_upload_marker(&self, digest: &[u8; 32]) -> Result<(), Error> { self.inner.chunk_store.cleanup_backend_upload_marker(digest) diff --git a/src/api2/backup/upload_chunk.rs b/src/api2/backup/upload_chunk.rs index 7d1f863ed..2f09938b7 100644 --- a/src/api2/backup/upload_chunk.rs +++ b/src/api2/backup/upload_chunk.rs @@ -263,10 +263,20 @@ async fn upload_to_backend( if env.no_cache { let object_key = pbs_datastore::s3::object_key_from_digest(&digest)?; - let is_duplicate = s3_client + if !datastore.insert_backend_upload_marker(&digest)? { + return Ok((digest, size, encoded_size, true)); + } + let is_duplicate = match s3_client .upload_no_replace_with_retry(object_key, data) .await - .map_err(|err| format_err!("failed to upload chunk to s3 backend - {err:#}"))?; + { + Ok(is_duplicate) => is_duplicate, + Err(err) => { + datastore.cleanup_backend_upload_marker(&digest)?; + bail!("failed to upload chunk to s3 backend - {err:#}"); + } + }; + env.datastore.persist_backend_upload_marker(&digest)?; return Ok((digest, size, encoded_size, is_duplicate)); } -- 2.47.3 _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel