From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id 801BD1FF187 for ; Mon, 3 Nov 2025 15:50:33 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 05D31200E9; Mon, 3 Nov 2025 15:51:12 +0100 (CET) Date: Mon, 03 Nov 2025 15:51:05 +0100 From: Fabian =?iso-8859-1?q?Gr=FCnbichler?= To: Proxmox Backup Server development discussion References: <20251103113120.239455-1-c.ebner@proxmox.com> <20251103113120.239455-11-c.ebner@proxmox.com> In-Reply-To: <20251103113120.239455-11-c.ebner@proxmox.com> MIME-Version: 1.0 User-Agent: astroid/0.17.0 (https://github.com/astroidmail/astroid) Message-Id: <1762177869.199x08a4up.astroid@yuna.none> X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1762181451426 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.048 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pbs-devel] [PATCH proxmox-backup 10/17] datastore: implement per-chunk file locking helper for s3 backend X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" On November 3, 2025 12:31 pm, Christian Ebner wrote: > Adds a datastore helper method to create per-chunk file locks. These > will be used to guard chunk operations on s3 backends to guarantee > exclusive access when performing cache and backend operations. > > Signed-off-by: Christian Ebner > --- > pbs-datastore/src/backup_info.rs | 2 +- > pbs-datastore/src/chunk_store.rs | 26 ++++++++++++++++++++++++++ > pbs-datastore/src/datastore.rs | 12 ++++++++++++ > 3 files changed, 39 insertions(+), 1 deletion(-) > > diff --git a/pbs-datastore/src/backup_info.rs b/pbs-datastore/src/backup_info.rs > index 4b10b6435..70c0fbe8a 100644 > --- a/pbs-datastore/src/backup_info.rs > +++ b/pbs-datastore/src/backup_info.rs > @@ -936,7 +936,7 @@ fn lock_file_path_helper(ns: &BackupNamespace, path: PathBuf) -> PathBuf { > /// deletion. > /// > /// It also creates the base directory for lock files. > -fn lock_helper( > +pub(crate) fn lock_helper( > store_name: &str, > path: &std::path::Path, > lock_fn: F, > diff --git a/pbs-datastore/src/chunk_store.rs b/pbs-datastore/src/chunk_store.rs > index ba7618e40..49687b2fa 100644 > --- a/pbs-datastore/src/chunk_store.rs > +++ b/pbs-datastore/src/chunk_store.rs > @@ -8,6 +8,7 @@ use anyhow::{bail, format_err, Context, Error}; > use tracing::{info, warn}; > > use pbs_api_types::{DatastoreFSyncLevel, GarbageCollectionStatus}; > +use pbs_config::BackupLockGuard; > use proxmox_io::ReadExt; > use proxmox_s3_client::S3Client; > use proxmox_sys::fs::{create_dir, create_path, file_type_from_file_stat, CreateOptions}; > @@ -16,6 +17,7 @@ use proxmox_sys::process_locker::{ > }; > use proxmox_worker_task::WorkerTaskContext; > > +use crate::backup_info::DATASTORE_LOCKS_DIR; > use crate::data_blob::DataChunkBuilder; > use crate::file_formats::{ > COMPRESSED_BLOB_MAGIC_1_0, ENCRYPTED_BLOB_MAGIC_1_0, UNCOMPRESSED_BLOB_MAGIC_1_0, > @@ -759,6 +761,30 @@ impl ChunkStore { > ChunkStore::check_permissions(lockfile_path, 0o644)?; > Ok(()) > } > + > + /// Generates the path to the chunks lock file > + pub(crate) fn chunk_lock_path(&self, digest: &[u8]) -> PathBuf { > + let mut lock_path = Path::new(DATASTORE_LOCKS_DIR).join(self.name.clone()); > + let digest_str = hex::encode(digest); > + lock_path.push(".chunks"); > + let prefix = digest_to_prefix(digest); > + lock_path.push(&prefix); > + lock_path.push(&digest_str); > + lock_path should we add "s3" or some suffix here, so that if we add another backend in the future we already have specific paths? > + } > + > + /// Get an exclusive lock on the chunks lock file > + pub(crate) fn lock_chunk( > + &self, > + digest: &[u8], > + timeout: Duration, > + ) -> Result { > + let lock_path = self.chunk_lock_path(digest); > + let guard = crate::backup_info::lock_helper(self.name(), &lock_path, |path| { > + pbs_config::open_backup_lockfile(path, Some(timeout), true) > + })?; > + Ok(guard) > + } > } > > #[test] > diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs > index 397c37e56..32f3562b3 100644 > --- a/pbs-datastore/src/datastore.rs > +++ b/pbs-datastore/src/datastore.rs > @@ -2568,6 +2568,18 @@ impl DataStore { > Ok(()) > } > > + /// Locks the per chunk lock file if the backend requires it > + fn lock_chunk_for_backend(&self, digest: &[u8; 32]) -> Result, Error> { > + // s3 put request times out after upload_size / 1 Kib/s, so about 2.3 hours for 8 MiB > + let timeout = Duration::from_secs(3 * 60 * 60); could move into the S3 branch below.. or be made S3 specific in the first place, since it is only called/effective there? the renaming helper needs some rework then I guess.. but I am not sure if this logic here is really sound (any individual caller waiting for longer than a single uploads max timeout might be valid, since the locking is not fair and multiple locking attempts might have queued up), I guess the instances where we end up taking this lock are few enough that no progress over such a long time makes any progress within reasonable time unlikely.. we currently take this lock for the duration of a chunk upload/insertion, for the duration of a chunk rename after corruption has been detected, and for a batch of GC chunk removal. > + match self.inner.backend_config.ty.unwrap_or_default() { > + DatastoreBackendType::Filesystem => Ok(None), > + DatastoreBackendType::S3 => { > + self.inner.chunk_store.lock_chunk(digest, timeout).map(Some) > + } > + } > + } > + > /// Renames a corrupt chunk, returning the new path if the chunk was renamed successfully. > /// Returns with `Ok(None)` if the chunk source was not found. > pub fn rename_corrupt_chunk(&self, digest: &[u8; 32]) -> Result, Error> { > -- > 2.47.3 > > > > _______________________________________________ > pbs-devel mailing list > pbs-devel@lists.proxmox.com > https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel > > > _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel