From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id DEF0F1FF16B for ; Fri, 26 Sep 2025 10:42:41 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 87850B115; Fri, 26 Sep 2025 10:43:14 +0200 (CEST) From: Christian Ebner To: pbs-devel@lists.proxmox.com Date: Fri, 26 Sep 2025 10:42:20 +0200 Message-ID: <20250926084221.201116-2-c.ebner@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20250926084221.201116-1-c.ebner@proxmox.com> References: <20250926084221.201116-1-c.ebner@proxmox.com> MIME-Version: 1.0 X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1758876145028 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.043 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment KAM_SHORT 0.001 Use of a URL Shortener for very short URL SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: [pbs-devel] [PATCH proxmox-backup v2 1/2] fix #6750: api: avoid possible deadlock on datastores with s3 backend X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" Closing of the fixed or dynamic index files with s3 backend will call async code, which must be avoided because of possible deadlocks [0]. Therefore, perform all changes on the shared backup state and drop the guard before uploading the fixed index file to the s3 backend. In order to keep track of the index writer state, add a closed flag to signal that the writer has already been closed successfully. By only taking a mutable reference during the initial accounting and moving the writer out of the shared backup state's writers list after the upload, the pre-existing logic for the finished flag can be used. [0] https://docs.rs/tokio/latest/tokio/sync/struct.Mutex.html#which-kind-of-mutex-should-you-use Fixes: https://bugzilla.proxmox.com/show_bug.cgi?id=6750 Signed-off-by: Christian Ebner --- src/api2/backup/environment.rs | 121 ++++++++++++++++++++++++--------- 1 file changed, 87 insertions(+), 34 deletions(-) diff --git a/src/api2/backup/environment.rs b/src/api2/backup/environment.rs index d5e6869cd..f997c86a1 100644 --- a/src/api2/backup/environment.rs +++ b/src/api2/backup/environment.rs @@ -62,6 +62,7 @@ struct DynamicWriterState { offset: u64, chunk_count: u64, upload_stat: UploadStatistic, + closed: bool, } struct FixedWriterState { @@ -73,6 +74,7 @@ struct FixedWriterState { small_chunk_count: usize, // allow 0..1 small chunks (last chunk may be smaller) upload_stat: UploadStatistic, incremental: bool, + closed: bool, } // key=digest, value=length @@ -194,6 +196,13 @@ impl BackupEnvironment { None => bail!("fixed writer '{}' not registered", wid), }; + if data.closed { + bail!( + "fixed writer '{}' register chunk failed - already closed", + data.name + ); + } + if size > data.chunk_size { bail!( "fixed writer '{}' - got large chunk ({} > {}", @@ -248,6 +257,13 @@ impl BackupEnvironment { None => bail!("dynamic writer '{}' not registered", wid), }; + if data.closed { + bail!( + "dynamic writer '{}' register chunk failed - already closed", + data.name + ); + } + // record statistics data.upload_stat.count += 1; data.upload_stat.size += size as u64; @@ -288,6 +304,7 @@ impl BackupEnvironment { offset: 0, chunk_count: 0, upload_stat: UploadStatistic::new(), + closed: false, }, ); @@ -320,6 +337,7 @@ impl BackupEnvironment { small_chunk_count: 0, upload_stat: UploadStatistic::new(), incremental, + closed: false, }, ); @@ -343,6 +361,13 @@ impl BackupEnvironment { None => bail!("dynamic writer '{}' not registered", wid), }; + if data.closed { + bail!( + "dynamic writer '{}' append chunk failed - already closed", + data.name + ); + } + if data.offset != offset { bail!( "dynamic writer '{}' append chunk failed - got strange chunk offset ({} != {})", @@ -377,6 +402,13 @@ impl BackupEnvironment { None => bail!("fixed writer '{}' not registered", wid), }; + if data.closed { + bail!( + "fixed writer '{}' append chunk failed - already closed", + data.name + ); + } + let end = (offset as usize) + (size as usize); let idx = data.index.check_chunk_alignment(end, size as usize)?; @@ -449,10 +481,17 @@ impl BackupEnvironment { state.ensure_unfinished()?; - let mut data = match state.dynamic_writers.remove(&wid) { + let data = match state.dynamic_writers.get_mut(&wid) { Some(data) => data, None => bail!("dynamic writer '{}' not registered", wid), }; + let writer_name = data.name.clone(); + let uuid = data.index.uuid; + let upload_stat = data.upload_stat; + + if data.closed { + bail!("dynamic writer '{writer_name}' close failed - already closed"); + } if data.chunk_count != chunk_count { bail!( @@ -472,9 +511,8 @@ impl BackupEnvironment { ); } - let uuid = data.index.uuid; - let expected_csum = data.index.close()?; + data.closed = true; if csum != expected_csum { bail!( @@ -483,28 +521,28 @@ impl BackupEnvironment { ); } + state.file_counter += 1; + state.backup_size += size; + state.backup_stat = state.backup_stat + upload_stat; + + self.log_upload_stat(&writer_name, &csum, &uuid, size, chunk_count, &upload_stat); + + // never hold mutex guard during s3 upload due to possible deadlocks + drop(state); + // For S3 backends, upload the index file to the object store after closing if let DatastoreBackend::S3(s3_client) = &self.backend { - self.s3_upload_index(s3_client, &data.name) + self.s3_upload_index(s3_client, &writer_name) .context("failed to upload dynamic index to s3 backend")?; self.log(format!( - "Uploaded dynamic index file to s3 backend: {}", - data.name + "Uploaded dynamic index file to s3 backend: {writer_name}" )) } - self.log_upload_stat( - &data.name, - &csum, - &uuid, - size, - chunk_count, - &data.upload_stat, - ); - - state.file_counter += 1; - state.backup_size += size; - state.backup_stat = state.backup_stat + data.upload_stat; + let mut state = self.state.lock().unwrap(); + if state.dynamic_writers.remove(&wid).is_none() { + bail!("dynamic writer '{wid}' no longer registered"); + } Ok(()) } @@ -521,11 +559,19 @@ impl BackupEnvironment { state.ensure_unfinished()?; - let mut data = match state.fixed_writers.remove(&wid) { + let data = match state.fixed_writers.get_mut(&wid) { Some(data) => data, None => bail!("fixed writer '{}' not registered", wid), }; + let writer_name = data.name.clone(); + let uuid = data.index.uuid; + let upload_stat = data.upload_stat; + + if data.closed { + bail!("fixed writer '{writer_name}' close failed - already closed"); + } + if data.chunk_count != chunk_count { bail!( "fixed writer '{}' close failed - received wrong number of chunk ({} != {})", @@ -557,8 +603,8 @@ impl BackupEnvironment { } } - let uuid = data.index.uuid; let expected_csum = data.index.close()?; + data.closed = true; if csum != expected_csum { bail!( @@ -567,28 +613,35 @@ impl BackupEnvironment { ); } - // For S3 backends, upload the index file to the object store after closing - if let DatastoreBackend::S3(s3_client) = &self.backend { - self.s3_upload_index(s3_client, &data.name) - .context("failed to upload fixed index to s3 backend")?; - self.log(format!( - "Uploaded fixed index file to object store: {}", - data.name - )) - } + state.file_counter += 1; + state.backup_size += size; + state.backup_stat = state.backup_stat + upload_stat; self.log_upload_stat( - &data.name, + &writer_name, &expected_csum, &uuid, size, chunk_count, - &data.upload_stat, + &upload_stat, ); - state.file_counter += 1; - state.backup_size += size; - state.backup_stat = state.backup_stat + data.upload_stat; + // never hold mutex guard during s3 upload due to possible deadlocks + drop(state); + + // For S3 backends, upload the index file to the object store after closing + if let DatastoreBackend::S3(s3_client) = &self.backend { + self.s3_upload_index(s3_client, &writer_name) + .context("failed to upload fixed index to s3 backend")?; + self.log(format!( + "Uploaded fixed index file to object store: {writer_name}" + )) + } + + let mut state = self.state.lock().unwrap(); + if state.fixed_writers.remove(&wid).is_none() { + bail!("dynamic writer '{wid}' no longer registered"); + } Ok(()) } -- 2.47.3 _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel