From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id 91CEC1FF183 for ; Wed, 24 Sep 2025 16:56:33 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id AB95FB845; Wed, 24 Sep 2025 16:57:04 +0200 (CEST) From: Christian Ebner To: pbs-devel@lists.proxmox.com Date: Wed, 24 Sep 2025 16:56:11 +0200 Message-ID: <20250924145612.188579-2-c.ebner@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20250924145612.188579-1-c.ebner@proxmox.com> References: <20250924145612.188579-1-c.ebner@proxmox.com> MIME-Version: 1.0 X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1758725777324 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.042 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment KAM_SHORT 0.001 Use of a URL Shortener for very short URL SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: [pbs-devel] [PATCH proxmox-backup 1/2] fix #6750: api: avoid possible deadlock on datastores with s3 backend X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" Closing of the fixed or dynamic index files with s3 backend will call async code, which must be avoided because of possible deadlocks [0]. Therefore, perform all changes on the shared backup state and drop the guard before uploading the fixed index file to the s3 backend. Account for active backend operations and check consistency, since it must be assured that all active backend operations are finished before the finish call can succeed. [0] https://docs.rs/tokio/latest/tokio/sync/struct.Mutex.html#which-kind-of-mutex-should-you-use Fixes: https://bugzilla.proxmox.com/show_bug.cgi?id=6750 Signed-off-by: Christian Ebner --- src/api2/backup/environment.rs | 77 +++++++++++++++++++++++----------- 1 file changed, 53 insertions(+), 24 deletions(-) diff --git a/src/api2/backup/environment.rs b/src/api2/backup/environment.rs index d5e6869cd..e535891a4 100644 --- a/src/api2/backup/environment.rs +++ b/src/api2/backup/environment.rs @@ -82,6 +82,7 @@ struct SharedBackupState { finished: bool, uid_counter: usize, file_counter: usize, // successfully uploaded files + active_backend_operations: usize, dynamic_writers: HashMap, fixed_writers: HashMap, known_chunks: KnownChunksMap, @@ -135,6 +136,7 @@ impl BackupEnvironment { finished: false, uid_counter: 0, file_counter: 0, + active_backend_operations: 0, dynamic_writers: HashMap::new(), fixed_writers: HashMap::new(), known_chunks: HashMap::new(), @@ -483,15 +485,10 @@ impl BackupEnvironment { ); } - // For S3 backends, upload the index file to the object store after closing - if let DatastoreBackend::S3(s3_client) = &self.backend { - self.s3_upload_index(s3_client, &data.name) - .context("failed to upload dynamic index to s3 backend")?; - self.log(format!( - "Uploaded dynamic index file to s3 backend: {}", - data.name - )) - } + state.file_counter += 1; + state.backup_size += size; + state.backup_stat = state.backup_stat + data.upload_stat; + state.active_backend_operations += 1; self.log_upload_stat( &data.name, @@ -502,9 +499,21 @@ impl BackupEnvironment { &data.upload_stat, ); - state.file_counter += 1; - state.backup_size += size; - state.backup_stat = state.backup_stat + data.upload_stat; + // never hold mutex guard during s3 upload due to possible deadlocks + drop(state); + + // For S3 backends, upload the index file to the object store after closing + if let DatastoreBackend::S3(s3_client) = &self.backend { + self.s3_upload_index(s3_client, &data.name) + .context("failed to upload dynamic index to s3 backend")?; + self.log(format!( + "Uploaded dynamic index file to s3 backend: {}", + data.name + )) + } + + let mut state = self.state.lock().unwrap(); + state.active_backend_operations -= 1; Ok(()) } @@ -567,15 +576,10 @@ impl BackupEnvironment { ); } - // For S3 backends, upload the index file to the object store after closing - if let DatastoreBackend::S3(s3_client) = &self.backend { - self.s3_upload_index(s3_client, &data.name) - .context("failed to upload fixed index to s3 backend")?; - self.log(format!( - "Uploaded fixed index file to object store: {}", - data.name - )) - } + state.file_counter += 1; + state.backup_size += size; + state.backup_stat = state.backup_stat + data.upload_stat; + state.active_backend_operations += 1; self.log_upload_stat( &data.name, @@ -586,9 +590,21 @@ impl BackupEnvironment { &data.upload_stat, ); - state.file_counter += 1; - state.backup_size += size; - state.backup_stat = state.backup_stat + data.upload_stat; + // never hold mutex guard during s3 upload due to possible deadlocks + drop(state); + + // For S3 backends, upload the index file to the object store after closing + if let DatastoreBackend::S3(s3_client) = &self.backend { + self.s3_upload_index(s3_client, &data.name) + .context("failed to upload fixed index to s3 backend")?; + self.log(format!( + "Uploaded fixed index file to object store: {}", + data.name + )) + } + + let mut state = self.state.lock().unwrap(); + state.active_backend_operations -= 1; Ok(()) } @@ -645,6 +661,13 @@ impl BackupEnvironment { bail!("found open index writer - unable to finish backup"); } + if state.active_backend_operations != 0 { + bail!( + "backup task still has {} active operations.", + state.active_backend_operations, + ); + } + if state.file_counter == 0 { bail!("backup does not contain valid files (file count == 0)"); } @@ -753,6 +776,12 @@ impl BackupEnvironment { if !state.finished { bail!("backup ended but finished flag is not set."); } + if state.active_backend_operations != 0 { + bail!( + "backup ended but {} active backend operations.", + state.active_backend_operations, + ); + } Ok(()) } -- 2.47.3 _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel