From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id 9A82D1FF16B for ; Fri, 26 Sep 2025 10:42:40 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 7ABAAAFC5; Fri, 26 Sep 2025 10:43:13 +0200 (CEST) From: Christian Ebner To: pbs-devel@lists.proxmox.com Date: Fri, 26 Sep 2025 10:42:21 +0200 Message-ID: <20250926084221.201116-3-c.ebner@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20250926084221.201116-1-c.ebner@proxmox.com> References: <20250926084221.201116-1-c.ebner@proxmox.com> MIME-Version: 1.0 X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1758876145238 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.041 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment KAM_SHORT 0.001 Use of a URL Shortener for very short URL RCVD_IN_VALIDITY_CERTIFIED_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. RCVD_IN_VALIDITY_RPBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. RCVD_IN_VALIDITY_SAFE_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [environment.rs, docs.rs] Subject: [pbs-devel] [PATCH proxmox-backup v2 2/2] api: backup: never hold mutex guard when doing manifest update X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" An manifest update with s3 backend will call async code, which must be avoided because of possible deadlocks [0]. Therefore, perform all changes on the shared backup state and drop the guard before updating the manifest, which performs the backend specific update, reacquiring it again afterwards to ensure the fs sync level. To still guarantee consistency, replace the finished flag by an enum with an new transient finishing state, which allows to discriminate the 3 different backup states. [0] https://docs.rs/tokio/latest/tokio/sync/struct.Mutex.html#which-kind-of-mutex-should-you-use Signed-off-by: Christian Ebner --- src/api2/backup/environment.rs | 48 +++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/src/api2/backup/environment.rs b/src/api2/backup/environment.rs index f997c86a1..de6ce3c89 100644 --- a/src/api2/backup/environment.rs +++ b/src/api2/backup/environment.rs @@ -80,8 +80,15 @@ struct FixedWriterState { // key=digest, value=length type KnownChunksMap = HashMap<[u8; 32], u32>; +#[derive(PartialEq)] +enum BackupState { + Active, + Finishing, + Finished, +} + struct SharedBackupState { - finished: bool, + finished: BackupState, uid_counter: usize, file_counter: usize, // successfully uploaded files dynamic_writers: HashMap, @@ -92,12 +99,13 @@ struct SharedBackupState { } impl SharedBackupState { - // Raise error if finished flag is set + // Raise error if the backup is no longer in an active state. fn ensure_unfinished(&self) -> Result<(), Error> { - if self.finished { - bail!("backup already marked as finished."); + match self.finished { + BackupState::Active => Ok(()), + BackupState::Finishing => bail!("backup is already in the process of finishing."), + BackupState::Finished => bail!("backup already marked as finished."), } - Ok(()) } // Get an unique integer ID @@ -134,7 +142,7 @@ impl BackupEnvironment { no_cache: bool, ) -> Result { let state = SharedBackupState { - finished: false, + finished: BackupState::Active, uid_counter: 0, file_counter: 0, dynamic_writers: HashMap::new(), @@ -712,18 +720,29 @@ impl BackupEnvironment { } } - // check for valid manifest and store stats let stats = serde_json::to_value(state.backup_stat)?; + + // make sure no other api calls can modify the backup state anymore + state.finished = BackupState::Finishing; + + // never hold mutex guard during s3 upload due to possible deadlocks + drop(state); + + // check for valid manifest and store stats self.backup_dir .update_manifest(&self.backend, |manifest| { manifest.unprotected["chunk_upload_stats"] = stats; }) .map_err(|err| format_err!("unable to update manifest blob - {err}"))?; + let mut state = self.state.lock().unwrap(); + if state.finished != BackupState::Finishing { + bail!("backup not in finishing state after manifest update"); + } self.datastore.try_ensure_sync_level()?; // marks the backup as successful - state.finished = true; + state.finished = BackupState::Finished; Ok(()) } @@ -800,25 +819,24 @@ impl BackupEnvironment { self.formatter.format_result(result, self) } - /// Raise error if finished flag is not set + /// Raise error if finished state is not set pub fn ensure_finished(&self) -> Result<(), Error> { - let state = self.state.lock().unwrap(); - if !state.finished { - bail!("backup ended but finished flag is not set."); + if !self.finished() { + bail!("backup ended but finished state is not set."); } Ok(()) } - /// Return true if the finished flag is set + /// Return true if the finished state is set pub fn finished(&self) -> bool { let state = self.state.lock().unwrap(); - state.finished + state.finished == BackupState::Finished } /// Remove complete backup pub fn remove_backup(&self) -> Result<(), Error> { let mut state = self.state.lock().unwrap(); - state.finished = true; + state.finished = BackupState::Finished; self.datastore.remove_backup_dir( self.backup_dir.backup_ns(), -- 2.47.3 _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel