From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 2487F6039B for ; Wed, 14 Oct 2020 14:17:22 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 7837B9C3B for ; Wed, 14 Oct 2020 14:16:51 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [212.186.127.180]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 25C219AD6 for ; Wed, 14 Oct 2020 14:16:48 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id DA78F45D47 for ; Wed, 14 Oct 2020 14:16:47 +0200 (CEST) From: Stefan Reiter To: pbs-devel@lists.proxmox.com Date: Wed, 14 Oct 2020 14:16:36 +0200 Message-Id: <20201014121639.25276-9-s.reiter@proxmox.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201014121639.25276-1-s.reiter@proxmox.com> References: <20201014121639.25276-1-s.reiter@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL -0.038 Adjusted score from AWL reputation of From: address KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment RCVD_IN_DNSWL_MED -2.3 Sender listed at https://www.dnswl.org/, medium trust SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [environment.rs, verify.rs, datastore.rs] Subject: [pbs-devel] [PATCH proxmox-backup 08/11] datastore: add manifest locking X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 14 Oct 2020 12:17:22 -0000 Avoid races when updating manifest data by flocking the manifest file itself. store_manifest is made to require such a lock and will automatically drop it to ensure safety using Rust's compiler. Snapshot deletion also acquires the lock, so it cannot interfere with an outstanding manifest write. Signed-off-by: Stefan Reiter --- src/api2/admin/datastore.rs | 4 +-- src/api2/backup/environment.rs | 4 +-- src/backup/datastore.rs | 50 ++++++++++++++++++++++++++++++++-- src/backup/verify.rs | 6 ++-- 4 files changed, 55 insertions(+), 9 deletions(-) diff --git a/src/api2/admin/datastore.rs b/src/api2/admin/datastore.rs index 5824611b..11223e6a 100644 --- a/src/api2/admin/datastore.rs +++ b/src/api2/admin/datastore.rs @@ -1481,11 +1481,11 @@ fn set_notes( let allowed = (user_privs & PRIV_DATASTORE_READ) != 0; if !allowed { check_backup_owner(&datastore, backup_dir.group(), &userid)?; } - let (mut manifest, _) = datastore.load_manifest(&backup_dir)?; + let (mut manifest, manifest_guard) = datastore.load_manifest_locked(&backup_dir)?; manifest.unprotected["notes"] = notes.into(); - datastore.store_manifest(&backup_dir, manifest)?; + datastore.store_manifest(&backup_dir, manifest, manifest_guard)?; Ok(()) } diff --git a/src/api2/backup/environment.rs b/src/api2/backup/environment.rs index f00c2cd3..0e672d8e 100644 --- a/src/api2/backup/environment.rs +++ b/src/api2/backup/environment.rs @@ -473,14 +473,14 @@ impl BackupEnvironment { } // check manifest - let (mut manifest, _) = self.datastore.load_manifest(&self.backup_dir) + let (mut manifest, manifest_guard) = self.datastore.load_manifest_locked(&self.backup_dir) .map_err(|err| format_err!("unable to load manifest blob - {}", err))?; let stats = serde_json::to_value(state.backup_stat)?; manifest.unprotected["chunk_upload_stats"] = stats; - self.datastore.store_manifest(&self.backup_dir, manifest) + self.datastore.store_manifest(&self.backup_dir, manifest, manifest_guard) .map_err(|err| format_err!("unable to store manifest blob - {}", err))?; if let Some(base) = &self.last_backup { diff --git a/src/backup/datastore.rs b/src/backup/datastore.rs index 8ea9311a..f8c228fc 100644 --- a/src/backup/datastore.rs +++ b/src/backup/datastore.rs @@ -3,6 +3,8 @@ use std::io::{self, Write}; use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex}; use std::convert::TryFrom; +use std::time::Duration; +use std::fs::File; use anyhow::{bail, format_err, Error}; use lazy_static::lazy_static; @@ -24,6 +26,8 @@ use crate::tools::fs::{lock_dir_noblock, DirLockGuard}; use crate::api2::types::{GarbageCollectionStatus, Userid}; use crate::server::UPID; +pub type ManifestLock = File; + lazy_static! { static ref DATASTORE_MAP: Mutex>> = Mutex::new(HashMap::new()); } @@ -228,9 +232,10 @@ impl DataStore { let full_path = self.snapshot_path(backup_dir); - let _guard; + let (_guard, _manifest_guard); if !force { _guard = lock_dir_noblock(&full_path, "snapshot", "possibly running or in use")?; + _manifest_guard = self.lock_manifest(backup_dir); } // Acquire lock and keep it during remove operation, so there's no @@ -656,8 +661,47 @@ impl DataStore { digest_str, err, )) - } + } + fn lock_manifest( + &self, + backup_dir: &BackupDir, + ) -> Result { + let mut path = self.base_path(); + path.push(backup_dir.relative_path()); + path.push(MANIFEST_BLOB_NAME); + + let mut handle = File::open(&path) + .map_err(|err| { + format_err!("unable to open manifest {:?} for locking - {}", &path, err) + })?; + + proxmox::tools::fs::lock_file(&mut handle, true, Some(Duration::from_secs(5))) + .map_err(|err| { + format_err!( + "unable to acquire lock on manifest {:?} - {}", &path, err + ) + })?; + + Ok(handle) + } + + /// Load the manifest with a lock, so it can be safely written back again. + /// Most operations consist of "load -> edit unprotected -> write back" so the lock is not held + /// for long - thus we wait a few seconds for the lock to become available before giving up. In + /// case of verify it might take longer, so all callers must either be able to cope with a + /// failure or ensure that they are exclusive with verify. + pub fn load_manifest_locked( + &self, + backup_dir: &BackupDir, + ) -> Result<(BackupManifest, ManifestLock), Error> { + let guard = self.lock_manifest(backup_dir)?; + let blob = self.load_blob(backup_dir, MANIFEST_BLOB_NAME)?; + let manifest = BackupManifest::try_from(blob)?; + Ok((manifest, guard)) + } + + /// Load the manifest without a lock. Cannot be edited and written back. pub fn load_manifest( &self, backup_dir: &BackupDir, @@ -668,10 +712,12 @@ impl DataStore { Ok((manifest, raw_size)) } + /// Store a given manifest. Requires a lock acquired with load_manifest_locked for safety. pub fn store_manifest( &self, backup_dir: &BackupDir, manifest: BackupManifest, + _manifest_lock: ManifestLock, ) -> Result<(), Error> { let manifest = serde_json::to_value(manifest)?; let manifest = serde_json::to_string_pretty(&manifest)?; diff --git a/src/backup/verify.rs b/src/backup/verify.rs index 05b6ba86..839987e1 100644 --- a/src/backup/verify.rs +++ b/src/backup/verify.rs @@ -300,8 +300,8 @@ pub fn verify_backup_dir( return Ok(true); } - let mut manifest = match datastore.load_manifest(&backup_dir) { - Ok((manifest, _)) => manifest, + let (mut manifest, manifest_guard) = match datastore.load_manifest_locked(&backup_dir) { + Ok((manifest, guard)) => (manifest, guard), Err(err) => { task_log!( worker, @@ -368,7 +368,7 @@ pub fn verify_backup_dir( upid, }; manifest.unprotected["verify_state"] = serde_json::to_value(verify_state)?; - datastore.store_manifest(&backup_dir, manifest) + datastore.store_manifest(&backup_dir, manifest, manifest_guard) .map_err(|err| format_err!("unable to store manifest blob - {}", err))?; Ok(error_count == 0) -- 2.20.1