From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 9795760174 for ; Fri, 16 Oct 2020 08:34:14 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 83F8C17F05 for ; Fri, 16 Oct 2020 08:34:14 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [212.186.127.180]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 65D0717EF7 for ; Fri, 16 Oct 2020 08:34:13 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 292C945D2D for ; Fri, 16 Oct 2020 08:34:13 +0200 (CEST) Date: Fri, 16 Oct 2020 08:33:54 +0200 (CEST) From: Dietmar Maurer To: Proxmox Backup Server development discussion , Stefan Reiter Message-ID: <1528426253.279.1602830035538@webmail.proxmox.com> In-Reply-To: <20201015104916.21170-3-s.reiter@proxmox.com> References: <20201015104916.21170-1-s.reiter@proxmox.com> <20201015104916.21170-3-s.reiter@proxmox.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 7bit X-Priority: 3 Importance: Normal X-Mailer: Open-Xchange Mailer v7.10.4-Rev11 X-Originating-Client: open-xchange-appsuite X-SPAM-LEVEL: Spam detection results: 0 AWL 0.064 Adjusted score from AWL reputation of From: address KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment RCVD_IN_DNSWL_MED -2.3 Sender listed at https://www.dnswl.org/, medium trust SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [manifest.rs, verify.rs, proxmox.com, datastore.rs, environment.rs] Subject: Re: [pbs-devel] [PATCH v2 proxmox-backup 2/4] datastore: add manifest locking X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 16 Oct 2020 06:34:14 -0000 comments inline: > On 10/15/2020 12:49 PM Stefan Reiter wrote: > > > Avoid races when updating manifest data by flocking a lock file. > update_manifest is used to ensure updates always happen with the lock > held. > > Snapshot deletion also acquires the lock, so it cannot interfere with an > outstanding manifest write. > > Signed-off-by: Stefan Reiter > --- > > v2: > * Use seperate manifest lock file > * Change store_manifest to update_manifest to force consumers to use correct > locking > * Don't hold lock across verify, reload manifest at the end > * Update comments > > src/api2/admin/datastore.rs | 8 +++---- > src/api2/backup/environment.rs | 13 ++++------ > src/backup/datastore.rs | 43 +++++++++++++++++++++++++++++----- > src/backup/manifest.rs | 1 + > src/backup/verify.rs | 9 +++---- > 5 files changed, 50 insertions(+), 24 deletions(-) > > diff --git a/src/api2/admin/datastore.rs b/src/api2/admin/datastore.rs > index de39cd1b..4f15c1cd 100644 > --- a/src/api2/admin/datastore.rs > +++ b/src/api2/admin/datastore.rs > @@ -1481,11 +1481,9 @@ fn set_notes( > let allowed = (user_privs & PRIV_DATASTORE_READ) != 0; > if !allowed { check_backup_owner(&datastore, backup_dir.group(), &userid)?; } > > - let (mut manifest, _) = datastore.load_manifest(&backup_dir)?; > - > - manifest.unprotected["notes"] = notes.into(); > - > - datastore.store_manifest(&backup_dir, manifest)?; > + datastore.update_manifest(&backup_dir,|manifest| { > + manifest.unprotected["notes"] = notes.into(); > + }).map_err(|err| format_err!("unable to update manifest blob - {}", err))?; > > Ok(()) > } > diff --git a/src/api2/backup/environment.rs b/src/api2/backup/environment.rs > index f00c2cd3..c4f166df 100644 > --- a/src/api2/backup/environment.rs > +++ b/src/api2/backup/environment.rs > @@ -472,16 +472,11 @@ impl BackupEnvironment { > bail!("backup does not contain valid files (file count == 0)"); > } > > - // check manifest > - let (mut manifest, _) = self.datastore.load_manifest(&self.backup_dir) > - .map_err(|err| format_err!("unable to load manifest blob - {}", err))?; > - > + // check for valid manifest and store stats > let stats = serde_json::to_value(state.backup_stat)?; > - > - manifest.unprotected["chunk_upload_stats"] = stats; > - > - self.datastore.store_manifest(&self.backup_dir, manifest) > - .map_err(|err| format_err!("unable to store manifest blob - {}", err))?; > + self.datastore.update_manifest(&self.backup_dir, |manifest| { > + manifest.unprotected["chunk_upload_stats"] = stats; > + }).map_err(|err| format_err!("unable to update manifest blob - {}", err))?; > > if let Some(base) = &self.last_backup { > let path = self.datastore.snapshot_path(&base.backup_dir); > diff --git a/src/backup/datastore.rs b/src/backup/datastore.rs > index ca8ca438..7b2bee7e 100644 > --- a/src/backup/datastore.rs > +++ b/src/backup/datastore.rs > @@ -3,17 +3,19 @@ use std::io::{self, Write}; > use std::path::{Path, PathBuf}; > use std::sync::{Arc, Mutex}; > use std::convert::TryFrom; > +use std::time::Duration; > +use std::fs::File; > > use anyhow::{bail, format_err, Error}; > use lazy_static::lazy_static; > > -use proxmox::tools::fs::{replace_file, CreateOptions}; > +use proxmox::tools::fs::{replace_file, CreateOptions, open_file_locked}; > > use super::backup_info::{BackupGroup, BackupDir}; > use super::chunk_store::ChunkStore; > use super::dynamic_index::{DynamicIndexReader, DynamicIndexWriter}; > use super::fixed_index::{FixedIndexReader, FixedIndexWriter}; > -use super::manifest::{MANIFEST_BLOB_NAME, CLIENT_LOG_BLOB_NAME, BackupManifest}; > +use super::manifest::{MANIFEST_BLOB_NAME, MANIFEST_LOCK_NAME, CLIENT_LOG_BLOB_NAME, BackupManifest}; > use super::index::*; > use super::{DataBlob, ArchiveType, archive_type}; > use crate::config::datastore; > @@ -235,9 +237,10 @@ impl DataStore { > > let full_path = self.snapshot_path(backup_dir); > > - let _guard; > + let (_guard, _manifest_guard); > if !force { > _guard = lock_dir_noblock(&full_path, "snapshot", "possibly running or in use")?; > + _manifest_guard = self.lock_manifest(backup_dir); I think this is unnecessary. An update manifest should not block a remove_backup_dir. What for exactly? > } > > // Acquire lock and keep it during remove operation, so there's no > @@ -665,8 +668,27 @@ impl DataStore { > digest_str, > err, > )) > - } > + } > > + fn lock_manifest( > + &self, > + backup_dir: &BackupDir, > + ) -> Result { > + let mut path = self.base_path(); > + path.push(backup_dir.relative_path()); > + path.push(&MANIFEST_LOCK_NAME); > + > + // update_manifest should never take a long time, so if someone else has > + // the lock we can simply block a bit and should get it soon > + open_file_locked(&path, Duration::from_secs(5), true) > + .map_err(|err| { > + format_err!( > + "unable to acquire manifest lock {:?} - {}", &path, err > + ) > + }) > + } > + > + /// Load the manifest without a lock. Must not be written back. > pub fn load_manifest( > &self, > backup_dir: &BackupDir, > @@ -677,11 +699,19 @@ impl DataStore { > Ok((manifest, raw_size)) > } > > - pub fn store_manifest( > + /// Update the manifest of the specified snapshot. Never write a manifest directly, > + /// only use this method - anything else may break locking guarantees. > + pub fn update_manifest( > &self, > backup_dir: &BackupDir, > - manifest: BackupManifest, > + update_fn: impl FnOnce(&mut BackupManifest), > ) -> Result<(), Error> { It should not be possible to update anything outside the "unprotected" property. > + > + let _guard = self.lock_manifest(backup_dir)?; > + let (mut manifest, _) = self.load_manifest(&backup_dir)?; > + > + update_fn(&mut manifest); > + > let manifest = serde_json::to_value(manifest)?; > let manifest = serde_json::to_string_pretty(&manifest)?; > let blob = DataBlob::encode(manifest.as_bytes(), None, true)?; > @@ -691,6 +721,7 @@ impl DataStore { > path.push(backup_dir.relative_path()); > path.push(MANIFEST_BLOB_NAME); > > + // atomic replace invalidates flock - no other writes past this point! > replace_file(&path, raw_data, CreateOptions::new())?; > > Ok(()) > diff --git a/src/backup/manifest.rs b/src/backup/manifest.rs > index 609cc998..51980a07 100644 > --- a/src/backup/manifest.rs > +++ b/src/backup/manifest.rs > @@ -8,6 +8,7 @@ use ::serde::{Deserialize, Serialize}; > use crate::backup::{BackupDir, CryptMode, CryptConfig}; > > pub const MANIFEST_BLOB_NAME: &str = "index.json.blob"; > +pub const MANIFEST_LOCK_NAME: &str = ".index.json.lck"; > pub const CLIENT_LOG_BLOB_NAME: &str = "client.log.blob"; > > mod hex_csum { > diff --git a/src/backup/verify.rs b/src/backup/verify.rs > index 05b6ba86..ea3fa760 100644 > --- a/src/backup/verify.rs > +++ b/src/backup/verify.rs > @@ -300,7 +300,7 @@ pub fn verify_backup_dir( > return Ok(true); > } > > - let mut manifest = match datastore.load_manifest(&backup_dir) { > + let manifest = match datastore.load_manifest(&backup_dir) { > Ok((manifest, _)) => manifest, > Err(err) => { > task_log!( > @@ -367,9 +367,10 @@ pub fn verify_backup_dir( > state: verify_result, > upid, > }; > - manifest.unprotected["verify_state"] = serde_json::to_value(verify_state)?; > - datastore.store_manifest(&backup_dir, manifest) > - .map_err(|err| format_err!("unable to store manifest blob - {}", err))?; > + let verify_state = serde_json::to_value(verify_state)?; > + datastore.update_manifest(&backup_dir, |manifest| { > + manifest.unprotected["verify_state"] = verify_state; > + }).map_err(|err| format_err!("unable to update manifest blob - {}", err))?; > > Ok(error_count == 0) > } > -- > 2.20.1 > > > > _______________________________________________ > pbs-devel mailing list > pbs-devel@lists.proxmox.com > https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel