From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 7CA606914D for ; Tue, 4 Aug 2020 12:42:58 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id CBAFBBE0D for ; Tue, 4 Aug 2020 12:42:27 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [212.186.127.180]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id F3E77BD84 for ; Tue, 4 Aug 2020 12:42:23 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id BD07243477 for ; Tue, 4 Aug 2020 12:42:23 +0200 (CEST) From: Stefan Reiter To: pbs-devel@lists.proxmox.com Date: Tue, 4 Aug 2020 12:42:01 +0200 Message-Id: <20200804104205.29540-4-s.reiter@proxmox.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20200804104205.29540-1-s.reiter@proxmox.com> References: <20200804104205.29540-1-s.reiter@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL -0.058 Adjusted score from AWL reputation of From: address KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment RCVD_IN_DNSWL_MED -2.3 Sender listed at https://www.dnswl.org/, medium trust SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [datastore.rs] Subject: [pbs-devel] [PATCH proxmox-backup 3/7] datastore: prevent in-use deletion with locks instead of heuristic X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 04 Aug 2020 10:42:58 -0000 Attempt to lock the backup directory to be deleted, if it works keep the lock until the deletion is complete. This way we ensure that no other locking operation (e.g. using a snapshot as base for another backup) can happen concurrently. Signed-off-by: Stefan Reiter --- For this to actually work the following patches are obviously necessary, but I wanted to keep them seperate for review. src/backup/backup_info.rs | 55 +++++++++++++++++++++++++++++++++++---- src/backup/datastore.rs | 48 ++++++++-------------------------- 2 files changed, 61 insertions(+), 42 deletions(-) diff --git a/src/backup/backup_info.rs b/src/backup/backup_info.rs index ea917d3c..c35928ce 100644 --- a/src/backup/backup_info.rs +++ b/src/backup/backup_info.rs @@ -41,7 +41,7 @@ lazy_static!{ } /// Opaque type releasing the corresponding flock when dropped -pub type BackupGroupGuard = Dir; +pub type BackupLockGuard = Dir; /// BackupGroup is a directory containing a list of BackupDir #[derive(Debug, Eq, PartialEq, Hash, Clone)] @@ -91,7 +91,11 @@ impl BackupGroup { let backup_dir = BackupDir::new(self.backup_type.clone(), self.backup_id.clone(), dt.timestamp()); let files = list_backup_files(l2_fd, backup_time)?; - list.push(BackupInfo { backup_dir, files }); + list.push(BackupInfo { + backup_dir, + files, + base_path: base_path.to_owned() + }); Ok(()) })?; @@ -137,7 +141,7 @@ impl BackupGroup { Ok(last) } - pub fn lock(&self, base_path: &Path) -> Result { + pub fn lock(&self, base_path: &Path) -> Result { use nix::fcntl::OFlag; use nix::sys::stat::Mode; @@ -299,6 +303,8 @@ pub struct BackupInfo { pub backup_dir: BackupDir, /// List of data files pub files: Vec, + /// Full path to dir containing backup_dir + pub base_path: PathBuf, } impl BackupInfo { @@ -309,7 +315,7 @@ impl BackupInfo { let files = list_backup_files(libc::AT_FDCWD, &path)?; - Ok(BackupInfo { backup_dir, files }) + Ok(BackupInfo { backup_dir, files, base_path: base_path.to_owned() }) } /// Finds the latest backup inside a backup group @@ -354,7 +360,11 @@ impl BackupInfo { let files = list_backup_files(l2_fd, backup_time)?; - list.push(BackupInfo { backup_dir, files }); + list.push(BackupInfo { + backup_dir, + files, + base_path: base_path.to_owned() + }); Ok(()) }) @@ -367,6 +377,41 @@ impl BackupInfo { // backup is considered unfinished if there is no manifest self.files.iter().any(|name| name == super::MANIFEST_BLOB_NAME) } + + pub fn lock(&self) -> Result { + use nix::fcntl::OFlag; + use nix::sys::stat::Mode; + + let mut path = self.base_path.clone(); + let dir = self.backup_dir.relative_path(); + path.push(&dir); + + let mut handle = Dir::open(&path, OFlag::O_RDONLY, Mode::empty()) + .map_err(|err| { + format_err!( + "unable to open snapshot directory {:?} for locking - {}", + &dir, + err, + ) + })?; + + // acquire in non-blocking mode, no point in waiting here since other + // backups could still take a very long time + proxmox::tools::fs::lock_file(&mut handle, true, Some(Duration::from_nanos(0))) + .map_err(|err| { + format_err!( + "unable to acquire lock on snapshot {:?} - {}", + &dir, + if err.would_block() { + String::from("snapshot is running or being used as base") + } else { + err.to_string() + } + ) + })?; + + Ok(handle) + } } fn list_backup_files(dirfd: RawFd, path: &P) -> Result, Error> { diff --git a/src/backup/datastore.rs b/src/backup/datastore.rs index ffd64b81..3c374302 100644 --- a/src/backup/datastore.rs +++ b/src/backup/datastore.rs @@ -11,7 +11,7 @@ use serde_json::Value; use proxmox::tools::fs::{replace_file, CreateOptions}; -use super::backup_info::{BackupGroup, BackupGroupGuard, BackupDir, BackupInfo}; +use super::backup_info::{BackupGroup, BackupLockGuard, BackupDir, BackupInfo}; use super::chunk_store::ChunkStore; use super::dynamic_index::{DynamicIndexReader, DynamicIndexWriter}; use super::fixed_index::{FixedIndexReader, FixedIndexWriter}; @@ -199,19 +199,13 @@ impl DataStore { let full_path = self.group_path(backup_group); - let mut snap_list = backup_group.list_backups(&self.base_path())?; - BackupInfo::sort_list(&mut snap_list, false); - for snap in snap_list { - if snap.is_finished() { - break; - } else { - bail!( - "cannot remove backup group {:?}, contains potentially running backup: {}", - full_path, - snap.backup_dir - ); - } - } + let _guard = backup_group.lock(&self.base_path()).map_err(|err| { + format_err!( + "cannot acquire lock on backup group {}: {}", + backup_group, + err + ) + })?; log::info!("removing backup group {:?}", full_path); std::fs::remove_dir_all(&full_path) @@ -231,29 +225,9 @@ impl DataStore { let full_path = self.snapshot_path(backup_dir); + let _guard; if !force { - let mut snap_list = backup_dir.group().list_backups(&self.base_path())?; - BackupInfo::sort_list(&mut snap_list, false); - let mut prev_snap_finished = true; - for snap in snap_list { - let cur_snap_finished = snap.is_finished(); - if &snap.backup_dir == backup_dir { - if !cur_snap_finished { - bail!( - "cannot remove currently running snapshot: {:?}", - backup_dir - ); - } - if !prev_snap_finished { - bail!( - "cannot remove snapshot {:?}, successor is currently running and potentially based on it", - backup_dir - ); - } - break; - } - prev_snap_finished = cur_snap_finished; - } + _guard = BackupInfo::new(&self.base_path(), backup_dir.clone())?.lock()?; } log::info!("removing backup snapshot {:?}", full_path); @@ -326,7 +300,7 @@ impl DataStore { /// current owner (instead of setting the owner). /// /// This also aquires an exclusive lock on the directory and returns the lock guard. - pub fn create_locked_backup_group(&self, backup_group: &BackupGroup, userid: &str) -> Result<(String, BackupGroupGuard), Error> { + pub fn create_locked_backup_group(&self, backup_group: &BackupGroup, userid: &str) -> Result<(String, BackupLockGuard), Error> { // create intermediate path first: let base_path = self.base_path(); -- 2.20.1