public inbox for pbs-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: Stefan Reiter <s.reiter@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [PATCH proxmox-backup 3/7] datastore: prevent in-use deletion with locks instead of heuristic
Date: Tue,  4 Aug 2020 12:42:01 +0200	[thread overview]
Message-ID: <20200804104205.29540-4-s.reiter@proxmox.com> (raw)
In-Reply-To: <20200804104205.29540-1-s.reiter@proxmox.com>

Attempt to lock the backup directory to be deleted, if it works keep the
lock until the deletion is complete. This way we ensure that no other
locking operation (e.g. using a snapshot as base for another backup) can
happen concurrently.

Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
---

For this to actually work the following patches are obviously necessary, but I
wanted to keep them seperate for review.

 src/backup/backup_info.rs | 55 +++++++++++++++++++++++++++++++++++----
 src/backup/datastore.rs   | 48 ++++++++--------------------------
 2 files changed, 61 insertions(+), 42 deletions(-)

diff --git a/src/backup/backup_info.rs b/src/backup/backup_info.rs
index ea917d3c..c35928ce 100644
--- a/src/backup/backup_info.rs
+++ b/src/backup/backup_info.rs
@@ -41,7 +41,7 @@ lazy_static!{
 }
 
 /// Opaque type releasing the corresponding flock when dropped
-pub type BackupGroupGuard = Dir;
+pub type BackupLockGuard = Dir;
 
 /// BackupGroup is a directory containing a list of BackupDir
 #[derive(Debug, Eq, PartialEq, Hash, Clone)]
@@ -91,7 +91,11 @@ impl BackupGroup {
             let backup_dir = BackupDir::new(self.backup_type.clone(), self.backup_id.clone(), dt.timestamp());
             let files = list_backup_files(l2_fd, backup_time)?;
 
-            list.push(BackupInfo { backup_dir, files });
+            list.push(BackupInfo {
+                backup_dir,
+                files,
+                base_path: base_path.to_owned()
+            });
 
             Ok(())
         })?;
@@ -137,7 +141,7 @@ impl BackupGroup {
         Ok(last)
     }
 
-    pub fn lock(&self, base_path: &Path) -> Result<BackupGroupGuard, Error> {
+    pub fn lock(&self, base_path: &Path) -> Result<BackupLockGuard, Error> {
         use nix::fcntl::OFlag;
         use nix::sys::stat::Mode;
 
@@ -299,6 +303,8 @@ pub struct BackupInfo {
     pub backup_dir: BackupDir,
     /// List of data files
     pub files: Vec<String>,
+    /// Full path to dir containing backup_dir
+    pub base_path: PathBuf,
 }
 
 impl BackupInfo {
@@ -309,7 +315,7 @@ impl BackupInfo {
 
         let files = list_backup_files(libc::AT_FDCWD, &path)?;
 
-        Ok(BackupInfo { backup_dir, files })
+        Ok(BackupInfo { backup_dir, files, base_path: base_path.to_owned() })
     }
 
     /// Finds the latest backup inside a backup group
@@ -354,7 +360,11 @@ impl BackupInfo {
 
                     let files = list_backup_files(l2_fd, backup_time)?;
 
-                    list.push(BackupInfo { backup_dir, files });
+                    list.push(BackupInfo {
+                        backup_dir,
+                        files,
+                        base_path: base_path.to_owned()
+                    });
 
                     Ok(())
                 })
@@ -367,6 +377,41 @@ impl BackupInfo {
         // backup is considered unfinished if there is no manifest
         self.files.iter().any(|name| name == super::MANIFEST_BLOB_NAME)
     }
+
+    pub fn lock(&self) -> Result<BackupLockGuard, Error> {
+        use nix::fcntl::OFlag;
+        use nix::sys::stat::Mode;
+
+        let mut path = self.base_path.clone();
+        let dir = self.backup_dir.relative_path();
+        path.push(&dir);
+
+        let mut handle = Dir::open(&path, OFlag::O_RDONLY, Mode::empty())
+            .map_err(|err| {
+                format_err!(
+                    "unable to open snapshot directory {:?} for locking - {}",
+                    &dir,
+                    err,
+                )
+            })?;
+
+        // acquire in non-blocking mode, no point in waiting here since other
+        // backups could still take a very long time
+        proxmox::tools::fs::lock_file(&mut handle, true, Some(Duration::from_nanos(0)))
+            .map_err(|err| {
+                format_err!(
+                    "unable to acquire lock on snapshot {:?} - {}",
+                    &dir,
+                    if err.would_block() {
+                        String::from("snapshot is running or being used as base")
+                    } else {
+                        err.to_string()
+                    }
+                )
+            })?;
+
+        Ok(handle)
+    }
 }
 
 fn list_backup_files<P: ?Sized + nix::NixPath>(dirfd: RawFd, path: &P) -> Result<Vec<String>, Error> {
diff --git a/src/backup/datastore.rs b/src/backup/datastore.rs
index ffd64b81..3c374302 100644
--- a/src/backup/datastore.rs
+++ b/src/backup/datastore.rs
@@ -11,7 +11,7 @@ use serde_json::Value;
 
 use proxmox::tools::fs::{replace_file, CreateOptions};
 
-use super::backup_info::{BackupGroup, BackupGroupGuard, BackupDir, BackupInfo};
+use super::backup_info::{BackupGroup, BackupLockGuard, BackupDir, BackupInfo};
 use super::chunk_store::ChunkStore;
 use super::dynamic_index::{DynamicIndexReader, DynamicIndexWriter};
 use super::fixed_index::{FixedIndexReader, FixedIndexWriter};
@@ -199,19 +199,13 @@ impl DataStore {
 
         let full_path = self.group_path(backup_group);
 
-        let mut snap_list = backup_group.list_backups(&self.base_path())?;
-        BackupInfo::sort_list(&mut snap_list, false);
-        for snap in snap_list {
-            if snap.is_finished() {
-                break;
-            } else {
-                bail!(
-                    "cannot remove backup group {:?}, contains potentially running backup: {}",
-                    full_path,
-                    snap.backup_dir
-                );
-            }
-        }
+        let _guard = backup_group.lock(&self.base_path()).map_err(|err| {
+            format_err!(
+                "cannot acquire lock on backup group {}: {}",
+                backup_group,
+                err
+            )
+        })?;
 
         log::info!("removing backup group {:?}", full_path);
         std::fs::remove_dir_all(&full_path)
@@ -231,29 +225,9 @@ impl DataStore {
 
         let full_path = self.snapshot_path(backup_dir);
 
+        let _guard;
         if !force {
-            let mut snap_list = backup_dir.group().list_backups(&self.base_path())?;
-            BackupInfo::sort_list(&mut snap_list, false);
-            let mut prev_snap_finished = true;
-            for snap in snap_list {
-                let cur_snap_finished = snap.is_finished();
-                if &snap.backup_dir == backup_dir {
-                    if !cur_snap_finished {
-                        bail!(
-                            "cannot remove currently running snapshot: {:?}",
-                            backup_dir
-                        );
-                    }
-                    if !prev_snap_finished {
-                        bail!(
-                            "cannot remove snapshot {:?}, successor is currently running and potentially based on it",
-                            backup_dir
-                        );
-                    }
-                    break;
-                }
-                prev_snap_finished = cur_snap_finished;
-            }
+            _guard = BackupInfo::new(&self.base_path(), backup_dir.clone())?.lock()?;
         }
 
         log::info!("removing backup snapshot {:?}", full_path);
@@ -326,7 +300,7 @@ impl DataStore {
     /// current owner (instead of setting the owner).
     ///
     /// This also aquires an exclusive lock on the directory and returns the lock guard.
-    pub fn create_locked_backup_group(&self, backup_group: &BackupGroup, userid: &str) -> Result<(String, BackupGroupGuard), Error> {
+    pub fn create_locked_backup_group(&self, backup_group: &BackupGroup, userid: &str) -> Result<(String, BackupLockGuard), Error> {
 
         // create intermediate path first:
         let base_path = self.base_path();
-- 
2.20.1





  parent reply	other threads:[~2020-08-04 10:42 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-04 10:41 [pbs-devel] [PATCH 0/7] More flocking and race elimination Stefan Reiter
2020-08-04 10:41 ` [pbs-devel] [PATCH proxmox-backup 1/7] finish_backup: mark backup as finished only after checks have passed Stefan Reiter
2020-08-06  4:39   ` [pbs-devel] applied: " Dietmar Maurer
2020-08-04 10:42 ` [pbs-devel] [PATCH proxmox-backup 2/7] backup: only allow finished backups as base snapshot Stefan Reiter
2020-08-06  4:45   ` Dietmar Maurer
2020-08-06  7:58     ` Stefan Reiter
2020-08-07  5:40       ` [pbs-devel] applied: " Dietmar Maurer
2020-08-04 10:42 ` Stefan Reiter [this message]
2020-08-06  4:51   ` [pbs-devel] [PATCH proxmox-backup 3/7] datastore: prevent in-use deletion with locks instead of heuristic Dietmar Maurer
2020-08-04 10:42 ` [pbs-devel] [PATCH proxmox-backup 4/7] prune: also check backup snapshot locks Stefan Reiter
2020-08-05  7:23   ` Fabian Ebner
2020-08-05  8:34     ` Stefan Reiter
2020-08-04 10:42 ` [pbs-devel] [PATCH proxmox-backup 5/7] backup: flock snapshot on backup start Stefan Reiter
2020-08-04 10:42 ` [pbs-devel] [PATCH proxmox-backup 6/7] Revert "backup: ensure base snapshots are still available after backup" Stefan Reiter
2020-08-04 10:42 ` [pbs-devel] [PATCH proxmox-backup 7/7] backup: lock base snapshot and ensure existance on finish Stefan Reiter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200804104205.29540-4-s.reiter@proxmox.com \
    --to=s.reiter@proxmox.com \
    --cc=pbs-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal