all lists on lists.proxmox.com
 help / color / mirror / Atom feed
From: Stefan Reiter <s.reiter@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [PATCH proxmox-backup 08/11] datastore: add manifest locking
Date: Wed, 14 Oct 2020 14:16:36 +0200	[thread overview]
Message-ID: <20201014121639.25276-9-s.reiter@proxmox.com> (raw)
In-Reply-To: <20201014121639.25276-1-s.reiter@proxmox.com>

Avoid races when updating manifest data by flocking the manifest file
itself. store_manifest is made to require such a lock and will
automatically drop it to ensure safety using Rust's compiler.

Snapshot deletion also acquires the lock, so it cannot interfere with an
outstanding manifest write.

Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
---
 src/api2/admin/datastore.rs    |  4 +--
 src/api2/backup/environment.rs |  4 +--
 src/backup/datastore.rs        | 50 ++++++++++++++++++++++++++++++++--
 src/backup/verify.rs           |  6 ++--
 4 files changed, 55 insertions(+), 9 deletions(-)

diff --git a/src/api2/admin/datastore.rs b/src/api2/admin/datastore.rs
index 5824611b..11223e6a 100644
--- a/src/api2/admin/datastore.rs
+++ b/src/api2/admin/datastore.rs
@@ -1481,11 +1481,11 @@ fn set_notes(
     let allowed = (user_privs & PRIV_DATASTORE_READ) != 0;
     if !allowed { check_backup_owner(&datastore, backup_dir.group(), &userid)?; }
 
-    let (mut manifest, _) = datastore.load_manifest(&backup_dir)?;
+    let (mut manifest, manifest_guard) = datastore.load_manifest_locked(&backup_dir)?;
 
     manifest.unprotected["notes"] = notes.into();
 
-    datastore.store_manifest(&backup_dir, manifest)?;
+    datastore.store_manifest(&backup_dir, manifest, manifest_guard)?;
 
     Ok(())
 }
diff --git a/src/api2/backup/environment.rs b/src/api2/backup/environment.rs
index f00c2cd3..0e672d8e 100644
--- a/src/api2/backup/environment.rs
+++ b/src/api2/backup/environment.rs
@@ -473,14 +473,14 @@ impl BackupEnvironment {
         }
 
         // check manifest
-        let (mut manifest, _) = self.datastore.load_manifest(&self.backup_dir)
+        let (mut manifest, manifest_guard) = self.datastore.load_manifest_locked(&self.backup_dir)
             .map_err(|err| format_err!("unable to load manifest blob - {}", err))?;
 
         let stats = serde_json::to_value(state.backup_stat)?;
 
         manifest.unprotected["chunk_upload_stats"] = stats;
 
-        self.datastore.store_manifest(&self.backup_dir, manifest)
+        self.datastore.store_manifest(&self.backup_dir, manifest, manifest_guard)
             .map_err(|err| format_err!("unable to store manifest blob - {}", err))?;
 
         if let Some(base) = &self.last_backup {
diff --git a/src/backup/datastore.rs b/src/backup/datastore.rs
index 8ea9311a..f8c228fc 100644
--- a/src/backup/datastore.rs
+++ b/src/backup/datastore.rs
@@ -3,6 +3,8 @@ use std::io::{self, Write};
 use std::path::{Path, PathBuf};
 use std::sync::{Arc, Mutex};
 use std::convert::TryFrom;
+use std::time::Duration;
+use std::fs::File;
 
 use anyhow::{bail, format_err, Error};
 use lazy_static::lazy_static;
@@ -24,6 +26,8 @@ use crate::tools::fs::{lock_dir_noblock, DirLockGuard};
 use crate::api2::types::{GarbageCollectionStatus, Userid};
 use crate::server::UPID;
 
+pub type ManifestLock = File;
+
 lazy_static! {
     static ref DATASTORE_MAP: Mutex<HashMap<String, Arc<DataStore>>> = Mutex::new(HashMap::new());
 }
@@ -228,9 +232,10 @@ impl DataStore {
 
         let full_path = self.snapshot_path(backup_dir);
 
-        let _guard;
+        let (_guard, _manifest_guard);
         if !force {
             _guard = lock_dir_noblock(&full_path, "snapshot", "possibly running or in use")?;
+            _manifest_guard = self.lock_manifest(backup_dir);
         }
 
         // Acquire lock and keep it during remove operation, so there's no
@@ -656,8 +661,47 @@ impl DataStore {
             digest_str,
             err,
         ))
-     }
+    }
 
+    fn lock_manifest(
+        &self,
+        backup_dir: &BackupDir,
+    ) -> Result<ManifestLock, Error> {
+        let mut path = self.base_path();
+        path.push(backup_dir.relative_path());
+        path.push(MANIFEST_BLOB_NAME);
+
+        let mut handle = File::open(&path)
+            .map_err(|err| {
+                format_err!("unable to open manifest {:?} for locking - {}", &path, err)
+            })?;
+
+        proxmox::tools::fs::lock_file(&mut handle, true, Some(Duration::from_secs(5)))
+            .map_err(|err| {
+                format_err!(
+                    "unable to acquire lock on manifest {:?} - {}", &path, err
+                )
+            })?;
+
+        Ok(handle)
+    }
+
+    /// Load the manifest with a lock, so it can be safely written back again.
+    /// Most operations consist of "load -> edit unprotected -> write back" so the lock is not held
+    /// for long - thus we wait a few seconds for the lock to become available before giving up. In
+    /// case of verify it might take longer, so all callers must either be able to cope with a
+    /// failure or ensure that they are exclusive with verify.
+    pub fn load_manifest_locked(
+        &self,
+        backup_dir: &BackupDir,
+    ) -> Result<(BackupManifest, ManifestLock), Error> {
+        let guard = self.lock_manifest(backup_dir)?;
+        let blob = self.load_blob(backup_dir, MANIFEST_BLOB_NAME)?;
+        let manifest = BackupManifest::try_from(blob)?;
+        Ok((manifest, guard))
+    }
+
+    /// Load the manifest without a lock. Cannot be edited and written back.
     pub fn load_manifest(
         &self,
         backup_dir: &BackupDir,
@@ -668,10 +712,12 @@ impl DataStore {
         Ok((manifest, raw_size))
     }
 
+    /// Store a given manifest. Requires a lock acquired with load_manifest_locked for safety.
     pub fn store_manifest(
         &self,
         backup_dir: &BackupDir,
         manifest: BackupManifest,
+        _manifest_lock: ManifestLock,
     ) -> Result<(), Error> {
         let manifest = serde_json::to_value(manifest)?;
         let manifest = serde_json::to_string_pretty(&manifest)?;
diff --git a/src/backup/verify.rs b/src/backup/verify.rs
index 05b6ba86..839987e1 100644
--- a/src/backup/verify.rs
+++ b/src/backup/verify.rs
@@ -300,8 +300,8 @@ pub fn verify_backup_dir(
         return Ok(true);
     }
 
-    let mut manifest = match datastore.load_manifest(&backup_dir) {
-        Ok((manifest, _)) => manifest,
+    let (mut manifest, manifest_guard) = match datastore.load_manifest_locked(&backup_dir) {
+        Ok((manifest, guard)) => (manifest, guard),
         Err(err) => {
             task_log!(
                 worker,
@@ -368,7 +368,7 @@ pub fn verify_backup_dir(
         upid,
     };
     manifest.unprotected["verify_state"] = serde_json::to_value(verify_state)?;
-    datastore.store_manifest(&backup_dir, manifest)
+    datastore.store_manifest(&backup_dir, manifest, manifest_guard)
         .map_err(|err| format_err!("unable to store manifest blob - {}", err))?;
 
     Ok(error_count == 0)
-- 
2.20.1





  parent reply	other threads:[~2020-10-14 12:17 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-14 12:16 [pbs-devel] [PATCH 00/11] Locking and rustdoc improvements Stefan Reiter
2020-10-14 12:16 ` [pbs-devel] [PATCH proxmox-backup 01/11] prune: respect snapshot flock Stefan Reiter
2020-10-15  5:11   ` [pbs-devel] applied: " Dietmar Maurer
2020-10-14 12:16 ` [pbs-devel] [PATCH proxmox-backup 02/11] prune: never fail, just warn about failed removals Stefan Reiter
2020-10-15  5:12   ` [pbs-devel] applied: " Dietmar Maurer
2020-10-14 12:16 ` [pbs-devel] [PATCH proxmox-backup 03/11] backup: use shared flock for base snapshot Stefan Reiter
2020-10-15  5:12   ` [pbs-devel] applied: " Dietmar Maurer
2020-10-14 12:16 ` [pbs-devel] [PATCH proxmox-backup 04/11] reader: acquire shared flock on open snapshot Stefan Reiter
2020-10-15  5:13   ` [pbs-devel] applied: " Dietmar Maurer
2020-10-14 12:16 ` [pbs-devel] [PATCH proxmox-backup 05/11] verify: acquire shared snapshot flock and skip on error Stefan Reiter
2020-10-15  5:13   ` [pbs-devel] applied: " Dietmar Maurer
2020-10-14 12:16 ` [pbs-devel] [PATCH proxmox-backup 06/11] gc: avoid race between phase1 and forget/prune Stefan Reiter
2020-10-15  5:17   ` Dietmar Maurer
2020-10-14 12:16 ` [pbs-devel] [PATCH proxmox-backup 07/11] datastore: remove load_manifest_json Stefan Reiter
2020-10-15  5:28   ` [pbs-devel] applied: " Dietmar Maurer
2020-10-14 12:16 ` Stefan Reiter [this message]
2020-10-15  5:25   ` [pbs-devel] [PATCH proxmox-backup 08/11] datastore: add manifest locking Dietmar Maurer
2020-10-15  7:04     ` Fabian Grünbichler
2020-10-15  5:39   ` Dietmar Maurer
2020-10-15  7:53     ` Stefan Reiter
2020-10-15  5:43   ` Dietmar Maurer
2020-10-15  7:53     ` Stefan Reiter
2020-10-14 12:16 ` [pbs-devel] [PATCH proxmox-backup 09/11] datastore: remove individual snapshots before group Stefan Reiter
2020-10-15  5:51   ` [pbs-devel] applied: " Dietmar Maurer
2020-10-14 12:16 ` [pbs-devel] [PATCH proxmox-backup 10/11] rustdoc: add crate level doc Stefan Reiter
2020-10-14 12:16 ` [pbs-devel] [PATCH proxmox-backup 11/11] rustdoc: overhaul backup rustdoc and add locking table Stefan Reiter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201014121639.25276-9-s.reiter@proxmox.com \
    --to=s.reiter@proxmox.com \
    --cc=pbs-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal