public inbox for pbs-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: Stefan Reiter <s.reiter@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [RFC proxmox-backup 5/5] backup: validate chunk existance in background
Date: Wed, 30 Sep 2020 16:16:01 +0200	[thread overview]
Message-ID: <20200930141601.27233-6-s.reiter@proxmox.com> (raw)
In-Reply-To: <20200930141601.27233-1-s.reiter@proxmox.com>

Reused chunks will not be uploaded, and thus never touched. We need to
verify their existance manually to ensure a valid backup.

Since we know all chunks that the client may reuse must be recorded in
the previous snapshot (which is locked during backup and can't be
forgotten), we can do the validation in the background, while the backup
is still running, and only join at the end if there's still work left.

The tradeoff here is that we don't know yet which chunks the client will
*not* reuse later in the backup, so we have to check them all.

This also means we can revert the changes to the KnownChunksMap type
made in 43772efc6e.

Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
---
 src/api2/backup/environment.rs | 155 ++++++++++++++++++++++-----------
 1 file changed, 105 insertions(+), 50 deletions(-)

diff --git a/src/api2/backup/environment.rs b/src/api2/backup/environment.rs
index a8c9ddb4..08ecc290 100644
--- a/src/api2/backup/environment.rs
+++ b/src/api2/backup/environment.rs
@@ -11,8 +11,9 @@ use proxmox::api::{RpcEnvironment, RpcEnvironmentType};
 
 use crate::api2::types::{Userid, SnapshotVerifyState, VerifyState};
 use crate::backup::*;
-use crate::server::WorkerTask;
+use crate::server::{WorkerTask, UPID};
 use crate::server::formatter::*;
+use crate::tools::ParallelHandler;
 use hyper::{Body, Response};
 
 #[derive(Copy, Clone, Serialize)]
@@ -66,8 +67,14 @@ struct FixedWriterState {
     incremental: bool,
 }
 
-// key=digest, value=(length, existance checked)
-type KnownChunksMap = HashMap<[u8;32], (u32, bool)>;
+// key=digest, value=length
+type KnownChunksMap = HashMap<[u8;32], u32>;
+
+enum ValidateHandlerState {
+    NotInitialized,
+    NotNeeded,
+    Running(ParallelHandler<'static, [u8;32]>),
+}
 
 struct SharedBackupState {
     finished: bool,
@@ -78,6 +85,7 @@ struct SharedBackupState {
     known_chunks: KnownChunksMap,
     backup_size: u64, // sums up size of all files
     backup_stat: UploadStatistic,
+    validate_handler: ValidateHandlerState,
 }
 
 impl SharedBackupState {
@@ -131,6 +139,7 @@ impl BackupEnvironment {
             known_chunks: HashMap::new(),
             backup_size: 0,
             backup_stat: UploadStatistic::new(),
+            validate_handler: ValidateHandlerState::NotInitialized,
         };
 
         Self {
@@ -156,11 +165,89 @@ impl BackupEnvironment {
 
         state.ensure_unfinished()?;
 
-        state.known_chunks.insert(digest, (length, false));
+        state.known_chunks.insert(digest, length);
+
+        match &state.validate_handler {
+            ValidateHandlerState::NotInitialized => {
+                if self.last_backup_has_recent_verify()? {
+                    state.validate_handler = ValidateHandlerState::NotNeeded;
+                } else {
+                    let handler = self.start_validate_handler();
+                    handler.send(digest)?;
+                    state.validate_handler = ValidateHandlerState::Running(handler);
+                }
+            },
+            ValidateHandlerState::Running(handler) => {
+                handler.send(digest)?;
+            },
+            ValidateHandlerState::NotNeeded => {}
+        }
 
         Ok(())
     }
 
+    fn start_validate_handler(&self) -> ParallelHandler<'static, [u8;32]> {
+        let datastore = Arc::clone(&self.datastore);
+        let upid = Arc::new(self.worker.upid().clone());
+        let last_backup = Arc::new(self.last_backup.clone());
+        ParallelHandler::new(
+            "verify handler",
+            1, // one worker is enough, and means we don't need a lock to mark the prev snapshot
+            move |digest| {
+                Self::validate_chunk_existance(
+                    &digest,
+                    Arc::clone(&datastore),
+                    Arc::clone(&upid),
+                    Arc::clone(&last_backup)
+                )
+            },
+            false // don't block on send
+        )
+    }
+
+    fn validate_chunk_existance(
+        digest: &[u8;32],
+        datastore: Arc<DataStore>,
+        upid: Arc<UPID>,
+        last_backup: Arc<Option<BackupInfo>>,
+    ) -> Result<(), Error> {
+        if !datastore.chunk_path(digest).0.exists() {
+            // Chunk is missing, mark last snapshot (which references it) as "verify failed"
+            let mark_msg = if let Some(ref last_backup) = *last_backup {
+                let last_dir = &last_backup.backup_dir;
+                let verify_state = SnapshotVerifyState {
+                    state: VerifyState::Failed,
+                    upid: UPID::clone(upid.as_ref()),
+                };
+
+                let res = proxmox::try_block!{
+                    let (mut manifest, _) = datastore.load_manifest(last_dir)?;
+                    manifest.unprotected["verify_state"] = serde_json::to_value(verify_state)?;
+                    datastore.store_manifest(last_dir, serde_json::to_value(manifest)?)
+                };
+
+                if let Err(err) = res {
+                    format!("tried marking previous snapshot as bad, \
+                            but got error accessing manifest: {}", err)
+                } else {
+                    "marked previous snapshot as bad, please use \
+                    'verify' for a detailed check".to_owned()
+                }
+            } else {
+                "internal error: no base backup registered to mark invalid".to_owned()
+            };
+
+            bail!(
+                "chunk '{}' was attempted to be reused but doesn't exist - {}",
+                digest_to_hex(digest),
+                mark_msg
+            );
+        }
+
+        Ok(())
+    }
+
+
     /// Register fixed length chunks after upload.
     ///
     /// Like `register_chunk()`, but additionally record statistics for
@@ -176,6 +263,9 @@ impl BackupEnvironment {
         let mut state = self.state.lock().unwrap();
 
         state.ensure_unfinished()?;
+        if let ValidateHandlerState::Running(handler) = &state.validate_handler {
+            handler.check_abort()?;
+        }
 
         let mut data = match state.fixed_writers.get_mut(&wid) {
             Some(data) => data,
@@ -198,7 +288,7 @@ impl BackupEnvironment {
         if is_duplicate { data.upload_stat.duplicates += 1; }
 
         // register chunk
-        state.known_chunks.insert(digest, (size, true));
+        state.known_chunks.insert(digest, size);
 
         Ok(())
     }
@@ -218,6 +308,9 @@ impl BackupEnvironment {
         let mut state = self.state.lock().unwrap();
 
         state.ensure_unfinished()?;
+        if let ValidateHandlerState::Running(handler) = &state.validate_handler {
+            handler.check_abort()?;
+        }
 
         let mut data = match state.dynamic_writers.get_mut(&wid) {
             Some(data) => data,
@@ -231,7 +324,7 @@ impl BackupEnvironment {
         if is_duplicate { data.upload_stat.duplicates += 1; }
 
         // register chunk
-        state.known_chunks.insert(digest, (size, true));
+        state.known_chunks.insert(digest, size);
 
         Ok(())
     }
@@ -240,7 +333,7 @@ impl BackupEnvironment {
         let state = self.state.lock().unwrap();
 
         match state.known_chunks.get(digest) {
-            Some((len, _)) => Some(*len),
+            Some(len) => Some(*len),
             None => None,
         }
     }
@@ -483,47 +576,6 @@ impl BackupEnvironment {
         }
     }
 
-    /// Ensure all chunks referenced in this backup actually exist.
-    /// Only call *after* all writers have been closed, to avoid race with GC.
-    /// In case of error, mark the previous backup as 'verify failed'.
-    fn verify_chunk_existance(&self, known_chunks: &KnownChunksMap) -> Result<(), Error> {
-        for (digest, (_, checked)) in known_chunks.iter() {
-            if !checked && !self.datastore.chunk_path(digest).0.exists() {
-                let mark_msg = if let Some(ref last_backup) = self.last_backup {
-                    let last_dir = &last_backup.backup_dir;
-                    let verify_state = SnapshotVerifyState {
-                        state: VerifyState::Failed,
-                        upid: self.worker.upid().clone(),
-                    };
-
-                    let res = proxmox::try_block!{
-                        let (mut manifest, _) = self.datastore.load_manifest(last_dir)?;
-                        manifest.unprotected["verify_state"] = serde_json::to_value(verify_state)?;
-                        self.datastore.store_manifest(last_dir, serde_json::to_value(manifest)?)
-                    };
-
-                    if let Err(err) = res {
-                        format!("tried marking previous snapshot as bad, \
-                                but got error accessing manifest: {}", err)
-                    } else {
-                        "marked previous snapshot as bad, please use \
-                        'verify' for a detailed check".to_owned()
-                    }
-                } else {
-                    "internal error: no base backup registered to mark invalid".to_owned()
-                };
-
-                bail!(
-                    "chunk '{}' was attempted to be reused but doesn't exist - {}",
-                    digest_to_hex(digest),
-                    mark_msg
-                );
-            }
-        }
-
-        Ok(())
-    }
-
     /// Mark backup as finished
     pub fn finish_backup(&self) -> Result<(), Error> {
         let mut state = self.state.lock().unwrap();
@@ -560,8 +612,11 @@ impl BackupEnvironment {
             }
         }
 
-        if !self.last_backup_has_recent_verify()? {
-            self.verify_chunk_existance(&state.known_chunks)?;
+        // stop verify handler and verify remaining chunks
+        let handler = std::mem::replace(&mut state.validate_handler, ValidateHandlerState::NotInitialized);
+        if let ValidateHandlerState::Running(handler) = handler {
+            self.worker.log("waiting for validate thread to complete");
+            handler.complete()?;
         }
 
         // marks the backup as successful
-- 
2.20.1





  parent reply	other threads:[~2020-09-30 14:16 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-30 14:15 [pbs-devel] [PATCH v2 0/5] backup validation improvements Stefan Reiter
2020-09-30 14:15 ` [pbs-devel] [PATCH v2 proxmox-backup 1/5] backup: don't validate chunk existance if base was recently verified Stefan Reiter
2020-09-30 14:35   ` Thomas Lamprecht
2020-09-30 14:56   ` Dietmar Maurer
2020-09-30 15:04     ` Thomas Lamprecht
2020-09-30 14:15 ` [pbs-devel] [RFC proxmox-backup 2/5] ParallelHandler add unbounded mode Stefan Reiter
2020-09-30 14:15 ` [pbs-devel] [RFC proxmox-backup 3/5] ParallelHandler: add check_abort function and handle errors during join Stefan Reiter
2020-09-30 14:16 ` [pbs-devel] [RFC proxmox-backup 4/5] ParallelHandler: exit early if this or other thread aborted Stefan Reiter
2020-09-30 14:16 ` Stefan Reiter [this message]
  -- strict thread matches above, loose matches on Subject: below --
2020-09-30 13:25 [pbs-devel] [PATCH 0/5] backup validation improvements Stefan Reiter
2020-09-30 13:25 ` [pbs-devel] [RFC proxmox-backup 5/5] backup: validate chunk existance in background Stefan Reiter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200930141601.27233-6-s.reiter@proxmox.com \
    --to=s.reiter@proxmox.com \
    --cc=pbs-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal