public inbox for pbs-devel@lists.proxmox.com
 help / color / mirror / Atom feed
* [pbs-devel] [PATCH proxmox-backup 1/3] verify: check all chunks of an index, even if we encounter a corrupt one
@ 2020-07-30  7:09 Dominik Csapak
  2020-07-30  7:09 ` [pbs-devel] [PATCH proxmox-backup 2/3] verify: keep also track of corrupt chunks Dominik Csapak
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Dominik Csapak @ 2020-07-30  7:09 UTC (permalink / raw)
  To: pbs-devel

this makes it easier to see which chunks are corrupt
(and enables us in the future to build a 'complete' list of
corrupt chunks)

Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
---
 src/backup/verify.rs | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/backup/verify.rs b/src/backup/verify.rs
index 33aaa621..9e309b12 100644
--- a/src/backup/verify.rs
+++ b/src/backup/verify.rs
@@ -42,6 +42,7 @@ fn verify_index_chunks(
     worker: &WorkerTask,
 ) -> Result<(), Error> {
 
+    let mut errors = 0;
     for pos in 0..index.index_count() {
 
         worker.fail_on_abort()?;
@@ -50,11 +51,19 @@ fn verify_index_chunks(
         let size = info.range.end - info.range.start;
 
         if !verified_chunks.contains(&info.digest) {
-            datastore.verify_stored_chunk(&info.digest, size)?;
-            verified_chunks.insert(info.digest);
+            if let Err(err) = datastore.verify_stored_chunk(&info.digest, size) {
+                worker.log(format!("{}", err));
+                errors += 1;
+            } else {
+                verified_chunks.insert(info.digest);
+            }
         }
     }
 
+    if errors > 0 {
+        bail!("chunks could not be verified");
+    }
+
     Ok(())
 }
 
-- 
2.20.1





^ permalink raw reply	[flat|nested] 4+ messages in thread

* [pbs-devel] [PATCH proxmox-backup 2/3] verify: keep also track of corrupt chunks
  2020-07-30  7:09 [pbs-devel] [PATCH proxmox-backup 1/3] verify: check all chunks of an index, even if we encounter a corrupt one Dominik Csapak
@ 2020-07-30  7:09 ` Dominik Csapak
  2020-07-30  7:09 ` [pbs-devel] [PATCH proxmox-backup 3/3] verify: keep track and log which dirs failed the verification Dominik Csapak
  2020-07-30  7:40 ` [pbs-devel] applied: [PATCH proxmox-backup 1/3] verify: check all chunks of an index, even if we encounter a corrupt one Dietmar Maurer
  2 siblings, 0 replies; 4+ messages in thread
From: Dominik Csapak @ 2020-07-30  7:09 UTC (permalink / raw)
  To: pbs-devel

so that we do not have to verify a corrupt one multiple times

Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
---
 src/api2/admin/datastore.rs |  3 ++-
 src/backup/verify.rs        | 46 +++++++++++++++++++++++++++++--------
 2 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/src/api2/admin/datastore.rs b/src/api2/admin/datastore.rs
index c720231c..5e6d5720 100644
--- a/src/api2/admin/datastore.rs
+++ b/src/api2/admin/datastore.rs
@@ -476,7 +476,8 @@ pub fn verify(
         {
             let success = if let Some(backup_dir) = backup_dir {
                 let mut verified_chunks = HashSet::with_capacity(1024*16);
-                verify_backup_dir(&datastore, &backup_dir, &mut verified_chunks, &worker)?
+                let mut corrupt_chunks = HashSet::with_capacity(64);
+                verify_backup_dir(&datastore, &backup_dir, &mut verified_chunks, &mut corrupt_chunks, &worker)?
             } else if let Some(backup_group) = backup_group {
                 verify_backup_group(&datastore, &backup_group, &worker)?
             } else {
diff --git a/src/backup/verify.rs b/src/backup/verify.rs
index 9e309b12..58b91bc9 100644
--- a/src/backup/verify.rs
+++ b/src/backup/verify.rs
@@ -39,6 +39,7 @@ fn verify_index_chunks(
     datastore: &DataStore,
     index: Box<dyn IndexFile>,
     verified_chunks: &mut HashSet<[u8;32]>,
+    corrupt_chunks: &mut HashSet<[u8; 32]>,
     worker: &WorkerTask,
 ) -> Result<(), Error> {
 
@@ -51,11 +52,18 @@ fn verify_index_chunks(
         let size = info.range.end - info.range.start;
 
         if !verified_chunks.contains(&info.digest) {
-            if let Err(err) = datastore.verify_stored_chunk(&info.digest, size) {
-                worker.log(format!("{}", err));
-                errors += 1;
+            if !corrupt_chunks.contains(&info.digest) {
+                if let Err(err) = datastore.verify_stored_chunk(&info.digest, size) {
+                    corrupt_chunks.insert(info.digest);
+                    worker.log(format!("{}", err));
+                    errors += 1;
+                } else {
+                    verified_chunks.insert(info.digest);
+                }
             } else {
-                verified_chunks.insert(info.digest);
+                let digest_str = proxmox::tools::digest_to_hex(&info.digest);
+                worker.log(format!("chunk {} was marked as corrupt", digest_str));
+                errors += 1;
             }
         }
     }
@@ -72,6 +80,7 @@ fn verify_fixed_index(
     backup_dir: &BackupDir,
     info: &FileInfo,
     verified_chunks: &mut HashSet<[u8;32]>,
+    corrupt_chunks: &mut HashSet<[u8;32]>,
     worker: &WorkerTask,
 ) -> Result<(), Error> {
 
@@ -89,7 +98,7 @@ fn verify_fixed_index(
         bail!("wrong index checksum");
     }
 
-    verify_index_chunks(datastore, Box::new(index), verified_chunks, worker)
+    verify_index_chunks(datastore, Box::new(index), verified_chunks, corrupt_chunks, worker)
 }
 
 fn verify_dynamic_index(
@@ -97,6 +106,7 @@ fn verify_dynamic_index(
     backup_dir: &BackupDir,
     info: &FileInfo,
     verified_chunks: &mut HashSet<[u8;32]>,
+    corrupt_chunks: &mut HashSet<[u8;32]>,
     worker: &WorkerTask,
 ) -> Result<(), Error> {
 
@@ -114,7 +124,7 @@ fn verify_dynamic_index(
         bail!("wrong index checksum");
     }
 
-    verify_index_chunks(datastore, Box::new(index), verified_chunks, worker)
+    verify_index_chunks(datastore, Box::new(index), verified_chunks, corrupt_chunks, worker)
 }
 
 /// Verify a single backup snapshot
@@ -130,6 +140,7 @@ pub fn verify_backup_dir(
     datastore: &DataStore,
     backup_dir: &BackupDir,
     verified_chunks: &mut HashSet<[u8;32]>,
+    corrupt_chunks: &mut HashSet<[u8;32]>,
     worker: &WorkerTask
 ) -> Result<bool, Error> {
 
@@ -149,8 +160,24 @@ pub fn verify_backup_dir(
         let result = proxmox::try_block!({
             worker.log(format!("  check {}", info.filename));
             match archive_type(&info.filename)? {
-                ArchiveType::FixedIndex => verify_fixed_index(&datastore, &backup_dir, info, verified_chunks, worker),
-                ArchiveType::DynamicIndex => verify_dynamic_index(&datastore, &backup_dir, info, verified_chunks, worker),
+                ArchiveType::FixedIndex =>
+                    verify_fixed_index(
+                        &datastore,
+                        &backup_dir,
+                        info,
+                        verified_chunks,
+                        corrupt_chunks,
+                        worker
+                    ),
+                ArchiveType::DynamicIndex =>
+                    verify_dynamic_index(
+                        &datastore,
+                        &backup_dir,
+                        info,
+                        verified_chunks,
+                        corrupt_chunks,
+                        worker
+                    ),
                 ArchiveType::Blob => verify_blob(&datastore, &backup_dir, info),
             }
         });
@@ -189,10 +216,11 @@ pub fn verify_backup_group(datastore: &DataStore, group: &BackupGroup, worker: &
     let mut error_count = 0;
 
     let mut verified_chunks = HashSet::with_capacity(1024*16); // start with 16384 chunks (up to 65GB)
+    let mut corrupt_chunks = HashSet::with_capacity(64); // start with 64 chunks since we assume there are few corrupt ones
 
     BackupInfo::sort_list(&mut list, false); // newest first
     for info in list {
-        if !verify_backup_dir(datastore, &info.backup_dir, &mut verified_chunks, worker)? {
+        if !verify_backup_dir(datastore, &info.backup_dir, &mut verified_chunks, &mut corrupt_chunks, worker)?{
             error_count += 1;
         }
     }
-- 
2.20.1





^ permalink raw reply	[flat|nested] 4+ messages in thread

* [pbs-devel] [PATCH proxmox-backup 3/3] verify: keep track and log which dirs failed the verification
  2020-07-30  7:09 [pbs-devel] [PATCH proxmox-backup 1/3] verify: check all chunks of an index, even if we encounter a corrupt one Dominik Csapak
  2020-07-30  7:09 ` [pbs-devel] [PATCH proxmox-backup 2/3] verify: keep also track of corrupt chunks Dominik Csapak
@ 2020-07-30  7:09 ` Dominik Csapak
  2020-07-30  7:40 ` [pbs-devel] applied: [PATCH proxmox-backup 1/3] verify: check all chunks of an index, even if we encounter a corrupt one Dietmar Maurer
  2 siblings, 0 replies; 4+ messages in thread
From: Dominik Csapak @ 2020-07-30  7:09 UTC (permalink / raw)
  To: pbs-devel

so that we can print a list at the end of the worker which backups
are corrupt.

this is useful if there are many snapshots and some in between had an
error. Before this patch, the task log simply says to 'look in the logs'
but if the log is very long it makes it hard to see what exactly failed.

Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
---
 src/api2/admin/datastore.rs | 14 +++++++++++---
 src/backup/verify.rs        | 31 ++++++++++++++-----------------
 2 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/src/api2/admin/datastore.rs b/src/api2/admin/datastore.rs
index 5e6d5720..29e74bd6 100644
--- a/src/api2/admin/datastore.rs
+++ b/src/api2/admin/datastore.rs
@@ -474,16 +474,24 @@ pub fn verify(
     let upid_str = WorkerTask::new_thread(
         "verify", Some(worker_id.clone()), &username, to_stdout, move |worker|
         {
-            let success = if let Some(backup_dir) = backup_dir {
+            let failed_dirs = if let Some(backup_dir) = backup_dir {
                 let mut verified_chunks = HashSet::with_capacity(1024*16);
                 let mut corrupt_chunks = HashSet::with_capacity(64);
-                verify_backup_dir(&datastore, &backup_dir, &mut verified_chunks, &mut corrupt_chunks, &worker)?
+                let mut res = Vec::new();
+                if !verify_backup_dir(&datastore, &backup_dir, &mut verified_chunks, &mut corrupt_chunks, &worker)? {
+                    res.push(backup_dir.to_string());
+                }
+                res
             } else if let Some(backup_group) = backup_group {
                 verify_backup_group(&datastore, &backup_group, &worker)?
             } else {
                 verify_all_backups(&datastore, &worker)?
             };
-            if !success {
+            if failed_dirs.len() > 0 {
+                worker.log("Failed to verify following snapshots:");
+                for dir in failed_dirs {
+                    worker.log(format!("\t{}", dir));
+                }
                 bail!("verfication failed - please check the log for details");
             }
             Ok(())
diff --git a/src/backup/verify.rs b/src/backup/verify.rs
index 58b91bc9..cba1297f 100644
--- a/src/backup/verify.rs
+++ b/src/backup/verify.rs
@@ -198,34 +198,32 @@ pub fn verify_backup_dir(
 /// Errors are logged to the worker log.
 ///
 /// Returns
-/// - Ok(true) if verify is successful
-/// - Ok(false) if there were verification errors
+/// - Ok(failed_dirs) where failed_dirs had verification errors
 /// - Err(_) if task was aborted
-pub fn verify_backup_group(datastore: &DataStore, group: &BackupGroup, worker: &WorkerTask) -> Result<bool, Error> {
+pub fn verify_backup_group(datastore: &DataStore, group: &BackupGroup, worker: &WorkerTask) -> Result<Vec<String>, Error> {
 
+    let mut errors = Vec::new();
     let mut list = match group.list_backups(&datastore.base_path()) {
         Ok(list) => list,
         Err(err) => {
             worker.log(format!("verify group {}:{} - unable to list backups: {}", datastore.name(), group, err));
-            return Ok(false);
+            return Ok(errors);
         }
     };
 
     worker.log(format!("verify group {}:{}", datastore.name(), group));
 
-    let mut error_count = 0;
-
     let mut verified_chunks = HashSet::with_capacity(1024*16); // start with 16384 chunks (up to 65GB)
     let mut corrupt_chunks = HashSet::with_capacity(64); // start with 64 chunks since we assume there are few corrupt ones
 
     BackupInfo::sort_list(&mut list, false); // newest first
     for info in list {
         if !verify_backup_dir(datastore, &info.backup_dir, &mut verified_chunks, &mut corrupt_chunks, worker)?{
-            error_count += 1;
+            errors.push(info.backup_dir.to_string());
         }
     }
 
-    Ok(error_count == 0)
+    Ok(errors)
 }
 
 /// Verify all backups inside a datastore
@@ -233,27 +231,26 @@ pub fn verify_backup_group(datastore: &DataStore, group: &BackupGroup, worker: &
 /// Errors are logged to the worker log.
 ///
 /// Returns
-/// - Ok(true) if verify is successful
-/// - Ok(false) if there were verification errors
+/// - Ok(failed_dirs) where failed_dirs had verification errors
 /// - Err(_) if task was aborted
-pub fn verify_all_backups(datastore: &DataStore, worker: &WorkerTask) -> Result<bool, Error> {
+pub fn verify_all_backups(datastore: &DataStore, worker: &WorkerTask) -> Result<Vec<String>, Error> {
+
+    let mut errors = Vec::new();
 
     let list = match BackupGroup::list_groups(&datastore.base_path()) {
         Ok(list) => list,
         Err(err) => {
             worker.log(format!("verify datastore {} - unable to list backups: {}", datastore.name(), err));
-            return Ok(false);
+            return Ok(errors);
         }
     };
 
     worker.log(format!("verify datastore {}", datastore.name()));
 
-    let mut error_count = 0;
     for group in list {
-        if !verify_backup_group(datastore, &group, worker)? {
-            error_count += 1;
-        }
+        let mut group_errors = verify_backup_group(datastore, &group, worker)?;
+        errors.append(&mut group_errors);
     }
 
-    Ok(error_count == 0)
+    Ok(errors)
 }
-- 
2.20.1





^ permalink raw reply	[flat|nested] 4+ messages in thread

* [pbs-devel] applied: [PATCH proxmox-backup 1/3] verify: check all chunks of an index, even if we encounter a corrupt one
  2020-07-30  7:09 [pbs-devel] [PATCH proxmox-backup 1/3] verify: check all chunks of an index, even if we encounter a corrupt one Dominik Csapak
  2020-07-30  7:09 ` [pbs-devel] [PATCH proxmox-backup 2/3] verify: keep also track of corrupt chunks Dominik Csapak
  2020-07-30  7:09 ` [pbs-devel] [PATCH proxmox-backup 3/3] verify: keep track and log which dirs failed the verification Dominik Csapak
@ 2020-07-30  7:40 ` Dietmar Maurer
  2 siblings, 0 replies; 4+ messages in thread
From: Dietmar Maurer @ 2020-07-30  7:40 UTC (permalink / raw)
  To: Proxmox Backup Server development discussion, Dominik Csapak

applied all three patches

> On 07/30/2020 9:09 AM Dominik Csapak <d.csapak@proxmox.com> wrote:
> 
>  
> this makes it easier to see which chunks are corrupt
> (and enables us in the future to build a 'complete' list of
> corrupt chunks)
> 
> Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
> ---
>  src/backup/verify.rs | 13 +++++++++++--
>  1 file changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/src/backup/verify.rs b/src/backup/verify.rs
> index 33aaa621..9e309b12 100644
> --- a/src/backup/verify.rs
> +++ b/src/backup/verify.rs
> @@ -42,6 +42,7 @@ fn verify_index_chunks(
>      worker: &WorkerTask,
>  ) -> Result<(), Error> {
>  
> +    let mut errors = 0;
>      for pos in 0..index.index_count() {
>  
>          worker.fail_on_abort()?;
> @@ -50,11 +51,19 @@ fn verify_index_chunks(
>          let size = info.range.end - info.range.start;
>  
>          if !verified_chunks.contains(&info.digest) {
> -            datastore.verify_stored_chunk(&info.digest, size)?;
> -            verified_chunks.insert(info.digest);
> +            if let Err(err) = datastore.verify_stored_chunk(&info.digest, size) {
> +                worker.log(format!("{}", err));
> +                errors += 1;
> +            } else {
> +                verified_chunks.insert(info.digest);
> +            }
>          }
>      }
>  
> +    if errors > 0 {
> +        bail!("chunks could not be verified");
> +    }
> +
>      Ok(())
>  }
>  
> -- 
> 2.20.1
> 
> 
> 
> _______________________________________________
> pbs-devel mailing list
> pbs-devel@lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel




^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-07-30  7:41 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-30  7:09 [pbs-devel] [PATCH proxmox-backup 1/3] verify: check all chunks of an index, even if we encounter a corrupt one Dominik Csapak
2020-07-30  7:09 ` [pbs-devel] [PATCH proxmox-backup 2/3] verify: keep also track of corrupt chunks Dominik Csapak
2020-07-30  7:09 ` [pbs-devel] [PATCH proxmox-backup 3/3] verify: keep track and log which dirs failed the verification Dominik Csapak
2020-07-30  7:40 ` [pbs-devel] applied: [PATCH proxmox-backup 1/3] verify: check all chunks of an index, even if we encounter a corrupt one Dietmar Maurer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal