all lists on lists.proxmox.com
 help / color / mirror / Atom feed
From: Stefan Reiter <s.reiter@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [PATCH proxmox-backup 3/4] gc: remove .bad files on garbage collect
Date: Thu,  3 Sep 2020 16:17:04 +0200	[thread overview]
Message-ID: <20200903141705.6344-4-s.reiter@proxmox.com> (raw)
In-Reply-To: <20200903141705.6344-1-s.reiter@proxmox.com>

The iterator of get_chunk_iterator is extended with a third parameter
indicating whether the current file is a chunk (false) or a .bad file
(true).

Count their sizes to the total of removed bytes, since it also frees
disk space.

Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
---
 src/api2/types/mod.rs     |  3 +++
 src/backup/chunk_store.rs | 43 ++++++++++++++++++++++++++++-----------
 src/backup/datastore.rs   |  5 ++++-
 3 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/src/api2/types/mod.rs b/src/api2/types/mod.rs
index 6854fdf0..e29a7e37 100644
--- a/src/api2/types/mod.rs
+++ b/src/api2/types/mod.rs
@@ -559,6 +559,8 @@ pub struct GarbageCollectionStatus {
     pub pending_bytes: u64,
     /// Number of pending chunks (pending removal - kept for safety).
     pub pending_chunks: usize,
+    /// Number of chunks marked as .bad by verify that have been removed by GC.
+    pub removed_bad: usize,
 }
 
 impl Default for GarbageCollectionStatus {
@@ -573,6 +575,7 @@ impl Default for GarbageCollectionStatus {
             removed_chunks: 0,
             pending_bytes: 0,
             pending_chunks: 0,
+            removed_bad: 0,
         }
     }
 }
diff --git a/src/backup/chunk_store.rs b/src/backup/chunk_store.rs
index e1da5a8a..5c2fb29d 100644
--- a/src/backup/chunk_store.rs
+++ b/src/backup/chunk_store.rs
@@ -187,7 +187,7 @@ impl ChunkStore {
     pub fn get_chunk_iterator(
         &self,
     ) -> Result<
-        impl Iterator<Item = (Result<tools::fs::ReadDirEntry, Error>, usize)> + std::iter::FusedIterator,
+        impl Iterator<Item = (Result<tools::fs::ReadDirEntry, Error>, usize, bool)> + std::iter::FusedIterator,
         Error
     > {
         use nix::dir::Dir;
@@ -218,20 +218,26 @@ impl ChunkStore {
                     match inner.next() {
                         Some(Ok(entry)) => {
                             // skip files if they're not a hash
-                            let bytes = entry.file_name().to_bytes();
-                            if bytes.len() != 64 {
-                                continue;
+                            let hash = {
+                                let bytes = entry.file_name().to_bytes();
+                                bytes.len() == 64 && bytes.iter().all(u8::is_ascii_hexdigit)
+                            };
+
+                            if hash {
+                                return Some((Ok(entry), percentage, false));
+                            } else if let Ok(name) = entry.file_name().to_str() {
+                                if name.ends_with(".bad") {
+                                    return Some((Ok(entry), percentage, true));
+                                }
                             }
-                            if !bytes.iter().all(u8::is_ascii_hexdigit) {
-                                continue;
-                            }
-                            return Some((Ok(entry), percentage));
+
+                            continue;
                         }
                         Some(Err(err)) => {
                             // stop after first error
                             done = true;
                             // and pass the error through:
-                            return Some((Err(err), percentage));
+                            return Some((Err(err), percentage, false));
                         }
                         None => (), // open next directory
                     }
@@ -261,7 +267,7 @@ impl ChunkStore {
                         // other errors are fatal, so end our iteration
                         done = true;
                         // and pass the error through:
-                        return Some((Err(format_err!("unable to read subdir '{}' - {}", subdir, err)), percentage));
+                        return Some((Err(format_err!("unable to read subdir '{}' - {}", subdir, err)), percentage, false));
                     }
                 }
             }
@@ -292,7 +298,7 @@ impl ChunkStore {
         let mut last_percentage = 0;
         let mut chunk_count = 0;
 
-        for (entry, percentage) in self.get_chunk_iterator()? {
+        for (entry, percentage, bad) in self.get_chunk_iterator()? {
             if last_percentage != percentage {
                 last_percentage = percentage;
                 worker.log(format!("percentage done: phase2 {}% (processed {} chunks)", percentage, chunk_count));
@@ -321,7 +327,20 @@ impl ChunkStore {
             let lock = self.mutex.lock();
 
             if let Ok(stat) = fstatat(dirfd, filename, nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW) {
-                if stat.st_atime < min_atime {
+                if bad {
+                    let res = unsafe { libc::unlinkat(dirfd, filename.as_ptr(), 0) };
+                    if res != 0 {
+                        let err = nix::Error::last();
+                        worker.warn(format!(
+                            "unlink .bad file {:?} failed on store '{}' - {}",
+                            filename,
+                            self.name,
+                            err,
+                        ));
+                    }
+                    status.removed_bad += 1;
+                    status.removed_bytes += stat.st_size as u64;
+                } else if stat.st_atime < min_atime {
                     //let age = now - stat.st_atime;
                     //println!("UNLINK {}  {:?}", age/(3600*24), filename);
                     let res = unsafe { libc::unlinkat(dirfd, filename.as_ptr(), 0) };
diff --git a/src/backup/datastore.rs b/src/backup/datastore.rs
index 42866e38..ebe47487 100644
--- a/src/backup/datastore.rs
+++ b/src/backup/datastore.rs
@@ -85,7 +85,7 @@ impl DataStore {
     pub fn get_chunk_iterator(
         &self,
     ) -> Result<
-        impl Iterator<Item = (Result<tools::fs::ReadDirEntry, Error>, usize)>,
+        impl Iterator<Item = (Result<tools::fs::ReadDirEntry, Error>, usize, bool)>,
         Error
     > {
         self.chunk_store.get_chunk_iterator()
@@ -495,6 +495,9 @@ impl DataStore {
             if gc_status.pending_bytes > 0 {
                 worker.log(&format!("Pending removals: {} (in {} chunks)", HumanByte::from(gc_status.pending_bytes), gc_status.pending_chunks));
             }
+            if gc_status.removed_bad > 0 {
+                worker.log(&format!("Removed bad files: {}", gc_status.removed_bad));
+            }
 
             worker.log(&format!("Original data usage: {}", HumanByte::from(gc_status.index_data_bytes)));
 
-- 
2.20.1





  parent reply	other threads:[~2020-09-03 14:17 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-03 14:17 [pbs-devel] [PATCH 0/4] Improve corrupt chunk handling Stefan Reiter
2020-09-03 14:17 ` [pbs-devel] [PATCH proxmox-backup 1/4] verify: fix log units Stefan Reiter
2020-09-03 14:17 ` [pbs-devel] [PATCH proxmox-backup 2/4] verify: rename corrupted chunks with .bad extension Stefan Reiter
2020-09-03 14:17 ` Stefan Reiter [this message]
2020-09-04 12:20   ` [pbs-devel] [PATCH proxmox-backup 3/4] gc: remove .bad files on garbage collect Thomas Lamprecht
2020-09-03 14:17 ` [pbs-devel] [PATCH proxmox-backup 4/4] backup: check all referenced chunks actually exist Stefan Reiter
2020-09-03 15:40 ` [pbs-devel] [PATCH 0/4] Improve corrupt chunk handling Dietmar Maurer
2020-09-03 15:51   ` Dietmar Maurer
2020-09-07  9:31     ` Stefan Reiter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200903141705.6344-4-s.reiter@proxmox.com \
    --to=s.reiter@proxmox.com \
    --cc=pbs-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal