From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 3257661B68 for ; Mon, 7 Sep 2020 17:30:48 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 2FF89D674 for ; Mon, 7 Sep 2020 17:30:48 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [212.186.127.180]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 2F1DAD60A for ; Mon, 7 Sep 2020 17:30:45 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id F0A6E44A86 for ; Mon, 7 Sep 2020 17:30:44 +0200 (CEST) From: Stefan Reiter To: pbs-devel@lists.proxmox.com Date: Mon, 7 Sep 2020 17:30:34 +0200 Message-Id: <20200907153036.9324-4-s.reiter@proxmox.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20200907153036.9324-1-s.reiter@proxmox.com> References: <20200907153036.9324-1-s.reiter@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL -0.053 Adjusted score from AWL reputation of From: address KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment RCVD_IN_DNSWL_MED -2.3 Sender listed at https://www.dnswl.org/, medium trust SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: [pbs-devel] [PATCH v2 proxmox-backup 3/5] gc: remove .bad files on garbage collect X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 07 Sep 2020 15:30:48 -0000 The iterator of get_chunk_iterator is extended with a third parameter indicating whether the current file is a chunk (false) or a .bad file (true). Count their sizes to the total of removed bytes, since it also frees disk space. .bad files are only deleted if the corresponding chunk exists, i.e. has been rewritten. Otherwise we might delete data only marked bad because of transient errors. While at it, also clean up and use nix::unistd::unlinkat instead of unsafe libc calls. Signed-off-by: Stefan Reiter --- v2: * only remove .bad files if corresponding chunk exists * only consider .bad files that start with a valid digest * improve log messages * use safe unlinkat * only count actually removed chunks src/api2/types/mod.rs | 3 ++ src/backup/chunk_store.rs | 72 ++++++++++++++++++++++++++++++++------- src/backup/datastore.rs | 5 ++- 3 files changed, 67 insertions(+), 13 deletions(-) diff --git a/src/api2/types/mod.rs b/src/api2/types/mod.rs index 6854fdf0..e29a7e37 100644 --- a/src/api2/types/mod.rs +++ b/src/api2/types/mod.rs @@ -559,6 +559,8 @@ pub struct GarbageCollectionStatus { pub pending_bytes: u64, /// Number of pending chunks (pending removal - kept for safety). pub pending_chunks: usize, + /// Number of chunks marked as .bad by verify that have been removed by GC. + pub removed_bad: usize, } impl Default for GarbageCollectionStatus { @@ -573,6 +575,7 @@ impl Default for GarbageCollectionStatus { removed_chunks: 0, pending_bytes: 0, pending_chunks: 0, + removed_bad: 0, } } } diff --git a/src/backup/chunk_store.rs b/src/backup/chunk_store.rs index e1da5a8a..abe1d67f 100644 --- a/src/backup/chunk_store.rs +++ b/src/backup/chunk_store.rs @@ -187,7 +187,7 @@ impl ChunkStore { pub fn get_chunk_iterator( &self, ) -> Result< - impl Iterator, usize)> + std::iter::FusedIterator, + impl Iterator, usize, bool)> + std::iter::FusedIterator, Error > { use nix::dir::Dir; @@ -219,19 +219,21 @@ impl ChunkStore { Some(Ok(entry)) => { // skip files if they're not a hash let bytes = entry.file_name().to_bytes(); - if bytes.len() != 64 { + if bytes.len() != 64 && bytes.len() != 64 + ".0.bad".len() { continue; } - if !bytes.iter().all(u8::is_ascii_hexdigit) { + if !bytes.iter().take(64).all(u8::is_ascii_hexdigit) { continue; } - return Some((Ok(entry), percentage)); + + let bad = bytes.ends_with(".bad".as_bytes()); + return Some((Ok(entry), percentage, bad)); } Some(Err(err)) => { // stop after first error done = true; // and pass the error through: - return Some((Err(err), percentage)); + return Some((Err(err), percentage, false)); } None => (), // open next directory } @@ -261,7 +263,7 @@ impl ChunkStore { // other errors are fatal, so end our iteration done = true; // and pass the error through: - return Some((Err(format_err!("unable to read subdir '{}' - {}", subdir, err)), percentage)); + return Some((Err(format_err!("unable to read subdir '{}' - {}", subdir, err)), percentage, false)); } } } @@ -280,6 +282,7 @@ impl ChunkStore { worker: &WorkerTask, ) -> Result<(), Error> { use nix::sys::stat::fstatat; + use nix::unistd::{unlinkat, UnlinkatFlags}; let mut min_atime = phase1_start_time - 3600*24; // at least 24h (see mount option relatime) @@ -292,7 +295,7 @@ impl ChunkStore { let mut last_percentage = 0; let mut chunk_count = 0; - for (entry, percentage) in self.get_chunk_iterator()? { + for (entry, percentage, bad) in self.get_chunk_iterator()? { if last_percentage != percentage { last_percentage = percentage; worker.log(format!("percentage done: phase2 {}% (processed {} chunks)", percentage, chunk_count)); @@ -321,14 +324,59 @@ impl ChunkStore { let lock = self.mutex.lock(); if let Ok(stat) = fstatat(dirfd, filename, nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW) { - if stat.st_atime < min_atime { + if bad { + match std::ffi::CString::new(&filename.to_bytes()[..64]) { + Ok(orig_filename) => { + match fstatat( + dirfd, + orig_filename.as_c_str(), + nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW) + { + Ok(_) => { /* do nothing */ }, + Err(nix::Error::Sys(nix::errno::Errno::ENOENT)) => { + // chunk hasn't been rewritten yet, keep + // .bad file around for manual recovery + continue; + }, + Err(err) => { + // some other error, warn user and keep + // .bad file around too + worker.warn(format!( + "error during stat on '{:?}' - {}", + orig_filename, + err, + )); + continue; + } + } + }, + Err(err) => { + worker.warn(format!( + "could not get original filename from .bad file '{:?}' - {}", + filename, + err, + )); + continue; + } + } + + if let Err(err) = unlinkat(Some(dirfd), filename, UnlinkatFlags::NoRemoveDir) { + worker.warn(format!( + "unlinking corrupt chunk {:?} failed on store '{}' - {}", + filename, + self.name, + err, + )); + } else { + status.removed_bad += 1; + status.removed_bytes += stat.st_size as u64; + } + } else if stat.st_atime < min_atime { //let age = now - stat.st_atime; //println!("UNLINK {} {:?}", age/(3600*24), filename); - let res = unsafe { libc::unlinkat(dirfd, filename.as_ptr(), 0) }; - if res != 0 { - let err = nix::Error::last(); + if let Err(err) = unlinkat(Some(dirfd), filename, UnlinkatFlags::NoRemoveDir) { bail!( - "unlink chunk {:?} failed on store '{}' - {}", + "unlinking chunk {:?} failed on store '{}' - {}", filename, self.name, err, diff --git a/src/backup/datastore.rs b/src/backup/datastore.rs index 42866e38..ebe47487 100644 --- a/src/backup/datastore.rs +++ b/src/backup/datastore.rs @@ -85,7 +85,7 @@ impl DataStore { pub fn get_chunk_iterator( &self, ) -> Result< - impl Iterator, usize)>, + impl Iterator, usize, bool)>, Error > { self.chunk_store.get_chunk_iterator() @@ -495,6 +495,9 @@ impl DataStore { if gc_status.pending_bytes > 0 { worker.log(&format!("Pending removals: {} (in {} chunks)", HumanByte::from(gc_status.pending_bytes), gc_status.pending_chunks)); } + if gc_status.removed_bad > 0 { + worker.log(&format!("Removed bad files: {}", gc_status.removed_bad)); + } worker.log(&format!("Original data usage: {}", HumanByte::from(gc_status.index_data_bytes))); -- 2.20.1