From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id D15A660C7B for ; Wed, 18 Nov 2020 14:28:59 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id C7D431C1B0 for ; Wed, 18 Nov 2020 14:28:59 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [212.186.127.180]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 75D491C1A3 for ; Wed, 18 Nov 2020 14:28:58 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 3997743980 for ; Wed, 18 Nov 2020 14:28:58 +0100 (CET) Date: Wed, 18 Nov 2020 14:28:56 +0100 From: Wolfgang Bumiller To: Stefan Reiter Cc: pbs-devel@lists.proxmox.com Message-ID: <20201118132856.2j4td2fosa3luier@wobu-vie.proxmox.com> References: <20201112145008.29363-1-s.reiter@proxmox.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20201112145008.29363-1-s.reiter@proxmox.com> User-Agent: NeoMutt/20180716 X-SPAM-LEVEL: Spam detection results: 0 AWL -1.141 Adjusted score from AWL reputation of From: address KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: [pbs-devel] applied: [PATCH proxmox-backup] gc: treat .bad files like regular chunks X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 18 Nov 2020 13:28:59 -0000 applied On Thu, Nov 12, 2020 at 03:50:08PM +0100, Stefan Reiter wrote: > Simplify the phase 2 code by treating .bad files just like regular > chunks, with the exception of stat logging. > > To facilitate, we need to touch .bad files in phase 1. We only do this > under the condition that 1) the original chunk is missing (as before), > and 2) the original chunk is still referenced somewhere (since the code > lives in the error handler for a failed chunk touch, it only gets called > for chunks we expect to be there, i.e. ones that are referenced). > > Untouched they will then be cleaned up after 24 hours (or after the last > longer-running task finishes). > > Reason 2) is also a fix for .bad files not being cleaned up at all if > the original is no longer referenced anywhere (e.g. a user deleting all > snapshots after seeing some corrupt chunks appear). > > cond_touch_path is introduced to touch arbitrary paths in the chunk > store with the same logic as touching chunks. > > Signed-off-by: Stefan Reiter > --- > src/backup/chunk_store.rs | 68 +++++++++++++-------------------------- > src/backup/datastore.rs | 11 +++++++ > 2 files changed, 33 insertions(+), 46 deletions(-) > > diff --git a/src/backup/chunk_store.rs b/src/backup/chunk_store.rs > index b7556f8a..59719bad 100644 > --- a/src/backup/chunk_store.rs > +++ b/src/backup/chunk_store.rs > @@ -154,9 +154,11 @@ impl ChunkStore { > } > > pub fn cond_touch_chunk(&self, digest: &[u8; 32], fail_if_not_exist: bool) -> Result { > - > let (chunk_path, _digest_str) = self.chunk_path(digest); > + self.cond_touch_path(&chunk_path, fail_if_not_exist) > + } > > + pub fn cond_touch_path(&self, path: &Path, fail_if_not_exist: bool) -> Result { > const UTIME_NOW: i64 = (1 << 30) - 1; > const UTIME_OMIT: i64 = (1 << 30) - 2; > > @@ -167,7 +169,7 @@ impl ChunkStore { > > use nix::NixPath; > > - let res = chunk_path.with_nix_path(|cstr| unsafe { > + let res = path.with_nix_path(|cstr| unsafe { > let tmp = libc::utimensat(-1, cstr.as_ptr(), ×[0], libc::AT_SYMLINK_NOFOLLOW); > nix::errno::Errno::result(tmp) > })?; > @@ -177,7 +179,7 @@ impl ChunkStore { > return Ok(false); > } > > - bail!("update atime failed for chunk {:?} - {}", chunk_path, err); > + bail!("update atime failed for chunk/file {:?} - {}", path, err); > } > > Ok(true) > @@ -328,49 +330,13 @@ impl ChunkStore { > let lock = self.mutex.lock(); > > if let Ok(stat) = fstatat(dirfd, filename, nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW) { > - if bad { > - // filename validity checked in iterator > - let orig_filename = std::ffi::CString::new(&filename.to_bytes()[..64])?; > - match fstatat( > - dirfd, > - orig_filename.as_c_str(), > - nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW) > - { > - Ok(_) => { > - match unlinkat(Some(dirfd), filename, UnlinkatFlags::NoRemoveDir) { > - Err(err) => > - crate::task_warn!( > - worker, > - "unlinking corrupt chunk {:?} failed on store '{}' - {}", > - filename, > - self.name, > - err, > - ), > - Ok(_) => { > - status.removed_bad += 1; > - status.removed_bytes += stat.st_size as u64; > - } > - } > - }, > - Err(nix::Error::Sys(nix::errno::Errno::ENOENT)) => { > - // chunk hasn't been rewritten yet, keep .bad file > - status.still_bad += 1; > - }, > - Err(err) => { > - // some other error, warn user and keep .bad file around too > - status.still_bad += 1; > - crate::task_warn!( > - worker, > - "error during stat on '{:?}' - {}", > - orig_filename, > - err, > - ); > - } > - } > - } else if stat.st_atime < min_atime { > + if stat.st_atime < min_atime { > //let age = now - stat.st_atime; > //println!("UNLINK {} {:?}", age/(3600*24), filename); > if let Err(err) = unlinkat(Some(dirfd), filename, UnlinkatFlags::NoRemoveDir) { > + if bad { > + status.still_bad += 1; > + } > bail!( > "unlinking chunk {:?} failed on store '{}' - {}", > filename, > @@ -378,13 +344,23 @@ impl ChunkStore { > err, > ); > } > - status.removed_chunks += 1; > + if bad { > + status.removed_bad += 1; > + } else { > + status.removed_chunks += 1; > + } > status.removed_bytes += stat.st_size as u64; > } else if stat.st_atime < oldest_writer { > - status.pending_chunks += 1; > + if bad { > + status.still_bad += 1; > + } else { > + status.pending_chunks += 1; > + } > status.pending_bytes += stat.st_size as u64; > } else { > - status.disk_chunks += 1; > + if !bad { > + status.disk_chunks += 1; > + } > status.disk_bytes += stat.st_size as u64; > } > } > diff --git a/src/backup/datastore.rs b/src/backup/datastore.rs > index 6d759e78..19efc23f 100644 > --- a/src/backup/datastore.rs > +++ b/src/backup/datastore.rs > @@ -446,6 +446,17 @@ impl DataStore { > file_name, > err, > ); > + > + // touch any corresponding .bad files to keep them around, meaning if a chunk is > + // rewritten correctly they will be removed automatically, as well as if no index > + // file requires the chunk anymore (won't get to this loop then) > + for i in 0..=9 { > + let bad_ext = format!("{}.bad", i); > + let mut bad_path = PathBuf::new(); > + bad_path.push(self.chunk_path(digest).0); > + bad_path.set_extension(bad_ext); > + self.chunk_store.cond_touch_path(&bad_path, false)?; > + } > } > } > Ok(()) > -- > 2.20.1