From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id F1BA06916A for ; Thu, 12 Nov 2020 15:50:16 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id E576311E7A for ; Thu, 12 Nov 2020 15:50:16 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [212.186.127.180]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 9D14911E6F for ; Thu, 12 Nov 2020 15:50:15 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 6C01F40A7C for ; Thu, 12 Nov 2020 15:50:15 +0100 (CET) From: Stefan Reiter To: pbs-devel@lists.proxmox.com Date: Thu, 12 Nov 2020 15:50:08 +0100 Message-Id: <20201112145008.29363-1-s.reiter@proxmox.com> X-Mailer: git-send-email 2.20.1 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL -0.036 Adjusted score from AWL reputation of From: address KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment RCVD_IN_DNSWL_MED -2.3 Sender listed at https://www.dnswl.org/, medium trust SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [datastore.rs] Subject: [pbs-devel] [PATCH proxmox-backup] gc: treat .bad files like regular chunks X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 12 Nov 2020 14:50:17 -0000 Simplify the phase 2 code by treating .bad files just like regular chunks, with the exception of stat logging. To facilitate, we need to touch .bad files in phase 1. We only do this under the condition that 1) the original chunk is missing (as before), and 2) the original chunk is still referenced somewhere (since the code lives in the error handler for a failed chunk touch, it only gets called for chunks we expect to be there, i.e. ones that are referenced). Untouched they will then be cleaned up after 24 hours (or after the last longer-running task finishes). Reason 2) is also a fix for .bad files not being cleaned up at all if the original is no longer referenced anywhere (e.g. a user deleting all snapshots after seeing some corrupt chunks appear). cond_touch_path is introduced to touch arbitrary paths in the chunk store with the same logic as touching chunks. Signed-off-by: Stefan Reiter --- src/backup/chunk_store.rs | 68 +++++++++++++-------------------------- src/backup/datastore.rs | 11 +++++++ 2 files changed, 33 insertions(+), 46 deletions(-) diff --git a/src/backup/chunk_store.rs b/src/backup/chunk_store.rs index b7556f8a..59719bad 100644 --- a/src/backup/chunk_store.rs +++ b/src/backup/chunk_store.rs @@ -154,9 +154,11 @@ impl ChunkStore { } pub fn cond_touch_chunk(&self, digest: &[u8; 32], fail_if_not_exist: bool) -> Result { - let (chunk_path, _digest_str) = self.chunk_path(digest); + self.cond_touch_path(&chunk_path, fail_if_not_exist) + } + pub fn cond_touch_path(&self, path: &Path, fail_if_not_exist: bool) -> Result { const UTIME_NOW: i64 = (1 << 30) - 1; const UTIME_OMIT: i64 = (1 << 30) - 2; @@ -167,7 +169,7 @@ impl ChunkStore { use nix::NixPath; - let res = chunk_path.with_nix_path(|cstr| unsafe { + let res = path.with_nix_path(|cstr| unsafe { let tmp = libc::utimensat(-1, cstr.as_ptr(), ×[0], libc::AT_SYMLINK_NOFOLLOW); nix::errno::Errno::result(tmp) })?; @@ -177,7 +179,7 @@ impl ChunkStore { return Ok(false); } - bail!("update atime failed for chunk {:?} - {}", chunk_path, err); + bail!("update atime failed for chunk/file {:?} - {}", path, err); } Ok(true) @@ -328,49 +330,13 @@ impl ChunkStore { let lock = self.mutex.lock(); if let Ok(stat) = fstatat(dirfd, filename, nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW) { - if bad { - // filename validity checked in iterator - let orig_filename = std::ffi::CString::new(&filename.to_bytes()[..64])?; - match fstatat( - dirfd, - orig_filename.as_c_str(), - nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW) - { - Ok(_) => { - match unlinkat(Some(dirfd), filename, UnlinkatFlags::NoRemoveDir) { - Err(err) => - crate::task_warn!( - worker, - "unlinking corrupt chunk {:?} failed on store '{}' - {}", - filename, - self.name, - err, - ), - Ok(_) => { - status.removed_bad += 1; - status.removed_bytes += stat.st_size as u64; - } - } - }, - Err(nix::Error::Sys(nix::errno::Errno::ENOENT)) => { - // chunk hasn't been rewritten yet, keep .bad file - status.still_bad += 1; - }, - Err(err) => { - // some other error, warn user and keep .bad file around too - status.still_bad += 1; - crate::task_warn!( - worker, - "error during stat on '{:?}' - {}", - orig_filename, - err, - ); - } - } - } else if stat.st_atime < min_atime { + if stat.st_atime < min_atime { //let age = now - stat.st_atime; //println!("UNLINK {} {:?}", age/(3600*24), filename); if let Err(err) = unlinkat(Some(dirfd), filename, UnlinkatFlags::NoRemoveDir) { + if bad { + status.still_bad += 1; + } bail!( "unlinking chunk {:?} failed on store '{}' - {}", filename, @@ -378,13 +344,23 @@ impl ChunkStore { err, ); } - status.removed_chunks += 1; + if bad { + status.removed_bad += 1; + } else { + status.removed_chunks += 1; + } status.removed_bytes += stat.st_size as u64; } else if stat.st_atime < oldest_writer { - status.pending_chunks += 1; + if bad { + status.still_bad += 1; + } else { + status.pending_chunks += 1; + } status.pending_bytes += stat.st_size as u64; } else { - status.disk_chunks += 1; + if !bad { + status.disk_chunks += 1; + } status.disk_bytes += stat.st_size as u64; } } diff --git a/src/backup/datastore.rs b/src/backup/datastore.rs index 6d759e78..19efc23f 100644 --- a/src/backup/datastore.rs +++ b/src/backup/datastore.rs @@ -446,6 +446,17 @@ impl DataStore { file_name, err, ); + + // touch any corresponding .bad files to keep them around, meaning if a chunk is + // rewritten correctly they will be removed automatically, as well as if no index + // file requires the chunk anymore (won't get to this loop then) + for i in 0..=9 { + let bad_ext = format!("{}.bad", i); + let mut bad_path = PathBuf::new(); + bad_path.push(self.chunk_path(digest).0); + bad_path.set_extension(bad_ext); + self.chunk_store.cond_touch_path(&bad_path, false)?; + } } } Ok(()) -- 2.20.1