From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id B1B7562C40 for ; Thu, 1 Oct 2020 12:13:34 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id A105F20383 for ; Thu, 1 Oct 2020 12:13:04 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [212.186.127.180]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 6AF9920369 for ; Thu, 1 Oct 2020 12:13:03 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 2E5F445B80 for ; Thu, 1 Oct 2020 12:13:03 +0200 (CEST) From: Thomas Lamprecht To: pbs-devel@lists.proxmox.com Date: Thu, 1 Oct 2020 12:12:57 +0200 Message-Id: <20201001101257.28881-1-t.lamprecht@proxmox.com> X-Mailer: git-send-email 2.27.0 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL -0.160 Adjusted score from AWL reputation of From: address KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment RCVD_IN_DNSWL_MED -2.3 Sender listed at https://www.dnswl.org/, medium trust SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [environment.rs] Subject: [pbs-devel] applied: [PATCH backup] assume correct backup, avoid verifying chunk existance X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 01 Oct 2020 10:13:34 -0000 This can slow things down by a lot on setups with (relatively) high seek time, in the order of doubling the backup times if cache isn't populated with the last backups chunk inode info. Effectively there's nothing known this protects us from in the codebase. The only thing which was theorized about was the case where a really long running backup job (over 24 hours) is still running and writing new chunks, not indexed yet anywhere, then an update (or manual action) triggers a reload of the proxy. There was some theory that then a GC in the new daemon would not know about the oldest writer in the old one, and thus use a less strict atime limit for chunk sweeping - opening up a window for deleting chunks from the long running backup. But, this simply cannot happen as we have a per datastore process wide flock, which is acquired shared by backup jobs and exclusive by GC. In the same process GC and backup can both get it, as it has a process locking granularity. If there's an old daemon with a writer, that also has the lock open shared, and so no GC in the new process can get exclusive access to it. So, with that confirmed we have no need for a "half-assed" verification in the backup finish step. Rather, we plan to add an opt-in "full verify each backup on finish" option (see #2988) Signed-off-by: Thomas Lamprecht --- src/api2/backup/environment.rs | 57 +++++----------------------------- 1 file changed, 7 insertions(+), 50 deletions(-) diff --git a/src/api2/backup/environment.rs b/src/api2/backup/environment.rs index d515bf30..b035f1e2 100644 --- a/src/api2/backup/environment.rs +++ b/src/api2/backup/environment.rs @@ -9,7 +9,7 @@ use proxmox::tools::digest_to_hex; use proxmox::tools::fs::{replace_file, CreateOptions}; use proxmox::api::{RpcEnvironment, RpcEnvironmentType}; -use crate::api2::types::{Userid, SnapshotVerifyState, VerifyState}; +use crate::api2::types::Userid; use crate::backup::*; use crate::server::WorkerTask; use crate::server::formatter::*; @@ -66,8 +66,8 @@ struct FixedWriterState { incremental: bool, } -// key=digest, value=(length, existance checked) -type KnownChunksMap = HashMap<[u8;32], (u32, bool)>; +// key=digest, value=length +type KnownChunksMap = HashMap<[u8;32], u32>; struct SharedBackupState { finished: bool, @@ -156,7 +156,7 @@ impl BackupEnvironment { state.ensure_unfinished()?; - state.known_chunks.insert(digest, (length, false)); + state.known_chunks.insert(digest, length); Ok(()) } @@ -198,7 +198,7 @@ impl BackupEnvironment { if is_duplicate { data.upload_stat.duplicates += 1; } // register chunk - state.known_chunks.insert(digest, (size, true)); + state.known_chunks.insert(digest, size); Ok(()) } @@ -231,7 +231,7 @@ impl BackupEnvironment { if is_duplicate { data.upload_stat.duplicates += 1; } // register chunk - state.known_chunks.insert(digest, (size, true)); + state.known_chunks.insert(digest, size); Ok(()) } @@ -240,7 +240,7 @@ impl BackupEnvironment { let state = self.state.lock().unwrap(); match state.known_chunks.get(digest) { - Some((len, _)) => Some(*len), + Some(len) => Some(*len), None => None, } } @@ -457,47 +457,6 @@ impl BackupEnvironment { Ok(()) } - /// Ensure all chunks referenced in this backup actually exist. - /// Only call *after* all writers have been closed, to avoid race with GC. - /// In case of error, mark the previous backup as 'verify failed'. - fn verify_chunk_existance(&self, known_chunks: &KnownChunksMap) -> Result<(), Error> { - for (digest, (_, checked)) in known_chunks.iter() { - if !checked && !self.datastore.chunk_path(digest).0.exists() { - let mark_msg = if let Some(ref last_backup) = self.last_backup { - let last_dir = &last_backup.backup_dir; - let verify_state = SnapshotVerifyState { - state: VerifyState::Failed, - upid: self.worker.upid().clone(), - }; - - let res = proxmox::try_block!{ - let (mut manifest, _) = self.datastore.load_manifest(last_dir)?; - manifest.unprotected["verify_state"] = serde_json::to_value(verify_state)?; - self.datastore.store_manifest(last_dir, serde_json::to_value(manifest)?) - }; - - if let Err(err) = res { - format!("tried marking previous snapshot as bad, \ - but got error accessing manifest: {}", err) - } else { - "marked previous snapshot as bad, please use \ - 'verify' for a detailed check".to_owned() - } - } else { - "internal error: no base backup registered to mark invalid".to_owned() - }; - - bail!( - "chunk '{}' was attempted to be reused but doesn't exist - {}", - digest_to_hex(digest), - mark_msg - ); - } - } - - Ok(()) - } - /// Mark backup as finished pub fn finish_backup(&self) -> Result<(), Error> { let mut state = self.state.lock().unwrap(); @@ -534,8 +493,6 @@ impl BackupEnvironment { } } - self.verify_chunk_existance(&state.known_chunks)?; - // marks the backup as successful state.finished = true; -- 2.27.0