From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 840126035A for ; Tue, 8 Sep 2020 15:29:52 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 7DAB49EEC for ; Tue, 8 Sep 2020 15:29:52 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [212.186.127.180]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 35A699EDE for ; Tue, 8 Sep 2020 15:29:51 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 03C5444985 for ; Tue, 8 Sep 2020 15:29:51 +0200 (CEST) From: Stefan Reiter To: pbs-devel@lists.proxmox.com Date: Tue, 8 Sep 2020 15:29:44 +0200 Message-Id: <20200908132944.5876-3-s.reiter@proxmox.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20200908132944.5876-1-s.reiter@proxmox.com> References: <20200908132944.5876-1-s.reiter@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL -0.055 Adjusted score from AWL reputation of From: address KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment RCVD_IN_DNSWL_MED -2.3 Sender listed at https://www.dnswl.org/, medium trust SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [environment.rs, proxmox.com] Subject: [pbs-devel] [PATCH v3 proxmox-backup 3/3] backup: check all referenced chunks actually exist X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 08 Sep 2020 13:29:52 -0000 A client can omit uploading chunks in the "known_chunks" list, those then also won't be written on the server side. Check all those chunks mentioned in the index but not uploaded for existance and report an error if they don't exist instead of marking a potentially broken backup as "successful". This is only important if the base snapshot references corrupted chunks, but has not been negatively verified. Also, it is important to only verify this at the end, *after* all index writers are closed, since only then can it be guaranteed that no GC will sweep referenced chunks away. If a chunk is found missing, also mark the previous backup with a verification failure, since we know the missing chunk has to referenced in it (only way it could have been inserted to known_chunks with checked=false). This has the benefit of automatically doing a full-upload backup if the user attempts to retry after seeing the new error, instead of requiring a manual verify or forget. Signed-off-by: Stefan Reiter --- v3: * reuse known_chunks map instead of new HashSet * create named typed for known_chunks * refactor check into helper function * mark previous backup with 'bad' verify to make next backup succeed @Dietmar: this patch is useful since the last snapshot (base) might be corrupted but we don't know since it might not have been verified. Of course, actually corrupted chunks are still not detected (expensive), but at least missing ones. v2 here: https://lists.proxmox.com/pipermail/pbs-devel/2020-September/000572.html src/api2/backup/environment.rs | 58 ++++++++++++++++++++++++++++++---- 1 file changed, 52 insertions(+), 6 deletions(-) diff --git a/src/api2/backup/environment.rs b/src/api2/backup/environment.rs index f635c6f7..22b96c22 100644 --- a/src/api2/backup/environment.rs +++ b/src/api2/backup/environment.rs @@ -9,7 +9,7 @@ use proxmox::tools::digest_to_hex; use proxmox::tools::fs::{replace_file, CreateOptions}; use proxmox::api::{RpcEnvironment, RpcEnvironmentType}; -use crate::api2::types::Userid; +use crate::api2::types::{Userid, SnapshotVerifyState}; use crate::backup::*; use crate::server::WorkerTask; use crate::server::formatter::*; @@ -66,13 +66,16 @@ struct FixedWriterState { incremental: bool, } +// key=digest, value=(length, existance checked) +type KnownChunksMap = HashMap<[u8;32], (u32, bool)>; + struct SharedBackupState { finished: bool, uid_counter: usize, file_counter: usize, // successfully uploaded files dynamic_writers: HashMap, fixed_writers: HashMap, - known_chunks: HashMap<[u8;32], u32>, + known_chunks: KnownChunksMap, backup_size: u64, // sums up size of all files backup_stat: UploadStatistic, } @@ -153,7 +156,7 @@ impl BackupEnvironment { state.ensure_unfinished()?; - state.known_chunks.insert(digest, length); + state.known_chunks.insert(digest, (length, false)); Ok(()) } @@ -195,7 +198,7 @@ impl BackupEnvironment { if is_duplicate { data.upload_stat.duplicates += 1; } // register chunk - state.known_chunks.insert(digest, size); + state.known_chunks.insert(digest, (size, true)); Ok(()) } @@ -228,7 +231,7 @@ impl BackupEnvironment { if is_duplicate { data.upload_stat.duplicates += 1; } // register chunk - state.known_chunks.insert(digest, size); + state.known_chunks.insert(digest, (size, true)); Ok(()) } @@ -237,7 +240,7 @@ impl BackupEnvironment { let state = self.state.lock().unwrap(); match state.known_chunks.get(digest) { - Some(len) => Some(*len), + Some((len, _)) => Some(*len), None => None, } } @@ -454,6 +457,47 @@ impl BackupEnvironment { Ok(()) } + /// Ensure all chunks referenced in this backup actually exist. + /// Only call *after* all writers have been closed, to avoid race with GC. + /// In case of error, mark the previous backup as 'verify failed'. + fn verify_chunk_existance(&self, known_chunks: &KnownChunksMap) -> Result<(), Error> { + for (digest, (_, checked)) in known_chunks.iter() { + if !checked && !self.datastore.chunk_path(digest).0.exists() { + let mark_msg = if let Some(ref last_backup) = self.last_backup { + let last_dir = &last_backup.backup_dir; + let verify_state = SnapshotVerifyState { + state: "failed".to_owned(), + upid: self.worker.upid().clone(), + }; + + let res = proxmox::try_block!{ + let (mut manifest, _) = self.datastore.load_manifest(last_dir)?; + manifest.unprotected["verify_state"] = serde_json::to_value(verify_state)?; + self.datastore.store_manifest(last_dir, serde_json::to_value(manifest)?) + }; + + if let Err(err) = res { + format!("tried marking previous snapshot as bad, \ + but got error accessing manifest: {}", err) + } else { + "marked previous snapshot as bad, please use \ + 'verify' for a detailed check".to_owned() + } + } else { + "internal error: no base backup registered to mark invalid".to_owned() + }; + + bail!( + "chunk '{}' was attempted to be reused but doesn't exist - {}", + digest_to_hex(digest), + mark_msg + ); + } + } + + Ok(()) + } + /// Mark backup as finished pub fn finish_backup(&self) -> Result<(), Error> { let mut state = self.state.lock().unwrap(); @@ -490,6 +534,8 @@ impl BackupEnvironment { } } + self.verify_chunk_existance(&state.known_chunks)?; + // marks the backup as successful state.finished = true; -- 2.20.1