From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with UTF8SMTPS id 12A2B62C75 for ; Thu, 1 Oct 2020 12:40:45 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with UTF8SMTP id 094C6204F6 for ; Thu, 1 Oct 2020 12:40:15 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [212.186.127.180]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with UTF8SMTPS id 3784E204E9 for ; Thu, 1 Oct 2020 12:40:14 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with UTF8SMTP id D4E9745B80 for ; Thu, 1 Oct 2020 12:40:13 +0200 (CEST) To: pbs-devel@lists.proxmox.com References: <20200925084330.75484-1-h.laimer@proxmox.com> <20200925084330.75484-5-h.laimer@proxmox.com> From: Dominik Csapak Message-ID: <01c8a457-1ce9-da57-5cfc-d7085cc82c08@proxmox.com> Date: Thu, 1 Oct 2020 12:40:12 +0200 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:82.0) Gecko/20100101 Thunderbird/82.0 MIME-Version: 1.0 In-Reply-To: <20200925084330.75484-5-h.laimer@proxmox.com> Content-Type: text/plain; charset=UTF-8; format=flowed Content-Language: en-US Content-Transfer-Encoding: 7bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.543 Adjusted score from AWL reputation of From: address KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment NICE_REPLY_A -0.001 Looks like a legit reply (A) RCVD_IN_DNSWL_MED -2.3 Sender listed at https://www.dnswl.org/, medium trust SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [verify.rs] Subject: Re: [pbs-devel] [PATCH v1 proxmox-backup 04/14] add do_verification_job function to verify.rs X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 01 Oct 2020 10:40:45 -0000 high-level: i am not sure i would put that code here it makes the verify code even more intertwined with the worker-tasks which is not something that is optimal (the verify code should have no dependency on a worker task imho) some comments inline On 9/25/20 10:43 AM, Hannes Laimer wrote: > Signed-off-by: Hannes Laimer > --- > src/backup/verify.rs | 91 +++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 90 insertions(+), 1 deletion(-) > > diff --git a/src/backup/verify.rs b/src/backup/verify.rs > index 1fad6187..1f54ebeb 100644 > --- a/src/backup/verify.rs > +++ b/src/backup/verify.rs > @@ -5,8 +5,10 @@ use std::time::Instant; > > use anyhow::{bail, format_err, Error}; > > -use crate::server::WorkerTask; > +use crate::server::{WorkerTask, TaskState}; > use crate::api2::types::*; > +use crate::config::jobstate::Job; > +use crate::config::verify::VerifyJobConfig; > > use super::{ > DataStore, DataBlob, BackupGroup, BackupDir, BackupInfo, IndexFile, > @@ -432,3 +434,90 @@ pub fn verify_all_backups(datastore: Arc, worker: Arc) -> > > Ok(errors) > } > + > +/// Runs a verification job. > +pub fn do_verification_job( > + mut job: Job, > + verify_job: VerifyJobConfig, > + userid: &Userid, > + schedule: Option, > +) -> Result { > + let datastore = DataStore::lookup_datastore(&verify_job.store)?; > + > + let mut backups_to_verify = BackupInfo::list_backups(&datastore.base_path())?; > + > + if verify_job.ignore_verified { > + backups_to_verify.retain(|backup_info| { > + if let Ok((manifest, _)) = datastore.load_manifest(&backup_info.backup_dir) { > + let verify = manifest.unprotected["verify_state"].clone(); > + if let Ok(verify) = serde_json::from_value::(verify) { > + let days_since_last_verify = > + (proxmox::tools::time::epoch_i64() - verify.upid.starttime) / 86400; > + // if last verification failed we have to verify again since it might be fixed OR > + // if outdated_after is None, verifications do not become outdated > + verify.state == VerifyState::Failed || (verify_job.outdated_after.is_some() > + && days_since_last_verify > verify_job.outdated_after.unwrap()) after talking some days ago with thomas, it may not be the best way to try to re-verify failed backups so maybe we can simply drop this and only verify 'non-verified' backups > + } else { true } // was never verified, therefore we always want to verify > + } else { false } // manifest could not be loaded, do not verify in that case > + }) > + } > + > + let job_id = job.jobname().to_string(); > + let worker_type = job.jobtype().to_string(); > + > + let upid_str = WorkerTask::new_thread( > + &worker_type, > + Some(job.jobname().to_string()), > + userid.clone(), > + false, > + move |worker| { > + job.start(&worker.upid().to_string())?; > + > + let verified_chunks = Arc::new(Mutex::new(HashSet::with_capacity(1024 * 16))); > + let corrupt_chunks = Arc::new(Mutex::new(HashSet::with_capacity(64))); > + > + worker.log(format!("Starting datastore verify job '{}'", job_id)); > + if let Some(event_str) = schedule { > + worker.log(format!("task triggered by schedule '{}'", event_str)); > + } > + > + let mut failed_dirs: Vec = Vec::new(); > + for backup_info in backups_to_verify { > + match verify_backup_dir( > + datastore.clone(), > + &backup_info.backup_dir, > + verified_chunks.clone(), > + corrupt_chunks.clone(), > + worker.clone(), > + ) { > + Ok(false) => failed_dirs.push(backup_info.backup_dir.to_string()), > + Err(err) => { > + let endtime = proxmox::tools::time::epoch_i64(); > + job.finish(TaskState::Error { > + message: err.to_string(), > + endtime > + })?; > + bail!(err.to_string()); here you can use our 'try_block' macro, i would it do it like so: let result = proxmox::try_block!({ // basically the whole 'real' worker code // this can use '?'/bail to bubble up errors }); // here goes only one regular job.finish call > + }, > + _ => {} > + } > + } > + if !failed_dirs.is_empty() { > + worker.log("Failed to verify following snapshots:"); > + for dir in failed_dirs { > + worker.log(format!("\t{}", dir)); > + } > + let endtime = proxmox::tools::time::epoch_i64(); > + job.finish(TaskState::Error { > + message: String::from("verification failed - please check the log for details"), > + endtime > + })?; > + bail!("verification failed - please check the log for details"); > + } > + let endtime = proxmox::tools::time::epoch_i64(); > + job.finish(TaskState::OK { endtime })?; > + Ok(()) > + })?; > + > + Ok(upid_str) > +} >