From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 50975607E9 for ; Thu, 24 Sep 2020 16:14:27 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 4933813003 for ; Thu, 24 Sep 2020 16:13:57 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [212.186.127.180]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 2D05512FFB for ; Thu, 24 Sep 2020 16:13:56 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id ECCE643F72 for ; Thu, 24 Sep 2020 16:13:55 +0200 (CEST) From: Dominik Csapak To: pbs-devel@lists.proxmox.com Date: Thu, 24 Sep 2020 16:13:54 +0200 Message-Id: <20200924141354.14547-2-d.csapak@proxmox.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20200924141354.14547-1-d.csapak@proxmox.com> References: <20200924141354.14547-1-d.csapak@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL -0.176 Adjusted score from AWL reputation of From: address KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment KAM_LAZY_DOMAIN_SECURITY 1 Sending domain does not have any anti-forgery methods NO_DNS_FOR_FROM 0.379 Envelope sender has no MX or A DNS records RCVD_IN_DNSWL_MED -2.3 Sender listed at https://www.dnswl.org/, medium trust SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_NONE 0.001 SPF: sender does not publish an SPF Record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [proxmox-backup-proxy.rs, datastore.rs] Subject: [pbs-devel] [PATCH proxmox-backup v2 2/2] use jobstate mechanism for verify/garbage_collection schedules X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 24 Sep 2020 14:14:27 -0000 also changes: * correct comment about reset (replace 'sync' with 'action') * check schedule change correctly (only when it is actually changed) with this changes, we can drop the 'lookup_last_worker' method Signed-off-by: Dominik Csapak --- changes from v1: * use try_block instead of closure src/api2/config/datastore.rs | 33 +++++++++-- src/bin/proxmox-backup-proxy.rs | 98 +++++++++++++++++---------------- 2 files changed, 78 insertions(+), 53 deletions(-) diff --git a/src/api2/config/datastore.rs b/src/api2/config/datastore.rs index 870324a3..298dd252 100644 --- a/src/api2/config/datastore.rs +++ b/src/api2/config/datastore.rs @@ -132,6 +132,8 @@ pub fn create_datastore(param: Value) -> Result<(), Error> { datastore::save_config(&config)?; crate::config::jobstate::create_state_file("prune", &datastore.name)?; + crate::config::jobstate::create_state_file("garbage_collection", &datastore.name)?; + crate::config::jobstate::create_state_file("verify", &datastore.name)?; Ok(()) } @@ -313,13 +315,23 @@ pub fn update_datastore( } } - if gc_schedule.is_some() { data.gc_schedule = gc_schedule; } + let mut gc_schedule_changed = false; + if gc_schedule.is_some() { + gc_schedule_changed = data.gc_schedule != gc_schedule; + data.gc_schedule = gc_schedule; + } + let mut prune_schedule_changed = false; if prune_schedule.is_some() { - prune_schedule_changed = true; + prune_schedule_changed = data.prune_schedule != prune_schedule; data.prune_schedule = prune_schedule; } - if verify_schedule.is_some() { data.verify_schedule = verify_schedule; } + + let mut verify_schedule_changed = false; + if verify_schedule.is_some() { + verify_schedule_changed = data.verify_schedule != verify_schedule; + data.verify_schedule = verify_schedule; + } if keep_last.is_some() { data.keep_last = keep_last; } if keep_hourly.is_some() { data.keep_hourly = keep_hourly; } @@ -332,12 +344,20 @@ pub fn update_datastore( datastore::save_config(&config)?; - // we want to reset the statefile, to avoid an immediate sync in some cases + // we want to reset the statefiles, to avoid an immediate action in some cases // (e.g. going from monthly to weekly in the second week of the month) + if gc_schedule_changed { + crate::config::jobstate::create_state_file("garbage_collection", &name)?; + } + if prune_schedule_changed { crate::config::jobstate::create_state_file("prune", &name)?; } + if verify_schedule_changed { + crate::config::jobstate::create_state_file("verify", &name)?; + } + Ok(()) } @@ -377,7 +397,10 @@ pub fn delete_datastore(name: String, digest: Option) -> Result<(), Erro datastore::save_config(&config)?; - crate::config::jobstate::remove_state_file("prune", &name)?; + // ignore errors + let _ = crate::config::jobstate::remove_state_file("prune", &name); + let _ = crate::config::jobstate::remove_state_file("garbage_collection", &name); + let _ = crate::config::jobstate::remove_state_file("verify", &name); Ok(()) } diff --git a/src/bin/proxmox-backup-proxy.rs b/src/bin/proxmox-backup-proxy.rs index b4759f58..3272fe72 100644 --- a/src/bin/proxmox-backup-proxy.rs +++ b/src/bin/proxmox-backup-proxy.rs @@ -202,40 +202,14 @@ async fn schedule_tasks() -> Result<(), Error> { Ok(()) } -fn lookup_last_worker(worker_type: &str, worker_id: &str) -> Result, Error> { - - let list = proxmox_backup::server::read_task_list()?; - - let mut last: Option<&server::UPID> = None; - - for entry in list.iter() { - if entry.upid.worker_type == worker_type { - if let Some(ref id) = entry.upid.worker_id { - if id == worker_id { - match last { - Some(ref upid) => { - if upid.starttime < entry.upid.starttime { - last = Some(&entry.upid) - } - } - None => { - last = Some(&entry.upid) - } - } - } - } - } - } - - Ok(last.cloned()) -} - - async fn schedule_datastore_garbage_collection() { use proxmox_backup::backup::DataStore; use proxmox_backup::server::{UPID, WorkerTask}; - use proxmox_backup::config::datastore::{self, DataStoreConfig}; + use proxmox_backup::config::{ + jobstate::{self, Job}, + datastore::{self, DataStoreConfig} + }; use proxmox_backup::tools::systemd::time::{ parse_calendar_event, compute_next_event}; @@ -291,11 +265,10 @@ async fn schedule_datastore_garbage_collection() { } } } else { - match lookup_last_worker(worker_type, &store) { - Ok(Some(upid)) => upid.starttime, - Ok(None) => 0, + match jobstate::last_run_time(worker_type, &store) { + Ok(time) => time, Err(err) => { - eprintln!("lookup_last_job_start failed: {}", err); + eprintln!("could not get last run time of {} {}: {}", worker_type, store, err); continue; } } @@ -314,6 +287,11 @@ async fn schedule_datastore_garbage_collection() { if next > now { continue; } + let mut job = match Job::new(worker_type, &store) { + Ok(job) => job, + Err(_) => continue, // could not get lock + }; + let store2 = store.clone(); if let Err(err) = WorkerTask::new_thread( @@ -322,9 +300,20 @@ async fn schedule_datastore_garbage_collection() { Userid::backup_userid().clone(), false, move |worker| { + job.start(&worker.upid().to_string())?; + worker.log(format!("starting garbage collection on store {}", store)); worker.log(format!("task triggered by schedule '{}'", event_str)); - datastore.garbage_collection(&worker) + + let result = datastore.garbage_collection(&worker); + + let status = worker.create_state(&result); + + if let Err(err) = job.finish(status) { + eprintln!("could not finish job state for {}: {}", worker_type, err); + } + + result } ) { eprintln!("unable to start garbage collection on store {} - {}", store2, err); @@ -482,7 +471,10 @@ async fn schedule_datastore_prune() { async fn schedule_datastore_verification() { use proxmox_backup::backup::{DataStore, verify_all_backups}; use proxmox_backup::server::{WorkerTask}; - use proxmox_backup::config::datastore::{self, DataStoreConfig}; + use proxmox_backup::config::{ + jobstate::{self, Job}, + datastore::{self, DataStoreConfig} + }; use proxmox_backup::tools::systemd::time::{ parse_calendar_event, compute_next_event}; @@ -526,16 +518,10 @@ async fn schedule_datastore_verification() { let worker_type = "verify"; - let last = match lookup_last_worker(worker_type, &store) { - Ok(Some(upid)) => { - if proxmox_backup::server::worker_is_active_local(&upid) { - continue; - } - upid.starttime - } - Ok(None) => 0, + let last = match jobstate::last_run_time(worker_type, &store) { + Ok(time) => time, Err(err) => { - eprintln!("lookup_last_job_start failed: {}", err); + eprintln!("could not get last run time of {} {}: {}", worker_type, store, err); continue; } }; @@ -553,6 +539,11 @@ async fn schedule_datastore_verification() { if next > now { continue; } + let mut job = match Job::new(worker_type, &store) { + Ok(job) => job, + Err(_) => continue, // could not get lock + }; + let worker_id = store.clone(); let store2 = store.clone(); if let Err(err) = WorkerTask::new_thread( @@ -561,18 +552,29 @@ async fn schedule_datastore_verification() { Userid::backup_userid().clone(), false, move |worker| { + job.start(&worker.upid().to_string())?; worker.log(format!("starting verification on store {}", store2)); worker.log(format!("task triggered by schedule '{}'", event_str)); - if let Ok(failed_dirs) = verify_all_backups(datastore, worker.clone()) { + let result = try_block!({ + let failed_dirs = verify_all_backups(datastore, worker.clone())?; if failed_dirs.len() > 0 { worker.log("Failed to verify following snapshots:"); for dir in failed_dirs { worker.log(format!("\t{}", dir)); } - bail!("verification failed - please check the log for details"); + Err(format_err!("verification failed - please check the log for details")) + } else { + Ok(()) } + }); + + let status = worker.create_state(&result); + + if let Err(err) = job.finish(status) { + eprintln!("could not finish job state for {}: {}", worker_type, err); } - Ok(()) + + result }, ) { eprintln!("unable to start verification on store {} - {}", store, err); -- 2.20.1