* [pbs-devel] [PATCH proxmox-backup v2 2/2] use jobstate mechanism for verify/garbage_collection schedules
2020-09-24 14:13 [pbs-devel] [PATCH proxmox-backup v2 1/2] proxy: fix error handling in prune scheduling Dominik Csapak
@ 2020-09-24 14:13 ` Dominik Csapak
2020-09-24 15:07 ` [pbs-devel] applied: " Dietmar Maurer
2020-09-24 15:06 ` [pbs-devel] applied: [PATCH proxmox-backup v2 1/2] proxy: fix error handling in prune scheduling Dietmar Maurer
1 sibling, 1 reply; 4+ messages in thread
From: Dominik Csapak @ 2020-09-24 14:13 UTC (permalink / raw)
To: pbs-devel
also changes:
* correct comment about reset (replace 'sync' with 'action')
* check schedule change correctly (only when it is actually changed)
with this changes, we can drop the 'lookup_last_worker' method
Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
---
changes from v1:
* use try_block instead of closure
src/api2/config/datastore.rs | 33 +++++++++--
src/bin/proxmox-backup-proxy.rs | 98 +++++++++++++++++----------------
2 files changed, 78 insertions(+), 53 deletions(-)
diff --git a/src/api2/config/datastore.rs b/src/api2/config/datastore.rs
index 870324a3..298dd252 100644
--- a/src/api2/config/datastore.rs
+++ b/src/api2/config/datastore.rs
@@ -132,6 +132,8 @@ pub fn create_datastore(param: Value) -> Result<(), Error> {
datastore::save_config(&config)?;
crate::config::jobstate::create_state_file("prune", &datastore.name)?;
+ crate::config::jobstate::create_state_file("garbage_collection", &datastore.name)?;
+ crate::config::jobstate::create_state_file("verify", &datastore.name)?;
Ok(())
}
@@ -313,13 +315,23 @@ pub fn update_datastore(
}
}
- if gc_schedule.is_some() { data.gc_schedule = gc_schedule; }
+ let mut gc_schedule_changed = false;
+ if gc_schedule.is_some() {
+ gc_schedule_changed = data.gc_schedule != gc_schedule;
+ data.gc_schedule = gc_schedule;
+ }
+
let mut prune_schedule_changed = false;
if prune_schedule.is_some() {
- prune_schedule_changed = true;
+ prune_schedule_changed = data.prune_schedule != prune_schedule;
data.prune_schedule = prune_schedule;
}
- if verify_schedule.is_some() { data.verify_schedule = verify_schedule; }
+
+ let mut verify_schedule_changed = false;
+ if verify_schedule.is_some() {
+ verify_schedule_changed = data.verify_schedule != verify_schedule;
+ data.verify_schedule = verify_schedule;
+ }
if keep_last.is_some() { data.keep_last = keep_last; }
if keep_hourly.is_some() { data.keep_hourly = keep_hourly; }
@@ -332,12 +344,20 @@ pub fn update_datastore(
datastore::save_config(&config)?;
- // we want to reset the statefile, to avoid an immediate sync in some cases
+ // we want to reset the statefiles, to avoid an immediate action in some cases
// (e.g. going from monthly to weekly in the second week of the month)
+ if gc_schedule_changed {
+ crate::config::jobstate::create_state_file("garbage_collection", &name)?;
+ }
+
if prune_schedule_changed {
crate::config::jobstate::create_state_file("prune", &name)?;
}
+ if verify_schedule_changed {
+ crate::config::jobstate::create_state_file("verify", &name)?;
+ }
+
Ok(())
}
@@ -377,7 +397,10 @@ pub fn delete_datastore(name: String, digest: Option<String>) -> Result<(), Erro
datastore::save_config(&config)?;
- crate::config::jobstate::remove_state_file("prune", &name)?;
+ // ignore errors
+ let _ = crate::config::jobstate::remove_state_file("prune", &name);
+ let _ = crate::config::jobstate::remove_state_file("garbage_collection", &name);
+ let _ = crate::config::jobstate::remove_state_file("verify", &name);
Ok(())
}
diff --git a/src/bin/proxmox-backup-proxy.rs b/src/bin/proxmox-backup-proxy.rs
index b4759f58..3272fe72 100644
--- a/src/bin/proxmox-backup-proxy.rs
+++ b/src/bin/proxmox-backup-proxy.rs
@@ -202,40 +202,14 @@ async fn schedule_tasks() -> Result<(), Error> {
Ok(())
}
-fn lookup_last_worker(worker_type: &str, worker_id: &str) -> Result<Option<server::UPID>, Error> {
-
- let list = proxmox_backup::server::read_task_list()?;
-
- let mut last: Option<&server::UPID> = None;
-
- for entry in list.iter() {
- if entry.upid.worker_type == worker_type {
- if let Some(ref id) = entry.upid.worker_id {
- if id == worker_id {
- match last {
- Some(ref upid) => {
- if upid.starttime < entry.upid.starttime {
- last = Some(&entry.upid)
- }
- }
- None => {
- last = Some(&entry.upid)
- }
- }
- }
- }
- }
- }
-
- Ok(last.cloned())
-}
-
-
async fn schedule_datastore_garbage_collection() {
use proxmox_backup::backup::DataStore;
use proxmox_backup::server::{UPID, WorkerTask};
- use proxmox_backup::config::datastore::{self, DataStoreConfig};
+ use proxmox_backup::config::{
+ jobstate::{self, Job},
+ datastore::{self, DataStoreConfig}
+ };
use proxmox_backup::tools::systemd::time::{
parse_calendar_event, compute_next_event};
@@ -291,11 +265,10 @@ async fn schedule_datastore_garbage_collection() {
}
}
} else {
- match lookup_last_worker(worker_type, &store) {
- Ok(Some(upid)) => upid.starttime,
- Ok(None) => 0,
+ match jobstate::last_run_time(worker_type, &store) {
+ Ok(time) => time,
Err(err) => {
- eprintln!("lookup_last_job_start failed: {}", err);
+ eprintln!("could not get last run time of {} {}: {}", worker_type, store, err);
continue;
}
}
@@ -314,6 +287,11 @@ async fn schedule_datastore_garbage_collection() {
if next > now { continue; }
+ let mut job = match Job::new(worker_type, &store) {
+ Ok(job) => job,
+ Err(_) => continue, // could not get lock
+ };
+
let store2 = store.clone();
if let Err(err) = WorkerTask::new_thread(
@@ -322,9 +300,20 @@ async fn schedule_datastore_garbage_collection() {
Userid::backup_userid().clone(),
false,
move |worker| {
+ job.start(&worker.upid().to_string())?;
+
worker.log(format!("starting garbage collection on store {}", store));
worker.log(format!("task triggered by schedule '{}'", event_str));
- datastore.garbage_collection(&worker)
+
+ let result = datastore.garbage_collection(&worker);
+
+ let status = worker.create_state(&result);
+
+ if let Err(err) = job.finish(status) {
+ eprintln!("could not finish job state for {}: {}", worker_type, err);
+ }
+
+ result
}
) {
eprintln!("unable to start garbage collection on store {} - {}", store2, err);
@@ -482,7 +471,10 @@ async fn schedule_datastore_prune() {
async fn schedule_datastore_verification() {
use proxmox_backup::backup::{DataStore, verify_all_backups};
use proxmox_backup::server::{WorkerTask};
- use proxmox_backup::config::datastore::{self, DataStoreConfig};
+ use proxmox_backup::config::{
+ jobstate::{self, Job},
+ datastore::{self, DataStoreConfig}
+ };
use proxmox_backup::tools::systemd::time::{
parse_calendar_event, compute_next_event};
@@ -526,16 +518,10 @@ async fn schedule_datastore_verification() {
let worker_type = "verify";
- let last = match lookup_last_worker(worker_type, &store) {
- Ok(Some(upid)) => {
- if proxmox_backup::server::worker_is_active_local(&upid) {
- continue;
- }
- upid.starttime
- }
- Ok(None) => 0,
+ let last = match jobstate::last_run_time(worker_type, &store) {
+ Ok(time) => time,
Err(err) => {
- eprintln!("lookup_last_job_start failed: {}", err);
+ eprintln!("could not get last run time of {} {}: {}", worker_type, store, err);
continue;
}
};
@@ -553,6 +539,11 @@ async fn schedule_datastore_verification() {
if next > now { continue; }
+ let mut job = match Job::new(worker_type, &store) {
+ Ok(job) => job,
+ Err(_) => continue, // could not get lock
+ };
+
let worker_id = store.clone();
let store2 = store.clone();
if let Err(err) = WorkerTask::new_thread(
@@ -561,18 +552,29 @@ async fn schedule_datastore_verification() {
Userid::backup_userid().clone(),
false,
move |worker| {
+ job.start(&worker.upid().to_string())?;
worker.log(format!("starting verification on store {}", store2));
worker.log(format!("task triggered by schedule '{}'", event_str));
- if let Ok(failed_dirs) = verify_all_backups(datastore, worker.clone()) {
+ let result = try_block!({
+ let failed_dirs = verify_all_backups(datastore, worker.clone())?;
if failed_dirs.len() > 0 {
worker.log("Failed to verify following snapshots:");
for dir in failed_dirs {
worker.log(format!("\t{}", dir));
}
- bail!("verification failed - please check the log for details");
+ Err(format_err!("verification failed - please check the log for details"))
+ } else {
+ Ok(())
}
+ });
+
+ let status = worker.create_state(&result);
+
+ if let Err(err) = job.finish(status) {
+ eprintln!("could not finish job state for {}: {}", worker_type, err);
}
- Ok(())
+
+ result
},
) {
eprintln!("unable to start verification on store {} - {}", store, err);
--
2.20.1
^ permalink raw reply [flat|nested] 4+ messages in thread