* [PATCH proxmox-backup v1 1/2] fix #7400: api: gracefully handle corrupted job statefiles
2026-03-17 16:07 [PATCH proxmox-backup v1 0/2] fix #7400: improve handling of corrupted job statefiles Michael Köppl
@ 2026-03-17 16:07 ` Michael Köppl
2026-03-17 16:07 ` [PATCH proxmox-backup v1 2/2] fix #7400: proxy: self-heal " Michael Köppl
1 sibling, 0 replies; 3+ messages in thread
From: Michael Köppl @ 2026-03-17 16:07 UTC (permalink / raw)
To: pbs-devel
Previously, if a job statefile was empty or corrupted (e.g. due to an
I/O error or an abrupt shutdown), the JobStatus::load method would
return an error that would be propagated up, causing the endpoint to
return an error to the user, meaning users would not see any of their
jobs if a single job had a corrupted statefile.
Instead, handle the error explicitly, logging a warning and returning a
default JobScheduleStatus, such that jobs lists can still be fetched,
displaying the affected job as configured but simply missing its last
run status.
Signed-off-by: Michael Köppl <m.koeppl@proxmox.com>
---
src/api2/admin/prune.rs | 17 ++++++++++-------
src/api2/admin/sync.rs | 16 ++++++++++------
src/api2/admin/verify.rs | 17 ++++++++++-------
src/api2/tape/backup.rs | 17 ++++++++++-------
4 files changed, 40 insertions(+), 27 deletions(-)
diff --git a/src/api2/admin/prune.rs b/src/api2/admin/prune.rs
index a5ebf2975..f2a6445c2 100644
--- a/src/api2/admin/prune.rs
+++ b/src/api2/admin/prune.rs
@@ -1,6 +1,6 @@
//! Datastore Prune Job Management
-use anyhow::{format_err, Error};
+use anyhow::Error;
use serde_json::Value;
use proxmox_router::{
@@ -10,8 +10,8 @@ use proxmox_schema::api;
use proxmox_sortable_macro::sortable;
use pbs_api_types::{
- Authid, PruneJobConfig, PruneJobStatus, DATASTORE_SCHEMA, JOB_ID_SCHEMA, PRIV_DATASTORE_AUDIT,
- PRIV_DATASTORE_MODIFY,
+ Authid, JobScheduleStatus, PruneJobConfig, PruneJobStatus, DATASTORE_SCHEMA, JOB_ID_SCHEMA,
+ PRIV_DATASTORE_AUDIT, PRIV_DATASTORE_MODIFY,
};
use pbs_config::prune;
use pbs_config::CachedUserInfo;
@@ -73,10 +73,13 @@ pub fn list_prune_jobs(
let mut list = Vec::new();
for job in job_config_iter {
- let last_state = JobState::load("prunejob", &job.id)
- .map_err(|err| format_err!("could not open statefile for {}: {}", &job.id, err))?;
-
- let mut status = compute_schedule_status(&last_state, Some(&job.schedule))?;
+ let mut status = match JobState::load("prunejob", &job.id) {
+ Ok(last_state) => compute_schedule_status(&last_state, Some(&job.schedule))?,
+ Err(err) => {
+ log::error!("could not open statefile for {}: {}", &job.id, err);
+ JobScheduleStatus::default()
+ }
+ };
if job.disable {
status.next_run = None;
}
diff --git a/src/api2/admin/sync.rs b/src/api2/admin/sync.rs
index 6722ebea0..b0ff53ef7 100644
--- a/src/api2/admin/sync.rs
+++ b/src/api2/admin/sync.rs
@@ -1,6 +1,6 @@
//! Datastore Synchronization Job Management
-use anyhow::{bail, format_err, Error};
+use anyhow::{bail, Error};
use serde::{Deserialize, Serialize};
use serde_json::Value;
@@ -12,7 +12,8 @@ use proxmox_schema::api;
use proxmox_sortable_macro::sortable;
use pbs_api_types::{
- Authid, SyncDirection, SyncJobConfig, SyncJobStatus, DATASTORE_SCHEMA, JOB_ID_SCHEMA,
+ Authid, JobScheduleStatus, SyncDirection, SyncJobConfig, SyncJobStatus, DATASTORE_SCHEMA,
+ JOB_ID_SCHEMA,
};
use pbs_config::sync;
use pbs_config::CachedUserInfo;
@@ -112,10 +113,13 @@ pub fn list_config_sync_jobs(
continue;
}
- let last_state = JobState::load("syncjob", &job.id)
- .map_err(|err| format_err!("could not open statefile for {}: {}", &job.id, err))?;
-
- let status = compute_schedule_status(&last_state, job.schedule.as_deref())?;
+ let status = match JobState::load("syncjob", &job.id) {
+ Ok(last_state) => compute_schedule_status(&last_state, job.schedule.as_deref())?,
+ Err(err) => {
+ log::error!("could not open statefile for {}: {}", &job.id, err);
+ JobScheduleStatus::default()
+ }
+ };
list.push(SyncJobStatus {
config: job,
diff --git a/src/api2/admin/verify.rs b/src/api2/admin/verify.rs
index 66695236c..d2c47ce0c 100644
--- a/src/api2/admin/verify.rs
+++ b/src/api2/admin/verify.rs
@@ -1,6 +1,6 @@
//! Datastore Verify Job Management
-use anyhow::{format_err, Error};
+use anyhow::Error;
use serde_json::Value;
use proxmox_router::{
@@ -11,8 +11,8 @@ use proxmox_schema::api;
use proxmox_sortable_macro::sortable;
use pbs_api_types::{
- Authid, VerificationJobConfig, VerificationJobStatus, DATASTORE_SCHEMA, JOB_ID_SCHEMA,
- PRIV_DATASTORE_AUDIT, PRIV_DATASTORE_VERIFY,
+ Authid, JobScheduleStatus, VerificationJobConfig, VerificationJobStatus, DATASTORE_SCHEMA,
+ JOB_ID_SCHEMA, PRIV_DATASTORE_AUDIT, PRIV_DATASTORE_VERIFY,
};
use pbs_config::verify;
use pbs_config::CachedUserInfo;
@@ -73,10 +73,13 @@ pub fn list_verification_jobs(
let mut list = Vec::new();
for job in job_config_iter {
- let last_state = JobState::load("verificationjob", &job.id)
- .map_err(|err| format_err!("could not open statefile for {}: {}", &job.id, err))?;
-
- let status = compute_schedule_status(&last_state, job.schedule.as_deref())?;
+ let status = match JobState::load("verificationjob", &job.id) {
+ Ok(last_state) => compute_schedule_status(&last_state, job.schedule.as_deref())?,
+ Err(err) => {
+ log::error!("could not open statefile for {}: {}", &job.id, err);
+ JobScheduleStatus::default()
+ }
+ };
list.push(VerificationJobStatus {
config: job,
diff --git a/src/api2/tape/backup.rs b/src/api2/tape/backup.rs
index 47e8d0209..dde65735d 100644
--- a/src/api2/tape/backup.rs
+++ b/src/api2/tape/backup.rs
@@ -1,6 +1,6 @@
use std::sync::{Arc, Mutex};
-use anyhow::{bail, format_err, Error};
+use anyhow::{bail, Error};
use serde_json::Value;
use tracing::{info, warn};
@@ -11,8 +11,8 @@ use proxmox_schema::api;
use proxmox_worker_task::WorkerTaskContext;
use pbs_api_types::{
- print_ns_and_snapshot, print_store_and_ns, Authid, MediaPoolConfig, Operation,
- TapeBackupJobConfig, TapeBackupJobSetup, TapeBackupJobStatus, JOB_ID_SCHEMA,
+ print_ns_and_snapshot, print_store_and_ns, Authid, JobScheduleStatus, MediaPoolConfig,
+ Operation, TapeBackupJobConfig, TapeBackupJobSetup, TapeBackupJobStatus, JOB_ID_SCHEMA,
PRIV_DATASTORE_READ, PRIV_TAPE_AUDIT, PRIV_TAPE_WRITE, UPID_SCHEMA,
};
@@ -97,10 +97,13 @@ pub fn list_tape_backup_jobs(
continue;
}
- let last_state = JobState::load("tape-backup-job", &job.id)
- .map_err(|err| format_err!("could not open statefile for {}: {}", &job.id, err))?;
-
- let status = compute_schedule_status(&last_state, job.schedule.as_deref())?;
+ let status = match JobState::load("tape-backup-job", &job.id) {
+ Ok(last_state) => compute_schedule_status(&last_state, job.schedule.as_deref())?,
+ Err(err) => {
+ log::error!("could not open statefile for {}: {}", &job.id, err);
+ JobScheduleStatus::default()
+ }
+ };
let next_run = status.next_run.unwrap_or(current_time);
--
2.47.3
^ permalink raw reply [flat|nested] 3+ messages in thread