From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id A86B5BA12B for ; Wed, 13 Dec 2023 16:38:30 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 9169FC858 for ; Wed, 13 Dec 2023 16:38:30 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Wed, 13 Dec 2023 16:38:29 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 230DC472BE for ; Wed, 13 Dec 2023 16:38:29 +0100 (CET) From: Christian Ebner To: pbs-devel@lists.proxmox.com Date: Wed, 13 Dec 2023 16:38:16 +0100 Message-Id: <20231213153819.391392-6-c.ebner@proxmox.com> X-Mailer: git-send-email 2.39.2 In-Reply-To: <20231213153819.391392-1-c.ebner@proxmox.com> References: <20231213153819.391392-1-c.ebner@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL 0.056 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record T_SCC_BODY_TEXT_LINE -0.01 - Subject: [pbs-devel] [RFC proxmox-backup 5/8] server: implement sanity check job X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 13 Dec 2023 15:38:30 -0000 Adds the sanity check job execution logic and implements a check for the datastore usage levels exceeding the config values threshold level. Signed-off-by: Christian Ebner --- src/server/mod.rs | 3 + src/server/sanity_check_job.rs | 131 +++++++++++++++++++++++++++++++++ 2 files changed, 134 insertions(+) create mode 100644 src/server/sanity_check_job.rs diff --git a/src/server/mod.rs b/src/server/mod.rs index 4e3b68ac..b3fdc281 100644 --- a/src/server/mod.rs +++ b/src/server/mod.rs @@ -25,6 +25,9 @@ pub use gc_job::*; mod realm_sync_job; pub use realm_sync_job::*; +mod sanity_check_job; +pub use sanity_check_job::*; + mod email_notifications; pub use email_notifications::*; diff --git a/src/server/sanity_check_job.rs b/src/server/sanity_check_job.rs new file mode 100644 index 00000000..a68b4bfd --- /dev/null +++ b/src/server/sanity_check_job.rs @@ -0,0 +1,131 @@ +use std::sync::Arc; + +use anyhow::{format_err, Error}; + +use proxmox_human_byte::HumanByte; +use proxmox_sys::{task_error, task_log}; + +use pbs_api_types::{ + Authid, Operation, SanityCheckJobOptions, Userid, DATASTORE_USAGE_FULL_THRESHOLD_DEFAULT, +}; +use pbs_datastore::DataStore; +use proxmox_rest_server::WorkerTask; + +use crate::server::{jobstate::Job, lookup_user_email}; + +pub fn check_datastore_usage_full_threshold( + worker: Arc, + sanity_check_options: SanityCheckJobOptions, +) -> Result, Error> { + let (config, _digest) = pbs_config::datastore::config()?; + let threshold = sanity_check_options + .datastore_usage_full_threshold + .unwrap_or(DATASTORE_USAGE_FULL_THRESHOLD_DEFAULT); + let mut errors = Vec::new(); + + task_log!( + worker, + "Checking datastore usage levels with {threshold}% threshold ..." + ); + for (store, (_, _)) in &config.sections { + let datastore = match DataStore::lookup_datastore(store, Some(Operation::Read)) { + Ok(datastore) => datastore, + Err(err) => { + let msg = format!("failed to lookup datastore - {err}"); + task_error!(worker, "{msg}"); + errors.push(msg); + continue; + } + }; + + let status = match proxmox_sys::fs::fs_info(&datastore.base_path()) { + Ok(status) => status, + Err(err) => { + let msg = format!("failed to get datastore status - {err}"); + task_error!(worker, "{msg}"); + errors.push(msg); + continue; + } + }; + + let used = (status.used as f64 / status.total as f64 * 100f64).trunc() as u8; + if used >= threshold { + let msg = format!( + "Datastore '{store}' exceeded usage threshold!\n used {} of {} ({used}%)", + HumanByte::from(status.used), + HumanByte::from(status.total), + ); + task_error!(worker, "{msg}"); + errors.push(msg); + } else { + task_log!( + worker, + "Datastore '{store}' below usage threshold, used {} of {} ({used}%)", + HumanByte::from(status.used), + HumanByte::from(status.total), + ); + } + } + + Ok(errors) +} + +pub fn do_sanity_check_job( + mut job: Job, + sanity_check_options: SanityCheckJobOptions, + auth_id: &Authid, + schedule: Option, +) -> Result { + let worker_type = job.jobtype().to_string(); + let auth_id = auth_id.clone(); + + let notify_user = sanity_check_options + .notify_user + .as_ref() + .unwrap_or_else(|| Userid::root_userid()); + let email = lookup_user_email(notify_user); + + let upid_str = WorkerTask::new_thread( + &worker_type, + Some(job.jobname().to_string()), + auth_id.to_string(), + false, + move |worker| { + job.start(&worker.upid().to_string())?; + + task_log!(worker, "sanity check job '{}'", job.jobname()); + + if let Some(event_str) = schedule { + task_log!(worker, "task triggered by schedule '{event_str}'"); + } + + let result = check_datastore_usage_full_threshold(worker.clone(), sanity_check_options); + let job_result = match result { + Ok(ref errors) if errors.is_empty() => Ok(()), + Ok(_) => Err(format_err!( + "sanity check failed - please check the log for details" + )), + Err(_) => Err(format_err!("sanity check failed - job aborted")), + }; + + let status = worker.create_state(&job_result); + + if let Err(err) = job.finish(status) { + eprintln!("could not finish job state for {}: {err}", job.jobtype()); + } + + if let Some(email) = email { + task_log!(worker, "sending notification email to '{email}'"); + if let Err(err) = + crate::server::send_sanity_check_status(&email, None, job.jobname(), &result) + { + log::error!("send sanity check notification failed: {err}"); + } + } + + job_result + }, + )?; + + Ok(upid_str) +} -- 2.39.2