From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id 8CC081FF185 for ; Wed, 06 May 2026 18:57:53 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 6FCAB2110; Wed, 6 May 2026 18:57:53 +0200 (CEST) From: Christian Ebner To: pbs-devel@lists.proxmox.com Subject: [PATCH proxmox-backup v2 08/10] api/datastore: use maintenance-mode lock to protect against changes Date: Wed, 6 May 2026 18:56:49 +0200 Message-ID: <20260506165651.1322947-9-c.ebner@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260506165651.1322947-1-c.ebner@proxmox.com> References: <20260506165651.1322947-1-c.ebner@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1778086520164 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.070 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Message-ID-Hash: EEKTOL6XPP7SRTWDP5ER5776UOLYU4KT X-Message-ID-Hash: EEKTOL6XPP7SRTWDP5ER5776UOLYU4KT X-MailFrom: c.ebner@proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox Backup Server development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: Instead of locking the whole config, which causes also unrelated datastores to not be available, use a per-datastore maintenance-mode lock to protect against changes during potentially longer running operations such as s3-refresh or removable datastore mount/unmount. However keep the current semantic of first setting the maintenance mode, allow to still opt out from the mode while waiting for active operations to finish, and only then enforce the maintenance mode during the s3 refresh or removable datastore mount operation. unset_maintenance() is adapted to also check for the expected maintenance type and acquire the maintenance-mode lock if not provided by the caller. Fixes: https://forum.proxmox.com/threads/183244/ Signed-off-by: Christian Ebner --- pbs-datastore/src/datastore.rs | 2 ++ pbs-datastore/src/lib.rs | 21 +++++++++++- src/api2/admin/datastore.rs | 62 ++++++++++++++++++++-------------- src/api2/config/datastore.rs | 6 ++-- 4 files changed, 63 insertions(+), 28 deletions(-) diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs index 84314cd29..d2136460e 100644 --- a/pbs-datastore/src/datastore.rs +++ b/pbs-datastore/src/datastore.rs @@ -3165,6 +3165,7 @@ impl DataStore { let (mut config, _digest) = pbs_config::datastore::config()?; let mut datastore_config: DataStoreConfig = config.lookup("datastore", name)?; + let maintenance_mode_lock = crate::maintenance_mode_lock(name, &config_lock)?; datastore_config.set_maintenance_mode(Some(MaintenanceMode { ty: MaintenanceType::Delete, message: None, @@ -3172,6 +3173,7 @@ impl DataStore { config.set_data(name, "datastore", &datastore_config)?; pbs_config::datastore::save_config(&config)?; + drop(maintenance_mode_lock); drop(config_lock); let (operations, _lock) = task_tracking::get_active_operations_locked(name)?; diff --git a/pbs-datastore/src/lib.rs b/pbs-datastore/src/lib.rs index 17d26647e..c1f528889 100644 --- a/pbs-datastore/src/lib.rs +++ b/pbs-datastore/src/lib.rs @@ -159,8 +159,9 @@ use std::os::unix::io::AsRawFd; use std::path::Path; +use std::time::Duration; -use anyhow::{bail, Error}; +use anyhow::{bail, Context, Error}; use proxmox_sys::fs::CreateOptions; @@ -282,3 +283,21 @@ where Ok(lock) } + +/// Acquire an exclusive lock for the datastore's maintenance-mode. The datastore config lock +/// must be acquired beforehand to avoid races. +pub fn maintenance_mode_lock( + store: &str, + _datastore_config_lock: &BackupLockGuard, +) -> Result { + let mut lock_path = Path::new(DATASTORE_LOCKS_DIR).join(store); + lock_path.push("maintenance-mode.lck"); + + lock_helper(store, &lock_path, |p| { + // never wait for the lock here: this is only acquired by long running operations + // and the datastore config lock must be held anyways. Blocking the it longer than + // necessary is not acceptable. + pbs_config::open_backup_lockfile(p, Some(Duration::from_secs(0)), true) + .context("unable to acquire exclusive datastore's maintenance-mode lock") + }) +} diff --git a/src/api2/admin/datastore.rs b/src/api2/admin/datastore.rs index 1e0a78f3a..e5f05c809 100644 --- a/src/api2/admin/datastore.rs +++ b/src/api2/admin/datastore.rs @@ -61,8 +61,8 @@ use pbs_datastore::index::IndexFile; use pbs_datastore::manifest::BackupManifest; use pbs_datastore::prune::compute_prune_info; use pbs_datastore::{ - check_backup_owner, ensure_datastore_is_mounted, task_tracking, BackupDir, DataStore, - LocalChunkReader, StoreProgress, + check_backup_owner, ensure_datastore_is_mounted, maintenance_mode_lock, task_tracking, + BackupDir, DataStore, LocalChunkReader, StoreProgress, }; use pbs_tools::json::required_string_param; use proxmox_rest_server::{formatter, worker_is_active, WorkerTask}; @@ -2686,11 +2686,7 @@ pub fn mount(store: String, rpcenv: &mut dyn RpcEnvironment) -> Result Result<(pbs_config::BackupLockGuard, DataStoreConfig), Error> { - let lock = pbs_config::datastore::lock_config()?; +fn expect_maintenance_type(store: &str, maintenance_type: MaintenanceType) -> Result<(), Error> { let (section_config, _digest) = pbs_config::datastore::config()?; let store_config: DataStoreConfig = section_config.lookup("datastore", store)?; @@ -2701,16 +2697,25 @@ fn expect_maintenance_type( bail!("maintenance mode is not '{maintenance_type}'"); } - Ok((lock, store_config)) + Ok(()) } fn unset_maintenance( - _lock: pbs_config::BackupLockGuard, - mut config: DataStoreConfig, + maintenance_mode_lock: Option, + store: &str, + expected: MaintenanceType, ) -> Result<(), Error> { + let lock = pbs_config::datastore::lock_config()?; + let _maintenance_mode_lock = match maintenance_mode_lock { + Some(lock) => lock, + None => pbs_datastore::maintenance_mode_lock(store, &lock)?, + }; + expect_maintenance_type(store, expected)?; let (mut section_config, _digest) = pbs_config::datastore::config()?; - config.maintenance_mode = None; - section_config.set_data(&config.name, "datastore", &config)?; + + let mut store_config: DataStoreConfig = section_config.lookup("datastore", store)?; + store_config.set_maintenance_mode(None)?; + section_config.set_data(store, "datastore", &store_config)?; pbs_config::datastore::save_config(§ion_config)?; Ok(()) } @@ -2759,10 +2764,11 @@ async fn do_unmount(store: String, auth_id: Authid, to_stdout: bool) -> Result Result Result Result Resu } /// Wait for no more active operations on the given datastore and run the provided callback in -/// a datastore locked context, protecting against maintenance mode changes. +/// a maintenance mode locked context, protecting against leaving the mode for the functions +/// lifetime. fn run_maintenance_locked( store: &str, maintenance_expected: MaintenanceType, @@ -2905,18 +2915,20 @@ fn run_maintenance_locked( } if aborted || worker.abort_requested() { - let _ = expect_maintenance_type(store, maintenance_expected) - .inspect_err(|e| warn!("maintenance mode was not as expected: {e}")) - .and_then(|(lock, config)| { - unset_maintenance(lock, config) - .inspect_err(|e| warn!("could not reset maintenance mode: {e}")) - }); + let _ = unset_maintenance(None, store, maintenance_expected) + .inspect_err(|e| warn!("could not reset maintenance mode: {e}")); bail!("aborted, due to user request"); } - let (lock, config) = expect_maintenance_type(store, maintenance_expected)?; + let lock = pbs_config::datastore::lock_config()?; + let maintenance_mode_lock = maintenance_mode_lock(store, &lock)?; + expect_maintenance_type(store, maintenance_expected)?; + // avoid blocking config access/updates, but keep maintenance-mode locked + // until done. + drop(lock); + callback()?; - unset_maintenance(lock, config) + unset_maintenance(Some(maintenance_mode_lock), store, maintenance_expected) .map_err(|e| format_err!("could not reset maintenance mode: {e}"))?; Ok(()) diff --git a/src/api2/config/datastore.rs b/src/api2/config/datastore.rs index 70e349b2f..fe8e641a3 100644 --- a/src/api2/config/datastore.rs +++ b/src/api2/config/datastore.rs @@ -28,7 +28,7 @@ use crate::api2::config::tape_backup_job::{delete_tape_backup_job, list_tape_bac use crate::api2::config::verify::delete_verification_job; use pbs_config::CachedUserInfo; -use pbs_datastore::{get_datastore_mount_status, DataStore}; +use pbs_datastore::{get_datastore_mount_status, maintenance_mode_lock, DataStore}; use proxmox_rest_server::WorkerTask; use crate::server::jobstate; @@ -371,7 +371,7 @@ pub fn update_datastore( delete: Option>, digest: Option, ) -> Result<(), Error> { - let _lock = pbs_config::datastore::lock_config()?; + let lock = pbs_config::datastore::lock_config()?; // pass/compare digest let (mut config, expected_digest) = pbs_config::datastore::config()?; @@ -429,6 +429,7 @@ pub fn update_datastore( data.tuning = None; } DeletableProperty::MaintenanceMode => { + let _maintenance_mode_lock = maintenance_mode_lock(&name, &lock)?; data.set_maintenance_mode(None)?; } DeletableProperty::NotificationThresholds => { @@ -527,6 +528,7 @@ pub fn update_datastore( )?), None => None, }; + let _maintenance_mode_lock = maintenance_mode_lock(&name, &lock)?; data.set_maintenance_mode(maintenance_mode)?; } -- 2.47.3