From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id C286C1FF17A for ; Wed, 06 May 2026 18:57:55 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 259CA2171; Wed, 6 May 2026 18:57:54 +0200 (CEST) From: Christian Ebner To: pbs-devel@lists.proxmox.com Subject: [PATCH proxmox-backup v2 10/10] datastore: protect datastore creation by maintenance-mode Date: Wed, 6 May 2026 18:56:51 +0200 Message-ID: <20260506165651.1322947-11-c.ebner@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260506165651.1322947-1-c.ebner@proxmox.com> References: <20260506165651.1322947-1-c.ebner@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1778086520624 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.070 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Message-ID-Hash: 27H4ATZGMZ6VUZCFEYTROL6XVO4ICR3C X-Message-ID-Hash: 27H4ATZGMZ6VUZCFEYTROL6XVO4ICR3C X-MailFrom: c.ebner@proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox Backup Server development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: Instead of holding the lock for the whole datastore creation, use the maintenance-mode `create`, protected by the maintenance-mode lock. It is inserted and written to the config when setting the maintenance mode before even starting any datastore related operation. Cleanup from config if creation failed must be performed manually. This has the advantage that a potentially long running creation does not block any concurrent access to the datastore config, thereby blocking also any unrelated datastore. The config has to be re-parsed a second time for this, but given that this unblocks the config and this is no performance critical path that tradeoff is better than its alternative. By refactoring the helpers to set and unset the maintenance mode, they can be reused for all call sites. Rely on re-acquisition of the lock to also succeed if other operations are being performed, due to the 10s timeout. Ordering of locking cannot always be preserved, but deadlocks are excluded by timeouts. Signed-off-by: Christian Ebner --- pbs-datastore/src/datastore.rs | 8 +--- src/api2/admin/datastore.rs | 39 ++-------------- src/api2/config/datastore.rs | 40 +++++++++++------ src/api2/helpers.rs | 77 +++++++++++++++++++++++++++++++- src/api2/node/disks/directory.rs | 27 +++++++++-- src/api2/node/disks/zfs.rs | 30 ++++++++++--- 6 files changed, 154 insertions(+), 67 deletions(-) diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs index d2136460e..79d83206b 100644 --- a/pbs-datastore/src/datastore.rs +++ b/pbs-datastore/src/datastore.rs @@ -477,7 +477,7 @@ impl DataStore { /// Create or reuse the chunk store from given datastore configuration pub fn construct_chunk_store( - store_config: &mut DataStoreConfig, + store_config: &DataStoreConfig, tuning: &DatastoreTuning, reuse_existing: bool, overwrite_in_use: bool, @@ -548,12 +548,6 @@ impl DataStore { }); } } - // starting out in maintenance mode s3-refresh, - // so no other operation will start until done with that. - store_config.set_maintenance_mode(Some(MaintenanceMode { - ty: MaintenanceType::S3Refresh, - message: None, - }))?; } let backup_user = pbs_config::backup_user()?; ChunkStore::create( diff --git a/src/api2/admin/datastore.rs b/src/api2/admin/datastore.rs index e5f05c809..086697fd9 100644 --- a/src/api2/admin/datastore.rs +++ b/src/api2/admin/datastore.rs @@ -68,6 +68,7 @@ use pbs_tools::json::required_string_param; use proxmox_rest_server::{formatter, worker_is_active, WorkerTask}; use crate::api2::backup::optional_ns_param; +use crate::api2::helpers::{expect_maintenance_type, unset_maintenance_mode}; use crate::api2::node::rrd::create_value_from_rrd; use crate::backup::{check_ns_privs_full, ListAccessibleBackupGroups, VerifyWorker, NS_PRIVS_OK}; use crate::server::jobstate::{compute_schedule_status, Job, JobState}; @@ -2686,40 +2687,6 @@ pub fn mount(store: String, rpcenv: &mut dyn RpcEnvironment) -> Result Result<(), Error> { - let (section_config, _digest) = pbs_config::datastore::config()?; - let store_config: DataStoreConfig = section_config.lookup("datastore", store)?; - - if store_config - .get_maintenance_mode() - .is_none_or(|m| m.ty != maintenance_type) - { - bail!("maintenance mode is not '{maintenance_type}'"); - } - - Ok(()) -} - -fn unset_maintenance( - maintenance_mode_lock: Option, - store: &str, - expected: MaintenanceType, -) -> Result<(), Error> { - let lock = pbs_config::datastore::lock_config()?; - let _maintenance_mode_lock = match maintenance_mode_lock { - Some(lock) => lock, - None => pbs_datastore::maintenance_mode_lock(store, &lock)?, - }; - expect_maintenance_type(store, expected)?; - let (mut section_config, _digest) = pbs_config::datastore::config()?; - - let mut store_config: DataStoreConfig = section_config.lookup("datastore", store)?; - store_config.set_maintenance_mode(None)?; - section_config.set_data(store, "datastore", &store_config)?; - pbs_config::datastore::save_config(§ion_config)?; - Ok(()) -} - fn do_unmount_device( datastore: DataStoreConfig, worker: &dyn WorkerTaskContext, @@ -2915,7 +2882,7 @@ fn run_maintenance_locked( } if aborted || worker.abort_requested() { - let _ = unset_maintenance(None, store, maintenance_expected) + let _ = unset_maintenance_mode(None, store, maintenance_expected) .inspect_err(|e| warn!("could not reset maintenance mode: {e}")); bail!("aborted, due to user request"); } @@ -2928,7 +2895,7 @@ fn run_maintenance_locked( drop(lock); callback()?; - unset_maintenance(Some(maintenance_mode_lock), store, maintenance_expected) + unset_maintenance_mode(Some(maintenance_mode_lock), store, maintenance_expected) .map_err(|e| format_err!("could not reset maintenance mode: {e}"))?; Ok(()) diff --git a/src/api2/config/datastore.rs b/src/api2/config/datastore.rs index 3061219ae..b451b4350 100644 --- a/src/api2/config/datastore.rs +++ b/src/api2/config/datastore.rs @@ -12,9 +12,9 @@ use proxmox_uuid::Uuid; use pbs_api_types::{ Authid, DataStoreConfig, DataStoreConfigUpdater, DatastoreBackendType, DatastoreNotify, - DatastoreTuning, KeepOptions, MaintenanceMode, PruneJobConfig, PruneJobOptions, - DATASTORE_SCHEMA, PRIV_DATASTORE_ALLOCATE, PRIV_DATASTORE_AUDIT, PRIV_DATASTORE_MODIFY, - PRIV_SYS_MODIFY, PROXMOX_CONFIG_DIGEST_SCHEMA, UPID_SCHEMA, + DatastoreTuning, KeepOptions, MaintenanceMode, MaintenanceType, PruneJobConfig, + PruneJobOptions, DATASTORE_SCHEMA, PRIV_DATASTORE_ALLOCATE, PRIV_DATASTORE_AUDIT, + PRIV_DATASTORE_MODIFY, PRIV_SYS_MODIFY, PROXMOX_CONFIG_DIGEST_SCHEMA, UPID_SCHEMA, }; use pbs_config::BackupLockGuard; @@ -31,6 +31,7 @@ use pbs_config::CachedUserInfo; use pbs_datastore::{get_datastore_mount_status, maintenance_mode_lock, DataStore}; use proxmox_rest_server::WorkerTask; +use crate::api2::helpers::{set_maintenance_type, unset_maintenance_mode}; use crate::server::jobstate; use crate::tools::disks::unmount_by_mountpoint; @@ -93,9 +94,9 @@ impl Drop for UnmountGuard { } pub(crate) fn do_create_datastore( - _lock: BackupLockGuard, - mut config: SectionConfigData, - mut datastore: DataStoreConfig, + _lock: &BackupLockGuard, + config: SectionConfigData, + datastore: &DataStoreConfig, reuse_datastore: bool, overwrite_in_use: bool, ) -> Result<(), Error> { @@ -122,11 +123,7 @@ pub(crate) fn do_create_datastore( UnmountGuard::new(None) }; - DataStore::construct_chunk_store(&mut datastore, &tuning, reuse_datastore, overwrite_in_use)?; - - config.set_data(&datastore.name, "datastore", &datastore)?; - - pbs_config::datastore::save_config(&config)?; + DataStore::construct_chunk_store(datastore, &tuning, reuse_datastore, overwrite_in_use)?; jobstate::create_state_file("garbage_collection", &datastore.name)?; @@ -232,12 +229,18 @@ pub fn create_datastore( } // clearing prune settings in the datastore config, as they are now handled by prune jobs - let config = DataStoreConfig { + let mut config = DataStoreConfig { prune_schedule: None, keep: KeepOptions::default(), ..config }; + // always start out in maintenance-mode create, so access can be blocked during creation + // without needing to hold the config lock, only the maintenance-mode lock. + let maintenance_mode_lock = pbs_datastore::maintenance_mode_lock(&config.name, &lock)?; + let mut current_type = MaintenanceType::Create; + set_maintenance_type(lock, &maintenance_mode_lock, &mut config, current_type)?; + let store_name = config.name.to_string(); WorkerTask::new_thread( @@ -247,9 +250,9 @@ pub fn create_datastore( to_stdout, move |worker| { do_create_datastore( - lock, + &maintenance_mode_lock, section_config, - config, + &config, reuse_datastore, overwrite_in_use, )?; @@ -259,8 +262,17 @@ pub fn create_datastore( } if reuse_datastore && backend == DatastoreBackendType::S3 { + // starting out in maintenance mode s3-refresh, + // so no other operation will start until done with that. + let lock = pbs_config::datastore::lock_config()?; + current_type = MaintenanceType::S3Refresh; + set_maintenance_type(lock, &maintenance_mode_lock, &mut config, current_type)?; + crate::api2::admin::datastore::do_s3_refresh(&store_name, &worker)?; } + + unset_maintenance_mode(Some(maintenance_mode_lock), &store_name, current_type)?; + Ok(()) }, ) diff --git a/src/api2/helpers.rs b/src/api2/helpers.rs index f346b0cca..9611d6151 100644 --- a/src/api2/helpers.rs +++ b/src/api2/helpers.rs @@ -1,12 +1,15 @@ use std::path::PathBuf; -use anyhow::Error; +use anyhow::{bail, Error}; use futures::stream::TryStreamExt; use hyper::{header, Response, StatusCode}; +use pbs_api_types::{DataStoreConfig, MaintenanceMode, MaintenanceType}; use proxmox_http::Body; use proxmox_router::http_bail; +use pbs_config::BackupLockGuard; + pub async fn create_download_response(path: PathBuf) -> Result, Error> { let file = match tokio::fs::File::open(path.clone()).await { Ok(file) => file, @@ -28,3 +31,75 @@ pub async fn create_download_response(path: PathBuf) -> Result, E .body(body) .unwrap()) } + +/// Check if the current datastore config reflects the expected maintenance type. +/// +/// Loads the config from file without locking. +pub(super) fn expect_maintenance_type( + store: &str, + maintenance_type: MaintenanceType, +) -> Result<(), Error> { + let (section_config, _digest) = pbs_config::datastore::config()?; + let store_config: DataStoreConfig = section_config.lookup("datastore", store)?; + + if store_config + .get_maintenance_mode() + .is_none_or(|m| m.ty != maintenance_type) + { + bail!("maintenance mode is not '{maintenance_type}'"); + } + + Ok(()) +} + +/// Sets and stores the maintenance mode for give store config. +/// +/// Requires and consumes thereby dropping the datastore config lock. +/// Requires but does not consume the maintenance-mode lock. +pub(super) fn set_maintenance_type( + _datastore_config_lock: BackupLockGuard, + _maintenance_mode_lock: &BackupLockGuard, + config: &mut DataStoreConfig, + maintenance_type: MaintenanceType, +) -> Result<(), Error> { + let (mut section_config, _digest) = pbs_config::datastore::config()?; + + config.set_maintenance_mode(Some(MaintenanceMode { + ty: maintenance_type, + message: None, + }))?; + + section_config.set_data(&config.name, "datastore", &config)?; + pbs_config::datastore::save_config(§ion_config) +} + +/// Unsets the maintenance-mode and updates the config for give datastore. +/// +/// Either acquires or consumes the provided maintenance-mode lock, +/// but always acquires the config lock. +pub(super) fn unset_maintenance_mode( + maintenance_mode_lock: Option, + store: &str, + expected: MaintenanceType, +) -> Result<(), Error> { + // It is fine to acquire the lock here in reverse order if passed in via the caller, + // as the config lock must never be held for long. + let lock = pbs_config::datastore::lock_config()?; + + let _maintenance_mode_lock = match maintenance_mode_lock { + Some(lock) => lock, + None => pbs_datastore::maintenance_mode_lock(store, &lock)?, + }; + + expect_maintenance_type(store, expected)?; + + let (mut section_config, _digest) = pbs_config::datastore::config()?; + let mut store_config: DataStoreConfig = section_config.lookup("datastore", store)?; + + store_config.set_maintenance_mode(None)?; + + section_config.set_data(store, "datastore", &store_config)?; + pbs_config::datastore::save_config(§ion_config)?; + + Ok(()) +} diff --git a/src/api2/node/disks/directory.rs b/src/api2/node/disks/directory.rs index c37d65b8d..2d7948f12 100644 --- a/src/api2/node/disks/directory.rs +++ b/src/api2/node/disks/directory.rs @@ -11,8 +11,8 @@ use proxmox_schema::api; use proxmox_section_config::SectionConfigData; use pbs_api_types::{ - DataStoreConfig, BLOCKDEVICE_NAME_SCHEMA, DATASTORE_MOUNT_DIR, DATASTORE_SCHEMA, NODE_SCHEMA, - PRIV_SYS_AUDIT, PRIV_SYS_MODIFY, UPID_SCHEMA, + DataStoreConfig, MaintenanceType, BLOCKDEVICE_NAME_SCHEMA, DATASTORE_MOUNT_DIR, + DATASTORE_SCHEMA, NODE_SCHEMA, PRIV_SYS_AUDIT, PRIV_SYS_MODIFY, UPID_SCHEMA, }; use crate::tools::disks::{ @@ -240,7 +240,7 @@ pub fn create_datastore_disk( if add_datastore { let lock = pbs_config::datastore::lock_config()?; - let datastore: DataStoreConfig = if removable_datastore { + let mut datastore: DataStoreConfig = if removable_datastore { serde_json::from_value( json!({ "name": name, "path": name, "backing-device": uuid }), )? @@ -253,8 +253,27 @@ pub fn create_datastore_disk( bail!("datastore '{}' already exists.", datastore.name); } + let maintenance_mode_lock = + pbs_datastore::maintenance_mode_lock(&datastore.name, &lock)?; + crate::api2::helpers::set_maintenance_type( + lock, + &maintenance_mode_lock, + &mut datastore, + MaintenanceType::Create, + )?; + crate::api2::config::datastore::do_create_datastore( - lock, config, datastore, false, false, + &maintenance_mode_lock, + config, + &datastore, + false, + false, + )?; + + crate::api2::helpers::unset_maintenance_mode( + Some(maintenance_mode_lock), + &datastore.name, + MaintenanceType::Create, )?; } diff --git a/src/api2/node/disks/zfs.rs b/src/api2/node/disks/zfs.rs index 3e5a7decf..ab48548c4 100644 --- a/src/api2/node/disks/zfs.rs +++ b/src/api2/node/disks/zfs.rs @@ -6,9 +6,9 @@ use proxmox_router::{Permission, Router, RpcEnvironment, RpcEnvironmentType}; use proxmox_schema::api; use pbs_api_types::{ - DataStoreConfig, ZfsCompressionType, ZfsRaidLevel, ZpoolListItem, DATASTORE_SCHEMA, - DISK_ARRAY_SCHEMA, DISK_LIST_SCHEMA, NODE_SCHEMA, PRIV_SYS_AUDIT, PRIV_SYS_MODIFY, UPID_SCHEMA, - ZFS_ASHIFT_SCHEMA, ZPOOL_NAME_SCHEMA, + DataStoreConfig, MaintenanceType, ZfsCompressionType, ZfsRaidLevel, ZpoolListItem, + DATASTORE_SCHEMA, DISK_ARRAY_SCHEMA, DISK_LIST_SCHEMA, NODE_SCHEMA, PRIV_SYS_AUDIT, + PRIV_SYS_MODIFY, UPID_SCHEMA, ZFS_ASHIFT_SCHEMA, ZPOOL_NAME_SCHEMA, }; use crate::tools::disks::{ @@ -304,7 +304,7 @@ pub fn create_zpool( if add_datastore { let lock = pbs_config::datastore::lock_config()?; - let datastore: DataStoreConfig = + let mut datastore: DataStoreConfig = serde_json::from_value(json!({ "name": name, "path": mount_point }))?; let (config, _digest) = pbs_config::datastore::config()?; @@ -313,8 +313,28 @@ pub fn create_zpool( bail!("datastore '{}' already exists.", datastore.name); } + let maintenance_mode_lock = + pbs_datastore::maintenance_mode_lock(&datastore.name, &lock)?; + + crate::api2::helpers::set_maintenance_type( + lock, + &maintenance_mode_lock, + &mut datastore, + MaintenanceType::Create, + )?; + crate::api2::config::datastore::do_create_datastore( - lock, config, datastore, false, false, + &maintenance_mode_lock, + config, + &datastore, + false, + false, + )?; + + crate::api2::helpers::unset_maintenance_mode( + Some(maintenance_mode_lock), + &datastore.name, + MaintenanceType::Create, )?; } -- 2.47.3