From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: <pbs-devel-bounces@lists.proxmox.com> Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id 455FE1FF164 for <inbox@lore.proxmox.com>; Fri, 14 Mar 2025 09:30:57 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id DD026368CE; Fri, 14 Mar 2025 09:30:48 +0100 (CET) Message-ID: <10c3183b-4c24-43f7-ba10-1fdfadbd9690@proxmox.com> Date: Fri, 14 Mar 2025 09:30:02 +0100 MIME-Version: 1.0 User-Agent: Mozilla Thunderbird To: Proxmox Backup Server development discussion <pbs-devel@lists.proxmox.com>, Shannon Sterz <s.sterz@proxmox.com> References: <20250311135200.309896-1-s.sterz@proxmox.com> <20250311135200.309896-2-s.sterz@proxmox.com> Content-Language: en-US, de-DE From: Christian Ebner <c.ebner@proxmox.com> In-Reply-To: <20250311135200.309896-2-s.sterz@proxmox.com> X-SPAM-LEVEL: Spam detection results: 0 AWL 0.031 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [verify.rs, sync.rs, datastore.rs, self.store, environment.rs, mod.rs] Subject: Re: [pbs-devel] [PATCH proxmox-backup v7 1/4] datastore/api/backup: prepare for fix of #3935 by adding lock helpers X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion <pbs-devel.lists.proxmox.com> List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pbs-devel>, <mailto:pbs-devel-request@lists.proxmox.com?subject=unsubscribe> List-Archive: <http://lists.proxmox.com/pipermail/pbs-devel/> List-Post: <mailto:pbs-devel@lists.proxmox.com> List-Help: <mailto:pbs-devel-request@lists.proxmox.com?subject=help> List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel>, <mailto:pbs-devel-request@lists.proxmox.com?subject=subscribe> Reply-To: Proxmox Backup Server development discussion <pbs-devel@lists.proxmox.com> Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="us-ascii"; Format="flowed" Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" <pbs-devel-bounces@lists.proxmox.com> On 3/11/25 14:51, Shannon Sterz wrote: > to avoid duplicate code, add helpers for locking groups and snapshots > to the BackupGroup and BackupDir traits respectively and refactor > existing code to use them. > > this also changes the error semantics of the refactored > functions as locking related error messages are now unified and, thus, > less specific. The callside specific context could however be readded to some extend by using anyhow::Context, adapt the callside error formatting and keeping the would_bock_msg even more generic like `locked by another operation`? This would help narrowing down issues more easily. > > Signed-off-by: Shannon Sterz <s.sterz@proxmox.com> > --- > pbs-datastore/src/backup_info.rs | 39 +++++++++++++--- > pbs-datastore/src/datastore.rs | 66 ++++++++++------------------ > pbs-datastore/src/snapshot_reader.rs | 7 +-- > src/api2/backup/environment.rs | 9 +--- > src/api2/backup/mod.rs | 8 +--- > src/api2/reader/mod.rs | 7 +-- > src/backup/verify.rs | 12 ++--- > src/server/sync.rs | 10 ++--- > 8 files changed, 68 insertions(+), 90 deletions(-) > > diff --git a/pbs-datastore/src/backup_info.rs b/pbs-datastore/src/backup_info.rs > index ef2b982c..42ce74e9 100644 > --- a/pbs-datastore/src/backup_info.rs > +++ b/pbs-datastore/src/backup_info.rs > @@ -5,7 +5,9 @@ use std::sync::Arc; > > use anyhow::{bail, format_err, Error}; > > -use proxmox_sys::fs::{lock_dir_noblock, replace_file, CreateOptions}; > +use proxmox_sys::fs::{ > + lock_dir_noblock, lock_dir_noblock_shared, replace_file, CreateOptions, DirLockGuard, > +}; > > use pbs_api_types::{ > Authid, BackupGroupDeleteStats, BackupNamespace, BackupType, GroupFilter, VerifyState, > @@ -199,9 +201,8 @@ impl BackupGroup { > /// Returns `BackupGroupDeleteStats`, containing the number of deleted snapshots > /// and number of protected snaphsots, which therefore were not removed. > pub fn destroy(&self) -> Result<BackupGroupDeleteStats, Error> { > + let _guard = self.lock()?; > let path = self.full_group_path(); > - let _guard = > - proxmox_sys::fs::lock_dir_noblock(&path, "backup group", "possible running backup")?; > > log::info!("removing backup group {:?}", path); > let mut delete_stats = BackupGroupDeleteStats::default(); > @@ -237,6 +238,15 @@ impl BackupGroup { > self.store > .set_owner(&self.ns, self.as_ref(), auth_id, force) > } > + > + /// Lock a group exclusively > + pub fn lock(&self) -> Result<DirLockGuard, Error> { > + lock_dir_noblock( > + &self.full_group_path(), > + "backup group", > + "possible running backup", > + ) > + } > } > > impl AsRef<pbs_api_types::BackupNamespace> for BackupGroup { > @@ -442,15 +452,31 @@ impl BackupDir { > .map_err(|err| format_err!("unable to acquire manifest lock {:?} - {}", &path, err)) > } > > + /// Lock this snapshot exclusively > + pub fn lock(&self) -> Result<DirLockGuard, Error> { > + lock_dir_noblock( > + &self.full_path(), > + "snapshot", > + "backup is running or snapshot is in use", > + ) > + } > + > + /// Acquire a shared lock on this snapshot > + pub fn lock_shared(&self) -> Result<DirLockGuard, Error> { > + lock_dir_noblock_shared( > + &self.full_path(), > + "snapshot", > + "backup is running or snapshot is in use, could not acquire shared lock", > + ) > + } > + > /// Destroy the whole snapshot, bails if it's protected > /// > /// Setting `force` to true skips locking and thus ignores if the backup is currently in use. > pub fn destroy(&self, force: bool) -> Result<(), Error> { > - let full_path = self.full_path(); > - > let (_guard, _manifest_guard); > if !force { > - _guard = lock_dir_noblock(&full_path, "snapshot", "possibly running or in use")?; > + _guard = self.lock()?; > _manifest_guard = self.lock_manifest()?; > } > > @@ -458,6 +484,7 @@ impl BackupDir { > bail!("cannot remove protected snapshot"); // use special error type? > } > > + let full_path = self.full_path(); > log::info!("removing backup snapshot {:?}", full_path); > std::fs::remove_dir_all(&full_path).map_err(|err| { > format_err!("removing backup snapshot {:?} failed - {}", full_path, err,) > diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs > index 75c0c16a..b3804b5a 100644 > --- a/pbs-datastore/src/datastore.rs > +++ b/pbs-datastore/src/datastore.rs > @@ -13,8 +13,8 @@ use proxmox_human_byte::HumanByte; > use proxmox_schema::ApiType; > > use proxmox_sys::error::SysError; > +use proxmox_sys::fs::DirLockGuard; > use proxmox_sys::fs::{file_read_optional_string, replace_file, CreateOptions}; > -use proxmox_sys::fs::{lock_dir_noblock, DirLockGuard}; > use proxmox_sys::linux::procfs::MountInfo; > use proxmox_sys::process_locker::ProcessLockSharedGuard; > use proxmox_worker_task::WorkerTaskContext; > @@ -774,41 +774,31 @@ impl DataStore { > /// > /// This also acquires an exclusive lock on the directory and returns the lock guard. > pub fn create_locked_backup_group( > - &self, > + self: &Arc<Self>, > ns: &BackupNamespace, > backup_group: &pbs_api_types::BackupGroup, > auth_id: &Authid, > ) -> Result<(Authid, DirLockGuard), Error> { > - // create intermediate path first: > - let mut full_path = self.base_path(); > - for ns in ns.components() { > - full_path.push("ns"); > - full_path.push(ns); > - } > - full_path.push(backup_group.ty.as_str()); > - std::fs::create_dir_all(&full_path)?; > + let backup_group = self.backup_group(ns.clone(), backup_group.clone()); > > - full_path.push(&backup_group.id); > + // create intermediate path first > + let full_path = backup_group.full_group_path(); > > - // create the last component now > + std::fs::create_dir_all(full_path.parent().ok_or_else(|| { > + format_err!("could not construct parent path for group {backup_group:?}") > + })?)?; > + > + // now create the group, this allows us to check whether it existed before > match std::fs::create_dir(&full_path) { > Ok(_) => { > - let guard = lock_dir_noblock( > - &full_path, > - "backup group", > - "another backup is already running", > - )?; > - self.set_owner(ns, backup_group, auth_id, false)?; > - let owner = self.get_owner(ns, backup_group)?; // just to be sure > + let guard = backup_group.lock()?; > + self.set_owner(ns, backup_group.group(), auth_id, false)?; > + let owner = self.get_owner(ns, backup_group.group())?; // just to be sure > Ok((owner, guard)) > } > Err(ref err) if err.kind() == io::ErrorKind::AlreadyExists => { > - let guard = lock_dir_noblock( > - &full_path, > - "backup group", > - "another backup is already running", > - )?; > - let owner = self.get_owner(ns, backup_group)?; // just to be sure > + let guard = backup_group.lock()?; > + let owner = self.get_owner(ns, backup_group.group())?; // just to be sure > Ok((owner, guard)) > } > Err(err) => bail!("unable to create backup group {:?} - {}", full_path, err), > @@ -819,29 +809,17 @@ impl DataStore { > /// > /// The BackupGroup directory needs to exist. > pub fn create_locked_backup_dir( > - &self, > + self: &Arc<Self>, > ns: &BackupNamespace, > backup_dir: &pbs_api_types::BackupDir, > ) -> Result<(PathBuf, bool, DirLockGuard), Error> { > - let full_path = self.snapshot_path(ns, backup_dir); > - let relative_path = full_path.strip_prefix(self.base_path()).map_err(|err| { > - format_err!( > - "failed to produce correct path for backup {backup_dir} in namespace {ns}: {err}" > - ) > - })?; > + let backup_dir = self.backup_dir(ns.clone(), backup_dir.clone())?; > + let relative_path = backup_dir.relative_path(); > > - let lock = || { > - lock_dir_noblock( > - &full_path, > - "snapshot", > - "internal error - tried creating snapshot that's already in use", > - ) > - }; > - > - match std::fs::create_dir(&full_path) { > - Ok(_) => Ok((relative_path.to_owned(), true, lock()?)), > + match std::fs::create_dir(backup_dir.full_path()) { > + Ok(_) => Ok((relative_path, true, backup_dir.lock()?)), > Err(ref e) if e.kind() == io::ErrorKind::AlreadyExists => { > - Ok((relative_path.to_owned(), false, lock()?)) > + Ok((relative_path, false, backup_dir.lock()?)) > } > Err(e) => Err(e.into()), > } > @@ -1305,7 +1283,7 @@ impl DataStore { > bail!("snapshot {} does not exist!", backup_dir.dir()); > } > > - let _guard = lock_dir_noblock(&full_path, "snapshot", "possibly running or in use")?; > + let _guard = backup_dir.lock()?; > > let protected_path = backup_dir.protected_file(); > if protection { > diff --git a/pbs-datastore/src/snapshot_reader.rs b/pbs-datastore/src/snapshot_reader.rs > index 36349888..604d35c1 100644 > --- a/pbs-datastore/src/snapshot_reader.rs > +++ b/pbs-datastore/src/snapshot_reader.rs > @@ -1,14 +1,12 @@ > use std::fs::File; > use std::os::unix::io::{AsRawFd, FromRawFd}; > use std::path::Path; > -use std::sync::Arc; > use std::rc::Rc; > +use std::sync::Arc; nit: unrelated hunk, does not apply cleanly anymore > > use anyhow::{bail, Error}; > use nix::dir::Dir; > > -use proxmox_sys::fs::lock_dir_noblock_shared; > - > use pbs_api_types::{ > print_store_and_ns, ArchiveType, BackupNamespace, Operation, CLIENT_LOG_BLOB_NAME, > MANIFEST_BLOB_NAME, > @@ -48,8 +46,7 @@ impl SnapshotReader { > bail!("snapshot {} does not exist!", snapshot.dir()); > } > > - let locked_dir = > - lock_dir_noblock_shared(&snapshot_path, "snapshot", "locked by another operation")?; > + let locked_dir = snapshot.lock_shared()?; > > let datastore_name = datastore.name().to_string(); > let manifest = match snapshot.load_manifest() { > diff --git a/src/api2/backup/environment.rs b/src/api2/backup/environment.rs > index 99d885e2..133932b3 100644 > --- a/src/api2/backup/environment.rs > +++ b/src/api2/backup/environment.rs > @@ -8,7 +8,7 @@ use ::serde::Serialize; > use serde_json::{json, Value}; > > use proxmox_router::{RpcEnvironment, RpcEnvironmentType}; > -use proxmox_sys::fs::{lock_dir_noblock_shared, replace_file, CreateOptions}; > +use proxmox_sys::fs::{replace_file, CreateOptions}; > > use pbs_api_types::Authid; > use pbs_datastore::backup_info::{BackupDir, BackupInfo}; > @@ -645,12 +645,7 @@ impl BackupEnvironment { > > // Downgrade to shared lock, the backup itself is finished > drop(excl_snap_lock); > - let snap_lock = lock_dir_noblock_shared( > - &self.backup_dir.full_path(), > - "snapshot", > - "snapshot is already locked by another operation", > - )?; > - > + let snap_lock = self.backup_dir.lock_shared()?; > let worker_id = format!( > "{}:{}/{}/{:08X}", > self.datastore.name(), > diff --git a/src/api2/backup/mod.rs b/src/api2/backup/mod.rs > index 82c6438a..d5513893 100644 > --- a/src/api2/backup/mod.rs > +++ b/src/api2/backup/mod.rs > @@ -17,7 +17,6 @@ use proxmox_router::{ > }; > use proxmox_schema::*; > use proxmox_sortable_macro::sortable; > -use proxmox_sys::fs::lock_dir_noblock_shared; > > use pbs_api_types::{ > ArchiveType, Authid, BackupNamespace, BackupType, Operation, VerifyState, > @@ -186,12 +185,7 @@ fn upgrade_to_backup_protocol( > } > > // lock last snapshot to prevent forgetting/pruning it during backup > - let full_path = last.backup_dir.full_path(); > - Some(lock_dir_noblock_shared( > - &full_path, > - "snapshot", > - "base snapshot is already locked by another operation", > - )?) > + Some(last.backup_dir.lock_shared()?) > } else { > None > }; > diff --git a/src/api2/reader/mod.rs b/src/api2/reader/mod.rs > index 328141c8..5a9678ff 100644 > --- a/src/api2/reader/mod.rs > +++ b/src/api2/reader/mod.rs > @@ -16,7 +16,6 @@ use proxmox_router::{ > }; > use proxmox_schema::{BooleanSchema, ObjectSchema}; > use proxmox_sortable_macro::sortable; > -use proxmox_sys::fs::lock_dir_noblock_shared; > > use pbs_api_types::{ > ArchiveType, Authid, Operation, BACKUP_ARCHIVE_NAME_SCHEMA, BACKUP_ID_SCHEMA, > @@ -129,11 +128,7 @@ fn upgrade_to_backup_reader_protocol( > bail!("snapshot {} does not exist.", backup_dir.dir()); > } > > - let _guard = lock_dir_noblock_shared( > - &backup_dir.full_path(), > - "snapshot", > - "locked by another operation", > - )?; > + let _guard = backup_dir.lock_shared()?; > > let path = datastore.base_path(); > > diff --git a/src/backup/verify.rs b/src/backup/verify.rs > index f652c262..10a64fda 100644 > --- a/src/backup/verify.rs > +++ b/src/backup/verify.rs > @@ -4,10 +4,9 @@ use std::sync::{Arc, Mutex}; > use std::time::Instant; > > use anyhow::{bail, Error}; > -use nix::dir::Dir; > +use proxmox_sys::fs::DirLockGuard; > use tracing::{error, info, warn}; > > -use proxmox_sys::fs::lock_dir_noblock_shared; > use proxmox_worker_task::WorkerTaskContext; > > use pbs_api_types::{ > @@ -307,11 +306,8 @@ pub fn verify_backup_dir( > return Ok(true); > } > > - let snap_lock = lock_dir_noblock_shared( > - &backup_dir.full_path(), > - "snapshot", > - "locked by another operation", > - ); > + let snap_lock = backup_dir.lock_shared(); > + > match snap_lock { > Ok(snap_lock) => { > verify_backup_dir_with_lock(verify_worker, backup_dir, upid, filter, snap_lock) > @@ -334,7 +330,7 @@ pub fn verify_backup_dir_with_lock( > backup_dir: &BackupDir, > upid: UPID, > filter: Option<&dyn Fn(&BackupManifest) -> bool>, > - _snap_lock: Dir, > + _snap_lock: DirLockGuard, > ) -> Result<bool, Error> { > let datastore_name = verify_worker.datastore.name(); > let backup_dir_name = backup_dir.dir(); > diff --git a/src/server/sync.rs b/src/server/sync.rs > index 4dd46c5a..abe7316a 100644 > --- a/src/server/sync.rs > +++ b/src/server/sync.rs > @@ -10,6 +10,7 @@ use std::time::Duration; > use anyhow::{bail, format_err, Context, Error}; > use futures::{future::FutureExt, select}; > use hyper::http::StatusCode; > +use proxmox_sys::fs::DirLockGuard; > use serde_json::json; > use tracing::{info, warn}; > > @@ -105,7 +106,7 @@ pub(crate) struct RemoteSourceReader { > } > > pub(crate) struct LocalSourceReader { > - pub(crate) _dir_lock: Arc<Mutex<proxmox_sys::fs::DirLockGuard>>, > + pub(crate) _dir_lock: Arc<Mutex<DirLockGuard>>, > pub(crate) path: PathBuf, > pub(crate) datastore: Arc<DataStore>, > } > @@ -478,13 +479,8 @@ impl SyncSource for LocalSource { > dir: &BackupDir, > ) -> Result<Arc<dyn SyncSourceReader>, Error> { > let dir = self.store.backup_dir(ns.clone(), dir.clone())?; > - let dir_lock = proxmox_sys::fs::lock_dir_noblock_shared( > - &dir.full_path(), > - "snapshot", > - "locked by another operation", > - )?; > Ok(Arc::new(LocalSourceReader { > - _dir_lock: Arc::new(Mutex::new(dir_lock)), > + _dir_lock: Arc::new(Mutex::new(dir.lock()?)), > path: dir.full_path(), > datastore: dir.datastore().clone(), > })) _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel