From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: <pbs-devel-bounces@lists.proxmox.com> Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id 6B6D91FF18C for <inbox@lore.proxmox.com>; Mon, 24 Mar 2025 13:52:26 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 766C61B280; Mon, 24 Mar 2025 13:52:22 +0100 (CET) From: Shannon Sterz <s.sterz@proxmox.com> To: pbs-devel@lists.proxmox.com Date: Mon, 24 Mar 2025 13:51:56 +0100 Message-Id: <20250324125157.165976-3-s.sterz@proxmox.com> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20250324125157.165976-1-s.sterz@proxmox.com> References: <20250324125157.165976-1-s.sterz@proxmox.com> MIME-Version: 1.0 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.017 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: [pbs-devel] [PATCH proxmox-backup v8 2/4] fix #3935: datastore/api/backup: move datastore locking to '/run' X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion <pbs-devel.lists.proxmox.com> List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pbs-devel>, <mailto:pbs-devel-request@lists.proxmox.com?subject=unsubscribe> List-Archive: <http://lists.proxmox.com/pipermail/pbs-devel/> List-Post: <mailto:pbs-devel@lists.proxmox.com> List-Help: <mailto:pbs-devel-request@lists.proxmox.com?subject=help> List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel>, <mailto:pbs-devel-request@lists.proxmox.com?subject=subscribe> Reply-To: Proxmox Backup Server development discussion <pbs-devel@lists.proxmox.com> Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" <pbs-devel-bounces@lists.proxmox.com> to avoid issues when removing a group or snapshot directory where two threads hold a lock to the same directory, move locking to the tmpfs backed '/run' directory. also adds double stat'ing to make it possible to remove locks without certain race condition issues. this new mechanism is only employed when we can be sure, that a reboot has occured so that all processes are using the new locking mechanism. otherwise, two separate process could assume they have exclusive rights to a group or snapshot. bumps the rust version to 1.81 so we can use `std::fs::exists` without issue. Signed-off-by: Shannon Sterz <s.sterz@proxmox.com> --- Cargo.toml | 2 +- debian/postinst | 5 + pbs-config/src/lib.rs | 32 ++++- pbs-datastore/Cargo.toml | 1 + pbs-datastore/src/backup_info.rs | 196 +++++++++++++++++++++++---- pbs-datastore/src/datastore.rs | 6 +- pbs-datastore/src/snapshot_reader.rs | 17 ++- src/api2/backup/environment.rs | 5 +- src/backup/verify.rs | 4 +- src/server/sync.rs | 4 +- 10 files changed, 230 insertions(+), 42 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 306d50e9..645f2d81 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ authors = [ edition = "2021" license = "AGPL-3" repository = "https://git.proxmox.com/?p=proxmox-backup.git" -rust-version = "1.80" +rust-version = "1.81" [package] name = "proxmox-backup" diff --git a/debian/postinst b/debian/postinst index 0485ca34..81c15826 100644 --- a/debian/postinst +++ b/debian/postinst @@ -80,6 +80,11 @@ EOF update_sync_job "$prev_job" fi fi + + if dpkg --compare-versions "$2" 'lt' '3.3.4~'; then + # ensure old locking is used by the daemon until a reboot happened + touch "/run/proxmox-backup/old-locking" + fi fi ;; diff --git a/pbs-config/src/lib.rs b/pbs-config/src/lib.rs index 20a8238d..9c4d77c2 100644 --- a/pbs-config/src/lib.rs +++ b/pbs-config/src/lib.rs @@ -22,6 +22,8 @@ pub use config_version_cache::ConfigVersionCache; use anyhow::{format_err, Error}; use nix::unistd::{Gid, Group, Uid, User}; +use proxmox_sys::fs::DirLockGuard; +use std::os::unix::prelude::AsRawFd; pub use pbs_buildcfg::{BACKUP_GROUP_NAME, BACKUP_USER_NAME}; @@ -46,13 +48,34 @@ pub fn backup_group() -> Result<nix::unistd::Group, Error> { } pub struct BackupLockGuard { - _file: Option<std::fs::File>, + file: Option<std::fs::File>, + // TODO: Remove `_legacy_dir` with PBS 5 + _legacy_dir: Option<DirLockGuard>, +} + +impl AsRawFd for BackupLockGuard { + fn as_raw_fd(&self) -> i32 { + self.file.as_ref().map_or(-1, |f| f.as_raw_fd()) + } +} + +// TODO: Remove with PBS 5 +impl From<DirLockGuard> for BackupLockGuard { + fn from(value: DirLockGuard) -> Self { + Self { + file: None, + _legacy_dir: Some(value), + } + } } #[doc(hidden)] /// Note: do not use for production code, this is only intended for tests pub unsafe fn create_mocked_lock() -> BackupLockGuard { - BackupLockGuard { _file: None } + BackupLockGuard { + file: None, + _legacy_dir: None, + } } /// Open or create a lock file owned by user "backup" and lock it. @@ -76,7 +99,10 @@ pub fn open_backup_lockfile<P: AsRef<std::path::Path>>( let timeout = timeout.unwrap_or(std::time::Duration::new(10, 0)); let file = proxmox_sys::fs::open_file_locked(&path, timeout, exclusive, options)?; - Ok(BackupLockGuard { _file: Some(file) }) + Ok(BackupLockGuard { + file: Some(file), + _legacy_dir: None, + }) } /// Atomically write data to file owned by "root:backup" with permission "0640" diff --git a/pbs-datastore/Cargo.toml b/pbs-datastore/Cargo.toml index 4ebc5fdc..7623adc2 100644 --- a/pbs-datastore/Cargo.toml +++ b/pbs-datastore/Cargo.toml @@ -35,6 +35,7 @@ proxmox-lang.workspace=true proxmox-schema = { workspace = true, features = [ "api-macro" ] } proxmox-serde = { workspace = true, features = [ "serde_json" ] } proxmox-sys.workspace = true +proxmox-systemd.workspace = true proxmox-time.workspace = true proxmox-uuid.workspace = true proxmox-worker-task.workspace = true diff --git a/pbs-datastore/src/backup_info.rs b/pbs-datastore/src/backup_info.rs index c7f7ed53..b79e8196 100644 --- a/pbs-datastore/src/backup_info.rs +++ b/pbs-datastore/src/backup_info.rs @@ -1,13 +1,15 @@ use std::fmt; -use std::os::unix::io::RawFd; +use std::os::unix::io::{AsRawFd, RawFd}; +use std::os::unix::prelude::OsStrExt; +use std::path::Path; use std::path::PathBuf; -use std::sync::Arc; +use std::sync::{Arc, LazyLock}; +use std::time::Duration; use anyhow::{bail, format_err, Context, Error}; -use proxmox_sys::fs::{ - lock_dir_noblock, lock_dir_noblock_shared, replace_file, CreateOptions, DirLockGuard, -}; +use proxmox_sys::fs::{lock_dir_noblock, lock_dir_noblock_shared, replace_file, CreateOptions}; +use proxmox_systemd::escape_unit; use pbs_api_types::{ Authid, BackupGroupDeleteStats, BackupNamespace, BackupType, GroupFilter, VerifyState, @@ -18,6 +20,18 @@ use pbs_config::{open_backup_lockfile, BackupLockGuard}; use crate::manifest::{BackupManifest, MANIFEST_LOCK_NAME}; use crate::{DataBlob, DataStore}; +pub const DATASTORE_LOCKS_DIR: &str = "/run/proxmox-backup/locks"; + +// TODO: Remove with PBS 5 +// Note: The `expect()` call here will only happen if we can neither confirm nor deny the existence +// of the file. this should only happen if a user messes with the `/run/proxmox-backup` directory. +// if that happens, a lot more should fail as we rely on the existence of the directory throughout +// the code. so just panic with a reasonable message. +static OLD_LOCKING: LazyLock<bool> = LazyLock::new(|| { + std::fs::exists("/run/proxmox-backup/old-locking") + .expect("cannot read `/run/proxmox-backup`, please check permissions") +}); + /// BackupGroup is a directory containing a list of BackupDir #[derive(Clone)] pub struct BackupGroup { @@ -225,6 +239,7 @@ impl BackupGroup { delete_stats.increment_removed_groups(); } + let _ = std::fs::remove_file(self.lock_path()); Ok(delete_stats) } @@ -241,13 +256,34 @@ impl BackupGroup { .set_owner(&self.ns, self.as_ref(), auth_id, force) } - /// Lock a group exclusively - pub fn lock(&self) -> Result<DirLockGuard, Error> { - lock_dir_noblock( - &self.full_group_path(), - "backup group", - "possible running backup", - ) + /// Returns a file name for locking a group. + /// + /// The lock file will be located in: + /// `${DATASTORE_LOCKS_DIR}/${datastore name}/${lock_file_path_helper(rpath)}` + /// where `rpath` is the relative path of the group. + fn lock_path(&self) -> PathBuf { + let path = Path::new(DATASTORE_LOCKS_DIR).join(self.store.name()); + + let rpath = Path::new(self.group.ty.as_str()).join(&self.group.id); + + path.join(lock_file_path_helper(&self.ns, rpath)) + } + + /// Locks a group exclusively. + pub fn lock(&self) -> Result<BackupLockGuard, Error> { + if *OLD_LOCKING { + lock_dir_noblock( + &self.full_group_path(), + "backup group", + "possible runing backup, group is in use", + ) + .map(BackupLockGuard::from) + } else { + lock_helper(self.store.name(), &self.lock_path(), |p| { + open_backup_lockfile(p, Some(Duration::from_secs(0)), true) + .map_err(|err| format_err!("unable to acquire backup group lock {p:?} - {err}")) + }) + } } } @@ -454,22 +490,54 @@ impl BackupDir { .map_err(|err| format_err!("unable to acquire manifest lock {:?} - {}", &path, err)) } - /// Lock this snapshot exclusively - pub fn lock(&self) -> Result<DirLockGuard, Error> { - lock_dir_noblock( - &self.full_path(), - "snapshot", - "backup is running or snapshot is in use", - ) + /// Returns a file name for locking a snapshot. + /// + /// The lock file will be located in: + /// `${DATASTORE_LOCKS_DIR}/${datastore name}/${lock_file_path_helper(rpath)}` + /// where `rpath` is the relative path of the snapshot. + fn lock_path(&self) -> PathBuf { + let path = Path::new(DATASTORE_LOCKS_DIR).join(self.store.name()); + + let rpath = Path::new(self.dir.group.ty.as_str()) + .join(&self.dir.group.id) + .join(&self.backup_time_string); + + path.join(lock_file_path_helper(&self.ns, rpath)) } - /// Acquire a shared lock on this snapshot - pub fn lock_shared(&self) -> Result<DirLockGuard, Error> { - lock_dir_noblock_shared( - &self.full_path(), - "snapshot", - "backup is running or snapshot is in use, could not acquire shared lock", - ) + /// Locks a snapshot exclusively. + pub fn lock(&self) -> Result<BackupLockGuard, Error> { + if *OLD_LOCKING { + lock_dir_noblock( + &self.full_path(), + "snapshot", + "backup is running or snapshot is in use", + ) + .map(BackupLockGuard::from) + } else { + lock_helper(self.store.name(), &self.lock_path(), |p| { + open_backup_lockfile(p, Some(Duration::from_secs(0)), true) + .map_err(|err| format_err!("unable to acquire snapshot lock {p:?} - {err}")) + }) + } + } + + /// Acquires a shared lock on a snapshot. + pub fn lock_shared(&self) -> Result<BackupLockGuard, Error> { + if *OLD_LOCKING { + lock_dir_noblock_shared( + &self.full_path(), + "snapshot", + "backup is running or snapshot is in use, could not acquire shared lock", + ) + .map(BackupLockGuard::from) + } else { + lock_helper(self.store.name(), &self.lock_path(), |p| { + open_backup_lockfile(p, Some(Duration::from_secs(0)), false).map_err(|err| { + format_err!("unable to acquire shared snapshot lock {p:?} - {err}") + }) + }) + } } /// Destroy the whole snapshot, bails if it's protected @@ -494,11 +562,13 @@ impl BackupDir { format_err!("removing backup snapshot {:?} failed - {}", full_path, err,) })?; - // the manifest doesn't exist anymore, no need to keep the lock (already done by guard?) + // remove no longer needed lock files if let Ok(path) = self.manifest_lock_path() { let _ = std::fs::remove_file(path); // ignore errors } + let _ = std::fs::remove_file(self.lock_path()); // ignore errors + Ok(()) } @@ -692,3 +762,75 @@ fn list_backup_files<P: ?Sized + nix::NixPath>( Ok(files) } + +/// Creates a path to a lock file depending on the relative path of an object (snapshot, group, +/// manifest) in a datastore. First all namespaces will be concatenated with a colon (ns-folder). +/// Then the actual file name will depend on the length of the relative path without namespaces. If +/// it is shorter than 255 characters in its unit encoded form, than the unit encoded form will be +/// used directly. If not, the file name will consist of the first 80 character, the last 80 +/// characters and the hash of the unit encoded relative path without namespaces. It will also be +/// placed into a "hashed" subfolder in the namespace folder. +/// +/// Examples: +/// +/// - vm-100 +/// - vm-100-2022\x2d05\x2d02T08\x3a11\x3a33Z +/// - ns1:ns2:ns3:ns4:ns5:ns6:ns7/vm-100-2022\x2d05\x2d02T08\x3a11\x3a33Z +/// +/// A "hashed" lock file would look like this: +/// - ns1:ns2:ns3/hashed/$first_eighty...$last_eighty-$hash +fn lock_file_path_helper(ns: &BackupNamespace, path: PathBuf) -> PathBuf { + let to_return = PathBuf::from( + ns.components() + .map(String::from) + .reduce(|acc, n| format!("{acc}:{n}")) + .unwrap_or_default(), + ); + + let path_bytes = path.as_os_str().as_bytes(); + + let enc = escape_unit(path_bytes, true); + + if enc.len() < 255 { + return to_return.join(enc); + } + + let to_return = to_return.join("hashed"); + + let first_eigthy = &enc[..80]; + let last_eighty = &enc[enc.len() - 80..]; + let hash = hex::encode(openssl::sha::sha256(path_bytes)); + + to_return.join(format!("{first_eigthy}...{last_eighty}-{hash}")) +} + +/// Helps implement the double stat'ing procedure. It avoids certain race conditions upon lock +/// deletion. +/// +/// It also creates the base directory for lock files. +fn lock_helper<F>( + store_name: &str, + path: &std::path::Path, + lock_fn: F, +) -> Result<BackupLockGuard, Error> +where + F: Fn(&std::path::Path) -> Result<BackupLockGuard, Error>, +{ + let mut lock_dir = Path::new(DATASTORE_LOCKS_DIR).join(store_name); + + if let Some(parent) = path.parent() { + lock_dir = lock_dir.join(parent); + }; + + std::fs::create_dir_all(&lock_dir)?; + + let lock = lock_fn(path)?; + + let stat = nix::sys::stat::fstat(lock.as_raw_fd())?.st_ino; + + if nix::sys::stat::stat(path).map_or(true, |st| stat != st.st_ino) { + bail!("could not acquire lock, another thread modified the lock file"); + } + + Ok(lock) +} diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs index 1cbd0038..1e6157c0 100644 --- a/pbs-datastore/src/datastore.rs +++ b/pbs-datastore/src/datastore.rs @@ -13,7 +13,6 @@ use proxmox_human_byte::HumanByte; use proxmox_schema::ApiType; use proxmox_sys::error::SysError; -use proxmox_sys::fs::DirLockGuard; use proxmox_sys::fs::{file_read_optional_string, replace_file, CreateOptions}; use proxmox_sys::linux::procfs::MountInfo; use proxmox_sys::process_locker::ProcessLockSharedGuard; @@ -24,6 +23,7 @@ use pbs_api_types::{ DataStoreConfig, DatastoreFSyncLevel, DatastoreTuning, GarbageCollectionStatus, MaintenanceMode, MaintenanceType, Operation, UPID, }; +use pbs_config::BackupLockGuard; use crate::backup_info::{BackupDir, BackupGroup}; use crate::chunk_store::ChunkStore; @@ -778,7 +778,7 @@ impl DataStore { ns: &BackupNamespace, backup_group: &pbs_api_types::BackupGroup, auth_id: &Authid, - ) -> Result<(Authid, DirLockGuard), Error> { + ) -> Result<(Authid, BackupLockGuard), Error> { let backup_group = self.backup_group(ns.clone(), backup_group.clone()); // create intermediate path first @@ -816,7 +816,7 @@ impl DataStore { self: &Arc<Self>, ns: &BackupNamespace, backup_dir: &pbs_api_types::BackupDir, - ) -> Result<(PathBuf, bool, DirLockGuard), Error> { + ) -> Result<(PathBuf, bool, BackupLockGuard), Error> { let backup_dir = self.backup_dir(ns.clone(), backup_dir.clone())?; let relative_path = backup_dir.relative_path(); diff --git a/pbs-datastore/src/snapshot_reader.rs b/pbs-datastore/src/snapshot_reader.rs index edff19ef..20fd388a 100644 --- a/pbs-datastore/src/snapshot_reader.rs +++ b/pbs-datastore/src/snapshot_reader.rs @@ -4,8 +4,11 @@ use std::path::Path; use std::rc::Rc; use std::sync::Arc; -use anyhow::{bail, Context, Error}; +use anyhow::{bail, format_err, Context, Error}; use nix::dir::Dir; +use nix::fcntl::OFlag; +use nix::sys::stat::Mode; +use pbs_config::BackupLockGuard; use pbs_api_types::{ print_store_and_ns, ArchiveType, BackupNamespace, Operation, CLIENT_LOG_BLOB_NAME, @@ -26,6 +29,10 @@ pub struct SnapshotReader { datastore_name: String, file_list: Vec<String>, locked_dir: Dir, + + // while this is never read, the lock needs to be kept until the + // reader is dropped to ensure valid locking semantics + _lock: BackupLockGuard, } impl SnapshotReader { @@ -46,10 +53,15 @@ impl SnapshotReader { bail!("snapshot {} does not exist!", snapshot.dir()); } - let locked_dir = snapshot + let lock = snapshot .lock_shared() .with_context(|| format!("while trying to read snapshot '{snapshot:?}'"))?; + let locked_dir = + Dir::open(&snapshot_path, OFlag::O_RDONLY, Mode::empty()).map_err(|err| { + format_err!("unable to open snapshot directory {snapshot_path:?} - {err}") + })?; + let datastore_name = datastore.name().to_string(); let manifest = match snapshot.load_manifest() { Ok((manifest, _)) => manifest, @@ -79,6 +91,7 @@ impl SnapshotReader { datastore_name, file_list, locked_dir, + _lock: lock, }) } diff --git a/src/api2/backup/environment.rs b/src/api2/backup/environment.rs index 7943b576..3d541b46 100644 --- a/src/api2/backup/environment.rs +++ b/src/api2/backup/environment.rs @@ -1,5 +1,6 @@ use anyhow::{bail, format_err, Context, Error}; -use nix::dir::Dir; +use pbs_config::BackupLockGuard; + use std::collections::HashMap; use std::sync::{Arc, Mutex}; use tracing::info; @@ -635,7 +636,7 @@ impl BackupEnvironment { /// If verify-new is set on the datastore, this will run a new verify task /// for the backup. If not, this will return and also drop the passed lock /// immediately. - pub fn verify_after_complete(&self, excl_snap_lock: Dir) -> Result<(), Error> { + pub fn verify_after_complete(&self, excl_snap_lock: BackupLockGuard) -> Result<(), Error> { self.ensure_finished()?; if !self.datastore.verify_new() { diff --git a/src/backup/verify.rs b/src/backup/verify.rs index 10a64fda..3d2cba8a 100644 --- a/src/backup/verify.rs +++ b/src/backup/verify.rs @@ -1,10 +1,10 @@ +use pbs_config::BackupLockGuard; use std::collections::HashSet; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::{Arc, Mutex}; use std::time::Instant; use anyhow::{bail, Error}; -use proxmox_sys::fs::DirLockGuard; use tracing::{error, info, warn}; use proxmox_worker_task::WorkerTaskContext; @@ -330,7 +330,7 @@ pub fn verify_backup_dir_with_lock( backup_dir: &BackupDir, upid: UPID, filter: Option<&dyn Fn(&BackupManifest) -> bool>, - _snap_lock: DirLockGuard, + _snap_lock: BackupLockGuard, ) -> Result<bool, Error> { let datastore_name = verify_worker.datastore.name(); let backup_dir_name = backup_dir.dir(); diff --git a/src/server/sync.rs b/src/server/sync.rs index b6852aff..10804b14 100644 --- a/src/server/sync.rs +++ b/src/server/sync.rs @@ -10,7 +10,7 @@ use std::time::Duration; use anyhow::{bail, format_err, Context, Error}; use futures::{future::FutureExt, select}; use hyper::http::StatusCode; -use proxmox_sys::fs::DirLockGuard; +use pbs_config::BackupLockGuard; use serde_json::json; use tracing::{info, warn}; @@ -106,7 +106,7 @@ pub(crate) struct RemoteSourceReader { } pub(crate) struct LocalSourceReader { - pub(crate) _dir_lock: Arc<Mutex<DirLockGuard>>, + pub(crate) _dir_lock: Arc<Mutex<BackupLockGuard>>, pub(crate) path: PathBuf, pub(crate) datastore: Arc<DataStore>, } -- 2.39.5 _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel