* [pbs-devel] [PATCH proxmox-backup v2 1/4] fix #3935: datastore/api/backup: add locking helpers to datastore
2022-04-13 9:11 [pbs-devel] [PATCH proxmox-backup v2 0/4] refactor datastore locking to use tmpfs Stefan Sterz
@ 2022-04-13 9:11 ` Stefan Sterz
2022-04-13 9:11 ` [pbs-devel] [PATCH proxmox-backup v2 2/4] fix #3935: datastore/api/backup: move datastore locking to '/run' Stefan Sterz
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Stefan Sterz @ 2022-04-13 9:11 UTC (permalink / raw)
To: pbs-devel
to avoid duplicate code, add helpers for locking groups and snapshots
to the datastore trait and refactor existing code to use them.
Signed-off-by: Stefan Sterz <s.sterz@proxmox.com>
---
pbs-datastore/src/datastore.rs | 58 ++++++++++++++++++++--------
pbs-datastore/src/snapshot_reader.rs | 12 +-----
src/api2/backup/mod.rs | 4 +-
src/api2/reader/mod.rs | 7 +---
src/backup/verify.rs | 8 +---
5 files changed, 48 insertions(+), 41 deletions(-)
diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
index d062d1cf..3895688d 100644
--- a/pbs-datastore/src/datastore.rs
+++ b/pbs-datastore/src/datastore.rs
@@ -11,11 +11,13 @@ use lazy_static::lazy_static;
use proxmox_schema::ApiType;
-use proxmox_sys::fs::{replace_file, file_read_optional_string, CreateOptions};
+use proxmox_sys::fs::{
+ file_read_optional_string, lock_dir_noblock, lock_dir_noblock_shared, replace_file,
+ CreateOptions, DirLockGuard,
+};
use proxmox_sys::process_locker::ProcessLockSharedGuard;
use proxmox_sys::WorkerTaskContext;
use proxmox_sys::{task_log, task_warn};
-use proxmox_sys::fs::{lock_dir_noblock, DirLockGuard};
use pbs_api_types::{
UPID, DataStoreConfig, Authid, GarbageCollectionStatus, HumanByte,
@@ -340,11 +342,9 @@ impl DataStore {
/// Remove a complete backup group including all snapshots, returns true
/// if all snapshots were removed, and false if some were protected
pub fn remove_backup_group(&self, backup_group: &BackupGroup) -> Result<bool, Error> {
-
+ let _guard = self.lock_group(backup_group)?;
let full_path = self.group_path(backup_group);
- let _guard = proxmox_sys::fs::lock_dir_noblock(&full_path, "backup group", "possible running backup")?;
-
log::info!("removing backup group {:?}", full_path);
let mut removed_all = true;
@@ -374,13 +374,11 @@ impl DataStore {
}
/// Remove a backup directory including all content
- pub fn remove_backup_dir(&self, backup_dir: &BackupDir, force: bool) -> Result<(), Error> {
-
- let full_path = self.snapshot_path(backup_dir);
-
+ pub fn remove_backup_dir(&self, backup_dir: &BackupDir, force: bool) -> Result<(), Error> {
let (_guard, _manifest_guard);
+
if !force {
- _guard = lock_dir_noblock(&full_path, "snapshot", "possibly running or in use")?;
+ _guard = self.lock_snapshot(backup_dir)?;
_manifest_guard = self.lock_manifest(backup_dir)?;
}
@@ -388,6 +386,8 @@ impl DataStore {
bail!("cannot remove protected snapshot");
}
+ let full_path = self.snapshot_path(backup_dir);
+
log::info!("removing backup snapshot {:?}", full_path);
std::fs::remove_dir_all(&full_path)
.map_err(|err| {
@@ -490,13 +490,13 @@ impl DataStore {
// create the last component now
match std::fs::create_dir(&full_path) {
Ok(_) => {
- let guard = lock_dir_noblock(&full_path, "backup group", "another backup is already running")?;
+ let guard = self.lock_group(backup_group)?;
self.set_owner(backup_group, auth_id, false)?;
let owner = self.get_owner(backup_group)?; // just to be sure
Ok((owner, guard))
}
Err(ref err) if err.kind() == io::ErrorKind::AlreadyExists => {
- let guard = lock_dir_noblock(&full_path, "backup group", "another backup is already running")?;
+ let guard = self.lock_group(backup_group)?;
let owner = self.get_owner(backup_group)?; // just to be sure
Ok((owner, guard))
}
@@ -514,8 +514,7 @@ impl DataStore {
let mut full_path = self.base_path();
full_path.push(&relative_path);
- let lock = ||
- lock_dir_noblock(&full_path, "snapshot", "internal error - tried creating snapshot that's already in use");
+ let lock = || self.lock_snapshot(backup_dir);
match std::fs::create_dir(&full_path) {
Ok(_) => Ok((relative_path, true, lock()?)),
@@ -935,9 +934,7 @@ impl DataStore {
backup_dir: &BackupDir,
protection: bool
) -> Result<(), Error> {
- let full_path = self.snapshot_path(backup_dir);
-
- let _guard = lock_dir_noblock(&full_path, "snapshot", "possibly running or in use")?;
+ let _guard = self.lock_snapshot(backup_dir)?;
let protected_path = backup_dir.protected_file(self.base_path());
if protection {
@@ -1004,4 +1001,31 @@ impl DataStore {
Ok(chunk_list)
}
+
+ /// Lock a snapshot exclusively
+ pub fn lock_snapshot(&self, backup_dir: &BackupDir) -> Result<DirLockGuard, Error> {
+ lock_dir_noblock(
+ &self.snapshot_path(backup_dir),
+ "snapshot",
+ "possibly running or in use",
+ )
+ }
+
+ /// Acquire a shared lock on a snapshot
+ pub fn lock_snapshot_shared(&self, backup_dir: &BackupDir) -> Result<DirLockGuard, Error> {
+ lock_dir_noblock_shared(
+ &self.snapshot_path(backup_dir),
+ "snapshot",
+ "possibly running or in use",
+ )
+ }
+
+ /// Lock a group exclusively
+ pub fn lock_group(&self, group: &BackupGroup) -> Result<DirLockGuard, Error> {
+ lock_dir_noblock(
+ &self.group_path(group),
+ "backup group",
+ "possible running backup",
+ )
+ }
}
diff --git a/pbs-datastore/src/snapshot_reader.rs b/pbs-datastore/src/snapshot_reader.rs
index d7df3f23..0eddac14 100644
--- a/pbs-datastore/src/snapshot_reader.rs
+++ b/pbs-datastore/src/snapshot_reader.rs
@@ -6,8 +6,6 @@ use std::fs::File;
use anyhow::{bail, Error};
use nix::dir::Dir;
-use proxmox_sys::fs::lock_dir_noblock_shared;
-
use crate::backup_info::BackupDir;
use crate::index::IndexFile;
use crate::fixed_index::FixedIndexReader;
@@ -30,13 +28,7 @@ impl SnapshotReader {
/// Lock snapshot, reads the manifest and returns a new instance
pub fn new(datastore: Arc<DataStore>, snapshot: BackupDir) -> Result<Self, Error> {
-
- let snapshot_path = datastore.snapshot_path(&snapshot);
-
- let locked_dir = lock_dir_noblock_shared(
- &snapshot_path,
- "snapshot",
- "locked by another operation")?;
+ let locked_dir = datastore.lock_snapshot_shared(&snapshot)?;
let datastore_name = datastore.name().to_string();
@@ -48,7 +40,7 @@ impl SnapshotReader {
}
};
- let mut client_log_path = snapshot_path;
+ let mut client_log_path = datastore.snapshot_path(&snapshot);
client_log_path.push(CLIENT_LOG_BLOB_NAME);
let mut file_list = Vec::new();
diff --git a/src/api2/backup/mod.rs b/src/api2/backup/mod.rs
index 9d62dc52..075b6a42 100644
--- a/src/api2/backup/mod.rs
+++ b/src/api2/backup/mod.rs
@@ -20,7 +20,6 @@ use pbs_api_types::{
BACKUP_ID_SCHEMA, BACKUP_TIME_SCHEMA, BACKUP_TYPE_SCHEMA, DATASTORE_SCHEMA,
CHUNK_DIGEST_SCHEMA, PRIV_DATASTORE_BACKUP, BACKUP_ARCHIVE_NAME_SCHEMA,
};
-use proxmox_sys::fs::lock_dir_noblock_shared;
use pbs_tools::json::{required_array_param, required_integer_param, required_string_param};
use pbs_config::CachedUserInfo;
use pbs_datastore::{DataStore, PROXMOX_BACKUP_PROTOCOL_ID_V1};
@@ -157,8 +156,7 @@ async move {
}
// lock last snapshot to prevent forgetting/pruning it during backup
- let full_path = datastore.snapshot_path(&last.backup_dir);
- Some(lock_dir_noblock_shared(&full_path, "snapshot", "base snapshot is already locked by another operation")?)
+ Some(datastore.lock_snapshot_shared(&last.backup_dir)?)
} else {
None
};
diff --git a/src/api2/reader/mod.rs b/src/api2/reader/mod.rs
index 45cefe5d..b0438f45 100644
--- a/src/api2/reader/mod.rs
+++ b/src/api2/reader/mod.rs
@@ -20,7 +20,7 @@ use pbs_api_types::{
BACKUP_ID_SCHEMA, CHUNK_DIGEST_SCHEMA, PRIV_DATASTORE_READ, PRIV_DATASTORE_BACKUP,
BACKUP_ARCHIVE_NAME_SCHEMA,
};
-use proxmox_sys::fs::lock_dir_noblock_shared;
+
use pbs_tools::json::{required_integer_param, required_string_param};
use pbs_datastore::{DataStore, PROXMOX_BACKUP_READER_PROTOCOL_ID_V1};
use pbs_datastore::backup_info::BackupDir;
@@ -114,10 +114,7 @@ fn upgrade_to_backup_reader_protocol(
}
}
- let _guard = lock_dir_noblock_shared(
- &datastore.snapshot_path(&backup_dir),
- "snapshot",
- "locked by another operation")?;
+ let _guard = datastore.lock_snapshot_shared(&backup_dir)?;
let path = datastore.base_path();
diff --git a/src/backup/verify.rs b/src/backup/verify.rs
index 307d366c..eea0e15d 100644
--- a/src/backup/verify.rs
+++ b/src/backup/verify.rs
@@ -13,7 +13,6 @@ use pbs_datastore::{DataStore, DataBlob, StoreProgress};
use pbs_datastore::backup_info::{BackupGroup, BackupDir, BackupInfo};
use pbs_datastore::index::IndexFile;
use pbs_datastore::manifest::{archive_type, ArchiveType, BackupManifest, FileInfo};
-use proxmox_sys::fs::lock_dir_noblock_shared;
use crate::tools::parallel_handler::ParallelHandler;
@@ -300,11 +299,8 @@ pub fn verify_backup_dir(
upid: UPID,
filter: Option<&dyn Fn(&BackupManifest) -> bool>,
) -> Result<bool, Error> {
- let snap_lock = lock_dir_noblock_shared(
- &verify_worker.datastore.snapshot_path(backup_dir),
- "snapshot",
- "locked by another operation",
- );
+ let snap_lock = verify_worker.datastore.lock_snapshot_shared(&backup_dir);
+
match snap_lock {
Ok(snap_lock) => {
verify_backup_dir_with_lock(verify_worker, backup_dir, upid, filter, snap_lock)
--
2.30.2
^ permalink raw reply [flat|nested] 5+ messages in thread
* [pbs-devel] [PATCH proxmox-backup v2 2/4] fix #3935: datastore/api/backup: move datastore locking to '/run'
2022-04-13 9:11 [pbs-devel] [PATCH proxmox-backup v2 0/4] refactor datastore locking to use tmpfs Stefan Sterz
2022-04-13 9:11 ` [pbs-devel] [PATCH proxmox-backup v2 1/4] fix #3935: datastore/api/backup: add locking helpers to datastore Stefan Sterz
@ 2022-04-13 9:11 ` Stefan Sterz
2022-04-13 9:11 ` [pbs-devel] [PATCH proxmox-backup v2 3/4] fix #3935: datastore: move manifest locking to new locking method Stefan Sterz
2022-04-13 9:11 ` [pbs-devel] [PATCH proxmox-backup v2 4/4] fix: api: avoid race condition in set_backup_owner Stefan Sterz
3 siblings, 0 replies; 5+ messages in thread
From: Stefan Sterz @ 2022-04-13 9:11 UTC (permalink / raw)
To: pbs-devel
to avoid issues when removing a group or snapshot directory where two
threads hold a lock to the same directory, move locking to the tmpfs
backed '/run' directory and use a flat file structure. also adds
double stat'ing to make it possible to remove locks without certain
race condition issues.
Signed-off-by: Stefan Sterz <s.sterz@proxmox.com>
---
pbs-config/src/lib.rs | 7 ++
pbs-datastore/src/datastore.rs | 126 +++++++++++++++++++++------
pbs-datastore/src/snapshot_reader.rs | 24 ++++-
src/api2/backup/environment.rs | 4 +-
src/backup/verify.rs | 4 +-
5 files changed, 131 insertions(+), 34 deletions(-)
diff --git a/pbs-config/src/lib.rs b/pbs-config/src/lib.rs
index 118030bc..81a7b243 100644
--- a/pbs-config/src/lib.rs
+++ b/pbs-config/src/lib.rs
@@ -21,6 +21,7 @@ pub use config_version_cache::ConfigVersionCache;
use anyhow::{format_err, Error};
use nix::unistd::{Gid, Group, Uid, User};
+use std::os::unix::prelude::AsRawFd;
pub use pbs_buildcfg::{BACKUP_USER_NAME, BACKUP_GROUP_NAME};
@@ -46,6 +47,12 @@ pub fn backup_group() -> Result<nix::unistd::Group, Error> {
pub struct BackupLockGuard(Option<std::fs::File>);
+impl AsRawFd for BackupLockGuard {
+ fn as_raw_fd(&self) -> i32 {
+ self.0.as_ref().map_or(-1, |f| f.as_raw_fd())
+ }
+}
+
#[doc(hidden)]
/// Note: do not use for production code, this is only intended for tests
pub unsafe fn create_mocked_lock() -> BackupLockGuard {
diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
index 3895688d..92d544e5 100644
--- a/pbs-datastore/src/datastore.rs
+++ b/pbs-datastore/src/datastore.rs
@@ -1,5 +1,6 @@
use std::collections::{HashSet, HashMap};
use std::io::{self, Write};
+use std::os::unix::prelude::AsRawFd;
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex};
use std::convert::TryFrom;
@@ -11,14 +12,13 @@ use lazy_static::lazy_static;
use proxmox_schema::ApiType;
-use proxmox_sys::fs::{
- file_read_optional_string, lock_dir_noblock, lock_dir_noblock_shared, replace_file,
- CreateOptions, DirLockGuard,
-};
+use proxmox_sys::fs::{file_read_optional_string, replace_file, CreateOptions};
use proxmox_sys::process_locker::ProcessLockSharedGuard;
use proxmox_sys::WorkerTaskContext;
use proxmox_sys::{task_log, task_warn};
+use proxmox_sys::systemd::escape_unit;
+
use pbs_api_types::{
UPID, DataStoreConfig, Authid, GarbageCollectionStatus, HumanByte,
ChunkOrder, DatastoreTuning, Operation,
@@ -42,6 +42,8 @@ lazy_static! {
static ref DATASTORE_MAP: Mutex<HashMap<String, Arc<DataStoreImpl>>> = Mutex::new(HashMap::new());
}
+pub const DATASTORE_LOCKS_DIR: &str = "/run/proxmox-backup/locks";
+
/// checks if auth_id is owner, or, if owner is a token, if
/// auth_id is the user of the token
pub fn check_backup_owner(
@@ -368,6 +370,8 @@ impl DataStore {
err,
)
})?;
+
+ let _ = std::fs::remove_file( self.group_lock_path(backup_group));
}
Ok(removed_all)
@@ -404,6 +408,8 @@ impl DataStore {
let _ = std::fs::remove_file(path);
}
+ let _ = std::fs::remove_file(self.snapshot_lock_path(backup_dir));
+
Ok(())
}
@@ -479,7 +485,7 @@ impl DataStore {
&self,
backup_group: &BackupGroup,
auth_id: &Authid,
- ) -> Result<(Authid, DirLockGuard), Error> {
+ ) -> Result<(Authid, BackupLockGuard), Error> {
// create intermediate path first:
let mut full_path = self.base_path();
full_path.push(backup_group.backup_type());
@@ -508,7 +514,7 @@ impl DataStore {
///
/// The BackupGroup directory needs to exist.
pub fn create_locked_backup_dir(&self, backup_dir: &BackupDir)
- -> Result<(PathBuf, bool, DirLockGuard), Error>
+ -> Result<(PathBuf, bool, BackupLockGuard), Error>
{
let relative_path = backup_dir.relative_path();
let mut full_path = self.base_path();
@@ -1002,30 +1008,96 @@ impl DataStore {
Ok(chunk_list)
}
- /// Lock a snapshot exclusively
- pub fn lock_snapshot(&self, backup_dir: &BackupDir) -> Result<DirLockGuard, Error> {
- lock_dir_noblock(
- &self.snapshot_path(backup_dir),
- "snapshot",
- "possibly running or in use",
- )
+ /// Encodes a string so it can be used as a lock file name.
+ ///
+ /// The first 64 characters will be the sha256 hash of `path` then
+ /// a hyphen followed by up to 100 characters of the unit encoded
+ /// version of `path`.
+ fn lock_file_name_helper(&self, path: &str) -> String {
+ let enc = escape_unit(path, true);
+ let from = enc.len().checked_sub(100).unwrap_or(0);
+ let hash = hex::encode(openssl::sha::sha256(path.as_bytes()));
+
+ format!("{}-{}", hash, enc[from..].to_string())
+ }
+
+ /// Returns a file name for locking a snapshot.
+ ///
+ /// The lock file will be located in:
+ /// `${DATASTORE_LOCKS_DIR}/${datastore name}/${lock_file_name_helper(rpath)}`
+ /// where `rpath` is the relative path of the snapshot.
+ fn snapshot_lock_path(&self, backup_dir: &BackupDir) -> PathBuf {
+ let path = Path::new(DATASTORE_LOCKS_DIR).join(self.name());
+
+ let rpath = format!(
+ "{}/{}/{}",
+ backup_dir.group().backup_type(),
+ backup_dir.group().backup_id(),
+ backup_dir.backup_time_string(),
+ );
+
+ path.join(self.lock_file_name_helper(&rpath))
+ }
+
+ /// Returns a file name for locking a group.
+ ///
+ /// The lock file will be located in:
+ /// `${DATASTORE_LOCKS_DIR}/${datastore name}/${lock_file_name_helper(rpath)}`
+ /// where `rpath` is the relative path of the group.
+ fn group_lock_path(&self, group: &BackupGroup) -> PathBuf {
+ let path = Path::new(DATASTORE_LOCKS_DIR).join(self.name());
+
+ let rpath = format!("{}/{}", group.backup_type(), group.backup_id());
+
+ path.join(self.lock_file_name_helper(&rpath))
+ }
+
+ /// Helps implement the double stat'ing procedure. It avoids
+ /// certain race conditions upon lock deletion.
+ ///
+ /// It also creates the base directory for lock files.
+ fn lock_helper<F>(&self, path: &std::path::Path, lock_fn: F) -> Result<BackupLockGuard, Error>
+ where
+ F: Fn(&std::path::Path) -> Result<BackupLockGuard, Error>,
+ {
+ let lock_dir = Path::new(DATASTORE_LOCKS_DIR).join(self.name());
+ std::fs::create_dir_all(&lock_dir)?;
+
+ let lock = lock_fn(path)?;
+
+ let stat = nix::sys::stat::fstat(lock.as_raw_fd())?.st_ino;
+
+ if nix::sys::stat::stat(path).map_or(true, |st| stat != st.st_ino) {
+ bail!("could not acquire lock, another thread modified the lock file");
+ }
+
+ Ok(lock)
+ }
+
+ /// Locks a snapshot exclusively.
+ pub fn lock_snapshot(&self, backup_dir: &BackupDir) -> Result<BackupLockGuard, Error> {
+ self.lock_helper(&self.snapshot_lock_path(backup_dir), |p| {
+ open_backup_lockfile(p, Some(Duration::from_secs(0)), true).map_err(|err| {
+ format_err!("unable to acquire snapshot lock {:?} - {}", &p, err)
+ })
+ })
}
- /// Acquire a shared lock on a snapshot
- pub fn lock_snapshot_shared(&self, backup_dir: &BackupDir) -> Result<DirLockGuard, Error> {
- lock_dir_noblock_shared(
- &self.snapshot_path(backup_dir),
- "snapshot",
- "possibly running or in use",
- )
+ /// Acquires a shared lock on a snapshot.
+ pub fn lock_snapshot_shared(&self, backup_dir: &BackupDir) -> Result<BackupLockGuard, Error> {
+ self.lock_helper(&self.snapshot_lock_path(backup_dir), |p| {
+ open_backup_lockfile(p, Some(Duration::from_secs(0)), false).map_err(|err| {
+ format_err!("unable to acquire shared snapshot lock {:?} - {}", &p, err)
+ })
+ })
}
- /// Lock a group exclusively
- pub fn lock_group(&self, group: &BackupGroup) -> Result<DirLockGuard, Error> {
- lock_dir_noblock(
- &self.group_path(group),
- "backup group",
- "possible running backup",
- )
+ /// Locks a group exclusively.
+ pub fn lock_group(&self, group: &BackupGroup) -> Result<BackupLockGuard, Error> {
+ self.lock_helper(&self.group_lock_path(group), |p| {
+ open_backup_lockfile(p, Some(Duration::from_secs(0)), true).map_err(|err| {
+ format_err!("unable to acquire backup group lock {:?} - {}", &p, err)
+ })
+ })
}
}
diff --git a/pbs-datastore/src/snapshot_reader.rs b/pbs-datastore/src/snapshot_reader.rs
index 0eddac14..7fde3238 100644
--- a/pbs-datastore/src/snapshot_reader.rs
+++ b/pbs-datastore/src/snapshot_reader.rs
@@ -3,8 +3,11 @@ use std::sync::Arc;
use std::os::unix::io::{AsRawFd, FromRawFd};
use std::fs::File;
-use anyhow::{bail, Error};
+use anyhow::{bail, format_err, Error};
use nix::dir::Dir;
+use nix::sys::stat::Mode;
+use nix::fcntl::OFlag;
+use pbs_config::BackupLockGuard;
use crate::backup_info::BackupDir;
use crate::index::IndexFile;
@@ -22,13 +25,22 @@ pub struct SnapshotReader {
datastore_name: String,
file_list: Vec<String>,
locked_dir: Dir,
+
+ // while this is never read, the lock needs to be kept until the
+ // reader is dropped to ensure valid locking semantics
+ _lock: BackupLockGuard,
}
impl SnapshotReader {
/// Lock snapshot, reads the manifest and returns a new instance
pub fn new(datastore: Arc<DataStore>, snapshot: BackupDir) -> Result<Self, Error> {
- let locked_dir = datastore.lock_snapshot_shared(&snapshot)?;
+ let lock = datastore.lock_snapshot_shared(&snapshot)?;
+
+ let path = datastore.snapshot_path(&snapshot);
+
+ let locked_dir = Dir::open(&path, OFlag::O_RDONLY, Mode::empty())
+ .map_err(|err| format_err!("unable to open snapshot directory {:?} - {}", path, err))?;
let datastore_name = datastore.name().to_string();
@@ -50,7 +62,13 @@ impl SnapshotReader {
file_list.push(CLIENT_LOG_BLOB_NAME.to_string());
}
- Ok(Self { snapshot, datastore_name, file_list, locked_dir })
+ Ok(Self {
+ snapshot,
+ datastore_name,
+ file_list,
+ locked_dir,
+ _lock: lock,
+ })
}
/// Return the snapshot directory
diff --git a/src/api2/backup/environment.rs b/src/api2/backup/environment.rs
index 3e23840f..b4557d42 100644
--- a/src/api2/backup/environment.rs
+++ b/src/api2/backup/environment.rs
@@ -1,7 +1,7 @@
use anyhow::{bail, format_err, Error};
+use pbs_config::BackupLockGuard;
use std::sync::{Arc, Mutex};
use std::collections::HashMap;
-use nix::dir::Dir;
use ::serde::{Serialize};
use serde_json::{json, Value};
@@ -501,7 +501,7 @@ impl BackupEnvironment {
/// If verify-new is set on the datastore, this will run a new verify task
/// for the backup. If not, this will return and also drop the passed lock
/// immediately.
- pub fn verify_after_complete(&self, snap_lock: Dir) -> Result<(), Error> {
+ pub fn verify_after_complete(&self, snap_lock: BackupLockGuard) -> Result<(), Error> {
self.ensure_finished()?;
if !self.datastore.verify_new() {
diff --git a/src/backup/verify.rs b/src/backup/verify.rs
index eea0e15d..f4375daa 100644
--- a/src/backup/verify.rs
+++ b/src/backup/verify.rs
@@ -1,4 +1,4 @@
-use nix::dir::Dir;
+use pbs_config::BackupLockGuard;
use std::collections::HashSet;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, Mutex};
@@ -324,7 +324,7 @@ pub fn verify_backup_dir_with_lock(
backup_dir: &BackupDir,
upid: UPID,
filter: Option<&dyn Fn(&BackupManifest) -> bool>,
- _snap_lock: Dir,
+ _snap_lock: BackupLockGuard,
) -> Result<bool, Error> {
let manifest = match verify_worker.datastore.load_manifest(backup_dir) {
Ok((manifest, _)) => manifest,
--
2.30.2
^ permalink raw reply [flat|nested] 5+ messages in thread
* [pbs-devel] [PATCH proxmox-backup v2 3/4] fix #3935: datastore: move manifest locking to new locking method
2022-04-13 9:11 [pbs-devel] [PATCH proxmox-backup v2 0/4] refactor datastore locking to use tmpfs Stefan Sterz
2022-04-13 9:11 ` [pbs-devel] [PATCH proxmox-backup v2 1/4] fix #3935: datastore/api/backup: add locking helpers to datastore Stefan Sterz
2022-04-13 9:11 ` [pbs-devel] [PATCH proxmox-backup v2 2/4] fix #3935: datastore/api/backup: move datastore locking to '/run' Stefan Sterz
@ 2022-04-13 9:11 ` Stefan Sterz
2022-04-13 9:11 ` [pbs-devel] [PATCH proxmox-backup v2 4/4] fix: api: avoid race condition in set_backup_owner Stefan Sterz
3 siblings, 0 replies; 5+ messages in thread
From: Stefan Sterz @ 2022-04-13 9:11 UTC (permalink / raw)
To: pbs-devel
adds double stat'ing and removes directory hierarchy to bring manifest
locking in-line with other locks used by the datastore trait.
Signed-off-by: Stefan Sterz <s.sterz@proxmox.com>
---
pbs-datastore/src/datastore.rs | 56 +++++++++++++---------------------
1 file changed, 21 insertions(+), 35 deletions(-)
diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
index 92d544e5..803dcef1 100644
--- a/pbs-datastore/src/datastore.rs
+++ b/pbs-datastore/src/datastore.rs
@@ -32,9 +32,7 @@ use crate::dynamic_index::{DynamicIndexReader, DynamicIndexWriter};
use crate::fixed_index::{FixedIndexReader, FixedIndexWriter};
use crate::index::IndexFile;
use crate::manifest::{
- MANIFEST_BLOB_NAME, MANIFEST_LOCK_NAME, CLIENT_LOG_BLOB_NAME,
- ArchiveType, BackupManifest,
- archive_type,
+ archive_type, ArchiveType, BackupManifest, CLIENT_LOG_BLOB_NAME, MANIFEST_BLOB_NAME,
};
use crate::task_tracking::update_active_operations;
@@ -403,10 +401,7 @@ impl DataStore {
})?;
// the manifest does not exists anymore, we do not need to keep the lock
- if let Ok(path) = self.manifest_lock_path(backup_dir) {
- // ignore errors
- let _ = std::fs::remove_file(path);
- }
+ let _ = std::fs::remove_file(self.manifest_lock_path(backup_dir));
let _ = std::fs::remove_file(self.snapshot_lock_path(backup_dir));
@@ -858,41 +853,32 @@ impl DataStore {
))
}
- /// Returns the filename to lock a manifest
+ /// Returns the filename to lock a manifest.
///
- /// Also creates the basedir. The lockfile is located in
- /// '/run/proxmox-backup/locks/{datastore}/{type}/{id}/{timestamp}.index.json.lck'
- fn manifest_lock_path(
- &self,
- backup_dir: &BackupDir,
- ) -> Result<String, Error> {
- let mut path = format!(
- "/run/proxmox-backup/locks/{}/{}/{}",
- self.name(),
+ /// The lock file will be located in:
+ /// `${DATASTORE_LOCKS_DIR}/${datastore name}/${lock_file_name_helper(rpath)}`
+ /// where rpath is the relative path of the snapshot appended with "manifest".
+ fn manifest_lock_path(&self, backup_dir: &BackupDir) -> PathBuf {
+ let path = Path::new(DATASTORE_LOCKS_DIR).join(self.name());
+
+ let rpath = format!(
+ "{}/{}/{}-manifest",
backup_dir.group().backup_type(),
backup_dir.group().backup_id(),
+ backup_dir.backup_time_string(),
);
- std::fs::create_dir_all(&path)?;
- use std::fmt::Write;
- write!(path, "/{}{}", backup_dir.backup_time_string(), &MANIFEST_LOCK_NAME)?;
- Ok(path)
+ path.join(self.lock_file_name_helper(&rpath))
}
- fn lock_manifest(
- &self,
- backup_dir: &BackupDir,
- ) -> Result<BackupLockGuard, Error> {
- let path = self.manifest_lock_path(backup_dir)?;
-
- // update_manifest should never take a long time, so if someone else has
- // the lock we can simply block a bit and should get it soon
- open_backup_lockfile(&path, Some(Duration::from_secs(5)), true)
- .map_err(|err| {
- format_err!(
- "unable to acquire manifest lock {:?} - {}", &path, err
- )
- })
+ fn lock_manifest(&self, backup_dir: &BackupDir) -> Result<BackupLockGuard, Error> {
+ self.lock_helper(&self.manifest_lock_path(backup_dir), |p| {
+ // update_manifest should never take a long time, so if
+ // someone else has the lock we can simply block a bit
+ // and should get it soon
+ open_backup_lockfile(&p, Some(Duration::from_secs(5)), true)
+ .map_err(|err| format_err!("unable to acquire manifest lock {:?} - {}", &p, err))
+ })
}
/// Load the manifest without a lock. Must not be written back.
--
2.30.2
^ permalink raw reply [flat|nested] 5+ messages in thread
* [pbs-devel] [PATCH proxmox-backup v2 4/4] fix: api: avoid race condition in set_backup_owner
2022-04-13 9:11 [pbs-devel] [PATCH proxmox-backup v2 0/4] refactor datastore locking to use tmpfs Stefan Sterz
` (2 preceding siblings ...)
2022-04-13 9:11 ` [pbs-devel] [PATCH proxmox-backup v2 3/4] fix #3935: datastore: move manifest locking to new locking method Stefan Sterz
@ 2022-04-13 9:11 ` Stefan Sterz
3 siblings, 0 replies; 5+ messages in thread
From: Stefan Sterz @ 2022-04-13 9:11 UTC (permalink / raw)
To: pbs-devel
when two clients change the owner of a backup store, a race condition
arose. add locking to avoid this.
Signed-off-by: Stefan Sterz <s.sterz@proxmox.com>
---
src/api2/admin/datastore.rs | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/src/api2/admin/datastore.rs b/src/api2/admin/datastore.rs
index 45a576ea..3dbf2246 100644
--- a/src/api2/admin/datastore.rs
+++ b/src/api2/admin/datastore.rs
@@ -1914,11 +1914,12 @@ pub fn set_backup_owner(
let privs = user_info.lookup_privs(&auth_id, &["datastore", &store]);
+ let owner = datastore.get_owner(&backup_group)?;
+
let allowed = if (privs & PRIV_DATASTORE_MODIFY) != 0 {
// High-privilege user/token
true
} else if (privs & PRIV_DATASTORE_BACKUP) != 0 {
- let owner = datastore.get_owner(&backup_group)?;
match (owner.is_token(), new_owner.is_token()) {
(true, true) => {
@@ -1965,6 +1966,12 @@ pub fn set_backup_owner(
new_owner);
}
+ let _guard = datastore.lock_group(&backup_group)?;
+
+ if owner != datastore.get_owner(&backup_group)? {
+ bail!("{} does not own this group anymore", owner);
+ }
+
datastore.set_owner(&backup_group, &new_owner, true)?;
Ok(())
--
2.30.2
^ permalink raw reply [flat|nested] 5+ messages in thread