From: Stefan Sterz <s.sterz@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [PATCH proxmox-backup v2 2/4] fix #3935: datastore/api/backup: move datastore locking to '/run'
Date: Wed, 13 Apr 2022 11:11:49 +0200 [thread overview]
Message-ID: <20220413091151.150019-3-s.sterz@proxmox.com> (raw)
In-Reply-To: <20220413091151.150019-1-s.sterz@proxmox.com>
to avoid issues when removing a group or snapshot directory where two
threads hold a lock to the same directory, move locking to the tmpfs
backed '/run' directory and use a flat file structure. also adds
double stat'ing to make it possible to remove locks without certain
race condition issues.
Signed-off-by: Stefan Sterz <s.sterz@proxmox.com>
---
pbs-config/src/lib.rs | 7 ++
pbs-datastore/src/datastore.rs | 126 +++++++++++++++++++++------
pbs-datastore/src/snapshot_reader.rs | 24 ++++-
src/api2/backup/environment.rs | 4 +-
src/backup/verify.rs | 4 +-
5 files changed, 131 insertions(+), 34 deletions(-)
diff --git a/pbs-config/src/lib.rs b/pbs-config/src/lib.rs
index 118030bc..81a7b243 100644
--- a/pbs-config/src/lib.rs
+++ b/pbs-config/src/lib.rs
@@ -21,6 +21,7 @@ pub use config_version_cache::ConfigVersionCache;
use anyhow::{format_err, Error};
use nix::unistd::{Gid, Group, Uid, User};
+use std::os::unix::prelude::AsRawFd;
pub use pbs_buildcfg::{BACKUP_USER_NAME, BACKUP_GROUP_NAME};
@@ -46,6 +47,12 @@ pub fn backup_group() -> Result<nix::unistd::Group, Error> {
pub struct BackupLockGuard(Option<std::fs::File>);
+impl AsRawFd for BackupLockGuard {
+ fn as_raw_fd(&self) -> i32 {
+ self.0.as_ref().map_or(-1, |f| f.as_raw_fd())
+ }
+}
+
#[doc(hidden)]
/// Note: do not use for production code, this is only intended for tests
pub unsafe fn create_mocked_lock() -> BackupLockGuard {
diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
index 3895688d..92d544e5 100644
--- a/pbs-datastore/src/datastore.rs
+++ b/pbs-datastore/src/datastore.rs
@@ -1,5 +1,6 @@
use std::collections::{HashSet, HashMap};
use std::io::{self, Write};
+use std::os::unix::prelude::AsRawFd;
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex};
use std::convert::TryFrom;
@@ -11,14 +12,13 @@ use lazy_static::lazy_static;
use proxmox_schema::ApiType;
-use proxmox_sys::fs::{
- file_read_optional_string, lock_dir_noblock, lock_dir_noblock_shared, replace_file,
- CreateOptions, DirLockGuard,
-};
+use proxmox_sys::fs::{file_read_optional_string, replace_file, CreateOptions};
use proxmox_sys::process_locker::ProcessLockSharedGuard;
use proxmox_sys::WorkerTaskContext;
use proxmox_sys::{task_log, task_warn};
+use proxmox_sys::systemd::escape_unit;
+
use pbs_api_types::{
UPID, DataStoreConfig, Authid, GarbageCollectionStatus, HumanByte,
ChunkOrder, DatastoreTuning, Operation,
@@ -42,6 +42,8 @@ lazy_static! {
static ref DATASTORE_MAP: Mutex<HashMap<String, Arc<DataStoreImpl>>> = Mutex::new(HashMap::new());
}
+pub const DATASTORE_LOCKS_DIR: &str = "/run/proxmox-backup/locks";
+
/// checks if auth_id is owner, or, if owner is a token, if
/// auth_id is the user of the token
pub fn check_backup_owner(
@@ -368,6 +370,8 @@ impl DataStore {
err,
)
})?;
+
+ let _ = std::fs::remove_file( self.group_lock_path(backup_group));
}
Ok(removed_all)
@@ -404,6 +408,8 @@ impl DataStore {
let _ = std::fs::remove_file(path);
}
+ let _ = std::fs::remove_file(self.snapshot_lock_path(backup_dir));
+
Ok(())
}
@@ -479,7 +485,7 @@ impl DataStore {
&self,
backup_group: &BackupGroup,
auth_id: &Authid,
- ) -> Result<(Authid, DirLockGuard), Error> {
+ ) -> Result<(Authid, BackupLockGuard), Error> {
// create intermediate path first:
let mut full_path = self.base_path();
full_path.push(backup_group.backup_type());
@@ -508,7 +514,7 @@ impl DataStore {
///
/// The BackupGroup directory needs to exist.
pub fn create_locked_backup_dir(&self, backup_dir: &BackupDir)
- -> Result<(PathBuf, bool, DirLockGuard), Error>
+ -> Result<(PathBuf, bool, BackupLockGuard), Error>
{
let relative_path = backup_dir.relative_path();
let mut full_path = self.base_path();
@@ -1002,30 +1008,96 @@ impl DataStore {
Ok(chunk_list)
}
- /// Lock a snapshot exclusively
- pub fn lock_snapshot(&self, backup_dir: &BackupDir) -> Result<DirLockGuard, Error> {
- lock_dir_noblock(
- &self.snapshot_path(backup_dir),
- "snapshot",
- "possibly running or in use",
- )
+ /// Encodes a string so it can be used as a lock file name.
+ ///
+ /// The first 64 characters will be the sha256 hash of `path` then
+ /// a hyphen followed by up to 100 characters of the unit encoded
+ /// version of `path`.
+ fn lock_file_name_helper(&self, path: &str) -> String {
+ let enc = escape_unit(path, true);
+ let from = enc.len().checked_sub(100).unwrap_or(0);
+ let hash = hex::encode(openssl::sha::sha256(path.as_bytes()));
+
+ format!("{}-{}", hash, enc[from..].to_string())
+ }
+
+ /// Returns a file name for locking a snapshot.
+ ///
+ /// The lock file will be located in:
+ /// `${DATASTORE_LOCKS_DIR}/${datastore name}/${lock_file_name_helper(rpath)}`
+ /// where `rpath` is the relative path of the snapshot.
+ fn snapshot_lock_path(&self, backup_dir: &BackupDir) -> PathBuf {
+ let path = Path::new(DATASTORE_LOCKS_DIR).join(self.name());
+
+ let rpath = format!(
+ "{}/{}/{}",
+ backup_dir.group().backup_type(),
+ backup_dir.group().backup_id(),
+ backup_dir.backup_time_string(),
+ );
+
+ path.join(self.lock_file_name_helper(&rpath))
+ }
+
+ /// Returns a file name for locking a group.
+ ///
+ /// The lock file will be located in:
+ /// `${DATASTORE_LOCKS_DIR}/${datastore name}/${lock_file_name_helper(rpath)}`
+ /// where `rpath` is the relative path of the group.
+ fn group_lock_path(&self, group: &BackupGroup) -> PathBuf {
+ let path = Path::new(DATASTORE_LOCKS_DIR).join(self.name());
+
+ let rpath = format!("{}/{}", group.backup_type(), group.backup_id());
+
+ path.join(self.lock_file_name_helper(&rpath))
+ }
+
+ /// Helps implement the double stat'ing procedure. It avoids
+ /// certain race conditions upon lock deletion.
+ ///
+ /// It also creates the base directory for lock files.
+ fn lock_helper<F>(&self, path: &std::path::Path, lock_fn: F) -> Result<BackupLockGuard, Error>
+ where
+ F: Fn(&std::path::Path) -> Result<BackupLockGuard, Error>,
+ {
+ let lock_dir = Path::new(DATASTORE_LOCKS_DIR).join(self.name());
+ std::fs::create_dir_all(&lock_dir)?;
+
+ let lock = lock_fn(path)?;
+
+ let stat = nix::sys::stat::fstat(lock.as_raw_fd())?.st_ino;
+
+ if nix::sys::stat::stat(path).map_or(true, |st| stat != st.st_ino) {
+ bail!("could not acquire lock, another thread modified the lock file");
+ }
+
+ Ok(lock)
+ }
+
+ /// Locks a snapshot exclusively.
+ pub fn lock_snapshot(&self, backup_dir: &BackupDir) -> Result<BackupLockGuard, Error> {
+ self.lock_helper(&self.snapshot_lock_path(backup_dir), |p| {
+ open_backup_lockfile(p, Some(Duration::from_secs(0)), true).map_err(|err| {
+ format_err!("unable to acquire snapshot lock {:?} - {}", &p, err)
+ })
+ })
}
- /// Acquire a shared lock on a snapshot
- pub fn lock_snapshot_shared(&self, backup_dir: &BackupDir) -> Result<DirLockGuard, Error> {
- lock_dir_noblock_shared(
- &self.snapshot_path(backup_dir),
- "snapshot",
- "possibly running or in use",
- )
+ /// Acquires a shared lock on a snapshot.
+ pub fn lock_snapshot_shared(&self, backup_dir: &BackupDir) -> Result<BackupLockGuard, Error> {
+ self.lock_helper(&self.snapshot_lock_path(backup_dir), |p| {
+ open_backup_lockfile(p, Some(Duration::from_secs(0)), false).map_err(|err| {
+ format_err!("unable to acquire shared snapshot lock {:?} - {}", &p, err)
+ })
+ })
}
- /// Lock a group exclusively
- pub fn lock_group(&self, group: &BackupGroup) -> Result<DirLockGuard, Error> {
- lock_dir_noblock(
- &self.group_path(group),
- "backup group",
- "possible running backup",
- )
+ /// Locks a group exclusively.
+ pub fn lock_group(&self, group: &BackupGroup) -> Result<BackupLockGuard, Error> {
+ self.lock_helper(&self.group_lock_path(group), |p| {
+ open_backup_lockfile(p, Some(Duration::from_secs(0)), true).map_err(|err| {
+ format_err!("unable to acquire backup group lock {:?} - {}", &p, err)
+ })
+ })
}
}
diff --git a/pbs-datastore/src/snapshot_reader.rs b/pbs-datastore/src/snapshot_reader.rs
index 0eddac14..7fde3238 100644
--- a/pbs-datastore/src/snapshot_reader.rs
+++ b/pbs-datastore/src/snapshot_reader.rs
@@ -3,8 +3,11 @@ use std::sync::Arc;
use std::os::unix::io::{AsRawFd, FromRawFd};
use std::fs::File;
-use anyhow::{bail, Error};
+use anyhow::{bail, format_err, Error};
use nix::dir::Dir;
+use nix::sys::stat::Mode;
+use nix::fcntl::OFlag;
+use pbs_config::BackupLockGuard;
use crate::backup_info::BackupDir;
use crate::index::IndexFile;
@@ -22,13 +25,22 @@ pub struct SnapshotReader {
datastore_name: String,
file_list: Vec<String>,
locked_dir: Dir,
+
+ // while this is never read, the lock needs to be kept until the
+ // reader is dropped to ensure valid locking semantics
+ _lock: BackupLockGuard,
}
impl SnapshotReader {
/// Lock snapshot, reads the manifest and returns a new instance
pub fn new(datastore: Arc<DataStore>, snapshot: BackupDir) -> Result<Self, Error> {
- let locked_dir = datastore.lock_snapshot_shared(&snapshot)?;
+ let lock = datastore.lock_snapshot_shared(&snapshot)?;
+
+ let path = datastore.snapshot_path(&snapshot);
+
+ let locked_dir = Dir::open(&path, OFlag::O_RDONLY, Mode::empty())
+ .map_err(|err| format_err!("unable to open snapshot directory {:?} - {}", path, err))?;
let datastore_name = datastore.name().to_string();
@@ -50,7 +62,13 @@ impl SnapshotReader {
file_list.push(CLIENT_LOG_BLOB_NAME.to_string());
}
- Ok(Self { snapshot, datastore_name, file_list, locked_dir })
+ Ok(Self {
+ snapshot,
+ datastore_name,
+ file_list,
+ locked_dir,
+ _lock: lock,
+ })
}
/// Return the snapshot directory
diff --git a/src/api2/backup/environment.rs b/src/api2/backup/environment.rs
index 3e23840f..b4557d42 100644
--- a/src/api2/backup/environment.rs
+++ b/src/api2/backup/environment.rs
@@ -1,7 +1,7 @@
use anyhow::{bail, format_err, Error};
+use pbs_config::BackupLockGuard;
use std::sync::{Arc, Mutex};
use std::collections::HashMap;
-use nix::dir::Dir;
use ::serde::{Serialize};
use serde_json::{json, Value};
@@ -501,7 +501,7 @@ impl BackupEnvironment {
/// If verify-new is set on the datastore, this will run a new verify task
/// for the backup. If not, this will return and also drop the passed lock
/// immediately.
- pub fn verify_after_complete(&self, snap_lock: Dir) -> Result<(), Error> {
+ pub fn verify_after_complete(&self, snap_lock: BackupLockGuard) -> Result<(), Error> {
self.ensure_finished()?;
if !self.datastore.verify_new() {
diff --git a/src/backup/verify.rs b/src/backup/verify.rs
index eea0e15d..f4375daa 100644
--- a/src/backup/verify.rs
+++ b/src/backup/verify.rs
@@ -1,4 +1,4 @@
-use nix::dir::Dir;
+use pbs_config::BackupLockGuard;
use std::collections::HashSet;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, Mutex};
@@ -324,7 +324,7 @@ pub fn verify_backup_dir_with_lock(
backup_dir: &BackupDir,
upid: UPID,
filter: Option<&dyn Fn(&BackupManifest) -> bool>,
- _snap_lock: Dir,
+ _snap_lock: BackupLockGuard,
) -> Result<bool, Error> {
let manifest = match verify_worker.datastore.load_manifest(backup_dir) {
Ok((manifest, _)) => manifest,
--
2.30.2
next prev parent reply other threads:[~2022-04-13 9:12 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-04-13 9:11 [pbs-devel] [PATCH proxmox-backup v2 0/4] refactor datastore locking to use tmpfs Stefan Sterz
2022-04-13 9:11 ` [pbs-devel] [PATCH proxmox-backup v2 1/4] fix #3935: datastore/api/backup: add locking helpers to datastore Stefan Sterz
2022-04-13 9:11 ` Stefan Sterz [this message]
2022-04-13 9:11 ` [pbs-devel] [PATCH proxmox-backup v2 3/4] fix #3935: datastore: move manifest locking to new locking method Stefan Sterz
2022-04-13 9:11 ` [pbs-devel] [PATCH proxmox-backup v2 4/4] fix: api: avoid race condition in set_backup_owner Stefan Sterz
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220413091151.150019-3-s.sterz@proxmox.com \
--to=s.sterz@proxmox.com \
--cc=pbs-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.