From: Stefan Sterz <s.sterz@proxmox.com>
To: pbs-devel@lists.proxmox.com
Subject: [pbs-devel] [RFC PATCH 1/5] fix #3935: datastore/api2/backup: move datastore locking to '/run'
Date: Fri, 18 Mar 2022 15:06:51 +0100 [thread overview]
Message-ID: <20220318140655.170656-2-s.sterz@proxmox.com> (raw)
In-Reply-To: <20220318140655.170656-1-s.sterz@proxmox.com>
to avoid issues when removing a group or snapshot directory where two
threads hold a lock to the same directory, move locking to the tmpfs
backed '/run' directory
Signed-off-by: Stefan Sterz <s.sterz@proxmox.com>
---
pbs-datastore/src/datastore.rs | 74 ++++++++++++++++++++++------
pbs-datastore/src/snapshot_reader.rs | 12 +----
src/api2/backup/mod.rs | 5 +-
src/api2/reader/mod.rs | 7 +--
src/backup/verify.rs | 8 +--
5 files changed, 66 insertions(+), 40 deletions(-)
diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
index d416c8d8..389ac298 100644
--- a/pbs-datastore/src/datastore.rs
+++ b/pbs-datastore/src/datastore.rs
@@ -15,7 +15,7 @@ use proxmox_sys::fs::{replace_file, file_read_optional_string, CreateOptions};
use proxmox_sys::process_locker::ProcessLockSharedGuard;
use proxmox_sys::WorkerTaskContext;
use proxmox_sys::{task_log, task_warn};
-use proxmox_sys::fs::{lock_dir_noblock, DirLockGuard};
+use proxmox_sys::fs::DirLockGuard;
use pbs_api_types::{
UPID, DataStoreConfig, Authid, GarbageCollectionStatus, HumanByte,
@@ -288,11 +288,9 @@ impl DataStore {
/// Remove a complete backup group including all snapshots, returns true
/// if all snapshots were removed, and false if some were protected
pub fn remove_backup_group(&self, backup_group: &BackupGroup) -> Result<bool, Error> {
-
+ let _guard = self.lock_group(backup_group)?;
let full_path = self.group_path(backup_group);
- let _guard = proxmox_sys::fs::lock_dir_noblock(&full_path, "backup group", "possible running backup")?;
-
log::info!("removing backup group {:?}", full_path);
let mut removed_all = true;
@@ -322,13 +320,11 @@ impl DataStore {
}
/// Remove a backup directory including all content
- pub fn remove_backup_dir(&self, backup_dir: &BackupDir, force: bool) -> Result<(), Error> {
-
- let full_path = self.snapshot_path(backup_dir);
-
+ pub fn remove_backup_dir(&self, backup_dir: &BackupDir, force: bool) -> Result<(), Error> {
let (_guard, _manifest_guard);
+
if !force {
- _guard = lock_dir_noblock(&full_path, "snapshot", "possibly running or in use")?;
+ _guard = self.lock_snapshot(backup_dir)?;
_manifest_guard = self.lock_manifest(backup_dir)?;
}
@@ -336,6 +332,8 @@ impl DataStore {
bail!("cannot remove protected snapshot");
}
+ let full_path = self.snapshot_path(backup_dir);
+
log::info!("removing backup snapshot {:?}", full_path);
std::fs::remove_dir_all(&full_path)
.map_err(|err| {
@@ -438,13 +436,13 @@ impl DataStore {
// create the last component now
match std::fs::create_dir(&full_path) {
Ok(_) => {
- let guard = lock_dir_noblock(&full_path, "backup group", "another backup is already running")?;
+ let guard = self.lock_group(backup_group)?;
self.set_owner(backup_group, auth_id, false)?;
let owner = self.get_owner(backup_group)?; // just to be sure
Ok((owner, guard))
}
Err(ref err) if err.kind() == io::ErrorKind::AlreadyExists => {
- let guard = lock_dir_noblock(&full_path, "backup group", "another backup is already running")?;
+ let guard = self.lock_group(backup_group)?;
let owner = self.get_owner(backup_group)?; // just to be sure
Ok((owner, guard))
}
@@ -462,8 +460,7 @@ impl DataStore {
let mut full_path = self.base_path();
full_path.push(&relative_path);
- let lock = ||
- lock_dir_noblock(&full_path, "snapshot", "internal error - tried creating snapshot that's already in use");
+ let lock = || self.lock_snapshot(backup_dir);
match std::fs::create_dir(&full_path) {
Ok(_) => Ok((relative_path, true, lock()?)),
@@ -883,9 +880,7 @@ impl DataStore {
backup_dir: &BackupDir,
protection: bool
) -> Result<(), Error> {
- let full_path = self.snapshot_path(backup_dir);
-
- let _guard = lock_dir_noblock(&full_path, "snapshot", "possibly running or in use")?;
+ let _guard = self.lock_snapshot(backup_dir)?;
let protected_path = backup_dir.protected_file(self.base_path());
if protection {
@@ -952,4 +947,51 @@ impl DataStore {
Ok(chunk_list)
}
+
+ fn snapshot_lock_path(&self, backup_dir: &BackupDir) -> Result<PathBuf, Error> {
+ let path = Path::new("/run/proxmox-backup/locks")
+ .join(self.name())
+ .join(backup_dir.relative_path());
+
+ std::fs::create_dir_all(&path)?;
+
+ Ok(path)
+ }
+
+ fn group_lock_path(&self, group: &BackupGroup) -> Result<PathBuf, Error> {
+ let path = Path::new("/run/proxmox-backup/locks")
+ .join(self.name())
+ .join(group.group_path());
+
+ std::fs::create_dir_all(&path)?;
+
+ Ok(path)
+ }
+
+ /// Lock a snapshot exclusively
+ pub fn lock_snapshot(&self, backup_dir: &BackupDir) -> Result<DirLockGuard, Error> {
+ proxmox_sys::fs::lock_dir_noblock(
+ &self.snapshot_lock_path(backup_dir)?,
+ "snapshot",
+ "possibly running or in use",
+ )
+ }
+
+ /// Acquire a shared lock on a snapshot
+ pub fn lock_snapshot_shared(&self, backup_dir: &BackupDir) -> Result<DirLockGuard, Error> {
+ proxmox_sys::fs::lock_dir_noblock_shared(
+ &self.snapshot_lock_path(backup_dir)?,
+ "snapshot",
+ "possibly running or in use",
+ )
+ }
+
+ /// Lock a group exclusively
+ pub fn lock_group(&self, group: &BackupGroup) -> Result<DirLockGuard, Error> {
+ proxmox_sys::fs::lock_dir_noblock(
+ &self.group_lock_path(group)?,
+ "backup group",
+ "another backup is already running",
+ )
+ }
}
diff --git a/pbs-datastore/src/snapshot_reader.rs b/pbs-datastore/src/snapshot_reader.rs
index 1bbf57e7..30993b0d 100644
--- a/pbs-datastore/src/snapshot_reader.rs
+++ b/pbs-datastore/src/snapshot_reader.rs
@@ -6,8 +6,6 @@ use std::fs::File;
use anyhow::{bail, Error};
use nix::dir::Dir;
-use proxmox_sys::fs::lock_dir_noblock_shared;
-
use crate::backup_info::BackupDir;
use crate::index::IndexFile;
use crate::fixed_index::FixedIndexReader;
@@ -29,13 +27,7 @@ impl SnapshotReader {
/// Lock snapshot, reads the manifest and returns a new instance
pub fn new(datastore: Arc<DataStore>, snapshot: BackupDir) -> Result<Self, Error> {
-
- let snapshot_path = datastore.snapshot_path(&snapshot);
-
- let locked_dir = lock_dir_noblock_shared(
- &snapshot_path,
- "snapshot",
- "locked by another operation")?;
+ let locked_dir = datastore.lock_snapshot_shared(&snapshot)?;
let datastore_name = datastore.name().to_string();
@@ -47,7 +39,7 @@ impl SnapshotReader {
}
};
- let mut client_log_path = snapshot_path;
+ let mut client_log_path = datastore.snapshot_path(&snapshot);
client_log_path.push(CLIENT_LOG_BLOB_NAME);
let mut file_list = Vec::new();
diff --git a/src/api2/backup/mod.rs b/src/api2/backup/mod.rs
index 395edd3d..6e38f3da 100644
--- a/src/api2/backup/mod.rs
+++ b/src/api2/backup/mod.rs
@@ -20,7 +20,7 @@ use pbs_api_types::{
BACKUP_ID_SCHEMA, BACKUP_TIME_SCHEMA, BACKUP_TYPE_SCHEMA, DATASTORE_SCHEMA,
CHUNK_DIGEST_SCHEMA, PRIV_DATASTORE_BACKUP, BACKUP_ARCHIVE_NAME_SCHEMA,
};
-use proxmox_sys::fs::lock_dir_noblock_shared;
+
use pbs_tools::json::{required_array_param, required_integer_param, required_string_param};
use pbs_config::CachedUserInfo;
use pbs_datastore::{DataStore, PROXMOX_BACKUP_PROTOCOL_ID_V1};
@@ -157,8 +157,7 @@ async move {
}
// lock last snapshot to prevent forgetting/pruning it during backup
- let full_path = datastore.snapshot_path(&last.backup_dir);
- Some(lock_dir_noblock_shared(&full_path, "snapshot", "base snapshot is already locked by another operation")?)
+ Some(datastore.lock_snapshot_shared(&last.backup_dir)?)
} else {
None
};
diff --git a/src/api2/reader/mod.rs b/src/api2/reader/mod.rs
index 2b11d1b1..f472686a 100644
--- a/src/api2/reader/mod.rs
+++ b/src/api2/reader/mod.rs
@@ -20,7 +20,7 @@ use pbs_api_types::{
CHUNK_DIGEST_SCHEMA, PRIV_DATASTORE_READ, PRIV_DATASTORE_BACKUP,
BACKUP_ARCHIVE_NAME_SCHEMA,
};
-use proxmox_sys::fs::lock_dir_noblock_shared;
+
use pbs_tools::json::{required_integer_param, required_string_param};
use pbs_datastore::{DataStore, PROXMOX_BACKUP_READER_PROTOCOL_ID_V1};
use pbs_datastore::backup_info::BackupDir;
@@ -114,10 +114,7 @@ fn upgrade_to_backup_reader_protocol(
}
}
- let _guard = lock_dir_noblock_shared(
- &datastore.snapshot_path(&backup_dir),
- "snapshot",
- "locked by another operation")?;
+ let _guard = datastore.lock_snapshot_shared(&backup_dir)?;
let path = datastore.base_path();
diff --git a/src/backup/verify.rs b/src/backup/verify.rs
index 307d366c..eea0e15d 100644
--- a/src/backup/verify.rs
+++ b/src/backup/verify.rs
@@ -13,7 +13,6 @@ use pbs_datastore::{DataStore, DataBlob, StoreProgress};
use pbs_datastore::backup_info::{BackupGroup, BackupDir, BackupInfo};
use pbs_datastore::index::IndexFile;
use pbs_datastore::manifest::{archive_type, ArchiveType, BackupManifest, FileInfo};
-use proxmox_sys::fs::lock_dir_noblock_shared;
use crate::tools::parallel_handler::ParallelHandler;
@@ -300,11 +299,8 @@ pub fn verify_backup_dir(
upid: UPID,
filter: Option<&dyn Fn(&BackupManifest) -> bool>,
) -> Result<bool, Error> {
- let snap_lock = lock_dir_noblock_shared(
- &verify_worker.datastore.snapshot_path(backup_dir),
- "snapshot",
- "locked by another operation",
- );
+ let snap_lock = verify_worker.datastore.lock_snapshot_shared(&backup_dir);
+
match snap_lock {
Ok(snap_lock) => {
verify_backup_dir_with_lock(verify_worker, backup_dir, upid, filter, snap_lock)
--
2.30.2
next prev parent reply other threads:[~2022-03-18 14:07 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-03-18 14:06 [pbs-devel] [RFC PATCH 0/5] fix #3935: refactor datastore locking to use tmpfs Stefan Sterz
2022-03-18 14:06 ` Stefan Sterz [this message]
2022-03-18 14:06 ` [pbs-devel] [RFC PATCH 2/5] fix #3935: datastore: add shared group lock to datastore Stefan Sterz
2022-03-18 14:06 ` [pbs-devel] [RFC PATCH 3/5] fix #3935: datastore/config: add a constant for the locking directory Stefan Sterz
2022-03-18 14:06 ` [pbs-devel] [RFC PATCH 4/5] fix #3935: datastore: keep manifest locks, avoid similar locking issue Stefan Sterz
2022-03-18 14:06 ` [pbs-devel] [RFC PATCH 5/5] fix: api: avoid race condition in set_backup_owner Stefan Sterz
2022-03-22 9:38 ` [pbs-devel] [RFC PATCH 0/5] fix #3935: refactor datastore locking to use tmpfs Wolfgang Bumiller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220318140655.170656-2-s.sterz@proxmox.com \
--to=s.sterz@proxmox.com \
--cc=pbs-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.