* [PATCH proxmox-backup 4/4] datastore: fix sync level update propagation to chunk store
2026-05-08 12:29 [PATCH proxmox-backup 0/4] fix sync level updates for chunk store Christian Ebner
` (2 preceding siblings ...)
2026-05-08 12:29 ` [PATCH proxmox-backup 3/4] datastore: move try_ensure_sync_level() to DataStoreImpl Christian Ebner
@ 2026-05-08 12:29 ` Christian Ebner
3 siblings, 0 replies; 5+ messages in thread
From: Christian Ebner @ 2026-05-08 12:29 UTC (permalink / raw)
To: pbs-devel
Changing the datastore tuning options triggers an invalidation of the
datastore cache entry, leading to re-instantiation with the new
config parameters on the next datastore lookup.
Since commit 0bd9c8701 ("datastore: lookup: reuse ChunkStore on stale
datastore re-open") this does however not lead to re-creation of the
chunk store instance in order to avoid dropping the process locker,
which would lead to loosing any existing shared lock. However, as a
consequence the sync level is not update on the chunk store.
Fix this by:
- Storing the sync level as runtime properties of the chunk store as
state within the mutex syncing concurrent modify access. It is held
where needed anyways.
- Pass the mutex guard as additional parameters to the methods
requiring the locked state. This encodes the requirement for the
mutex guard directly into the function signature instead of
labeling it as unsafe only.
- Assuring the previous sync level on config changes for consistency.
Reported-by: Robert Obkircher <r.obkircher@proxmox.com>
Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
---
pbs-datastore/src/chunk_store.rs | 59 +++++++++++--------
pbs-datastore/src/datastore.rs | 16 ++++-
.../src/local_datastore_lru_cache.rs | 12 ++--
3 files changed, 56 insertions(+), 31 deletions(-)
diff --git a/pbs-datastore/src/chunk_store.rs b/pbs-datastore/src/chunk_store.rs
index 2888dea39..e2219276d 100644
--- a/pbs-datastore/src/chunk_store.rs
+++ b/pbs-datastore/src/chunk_store.rs
@@ -1,7 +1,7 @@
use std::os::unix::fs::MetadataExt;
use std::os::unix::io::AsRawFd;
use std::path::{Path, PathBuf};
-use std::sync::{Arc, Mutex};
+use std::sync::{Arc, Mutex, MutexGuard};
use std::time::Duration;
use anyhow::{bail, format_err, Context, Error};
@@ -27,14 +27,20 @@ use crate::{DataBlob, LocalDatastoreLruCache};
const USING_MARKER_FILENAME_EXT: &str = "using";
+#[derive(Default)]
+/// Configurable runtime properties of a chunk store
+pub(crate) struct ChunkStoreProperties {
+ pub(crate) sync_level: DatastoreFSyncLevel,
+}
+
/// File system based chunk store
pub struct ChunkStore {
name: String, // used for error reporting
pub(crate) base: PathBuf,
chunk_dir: PathBuf,
- mutex: Mutex<()>,
+ // Mutex to sync chunk store access, including property updates
+ mutex: Arc<Mutex<ChunkStoreProperties>>,
locker: Option<Arc<Mutex<ProcessLocker>>>,
- sync_level: DatastoreFSyncLevel,
}
// TODO: what about sysctl setting vm.vfs_cache_pressure (0 - 100) ?
@@ -79,9 +85,8 @@ impl ChunkStore {
name: String::new(),
base: PathBuf::new(),
chunk_dir: PathBuf::new(),
- mutex: Mutex::new(()),
+ mutex: Arc::new(Mutex::new(Default::default())),
locker: None,
- sync_level: Default::default(),
}
}
@@ -204,16 +209,19 @@ impl ChunkStore {
base,
chunk_dir,
locker: Some(locker),
- mutex: Mutex::new(()),
- sync_level,
+ mutex: Arc::new(Mutex::new(ChunkStoreProperties { sync_level })),
})
}
- fn touch_chunk_no_lock(&self, digest: &[u8; 32]) -> Result<(), Error> {
+ fn touch_chunk_no_lock(
+ &self,
+ digest: &[u8; 32],
+ mutex_guard: MutexGuard<ChunkStoreProperties>,
+ ) -> Result<(), Error> {
// unwrap: only `None` in unit tests
assert!(self.locker.is_some());
- self.cond_touch_chunk_no_lock(digest, true)?;
+ self.cond_touch_chunk_no_lock(digest, true, mutex_guard)?;
Ok(())
}
@@ -226,14 +234,15 @@ impl ChunkStore {
digest: &[u8; 32],
assert_exists: bool,
) -> Result<bool, Error> {
- let _lock = self.mutex.lock();
- self.cond_touch_chunk_no_lock(digest, assert_exists)
+ let lock = self.mutex.lock().unwrap();
+ self.cond_touch_chunk_no_lock(digest, assert_exists, lock)
}
fn cond_touch_chunk_no_lock(
&self,
digest: &[u8; 32],
assert_exists: bool,
+ _mutex_guard: MutexGuard<ChunkStoreProperties>,
) -> Result<bool, Error> {
// unwrap: only `None` in unit tests
assert!(self.locker.is_some());
@@ -423,8 +432,12 @@ impl ChunkStore {
ProcessLocker::oldest_shared_lock(self.locker.clone().unwrap())
}
- pub(crate) fn mutex(&self) -> &std::sync::Mutex<()> {
- &self.mutex
+ /// Mutex to lock chunk store for exclusive access.
+ ///
+ /// Must be held when modifying chunk store contents and allows to update
+ /// chunk store runtime properties.
+ pub(crate) fn mutex(&self) -> Arc<Mutex<ChunkStoreProperties>> {
+ Arc::clone(&self.mutex)
}
pub fn sweep_unused_chunks(
@@ -665,18 +678,17 @@ impl ChunkStore {
//println!("DIGEST {}", hex::encode(digest));
- let _lock = self.mutex.lock();
+ let lock = self.mutex.lock().unwrap();
- // Safety: lock acquired above
- unsafe { self.insert_chunk_nolock(chunk, digest, true) }
+ self.insert_chunk_nolock(chunk, digest, true, lock)
}
- /// Safety: requires holding the chunk store mutex!
- pub(crate) unsafe fn insert_chunk_nolock(
+ pub(crate) fn insert_chunk_nolock(
&self,
chunk: &DataBlob,
digest: &[u8; 32],
warn_on_overwrite_empty: bool,
+ mutex_guard: MutexGuard<ChunkStoreProperties>,
) -> Result<(bool, u64), Error> {
// unwrap: only `None` in unit tests
assert!(self.locker.is_some());
@@ -694,7 +706,7 @@ impl ChunkStore {
}
let old_size = metadata.len();
if encoded_size == old_size {
- self.touch_chunk_no_lock(digest)?;
+ self.touch_chunk_no_lock(digest, mutex_guard)?;
return Ok((true, old_size));
} else if old_size == 0 {
if warn_on_overwrite_empty {
@@ -721,11 +733,11 @@ impl ChunkStore {
// compressed, the size mismatch could be caused by different zstd versions
// so let's keep the one that was uploaded first, bit-rot is hopefully detected by
// verification at some point..
- self.touch_chunk_no_lock(digest)?;
+ self.touch_chunk_no_lock(digest, mutex_guard)?;
return Ok((true, old_size));
} else if old_size < encoded_size {
log::debug!("Got another copy of chunk with digest '{digest_str}', existing chunk is smaller, discarding uploaded one.");
- self.touch_chunk_no_lock(digest)?;
+ self.touch_chunk_no_lock(digest, mutex_guard)?;
return Ok((true, old_size));
} else {
log::debug!("Got another copy of chunk with digest '{digest_str}', existing chunk is bigger, replacing with uploaded one.");
@@ -742,17 +754,18 @@ impl ChunkStore {
let gid = pbs_config::backup_group()?.gid;
create_options = create_options.owner(uid).group(gid);
}
+
proxmox_sys::fs::replace_file(
&chunk_path,
raw_data,
create_options,
- self.sync_level == DatastoreFSyncLevel::File,
+ mutex_guard.sync_level == DatastoreFSyncLevel::File,
)
.map_err(|err| {
format_err!("inserting chunk on store '{name}' failed for {digest_str} - {err}")
})?;
- if self.sync_level == DatastoreFSyncLevel::File {
+ if mutex_guard.sync_level == DatastoreFSyncLevel::File {
// fsync dir handle to persist the tmp rename
let dir = std::fs::File::open(chunk_dir_path)?;
nix::unistd::fsync(dir.as_raw_fd())
diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
index 8f69dd7ac..379f797be 100644
--- a/pbs-datastore/src/datastore.rs
+++ b/pbs-datastore/src/datastore.rs
@@ -610,12 +610,22 @@ impl DataStore {
operation: Some(lookup.operation),
}));
}
+
+ datastore.try_ensure_sync_level()?;
+ let tuning: DatastoreTuning = serde_json::from_value(
+ DatastoreTuning::API_SCHEMA
+ .parse_property_string(config.tuning.as_deref().unwrap_or(""))?,
+ )?;
+ let mutex = datastore.chunk_store.mutex();
+ let mut mutex_guard = mutex.lock().unwrap();
+ mutex_guard.sync_level = tuning.sync_level.unwrap_or_default();
Arc::clone(&datastore.chunk_store)
} else {
let tuning: DatastoreTuning = serde_json::from_value(
DatastoreTuning::API_SCHEMA
.parse_property_string(config.tuning.as_deref().unwrap_or(""))?,
)?;
+
Arc::new(ChunkStore::open(
lookup.name,
config.absolute_path(),
@@ -2499,7 +2509,8 @@ impl DataStore {
Ok(guard) => guard,
Err(_) => continue,
};
- let _guard = self.inner.chunk_store.mutex().lock().unwrap();
+ let mutex = self.inner.chunk_store.mutex();
+ let _guard = mutex.lock().unwrap();
// Check local markers (created or atime updated during phase1) and
// keep or delete chunk based on that.
@@ -3450,7 +3461,8 @@ impl DataStore {
.chunk_store
.lock_chunk(digest, CHUNK_LOCK_TIMEOUT)?;
}
- let _lock = self.inner.chunk_store.mutex().lock().unwrap();
+ let mutex = self.inner.chunk_store.mutex();
+ let _lock = mutex.lock().unwrap();
let (new_path, counter) = self.inner.chunk_store.next_bad_chunk_path(digest);
diff --git a/pbs-datastore/src/local_datastore_lru_cache.rs b/pbs-datastore/src/local_datastore_lru_cache.rs
index ac27d4637..cf38d4a57 100644
--- a/pbs-datastore/src/local_datastore_lru_cache.rs
+++ b/pbs-datastore/src/local_datastore_lru_cache.rs
@@ -34,12 +34,11 @@ impl LocalDatastoreLruCache {
///
/// Fails if the chunk cannot be inserted successfully.
pub fn insert(&self, digest: &[u8; 32], chunk: &DataBlob) -> Result<(), Error> {
- let _lock = self.store.mutex().lock().unwrap();
+ let shared_mutex = self.store.mutex();
+ let lock = shared_mutex.lock().unwrap();
+
+ self.store.insert_chunk_nolock(chunk, digest, false, lock)?;
- // Safety: lock acquire above
- unsafe {
- self.store.insert_chunk_nolock(chunk, digest, false)?;
- }
self.cache.insert(*digest, (), |digest| {
// Safety: lock acquired above, this is executed inline!
unsafe {
@@ -80,7 +79,8 @@ impl LocalDatastoreLruCache {
Ok(mut file) => match DataBlob::load_from_reader(&mut file) {
// File was still cached with contents, load response from file
Ok(chunk) => {
- let _lock = self.store.mutex().lock().unwrap();
+ let shared_mutex = self.store.mutex();
+ let _lock = shared_mutex.lock().unwrap();
self.cache.insert(*digest, (), |digest| {
// Safety: lock acquired above, this is executed inline
unsafe {
--
2.47.3
^ permalink raw reply related [flat|nested] 5+ messages in thread