From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id BD2F41FF13A for ; Wed, 13 May 2026 12:42:39 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id CCC307EDE; Wed, 13 May 2026 12:42:38 +0200 (CEST) Message-ID: Date: Wed, 13 May 2026 12:42:29 +0200 MIME-Version: 1.0 User-Agent: Mozilla Thunderbird Subject: Re: [PATCH proxmox-backup v3 7/7] datastore: fix sync level update propagation to chunk store To: Christian Ebner , pbs-devel@lists.proxmox.com References: <20260512131047.689089-1-c.ebner@proxmox.com> <20260512131047.689089-8-c.ebner@proxmox.com> Content-Language: en-US, de-AT From: Robert Obkircher In-Reply-To: <20260512131047.689089-8-c.ebner@proxmox.com> Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 7bit X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1778668837375 X-SPAM-LEVEL: Spam detection results: 0 AWL -0.094 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment POISEN_SPAM_PILL 0.1 Meta: its spam POISEN_SPAM_PILL_2 0.1 random spam to be learned in bayes POISEN_SPAM_PILL_4 0.1 random spam to be learned in bayes SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [datastore.rs] Message-ID-Hash: FPC545KV5R6ROYQ4ICC4V54DYFG6SKYZ X-Message-ID-Hash: FPC545KV5R6ROYQ4ICC4V54DYFG6SKYZ X-MailFrom: r.obkircher@proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox Backup Server development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: On 12.05.26 15:09, Christian Ebner wrote: > Changing the datastore tuning options triggers an invalidation of the > datastore cache entry, leading to re-instantiation with the new > config parameters on the next datastore lookup. > Since commit 0bd9c8701 ("datastore: lookup: reuse ChunkStore on stale > datastore re-open") this does however not lead to re-creation of the > chunk store instance in order to avoid dropping the process locker, > which would lead to loosing any existing shared lock. However, as a > consequence the sync level is not updated on the chunk store. > > Fix this by: > - Storing the sync level as runtime properties of the chunk store as > state within the mutex syncing concurrent modify access. It is held > where needed anyways. > - Pass the mutex guard as additional parameters to the methods > requiring the locked state. This encodes the requirement for the > mutex guard directly into the function signature instead of > labeling it as unsafe only. > - Assuring the previous sync level on config changes for consistency. > To do this, extend and rename try_ensure_sync_level to also take the > mutex guard and update the sync level if given. > > Reported-by: Robert Obkircher > Signed-off-by: Christian Ebner > --- > pbs-datastore/src/chunk_store.rs | 73 +++++++++++++------ > pbs-datastore/src/datastore.rs | 26 ++++++- > .../src/local_datastore_lru_cache.rs | 12 +-- > 3 files changed, 78 insertions(+), 33 deletions(-) > > diff --git a/pbs-datastore/src/chunk_store.rs b/pbs-datastore/src/chunk_store.rs > index e8c279a62..6c97e31d7 100644 > --- a/pbs-datastore/src/chunk_store.rs > +++ b/pbs-datastore/src/chunk_store.rs > @@ -1,7 +1,7 @@ > use std::os::unix::fs::MetadataExt; > use std::os::unix::io::AsRawFd; > use std::path::{Path, PathBuf}; > -use std::sync::{Arc, Mutex}; > +use std::sync::{Arc, Mutex, MutexGuard}; > use std::time::Duration; > > use anyhow::{bail, format_err, Context, Error}; > @@ -27,14 +27,20 @@ use crate::{DataBlob, LocalDatastoreLruCache}; > > const USING_MARKER_FILENAME_EXT: &str = "using"; > > +#[derive(Default)] > +/// Configurable runtime properties of a chunk store > +pub(crate) struct ChunkStoreProperties { > + pub(crate) sync_level: DatastoreFSyncLevel, > +} > + > /// File system based chunk store > pub struct ChunkStore { > name: String, // used for error reporting > pub(crate) base: PathBuf, > chunk_dir: PathBuf, > - mutex: Mutex<()>, > + // Mutex to sync chunk store access, including property updates > + mutex: Arc>, > locker: Option>>, > - sync_level: DatastoreFSyncLevel, > } > > // TODO: what about sysctl setting vm.vfs_cache_pressure (0 - 100) ? > @@ -79,9 +85,8 @@ impl ChunkStore { > name: String::new(), > base: PathBuf::new(), > chunk_dir: PathBuf::new(), > - mutex: Mutex::new(()), > + mutex: Arc::new(Mutex::new(Default::default())), This doesn't need to be wrapped in an Arc anymore. > locker: None, > - sync_level: Default::default(), > } > } > > @@ -204,16 +209,19 @@ impl ChunkStore { > base, > chunk_dir, > locker: Some(locker), > - mutex: Mutex::new(()), > - sync_level, > + mutex: Arc::new(Mutex::new(ChunkStoreProperties { sync_level })), > }) > } > > - fn touch_chunk_no_lock(&self, digest: &[u8; 32]) -> Result<(), Error> { > + fn touch_chunk_no_lock( > + &self, > + digest: &[u8; 32], > + mutex_guard: MutexGuard, > + ) -> Result<(), Error> { > // unwrap: only `None` in unit tests > assert!(self.locker.is_some()); > > - self.cond_touch_chunk_no_lock(digest, true)?; > + self.cond_touch_chunk_no_lock(digest, true, mutex_guard)?; > Ok(()) > } > > @@ -226,14 +234,15 @@ impl ChunkStore { > digest: &[u8; 32], > assert_exists: bool, > ) -> Result { > - let _lock = self.mutex.lock(); > - self.cond_touch_chunk_no_lock(digest, assert_exists) > + let lock = self.mutex.lock().unwrap(); > + self.cond_touch_chunk_no_lock(digest, assert_exists, lock) > } > > fn cond_touch_chunk_no_lock( > &self, > digest: &[u8; 32], > assert_exists: bool, > + _mutex_guard: MutexGuard, > ) -> Result { > // unwrap: only `None` in unit tests > assert!(self.locker.is_some()); > @@ -423,7 +432,11 @@ impl ChunkStore { > ProcessLocker::oldest_shared_lock(self.locker.clone().unwrap()) > } > > - pub(crate) fn mutex(&self) -> &std::sync::Mutex<()> { > + /// Mutex to lock chunk store for exclusive access. > + /// > + /// Must be held when modifying chunk store contents and allows to update > + /// chunk store runtime properties. > + pub(crate) fn mutex(&self) -> &Mutex { > &self.mutex > } > > @@ -665,18 +678,17 @@ impl ChunkStore { > > //println!("DIGEST {}", hex::encode(digest)); > > - let _lock = self.mutex.lock(); > + let lock = self.mutex.lock().unwrap(); > > - // Safety: lock acquired above > - unsafe { self.insert_chunk_nolock(chunk, digest, true) } > + self.insert_chunk_nolock(chunk, digest, true, lock) > } > > - /// Safety: requires holding the chunk store mutex! > - pub(crate) unsafe fn insert_chunk_nolock( > + pub(crate) fn insert_chunk_nolock( > &self, > chunk: &DataBlob, > digest: &[u8; 32], > warn_on_overwrite_empty: bool, > + mutex_guard: MutexGuard, > ) -> Result<(bool, u64), Error> { > // unwrap: only `None` in unit tests > assert!(self.locker.is_some()); > @@ -694,7 +706,7 @@ impl ChunkStore { > } > let old_size = metadata.len(); > if encoded_size == old_size { > - self.touch_chunk_no_lock(digest)?; > + self.touch_chunk_no_lock(digest, mutex_guard)?; > return Ok((true, old_size)); > } else if old_size == 0 { > if warn_on_overwrite_empty { > @@ -721,11 +733,11 @@ impl ChunkStore { > // compressed, the size mismatch could be caused by different zstd versions > // so let's keep the one that was uploaded first, bit-rot is hopefully detected by > // verification at some point.. > - self.touch_chunk_no_lock(digest)?; > + self.touch_chunk_no_lock(digest, mutex_guard)?; > return Ok((true, old_size)); > } else if old_size < encoded_size { > log::debug!("Got another copy of chunk with digest '{digest_str}', existing chunk is smaller, discarding uploaded one."); > - self.touch_chunk_no_lock(digest)?; > + self.touch_chunk_no_lock(digest, mutex_guard)?; > return Ok((true, old_size)); > } else { > log::debug!("Got another copy of chunk with digest '{digest_str}', existing chunk is bigger, replacing with uploaded one."); > @@ -742,17 +754,18 @@ impl ChunkStore { > let gid = pbs_config::backup_group()?.gid; > create_options = create_options.owner(uid).group(gid); > } > + > proxmox_sys::fs::replace_file( > &chunk_path, > raw_data, > create_options, > - self.sync_level == DatastoreFSyncLevel::File, > + mutex_guard.sync_level == DatastoreFSyncLevel::File, > ) > .map_err(|err| { > format_err!("inserting chunk on store '{name}' failed for {digest_str} - {err}") > })?; > > - if self.sync_level == DatastoreFSyncLevel::File { > + if mutex_guard.sync_level == DatastoreFSyncLevel::File { > // fsync dir handle to persist the tmp rename > let dir = std::fs::File::open(chunk_dir_path)?; > nix::unistd::fsync(dir.as_raw_fd()) > @@ -967,10 +980,22 @@ impl ChunkStore { > (chunk_path, counter) > } > > - pub(super) fn try_ensure_sync_level(&self) -> Result<(), Error> { > - if self.sync_level != DatastoreFSyncLevel::Filesystem { > + pub(super) fn try_ensure_sync_level_with_update( > + &self, > + mut mutex_guard: MutexGuard, > + update_sync_level: Option, > + ) -> Result<(), Error> { > + let assure_sync_level = mutex_guard.sync_level; > + if let Some(sync_level) = update_sync_level { > + mutex_guard.sync_level = sync_level; > + } > + > + drop(mutex_guard); // never hold during syncfs > + > + if assure_sync_level != DatastoreFSyncLevel::Filesystem { > return Ok(()); > } > + > let file = std::fs::File::open(self.base_path())?; > let fd = file.as_raw_fd(); > info!("syncing filesystem"); Unrelated, but it might make sense to include the path in the log message. > diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs > index b67ab2f3a..6ec52d3f5 100644 > --- a/pbs-datastore/src/datastore.rs > +++ b/pbs-datastore/src/datastore.rs > @@ -595,6 +595,20 @@ impl DataStore { > operation: Some(lookup.operation), > })); > } > + > + let tuning = pbs_config::datastore::parse_datastore_tuning_options(&config)?; > + let sync_level = tuning.sync_level.unwrap_or_default(); > + > + let mutex = datastore.chunk_store.mutex(); > + let mutex_guard = mutex.lock().unwrap(); > + if mutex_guard.sync_level != sync_level { > + datastore > + .chunk_store > + .try_ensure_sync_level_with_update(mutex_guard, Some(sync_level))?; > + } else { > + drop(mutex_guard); > + } > + > Arc::clone(&datastore.chunk_store) > } else { > let tuning = pbs_config::datastore::parse_datastore_tuning_options(&config)?; > @@ -2466,7 +2480,8 @@ impl DataStore { > Ok(guard) => guard, > Err(_) => continue, > }; > - let _guard = self.inner.chunk_store.mutex().lock().unwrap(); > + let mutex = self.inner.chunk_store.mutex(); > + let _guard = mutex.lock().unwrap(); This can be a one-liner again (also in other places). > > // Check local markers (created or atime updated during phase1) and > // keep or delete chunk based on that. > @@ -2995,7 +3010,11 @@ impl DataStore { > /// Syncs the filesystem of the chunk store base path if 'sync_level' is set to > /// [`DatastoreFSyncLevel::Filesystem`]. Uses syncfs(2). > pub fn try_ensure_sync_level(&self) -> Result<(), Error> { > - self.inner.chunk_store.try_ensure_sync_level() > + let mutex = self.inner.chunk_store.mutex(); > + let mutex_guard = mutex.lock().unwrap(); > + self.inner > + .chunk_store > + .try_ensure_sync_level_with_update(mutex_guard, None) > } > > /// Destroy a datastore. This requires that there are no active operations on the datastore. > @@ -3417,7 +3436,8 @@ impl DataStore { > .chunk_store > .lock_chunk(digest, CHUNK_LOCK_TIMEOUT)?; > } > - let _lock = self.inner.chunk_store.mutex().lock().unwrap(); > + let mutex = self.inner.chunk_store.mutex(); > + let _lock = mutex.lock().unwrap(); > > let (new_path, counter) = self.inner.chunk_store.next_bad_chunk_path(digest); > > diff --git a/pbs-datastore/src/local_datastore_lru_cache.rs b/pbs-datastore/src/local_datastore_lru_cache.rs > index ac27d4637..cf38d4a57 100644 > --- a/pbs-datastore/src/local_datastore_lru_cache.rs > +++ b/pbs-datastore/src/local_datastore_lru_cache.rs > @@ -34,12 +34,11 @@ impl LocalDatastoreLruCache { > /// > /// Fails if the chunk cannot be inserted successfully. > pub fn insert(&self, digest: &[u8; 32], chunk: &DataBlob) -> Result<(), Error> { > - let _lock = self.store.mutex().lock().unwrap(); > + let shared_mutex = self.store.mutex(); > + let lock = shared_mutex.lock().unwrap(); > + > + self.store.insert_chunk_nolock(chunk, digest, false, lock)?; This must pass the lock guard by reference, not by value. Otherwise the code below is no longer protected by the lock! > > - // Safety: lock acquire above > - unsafe { > - self.store.insert_chunk_nolock(chunk, digest, false)?; > - } > self.cache.insert(*digest, (), |digest| { > // Safety: lock acquired above, this is executed inline! > unsafe { > @@ -80,7 +79,8 @@ impl LocalDatastoreLruCache { > Ok(mut file) => match DataBlob::load_from_reader(&mut file) { > // File was still cached with contents, load response from file > Ok(chunk) => { > - let _lock = self.store.mutex().lock().unwrap(); > + let shared_mutex = self.store.mutex(); > + let _lock = shared_mutex.lock().unwrap(); > self.cache.insert(*digest, (), |digest| { > // Safety: lock acquired above, this is executed inline > unsafe {