From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id 33E7C1FF133 for ; Mon, 11 May 2026 14:56:21 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id B4F1116249; Mon, 11 May 2026 14:56:20 +0200 (CEST) Message-ID: <0671c027-806a-4b12-b059-261ed33d4777@proxmox.com> Date: Mon, 11 May 2026 14:56:12 +0200 MIME-Version: 1.0 User-Agent: Mozilla Thunderbird Subject: Re: [PATCH proxmox-backup 4/4] datastore: fix sync level update propagation to chunk store To: Robert Obkircher , pbs-devel@lists.proxmox.com References: <20260508122912.395304-1-c.ebner@proxmox.com> <20260508122912.395304-5-c.ebner@proxmox.com> <76a86dc3-56f5-42b8-ae83-91cd98f47c3f@proxmox.com> Content-Language: en-US, de-DE From: Christian Ebner In-Reply-To: <76a86dc3-56f5-42b8-ae83-91cd98f47c3f@proxmox.com> Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 7bit X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1778504061216 X-SPAM-LEVEL: Spam detection results: 0 AWL -0.080 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment POISEN_SPAM_PILL 0.1 Meta: its spam POISEN_SPAM_PILL_2 0.1 random spam to be learned in bayes POISEN_SPAM_PILL_4 0.1 random spam to be learned in bayes SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Message-ID-Hash: Z5DH2TT4VPBVRHLYTBMQF3YNR7ZQFP65 X-Message-ID-Hash: Z5DH2TT4VPBVRHLYTBMQF3YNR7ZQFP65 X-MailFrom: c.ebner@proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox Backup Server development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: On 5/11/26 11:23 AM, Robert Obkircher wrote: > > On 08.05.26 14:27, Christian Ebner wrote: >> Changing the datastore tuning options triggers an invalidation of the >> datastore cache entry, leading to re-instantiation with the new >> config parameters on the next datastore lookup. >> Since commit 0bd9c8701 ("datastore: lookup: reuse ChunkStore on stale >> datastore re-open") this does however not lead to re-creation of the >> chunk store instance in order to avoid dropping the process locker, >> which would lead to loosing any existing shared lock. However, as a >> consequence the sync level is not update on the chunk store. > typo: updated acked! >> >> Fix this by: >> - Storing the sync level as runtime properties of the chunk store as >> state within the mutex syncing concurrent modify access. It is held >> where needed anyways. >> - Pass the mutex guard as additional parameters to the methods >> requiring the locked state. This encodes the requirement for the >> mutex guard directly into the function signature instead of >> labeling it as unsafe only. >> - Assuring the previous sync level on config changes for consistency. >> >> Reported-by: Robert Obkircher >> Signed-off-by: Christian Ebner >> --- >> pbs-datastore/src/chunk_store.rs | 59 +++++++++++-------- >> pbs-datastore/src/datastore.rs | 16 ++++- >> .../src/local_datastore_lru_cache.rs | 12 ++-- >> 3 files changed, 56 insertions(+), 31 deletions(-) >> >> diff --git a/pbs-datastore/src/chunk_store.rs b/pbs-datastore/src/chunk_store.rs >> index 2888dea39..e2219276d 100644 >> --- a/pbs-datastore/src/chunk_store.rs >> +++ b/pbs-datastore/src/chunk_store.rs >> @@ -1,7 +1,7 @@ >> use std::os::unix::fs::MetadataExt; >> use std::os::unix::io::AsRawFd; >> use std::path::{Path, PathBuf}; >> -use std::sync::{Arc, Mutex}; >> +use std::sync::{Arc, Mutex, MutexGuard}; >> use std::time::Duration; >> >> use anyhow::{bail, format_err, Context, Error}; >> @@ -27,14 +27,20 @@ use crate::{DataBlob, LocalDatastoreLruCache}; >> >> const USING_MARKER_FILENAME_EXT: &str = "using"; >> >> +#[derive(Default)] >> +/// Configurable runtime properties of a chunk store >> +pub(crate) struct ChunkStoreProperties { >> + pub(crate) sync_level: DatastoreFSyncLevel, >> +} >> + >> /// File system based chunk store >> pub struct ChunkStore { >> name: String, // used for error reporting >> pub(crate) base: PathBuf, >> chunk_dir: PathBuf, >> - mutex: Mutex<()>, >> + // Mutex to sync chunk store access, including property updates >> + mutex: Arc>, >> locker: Option>>, >> - sync_level: DatastoreFSyncLevel, >> } >> >> // TODO: what about sysctl setting vm.vfs_cache_pressure (0 - 100) ? >> @@ -79,9 +85,8 @@ impl ChunkStore { >> name: String::new(), >> base: PathBuf::new(), >> chunk_dir: PathBuf::new(), >> - mutex: Mutex::new(()), >> + mutex: Arc::new(Mutex::new(Default::default())), >> locker: None, >> - sync_level: Default::default(), >> } >> } >> >> @@ -204,16 +209,19 @@ impl ChunkStore { >> base, >> chunk_dir, >> locker: Some(locker), >> - mutex: Mutex::new(()), >> - sync_level, >> + mutex: Arc::new(Mutex::new(ChunkStoreProperties { sync_level })), >> }) >> } >> >> - fn touch_chunk_no_lock(&self, digest: &[u8; 32]) -> Result<(), Error> { >> + fn touch_chunk_no_lock( >> + &self, >> + digest: &[u8; 32], >> + mutex_guard: MutexGuard, >> + ) -> Result<(), Error> { >> // unwrap: only `None` in unit tests >> assert!(self.locker.is_some()); >> >> - self.cond_touch_chunk_no_lock(digest, true)?; >> + self.cond_touch_chunk_no_lock(digest, true, mutex_guard)?; >> Ok(()) >> } >> >> @@ -226,14 +234,15 @@ impl ChunkStore { >> digest: &[u8; 32], >> assert_exists: bool, >> ) -> Result { >> - let _lock = self.mutex.lock(); >> - self.cond_touch_chunk_no_lock(digest, assert_exists) >> + let lock = self.mutex.lock().unwrap(); >> + self.cond_touch_chunk_no_lock(digest, assert_exists, lock) >> } >> >> fn cond_touch_chunk_no_lock( >> &self, >> digest: &[u8; 32], >> assert_exists: bool, >> + _mutex_guard: MutexGuard, >> ) -> Result { >> // unwrap: only `None` in unit tests >> assert!(self.locker.is_some()); >> @@ -423,8 +432,12 @@ impl ChunkStore { >> ProcessLocker::oldest_shared_lock(self.locker.clone().unwrap()) >> } >> >> - pub(crate) fn mutex(&self) -> &std::sync::Mutex<()> { >> - &self.mutex >> + /// Mutex to lock chunk store for exclusive access. >> + /// >> + /// Must be held when modifying chunk store contents and allows to update >> + /// chunk store runtime properties. >> + pub(crate) fn mutex(&self) -> Arc> { >> + Arc::clone(&self.mutex) > It's a bit unfortunate if we have to clone the Arc every time. > > It looks like the methods that require the mutex guard all take self > by shared reference, so it should be fine to keep returning that here > as well, no? Yes, should be avoided. Will fix for v2 > >> } >> >> pub fn sweep_unused_chunks( >> @@ -665,18 +678,17 @@ impl ChunkStore { >> >> //println!("DIGEST {}", hex::encode(digest)); >> >> - let _lock = self.mutex.lock(); >> + let lock = self.mutex.lock().unwrap(); >> >> - // Safety: lock acquired above >> - unsafe { self.insert_chunk_nolock(chunk, digest, true) } >> + self.insert_chunk_nolock(chunk, digest, true, lock) >> } >> >> - /// Safety: requires holding the chunk store mutex! >> - pub(crate) unsafe fn insert_chunk_nolock( >> + pub(crate) fn insert_chunk_nolock( >> &self, >> chunk: &DataBlob, >> digest: &[u8; 32], >> warn_on_overwrite_empty: bool, >> + mutex_guard: MutexGuard, >> ) -> Result<(bool, u64), Error> { >> // unwrap: only `None` in unit tests >> assert!(self.locker.is_some()); >> @@ -694,7 +706,7 @@ impl ChunkStore { >> } >> let old_size = metadata.len(); >> if encoded_size == old_size { >> - self.touch_chunk_no_lock(digest)?; >> + self.touch_chunk_no_lock(digest, mutex_guard)?; >> return Ok((true, old_size)); >> } else if old_size == 0 { >> if warn_on_overwrite_empty { >> @@ -721,11 +733,11 @@ impl ChunkStore { >> // compressed, the size mismatch could be caused by different zstd versions >> // so let's keep the one that was uploaded first, bit-rot is hopefully detected by >> // verification at some point.. >> - self.touch_chunk_no_lock(digest)?; >> + self.touch_chunk_no_lock(digest, mutex_guard)?; >> return Ok((true, old_size)); >> } else if old_size < encoded_size { >> log::debug!("Got another copy of chunk with digest '{digest_str}', existing chunk is smaller, discarding uploaded one."); >> - self.touch_chunk_no_lock(digest)?; >> + self.touch_chunk_no_lock(digest, mutex_guard)?; >> return Ok((true, old_size)); >> } else { >> log::debug!("Got another copy of chunk with digest '{digest_str}', existing chunk is bigger, replacing with uploaded one."); >> @@ -742,17 +754,18 @@ impl ChunkStore { >> let gid = pbs_config::backup_group()?.gid; >> create_options = create_options.owner(uid).group(gid); >> } >> + >> proxmox_sys::fs::replace_file( >> &chunk_path, >> raw_data, >> create_options, >> - self.sync_level == DatastoreFSyncLevel::File, >> + mutex_guard.sync_level == DatastoreFSyncLevel::File, >> ) >> .map_err(|err| { >> format_err!("inserting chunk on store '{name}' failed for {digest_str} - {err}") >> })?; >> >> - if self.sync_level == DatastoreFSyncLevel::File { >> + if mutex_guard.sync_level == DatastoreFSyncLevel::File { >> // fsync dir handle to persist the tmp rename >> let dir = std::fs::File::open(chunk_dir_path)?; >> nix::unistd::fsync(dir.as_raw_fd()) >> diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs >> index 8f69dd7ac..379f797be 100644 >> --- a/pbs-datastore/src/datastore.rs >> +++ b/pbs-datastore/src/datastore.rs >> @@ -610,12 +610,22 @@ impl DataStore { >> operation: Some(lookup.operation), >> })); >> } >> + >> + datastore.try_ensure_sync_level()?; > This should only be called if the level actually changes. > > There is also a small race condition with try_ensure_sync_level. It > could be solved by locking the mutex in both cases. Yes, will fix, thanks! >> + let tuning: DatastoreTuning = serde_json::from_value( >> + DatastoreTuning::API_SCHEMA >> + .parse_property_string(config.tuning.as_deref().unwrap_or(""))?, >> + )?; >> + let mutex = datastore.chunk_store.mutex(); >> + let mut mutex_guard = mutex.lock().unwrap(); >> + mutex_guard.sync_level = tuning.sync_level.unwrap_or_default(); >> Arc::clone(&datastore.chunk_store) >> } else { >> let tuning: DatastoreTuning = serde_json::from_value( >> DatastoreTuning::API_SCHEMA >> .parse_property_string(config.tuning.as_deref().unwrap_or(""))?, >> )?; >> + >> Arc::new(ChunkStore::open( >> lookup.name, >> config.absolute_path(), >> @@ -2499,7 +2509,8 @@ impl DataStore { >> Ok(guard) => guard, >> Err(_) => continue, >> }; >> - let _guard = self.inner.chunk_store.mutex().lock().unwrap(); >> + let mutex = self.inner.chunk_store.mutex(); >> + let _guard = mutex.lock().unwrap(); >> >> // Check local markers (created or atime updated during phase1) and >> // keep or delete chunk based on that. >> @@ -3450,7 +3461,8 @@ impl DataStore { >> .chunk_store >> .lock_chunk(digest, CHUNK_LOCK_TIMEOUT)?; >> } >> - let _lock = self.inner.chunk_store.mutex().lock().unwrap(); >> + let mutex = self.inner.chunk_store.mutex(); >> + let _lock = mutex.lock().unwrap(); >> >> let (new_path, counter) = self.inner.chunk_store.next_bad_chunk_path(digest); >> >> diff --git a/pbs-datastore/src/local_datastore_lru_cache.rs b/pbs-datastore/src/local_datastore_lru_cache.rs >> index ac27d4637..cf38d4a57 100644 >> --- a/pbs-datastore/src/local_datastore_lru_cache.rs >> +++ b/pbs-datastore/src/local_datastore_lru_cache.rs >> @@ -34,12 +34,11 @@ impl LocalDatastoreLruCache { >> /// >> /// Fails if the chunk cannot be inserted successfully. >> pub fn insert(&self, digest: &[u8; 32], chunk: &DataBlob) -> Result<(), Error> { >> - let _lock = self.store.mutex().lock().unwrap(); >> + let shared_mutex = self.store.mutex(); >> + let lock = shared_mutex.lock().unwrap(); >> + >> + self.store.insert_chunk_nolock(chunk, digest, false, lock)?; >> >> - // Safety: lock acquire above >> - unsafe { >> - self.store.insert_chunk_nolock(chunk, digest, false)?; >> - } >> self.cache.insert(*digest, (), |digest| { >> // Safety: lock acquired above, this is executed inline! >> unsafe { >> @@ -80,7 +79,8 @@ impl LocalDatastoreLruCache { >> Ok(mut file) => match DataBlob::load_from_reader(&mut file) { >> // File was still cached with contents, load response from file >> Ok(chunk) => { >> - let _lock = self.store.mutex().lock().unwrap(); >> + let shared_mutex = self.store.mutex(); >> + let _lock = shared_mutex.lock().unwrap(); >> self.cache.insert(*digest, (), |digest| { >> // Safety: lock acquired above, this is executed inline >> unsafe {