From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id CAE951FF14F for ; Fri, 08 May 2026 14:29:34 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id B8AD816E88; Fri, 8 May 2026 14:29:33 +0200 (CEST) From: Christian Ebner To: pbs-devel@lists.proxmox.com Subject: [PATCH proxmox-backup 4/4] datastore: fix sync level update propagation to chunk store Date: Fri, 8 May 2026 14:29:12 +0200 Message-ID: <20260508122912.395304-5-c.ebner@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260508122912.395304-1-c.ebner@proxmox.com> References: <20260508122912.395304-1-c.ebner@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1778243254522 X-SPAM-LEVEL: Spam detection results: 0 AWL -0.080 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment POISEN_SPAM_PILL 0.1 Meta: its spam POISEN_SPAM_PILL_2 0.1 random spam to be learned in bayes POISEN_SPAM_PILL_4 0.1 random spam to be learned in bayes SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Message-ID-Hash: KWWDR6VJ44RCQVGSZYYNMZ3I3YZPOTN3 X-Message-ID-Hash: KWWDR6VJ44RCQVGSZYYNMZ3I3YZPOTN3 X-MailFrom: c.ebner@proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox Backup Server development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: Changing the datastore tuning options triggers an invalidation of the datastore cache entry, leading to re-instantiation with the new config parameters on the next datastore lookup. Since commit 0bd9c8701 ("datastore: lookup: reuse ChunkStore on stale datastore re-open") this does however not lead to re-creation of the chunk store instance in order to avoid dropping the process locker, which would lead to loosing any existing shared lock. However, as a consequence the sync level is not update on the chunk store. Fix this by: - Storing the sync level as runtime properties of the chunk store as state within the mutex syncing concurrent modify access. It is held where needed anyways. - Pass the mutex guard as additional parameters to the methods requiring the locked state. This encodes the requirement for the mutex guard directly into the function signature instead of labeling it as unsafe only. - Assuring the previous sync level on config changes for consistency. Reported-by: Robert Obkircher Signed-off-by: Christian Ebner --- pbs-datastore/src/chunk_store.rs | 59 +++++++++++-------- pbs-datastore/src/datastore.rs | 16 ++++- .../src/local_datastore_lru_cache.rs | 12 ++-- 3 files changed, 56 insertions(+), 31 deletions(-) diff --git a/pbs-datastore/src/chunk_store.rs b/pbs-datastore/src/chunk_store.rs index 2888dea39..e2219276d 100644 --- a/pbs-datastore/src/chunk_store.rs +++ b/pbs-datastore/src/chunk_store.rs @@ -1,7 +1,7 @@ use std::os::unix::fs::MetadataExt; use std::os::unix::io::AsRawFd; use std::path::{Path, PathBuf}; -use std::sync::{Arc, Mutex}; +use std::sync::{Arc, Mutex, MutexGuard}; use std::time::Duration; use anyhow::{bail, format_err, Context, Error}; @@ -27,14 +27,20 @@ use crate::{DataBlob, LocalDatastoreLruCache}; const USING_MARKER_FILENAME_EXT: &str = "using"; +#[derive(Default)] +/// Configurable runtime properties of a chunk store +pub(crate) struct ChunkStoreProperties { + pub(crate) sync_level: DatastoreFSyncLevel, +} + /// File system based chunk store pub struct ChunkStore { name: String, // used for error reporting pub(crate) base: PathBuf, chunk_dir: PathBuf, - mutex: Mutex<()>, + // Mutex to sync chunk store access, including property updates + mutex: Arc>, locker: Option>>, - sync_level: DatastoreFSyncLevel, } // TODO: what about sysctl setting vm.vfs_cache_pressure (0 - 100) ? @@ -79,9 +85,8 @@ impl ChunkStore { name: String::new(), base: PathBuf::new(), chunk_dir: PathBuf::new(), - mutex: Mutex::new(()), + mutex: Arc::new(Mutex::new(Default::default())), locker: None, - sync_level: Default::default(), } } @@ -204,16 +209,19 @@ impl ChunkStore { base, chunk_dir, locker: Some(locker), - mutex: Mutex::new(()), - sync_level, + mutex: Arc::new(Mutex::new(ChunkStoreProperties { sync_level })), }) } - fn touch_chunk_no_lock(&self, digest: &[u8; 32]) -> Result<(), Error> { + fn touch_chunk_no_lock( + &self, + digest: &[u8; 32], + mutex_guard: MutexGuard, + ) -> Result<(), Error> { // unwrap: only `None` in unit tests assert!(self.locker.is_some()); - self.cond_touch_chunk_no_lock(digest, true)?; + self.cond_touch_chunk_no_lock(digest, true, mutex_guard)?; Ok(()) } @@ -226,14 +234,15 @@ impl ChunkStore { digest: &[u8; 32], assert_exists: bool, ) -> Result { - let _lock = self.mutex.lock(); - self.cond_touch_chunk_no_lock(digest, assert_exists) + let lock = self.mutex.lock().unwrap(); + self.cond_touch_chunk_no_lock(digest, assert_exists, lock) } fn cond_touch_chunk_no_lock( &self, digest: &[u8; 32], assert_exists: bool, + _mutex_guard: MutexGuard, ) -> Result { // unwrap: only `None` in unit tests assert!(self.locker.is_some()); @@ -423,8 +432,12 @@ impl ChunkStore { ProcessLocker::oldest_shared_lock(self.locker.clone().unwrap()) } - pub(crate) fn mutex(&self) -> &std::sync::Mutex<()> { - &self.mutex + /// Mutex to lock chunk store for exclusive access. + /// + /// Must be held when modifying chunk store contents and allows to update + /// chunk store runtime properties. + pub(crate) fn mutex(&self) -> Arc> { + Arc::clone(&self.mutex) } pub fn sweep_unused_chunks( @@ -665,18 +678,17 @@ impl ChunkStore { //println!("DIGEST {}", hex::encode(digest)); - let _lock = self.mutex.lock(); + let lock = self.mutex.lock().unwrap(); - // Safety: lock acquired above - unsafe { self.insert_chunk_nolock(chunk, digest, true) } + self.insert_chunk_nolock(chunk, digest, true, lock) } - /// Safety: requires holding the chunk store mutex! - pub(crate) unsafe fn insert_chunk_nolock( + pub(crate) fn insert_chunk_nolock( &self, chunk: &DataBlob, digest: &[u8; 32], warn_on_overwrite_empty: bool, + mutex_guard: MutexGuard, ) -> Result<(bool, u64), Error> { // unwrap: only `None` in unit tests assert!(self.locker.is_some()); @@ -694,7 +706,7 @@ impl ChunkStore { } let old_size = metadata.len(); if encoded_size == old_size { - self.touch_chunk_no_lock(digest)?; + self.touch_chunk_no_lock(digest, mutex_guard)?; return Ok((true, old_size)); } else if old_size == 0 { if warn_on_overwrite_empty { @@ -721,11 +733,11 @@ impl ChunkStore { // compressed, the size mismatch could be caused by different zstd versions // so let's keep the one that was uploaded first, bit-rot is hopefully detected by // verification at some point.. - self.touch_chunk_no_lock(digest)?; + self.touch_chunk_no_lock(digest, mutex_guard)?; return Ok((true, old_size)); } else if old_size < encoded_size { log::debug!("Got another copy of chunk with digest '{digest_str}', existing chunk is smaller, discarding uploaded one."); - self.touch_chunk_no_lock(digest)?; + self.touch_chunk_no_lock(digest, mutex_guard)?; return Ok((true, old_size)); } else { log::debug!("Got another copy of chunk with digest '{digest_str}', existing chunk is bigger, replacing with uploaded one."); @@ -742,17 +754,18 @@ impl ChunkStore { let gid = pbs_config::backup_group()?.gid; create_options = create_options.owner(uid).group(gid); } + proxmox_sys::fs::replace_file( &chunk_path, raw_data, create_options, - self.sync_level == DatastoreFSyncLevel::File, + mutex_guard.sync_level == DatastoreFSyncLevel::File, ) .map_err(|err| { format_err!("inserting chunk on store '{name}' failed for {digest_str} - {err}") })?; - if self.sync_level == DatastoreFSyncLevel::File { + if mutex_guard.sync_level == DatastoreFSyncLevel::File { // fsync dir handle to persist the tmp rename let dir = std::fs::File::open(chunk_dir_path)?; nix::unistd::fsync(dir.as_raw_fd()) diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs index 8f69dd7ac..379f797be 100644 --- a/pbs-datastore/src/datastore.rs +++ b/pbs-datastore/src/datastore.rs @@ -610,12 +610,22 @@ impl DataStore { operation: Some(lookup.operation), })); } + + datastore.try_ensure_sync_level()?; + let tuning: DatastoreTuning = serde_json::from_value( + DatastoreTuning::API_SCHEMA + .parse_property_string(config.tuning.as_deref().unwrap_or(""))?, + )?; + let mutex = datastore.chunk_store.mutex(); + let mut mutex_guard = mutex.lock().unwrap(); + mutex_guard.sync_level = tuning.sync_level.unwrap_or_default(); Arc::clone(&datastore.chunk_store) } else { let tuning: DatastoreTuning = serde_json::from_value( DatastoreTuning::API_SCHEMA .parse_property_string(config.tuning.as_deref().unwrap_or(""))?, )?; + Arc::new(ChunkStore::open( lookup.name, config.absolute_path(), @@ -2499,7 +2509,8 @@ impl DataStore { Ok(guard) => guard, Err(_) => continue, }; - let _guard = self.inner.chunk_store.mutex().lock().unwrap(); + let mutex = self.inner.chunk_store.mutex(); + let _guard = mutex.lock().unwrap(); // Check local markers (created or atime updated during phase1) and // keep or delete chunk based on that. @@ -3450,7 +3461,8 @@ impl DataStore { .chunk_store .lock_chunk(digest, CHUNK_LOCK_TIMEOUT)?; } - let _lock = self.inner.chunk_store.mutex().lock().unwrap(); + let mutex = self.inner.chunk_store.mutex(); + let _lock = mutex.lock().unwrap(); let (new_path, counter) = self.inner.chunk_store.next_bad_chunk_path(digest); diff --git a/pbs-datastore/src/local_datastore_lru_cache.rs b/pbs-datastore/src/local_datastore_lru_cache.rs index ac27d4637..cf38d4a57 100644 --- a/pbs-datastore/src/local_datastore_lru_cache.rs +++ b/pbs-datastore/src/local_datastore_lru_cache.rs @@ -34,12 +34,11 @@ impl LocalDatastoreLruCache { /// /// Fails if the chunk cannot be inserted successfully. pub fn insert(&self, digest: &[u8; 32], chunk: &DataBlob) -> Result<(), Error> { - let _lock = self.store.mutex().lock().unwrap(); + let shared_mutex = self.store.mutex(); + let lock = shared_mutex.lock().unwrap(); + + self.store.insert_chunk_nolock(chunk, digest, false, lock)?; - // Safety: lock acquire above - unsafe { - self.store.insert_chunk_nolock(chunk, digest, false)?; - } self.cache.insert(*digest, (), |digest| { // Safety: lock acquired above, this is executed inline! unsafe { @@ -80,7 +79,8 @@ impl LocalDatastoreLruCache { Ok(mut file) => match DataBlob::load_from_reader(&mut file) { // File was still cached with contents, load response from file Ok(chunk) => { - let _lock = self.store.mutex().lock().unwrap(); + let shared_mutex = self.store.mutex(); + let _lock = shared_mutex.lock().unwrap(); self.cache.insert(*digest, (), |digest| { // Safety: lock acquired above, this is executed inline unsafe { -- 2.47.3