From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id 6A3721FF13C for ; Thu, 14 May 2026 10:06:13 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 48EAD327CC; Thu, 14 May 2026 10:06:13 +0200 (CEST) From: Christian Ebner To: pbs-devel@lists.proxmox.com Subject: [PATCH proxmox-backup v4 7/8] datastore: fix sync level update propagation to chunk store Date: Thu, 14 May 2026 10:04:56 +0200 Message-ID: <20260514080458.5677-8-c.ebner@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260514080458.5677-1-c.ebner@proxmox.com> References: <20260514080458.5677-1-c.ebner@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1778745921165 X-SPAM-LEVEL: Spam detection results: 0 AWL -0.081 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment POISEN_SPAM_PILL 0.1 Meta: its spam POISEN_SPAM_PILL_2 0.1 random spam to be learned in bayes POISEN_SPAM_PILL_4 0.1 random spam to be learned in bayes SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Message-ID-Hash: DSMADRBPEDOCYNHBGEL325TT7GP7WGP3 X-Message-ID-Hash: DSMADRBPEDOCYNHBGEL325TT7GP7WGP3 X-MailFrom: c.ebner@proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox Backup Server development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: Changing the datastore tuning options triggers an invalidation of the datastore cache entry, leading to re-instantiation with the new config parameters on the next datastore lookup. Since commit 0bd9c8701 ("datastore: lookup: reuse ChunkStore on stale datastore re-open") this does however not lead to re-creation of the chunk store instance in order to avoid dropping the process locker, which would lead to loosing any existing shared lock. However, as a consequence the sync level is not updated on the chunk store. Fix this by: - Storing the sync level as runtime properties of the chunk store as state within the mutex syncing concurrent modify access. It is held where needed anyways. - Pass the mutex guard as additional parameters to the methods requiring the locked state. This encodes the requirement for the mutex guard directly into the function signature instead of labeling it as unsafe only. - Assuring the previous sync level on config changes for consistency. To do this, extend and rename try_ensure_sync_level to also take the mutex guard and update the sync level if given. Reported-by: Robert Obkircher Signed-off-by: Christian Ebner --- pbs-datastore/src/chunk_store.rs | 72 ++++++++++++------- pbs-datastore/src/datastore.rs | 18 ++++- .../src/local_datastore_lru_cache.rs | 9 ++- 3 files changed, 69 insertions(+), 30 deletions(-) diff --git a/pbs-datastore/src/chunk_store.rs b/pbs-datastore/src/chunk_store.rs index e8c279a62..96deb840d 100644 --- a/pbs-datastore/src/chunk_store.rs +++ b/pbs-datastore/src/chunk_store.rs @@ -1,7 +1,7 @@ use std::os::unix::fs::MetadataExt; use std::os::unix::io::AsRawFd; use std::path::{Path, PathBuf}; -use std::sync::{Arc, Mutex}; +use std::sync::{Arc, Mutex, MutexGuard}; use std::time::Duration; use anyhow::{bail, format_err, Context, Error}; @@ -27,14 +27,20 @@ use crate::{DataBlob, LocalDatastoreLruCache}; const USING_MARKER_FILENAME_EXT: &str = "using"; +#[derive(Default)] +/// Configurable runtime properties of a chunk store +pub(crate) struct ChunkStoreProperties { + pub(crate) sync_level: DatastoreFSyncLevel, +} + /// File system based chunk store pub struct ChunkStore { name: String, // used for error reporting pub(crate) base: PathBuf, chunk_dir: PathBuf, - mutex: Mutex<()>, + // Mutex to sync chunk store access, including property updates + mutex: Mutex, locker: Option>>, - sync_level: DatastoreFSyncLevel, } // TODO: what about sysctl setting vm.vfs_cache_pressure (0 - 100) ? @@ -79,9 +85,8 @@ impl ChunkStore { name: String::new(), base: PathBuf::new(), chunk_dir: PathBuf::new(), - mutex: Mutex::new(()), + mutex: Mutex::new(Default::default()), locker: None, - sync_level: Default::default(), } } @@ -204,16 +209,19 @@ impl ChunkStore { base, chunk_dir, locker: Some(locker), - mutex: Mutex::new(()), - sync_level, + mutex: Mutex::new(ChunkStoreProperties { sync_level }), }) } - fn touch_chunk_no_lock(&self, digest: &[u8; 32]) -> Result<(), Error> { + fn touch_chunk_no_lock( + &self, + digest: &[u8; 32], + mutex_guard: &MutexGuard, + ) -> Result<(), Error> { // unwrap: only `None` in unit tests assert!(self.locker.is_some()); - self.cond_touch_chunk_no_lock(digest, true)?; + self.cond_touch_chunk_no_lock(digest, true, mutex_guard)?; Ok(()) } @@ -226,14 +234,15 @@ impl ChunkStore { digest: &[u8; 32], assert_exists: bool, ) -> Result { - let _lock = self.mutex.lock(); - self.cond_touch_chunk_no_lock(digest, assert_exists) + let lock = self.mutex.lock().unwrap(); + self.cond_touch_chunk_no_lock(digest, assert_exists, &lock) } fn cond_touch_chunk_no_lock( &self, digest: &[u8; 32], assert_exists: bool, + _mutex_guard: &MutexGuard, ) -> Result { // unwrap: only `None` in unit tests assert!(self.locker.is_some()); @@ -423,7 +432,11 @@ impl ChunkStore { ProcessLocker::oldest_shared_lock(self.locker.clone().unwrap()) } - pub(crate) fn mutex(&self) -> &std::sync::Mutex<()> { + /// Mutex to lock chunk store for exclusive access. + /// + /// Must be held when modifying chunk store contents and allows to update + /// chunk store runtime properties. + pub(crate) fn mutex(&self) -> &Mutex { &self.mutex } @@ -665,18 +678,17 @@ impl ChunkStore { //println!("DIGEST {}", hex::encode(digest)); - let _lock = self.mutex.lock(); + let lock = self.mutex.lock().unwrap(); - // Safety: lock acquired above - unsafe { self.insert_chunk_nolock(chunk, digest, true) } + self.insert_chunk_nolock(chunk, digest, true, &lock) } - /// Safety: requires holding the chunk store mutex! - pub(crate) unsafe fn insert_chunk_nolock( + pub(crate) fn insert_chunk_nolock( &self, chunk: &DataBlob, digest: &[u8; 32], warn_on_overwrite_empty: bool, + mutex_guard: &MutexGuard, ) -> Result<(bool, u64), Error> { // unwrap: only `None` in unit tests assert!(self.locker.is_some()); @@ -694,7 +706,7 @@ impl ChunkStore { } let old_size = metadata.len(); if encoded_size == old_size { - self.touch_chunk_no_lock(digest)?; + self.touch_chunk_no_lock(digest, mutex_guard)?; return Ok((true, old_size)); } else if old_size == 0 { if warn_on_overwrite_empty { @@ -721,11 +733,11 @@ impl ChunkStore { // compressed, the size mismatch could be caused by different zstd versions // so let's keep the one that was uploaded first, bit-rot is hopefully detected by // verification at some point.. - self.touch_chunk_no_lock(digest)?; + self.touch_chunk_no_lock(digest, mutex_guard)?; return Ok((true, old_size)); } else if old_size < encoded_size { log::debug!("Got another copy of chunk with digest '{digest_str}', existing chunk is smaller, discarding uploaded one."); - self.touch_chunk_no_lock(digest)?; + self.touch_chunk_no_lock(digest, mutex_guard)?; return Ok((true, old_size)); } else { log::debug!("Got another copy of chunk with digest '{digest_str}', existing chunk is bigger, replacing with uploaded one."); @@ -746,13 +758,13 @@ impl ChunkStore { &chunk_path, raw_data, create_options, - self.sync_level == DatastoreFSyncLevel::File, + mutex_guard.sync_level == DatastoreFSyncLevel::File, ) .map_err(|err| { format_err!("inserting chunk on store '{name}' failed for {digest_str} - {err}") })?; - if self.sync_level == DatastoreFSyncLevel::File { + if mutex_guard.sync_level == DatastoreFSyncLevel::File { // fsync dir handle to persist the tmp rename let dir = std::fs::File::open(chunk_dir_path)?; nix::unistd::fsync(dir.as_raw_fd()) @@ -967,10 +979,22 @@ impl ChunkStore { (chunk_path, counter) } - pub(super) fn try_ensure_sync_level(&self) -> Result<(), Error> { - if self.sync_level != DatastoreFSyncLevel::Filesystem { + pub(super) fn try_ensure_sync_level_with_update( + &self, + mut mutex_guard: MutexGuard, + update_sync_level: Option, + ) -> Result<(), Error> { + let assure_sync_level = mutex_guard.sync_level; + if let Some(sync_level) = update_sync_level { + mutex_guard.sync_level = sync_level; + } + + drop(mutex_guard); // never hold during syncfs + + if assure_sync_level != DatastoreFSyncLevel::Filesystem { return Ok(()); } + let file = std::fs::File::open(self.base_path())?; let fd = file.as_raw_fd(); info!("syncing filesystem"); diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs index b67ab2f3a..fbb1f2494 100644 --- a/pbs-datastore/src/datastore.rs +++ b/pbs-datastore/src/datastore.rs @@ -595,6 +595,19 @@ impl DataStore { operation: Some(lookup.operation), })); } + + let tuning = pbs_config::datastore::parse_datastore_tuning_options(&config)?; + let sync_level = tuning.sync_level.unwrap_or_default(); + + let mutex_guard = datastore.chunk_store.mutex().lock().unwrap(); + if mutex_guard.sync_level != sync_level { + datastore + .chunk_store + .try_ensure_sync_level_with_update(mutex_guard, Some(sync_level))?; + } else { + drop(mutex_guard); + } + Arc::clone(&datastore.chunk_store) } else { let tuning = pbs_config::datastore::parse_datastore_tuning_options(&config)?; @@ -2995,7 +3008,10 @@ impl DataStore { /// Syncs the filesystem of the chunk store base path if 'sync_level' is set to /// [`DatastoreFSyncLevel::Filesystem`]. Uses syncfs(2). pub fn try_ensure_sync_level(&self) -> Result<(), Error> { - self.inner.chunk_store.try_ensure_sync_level() + let mutex_guard = self.inner.chunk_store.mutex().lock().unwrap(); + self.inner + .chunk_store + .try_ensure_sync_level_with_update(mutex_guard, None) } /// Destroy a datastore. This requires that there are no active operations on the datastore. diff --git a/pbs-datastore/src/local_datastore_lru_cache.rs b/pbs-datastore/src/local_datastore_lru_cache.rs index ac27d4637..854cd5019 100644 --- a/pbs-datastore/src/local_datastore_lru_cache.rs +++ b/pbs-datastore/src/local_datastore_lru_cache.rs @@ -34,12 +34,11 @@ impl LocalDatastoreLruCache { /// /// Fails if the chunk cannot be inserted successfully. pub fn insert(&self, digest: &[u8; 32], chunk: &DataBlob) -> Result<(), Error> { - let _lock = self.store.mutex().lock().unwrap(); + let lock = self.store.mutex().lock().unwrap(); + + self.store + .insert_chunk_nolock(chunk, digest, false, &lock)?; - // Safety: lock acquire above - unsafe { - self.store.insert_chunk_nolock(chunk, digest, false)?; - } self.cache.insert(*digest, (), |digest| { // Safety: lock acquired above, this is executed inline! unsafe { -- 2.47.3