From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id 00DAE1FF183 for ; Wed, 19 Nov 2025 14:24:38 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 595C5C245; Wed, 19 Nov 2025 14:24:43 +0100 (CET) Date: Wed, 19 Nov 2025 14:24:35 +0100 From: Fabian =?iso-8859-1?q?Gr=FCnbichler?= To: Proxmox Backup Server development discussion References: <20251114150544.224839-1-s.rufinatscha@proxmox.com> <20251114150544.224839-3-s.rufinatscha@proxmox.com> In-Reply-To: <20251114150544.224839-3-s.rufinatscha@proxmox.com> MIME-Version: 1.0 User-Agent: astroid/0.17.0 (https://github.com/astroidmail/astroid) Message-Id: <1763557787.uko5tj5o4t.astroid@yuna.none> X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1763558647349 X-SPAM-LEVEL: Spam detection results: 0 AWL -0.105 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment POISEN_SPAM_PILL 0.1 Meta: its spam POISEN_SPAM_PILL_1 0.1 random spam to be learned in bayes POISEN_SPAM_PILL_3 0.1 random spam to be learned in bayes RCVD_IN_VALIDITY_CERTIFIED_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. RCVD_IN_VALIDITY_RPBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. RCVD_IN_VALIDITY_SAFE_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pbs-devel] [PATCH proxmox-backup v2 2/4] partial fix #6049: datastore: impl ConfigVersionCache fast path for lookups X-BeenThere: pbs-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox Backup Server development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox Backup Server development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pbs-devel-bounces@lists.proxmox.com Sender: "pbs-devel" On November 14, 2025 4:05 pm, Samuel Rufinatscha wrote: > Repeated /status requests caused lookup_datastore() to re-read and > parse datastore.cfg on every call. The issue was mentioned in report > #6049 [1]. cargo-flamegraph [2] confirmed that the hot path is > dominated by pbs_config::datastore::config() (config parsing). > > This patch implements caching of the global datastore.cfg using the > generation numbers from the shared config version cache. It caches the > datastore.cfg along with the generation number and, when a subsequent > lookup sees the same generation, it reuses the cached config without > re-reading it from disk. If the generation differs > (or the cache is unavailable), it falls back to the existing slow path > with no behavioral changes. > > Behavioral notes > > - The generation is bumped via the existing save_config() path, so > API-driven config changes are detected immediately. > - Manual edits to datastore.cfg are not detected; a TTL > guard is introduced in a dedicated patch in this series. > - DataStore::drop still performs a config read on the common path, > this is covered in a dedicated patch in this series. > > Links > > [1] Bugzilla: https://bugzilla.proxmox.com/show_bug.cgi?id=6049 > [2] cargo-flamegraph: https://github.com/flamegraph-rs/flamegraph > > Fixes: #6049 > Signed-off-by: Samuel Rufinatscha > --- > pbs-datastore/Cargo.toml | 1 + > pbs-datastore/src/datastore.rs | 120 +++++++++++++++++++++++---------- > 2 files changed, 87 insertions(+), 34 deletions(-) > > diff --git a/pbs-datastore/Cargo.toml b/pbs-datastore/Cargo.toml > index 8ce930a9..42f49a7b 100644 > --- a/pbs-datastore/Cargo.toml > +++ b/pbs-datastore/Cargo.toml > @@ -40,6 +40,7 @@ proxmox-io.workspace = true > proxmox-lang.workspace=true > proxmox-s3-client = { workspace = true, features = [ "impl" ] } > proxmox-schema = { workspace = true, features = [ "api-macro" ] } > +proxmox-section-config.workspace = true > proxmox-serde = { workspace = true, features = [ "serde_json" ] } > proxmox-sys.workspace = true > proxmox-systemd.workspace = true > diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs > index 031fa958..e7748872 100644 > --- a/pbs-datastore/src/datastore.rs > +++ b/pbs-datastore/src/datastore.rs > @@ -32,7 +32,8 @@ use pbs_api_types::{ > MaintenanceType, Operation, UPID, > }; > use pbs_config::s3::S3_CFG_TYPE_ID; > -use pbs_config::BackupLockGuard; > +use pbs_config::{BackupLockGuard, ConfigVersionCache}; > +use proxmox_section_config::SectionConfigData; > > use crate::backup_info::{ > BackupDir, BackupGroup, BackupInfo, OLD_LOCKING, PROTECTED_MARKER_FILENAME, > @@ -46,6 +47,17 @@ use crate::s3::S3_CONTENT_PREFIX; > use crate::task_tracking::{self, update_active_operations}; > use crate::{DataBlob, LocalDatastoreLruCache}; > > +// Cache for fully parsed datastore.cfg > +struct DatastoreConfigCache { > + // Parsed datastore.cfg file > + config: Arc, > + // Generation number from ConfigVersionCache > + last_generation: usize, > +} > + > +static DATASTORE_CONFIG_CACHE: LazyLock>> = > + LazyLock::new(|| Mutex::new(None)); > + > static DATASTORE_MAP: LazyLock>>> = > LazyLock::new(|| Mutex::new(HashMap::new())); > > @@ -140,10 +152,12 @@ pub struct DataStoreImpl { > last_gc_status: Mutex, > verify_new: bool, > chunk_order: ChunkOrder, > - last_digest: Option<[u8; 32]>, > sync_level: DatastoreFSyncLevel, > backend_config: DatastoreBackendConfig, > lru_store_caching: Option, > + /// Datastore generation number from `ConfigVersionCache` at creation time, used to > + /// validate reuse of this cached `DataStoreImpl`. > + config_generation: Option, > } > > impl DataStoreImpl { > @@ -156,10 +170,10 @@ impl DataStoreImpl { > last_gc_status: Mutex::new(GarbageCollectionStatus::default()), > verify_new: false, > chunk_order: Default::default(), > - last_digest: None, > sync_level: Default::default(), > backend_config: Default::default(), > lru_store_caching: None, > + config_generation: None, > }) > } > } > @@ -254,6 +268,37 @@ impl DatastoreBackend { > } > } > > +/// Return the cached datastore SectionConfig and its generation. > +fn datastore_section_config_cached() -> Result<(Arc, Option), Error> { > + let gen = ConfigVersionCache::new() > + .ok() > + .map(|c| c.datastore_generation()); > + > + let mut guard = DATASTORE_CONFIG_CACHE.lock().unwrap(); > + > + // Fast path: re-use cached datastore.cfg > + if let (Some(gen), Some(cache)) = (gen, guard.as_ref()) { > + if cache.last_generation == gen { > + return Ok((cache.config.clone(), Some(gen))); > + } > + } > + > + // Slow path: re-read datastore.cfg > + let (config_raw, _digest) = pbs_config::datastore::config()?; > + let config = Arc::new(config_raw); > + > + if let Some(gen_val) = gen { > + *guard = Some(DatastoreConfigCache { > + config: config.clone(), > + last_generation: gen_val, > + }); > + } else { > + *guard = None; > + } > + > + Ok((config, gen)) I think this would be more readable (especially with the extensions coming later, and with what I propose in my reply to patch #4) if ordered like this: let mut guard = DATASTORE_CONFIG_CACHE.lock().unwrap(); if let Ok(version_cache) = ConfigVersionCache::new() { let gen = version_cache.datastore_generation(); if let Some(cached) = guard.as_ref() { // Fast path: re-use cached datastore.cfg if gen == cached.last_generation { return Ok((cached.config.clone(), Some(gen))); } } // Slow path: re-read datastore.cfg let (config_raw, _digest) = pbs_config::datastore::config()?; let config = Arc::new(config_raw); *guard = Some(DatastoreConfigCache { config: config.clone(), last_generation: gen, }); Ok((config, Some(gen))) } else { // Fallback path, no config version cache: read datastore.cfg *guard = None; let (config_raw, _digest) = pbs_config::datastore::config()?; Ok((Arc::new(config_raw), None)) } with the later changes it would then look like this (but this still has the issues I mentioned in my comment to patch #4 ;)): let mut guard = DATASTORE_CONFIG_CACHE.lock().unwrap(); if let Some(version_cache) = ConfigVersionCache::new().ok() { let now = epoch_i64(); let current_gen = version_cache.datastore_generation(); if let Some(cached) = guard.as_ref() { // Fast path: re-use cached datastore.cfg if cache is available, generation matches and TTL not expired if cached.last_generation == current_gen && now - cached.last_update < DATASTORE_CONFIG_CACHE_TTL_SECS { return Ok((cached.config.clone(), Some(cached.last_generation))); } } // Slow path: re-read datastore.cfg let (config_raw, _digest) = pbs_config::datastore::config()?; let config = Arc::new(config_raw); // Bump datastore generation whenever we reload the config. // This ensures that Drop handlers will detect that a newer config exists // and will not rely on a stale cached entry for maintenance mandate. let prev_gen = version_cache.increase_datastore_generation(); let new_gen = prev_gen + 1; // Update cache *guard = Some(DatastoreConfigCache { config: config.clone(), last_generation: new_gen, last_update: now, }); Ok((config, Some(new_gen))) } else { // Fallback path, no config version cache: read datastore.cfg *guard = None; let (config_raw, _digest) = pbs_config::datastore::config()?; Ok((Arc::new(config_raw), None)) } technically setting the guard to None in the else branch is not needed, since if we ever get an Ok result back it has been initialized and subsequent calls cannot fail.. > +} > + > impl DataStore { > // This one just panics on everything > #[doc(hidden)] > @@ -325,56 +370,63 @@ impl DataStore { > name: &str, > operation: Option, > ) -> Result, Error> { > - // Avoid TOCTOU between checking maintenance mode and updating active operation counter, as > - // we use it to decide whether it is okay to delete the datastore. > + // Avoid TOCTOU between checking maintenance mode and updating active operations. > let _config_lock = pbs_config::datastore::lock_config()?; > > - // we could use the ConfigVersionCache's generation for staleness detection, but we load > - // the config anyway -> just use digest, additional benefit: manual changes get detected > - let (config, digest) = pbs_config::datastore::config()?; > - let config: DataStoreConfig = config.lookup("datastore", name)?; > + // Get the current datastore.cfg generation number and cached config > + let (section_config, gen_num) = datastore_section_config_cached()?; > + > + let datastore_cfg: DataStoreConfig = section_config.lookup("datastore", name)?; > + let maintenance_mode = datastore_cfg.get_maintenance_mode(); > + let mount_status = get_datastore_mount_status(&datastore_cfg); > > - if let Some(maintenance_mode) = config.get_maintenance_mode() { > - if let Err(error) = maintenance_mode.check(operation) { > + if let Some(mm) = &maintenance_mode { > + if let Err(error) = mm.check(operation.clone()) { > bail!("datastore '{name}' is unavailable: {error}"); > } > } > > - if get_datastore_mount_status(&config) == Some(false) { > - let mut datastore_cache = DATASTORE_MAP.lock().unwrap(); > - datastore_cache.remove(&config.name); > - bail!("datastore '{}' is not mounted", config.name); > + let mut datastore_cache = DATASTORE_MAP.lock().unwrap(); > + > + if mount_status == Some(false) { > + datastore_cache.remove(&datastore_cfg.name); > + bail!("datastore '{}' is not mounted", datastore_cfg.name); > } > > - let mut datastore_cache = DATASTORE_MAP.lock().unwrap(); > - let entry = datastore_cache.get(name); > - > - // reuse chunk store so that we keep using the same process locker instance! > - let chunk_store = if let Some(datastore) = &entry { > - let last_digest = datastore.last_digest.as_ref(); > - if let Some(true) = last_digest.map(|last_digest| last_digest == &digest) { > - if let Some(operation) = operation { > - update_active_operations(name, operation, 1)?; > + // Re-use DataStoreImpl > + if let Some(existing) = datastore_cache.get(name).cloned() { > + if let (Some(last_generation), Some(gen_num)) = (existing.config_generation, gen_num) { > + if last_generation == gen_num { > + if let Some(op) = operation { > + update_active_operations(name, op, 1)?; > + } > + > + return Ok(Arc::new(Self { > + inner: existing, > + operation, > + })); > } > - return Ok(Arc::new(Self { > - inner: Arc::clone(datastore), > - operation, > - })); > } > - Arc::clone(&datastore.chunk_store) > + } > + > + // (Re)build DataStoreImpl > + > + // Reuse chunk store so that we keep using the same process locker instance! > + let chunk_store = if let Some(existing) = datastore_cache.get(name) { > + Arc::clone(&existing.chunk_store) > } else { > let tuning: DatastoreTuning = serde_json::from_value( > DatastoreTuning::API_SCHEMA > - .parse_property_string(config.tuning.as_deref().unwrap_or(""))?, > + .parse_property_string(datastore_cfg.tuning.as_deref().unwrap_or(""))?, > )?; > Arc::new(ChunkStore::open( > name, > - config.absolute_path(), > + datastore_cfg.absolute_path(), > tuning.sync_level.unwrap_or_default(), > )?) > }; > > - let datastore = DataStore::with_store_and_config(chunk_store, config, Some(digest))?; > + let datastore = DataStore::with_store_and_config(chunk_store, datastore_cfg, gen_num)?; > > let datastore = Arc::new(datastore); > datastore_cache.insert(name.to_string(), datastore.clone()); > @@ -476,7 +528,7 @@ impl DataStore { > fn with_store_and_config( > chunk_store: Arc, > config: DataStoreConfig, > - last_digest: Option<[u8; 32]>, > + generation: Option, > ) -> Result { > let mut gc_status_path = chunk_store.base_path(); > gc_status_path.push(".gc-status"); > @@ -536,10 +588,10 @@ impl DataStore { > last_gc_status: Mutex::new(gc_status), > verify_new: config.verify_new.unwrap_or(false), > chunk_order: tuning.chunk_order.unwrap_or_default(), > - last_digest, > sync_level: tuning.sync_level.unwrap_or_default(), > backend_config, > lru_store_caching, > + config_generation: generation, > }) > } > > -- > 2.47.3 > > > > _______________________________________________ > pbs-devel mailing list > pbs-devel@lists.proxmox.com > https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel > > > _______________________________________________ pbs-devel mailing list pbs-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel