From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id 458631FF13B for ; Wed, 20 May 2026 12:55:36 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id E7D191814; Wed, 20 May 2026 12:55:34 +0200 (CEST) From: Lukas Wagner To: pdm-devel@lists.proxmox.com Subject: [PATCH datacenter-manager] api: resources: use new api-cache for remote resources Date: Wed, 20 May 2026 12:54:53 +0200 Message-ID: <20260520105453.209919-1-l.wagner@proxmox.com> X-Mailer: git-send-email 2.47.3 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1779274483822 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.053 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [resources.rs] Message-ID-Hash: WCKWZO6ZLZTQAO3DL4BQFQDJHRAHFGGW X-Message-ID-Hash: WCKWZO6ZLZTQAO3DL4BQFQDJHRAHFGGW X-MailFrom: l.wagner@proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox Datacenter Manager development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: Similar to the subscription cache that was already migrated over, use the new api-cache for remote resources as well. The 'top-entities' API already used the existing resource cache. Since that API uses blocking IO when reading from the RRD anyways, a new, blocking getter was added ('get_cached_resources_blocking') which uses the blocking code paths of namespaced_cache. The 'top-entities' API handler was made blocking as well, since there are no async code paths in the handler anyway. As a result, any blocking IO when accessing the RRD cannot block the async runtime anymore. Signed-off-by: Lukas Wagner --- server/src/api/resources.rs | 113 ++++++++++++------- server/src/metric_collection/top_entities.rs | 4 +- 2 files changed, 72 insertions(+), 45 deletions(-) diff --git a/server/src/api/resources.rs b/server/src/api/resources.rs index 8bcb8ce3..1a6a23d9 100644 --- a/server/src/api/resources.rs +++ b/server/src/api/resources.rs @@ -1,6 +1,5 @@ use std::collections::{HashMap, HashSet}; use std::str::FromStr; -use std::sync::{LazyLock, RwLock}; use anyhow::{bail, Context, Error}; use futures::future::join_all; @@ -748,7 +747,7 @@ pub async fn get_subscription_status( returns: { type: TopEntities } )] /// Returns the top X entities regarding the chosen type -async fn get_top_entities( +fn get_top_entities( timeframe: Option, view: Option, rpcenv: &mut dyn RpcEnvironment, @@ -959,72 +958,100 @@ async fn fetch_remote_subscription_info( Ok(list) } -#[derive(Clone)] +#[derive(Clone, Serialize, Deserialize)] +/// Per-remote entry in the resource cache. pub struct CachedResources { + /// Resources for this remote. pub resources: Vec, + /// Unix timestamp at which the list of resources was polled. pub timestamp: i64, } -static CACHE: LazyLock>> = - LazyLock::new(|| RwLock::new(HashMap::new())); - /// Get resources for a given remote. /// /// If recent enough cached data is available, it is returned /// instead of calling out to the remote. async fn get_resources_for_remote(remote: &Remote, max_age: u64) -> Result, Error> { - let remote_name = remote.id.to_owned(); - if let Some(cached_resource) = get_cached_resources(&remote_name, max_age) { - Ok(cached_resource.resources) + if let Some(cached_resources) = get_cached_resources(&remote.id, max_age).await? { + Ok(cached_resources.resources) } else { let resources = fetch_remote_resource(remote).await?; let now = proxmox_time::epoch_i64(); - update_cached_resources(&remote_name, &resources, now); + + if let Some(existing_state) = + update_cached_resources(&remote.id, resources.clone(), now).await? + { + // Somebody else updated the cache while we performed the API request, + // return the more recent data instead of the data we just fetched. + return Ok(existing_state.resources); + } Ok(resources) } } +const REMOTE_RESOURCES_CACHE_KEY: &str = "resources"; + /// Read cached resource data from the cache -pub fn get_cached_resources(remote: &str, max_age: u64) -> Option { - // there is no good way to recover from this, so panicking should be fine - let cache = CACHE.read().expect("mutex poisoned"); +async fn get_cached_resources( + remote: &str, + max_age: u64, +) -> Result, Error> { + let cache = api_cache::read_remote(remote).await?; + let resources = cache + .get_with_max_age(REMOTE_RESOURCES_CACHE_KEY, max_age as i64) + .await + .inspect_err(|err| { + log::error!( + "could not read remote resources for remote '{remote}' from API cache: {err}" + ) + }) + .ok() + .flatten(); - if let Some(cached_resource) = cache.get(remote) { - let now = proxmox_time::epoch_i64(); - let diff = now - cached_resource.timestamp; + Ok(resources) +} - if diff > max_age as i64 || diff < 0 { - // value is too old or from the future - None - } else { - Some(cached_resource.clone()) - } - } else { - None - } +/// Read cached resource data from the cache (blocking). +pub fn get_cached_resources_blocking( + remote: &str, + max_age: u64, +) -> Result, Error> { + let cache = api_cache::read_remote_blocking(remote)?; + let resources = cache + .get_with_max_age(REMOTE_RESOURCES_CACHE_KEY, max_age as i64) + .inspect_err(|err| { + log::error!( + "could not read remote resources for remote '{remote}' from API cache: {err}" + ) + }) + .ok() + .flatten(); + + Ok(resources) } /// Update cached resource data. /// -/// If the cache already contains more recent data we don't insert the passed resources. -fn update_cached_resources(remote: &str, resources: &[Resource], now: i64) { - // there is no good way to recover from this, so panicking should be fine - let mut cache = CACHE.write().expect("mutex poisoned"); +/// If the cache already contains more recent data, this function returns the already +/// stored state as `Ok(Some(state))`. If the data that was passed in replaced the cache +/// entry, `Ok(None)` is returned. +async fn update_cached_resources( + remote: &str, + resources: Vec, + now: i64, +) -> Result, Error> { + let cache = api_cache::write_remote(remote).await?; - if let Some(cached_resource) = cache.get(remote) { - // skip updating if existing data is newer - if cached_resource.timestamp >= now { - return; - } - } - - cache.insert( - remote.into(), - CachedResources { - timestamp: now, - resources: resources.into(), - }, - ); + Ok(cache + .set_if_newer_with_timestamp( + REMOTE_RESOURCES_CACHE_KEY, + CachedResources { + resources, + timestamp: now, + }, + now, + ) + .await?) } /// Fetch remote resources and map to pdm-native data types. diff --git a/server/src/metric_collection/top_entities.rs b/server/src/metric_collection/top_entities.rs index e10c672a..61ea1d9f 100644 --- a/server/src/metric_collection/top_entities.rs +++ b/server/src/metric_collection/top_entities.rs @@ -48,8 +48,8 @@ pub fn calculate_top( continue; } - if let Some(data) = - crate::api::resources::get_cached_resources(remote_name, i64::MAX as u64) + if let Ok(Some(data)) = + crate::api::resources::get_cached_resources_blocking(remote_name, i64::MAX as u64) { for res in data.resources { if !is_resource_included(remote_name, &res) { -- 2.47.3