public inbox for pdm-devel@lists.proxmox.com
 help / color / mirror / Atom feed
* [PATCH datacenter-manager] api: resources: use new api-cache for remote resources
@ 2026-05-20 10:54 Lukas Wagner
  2026-05-21 13:00 ` applied: " Wolfgang Bumiller
  0 siblings, 1 reply; 2+ messages in thread
From: Lukas Wagner @ 2026-05-20 10:54 UTC (permalink / raw)
  To: pdm-devel

Similar to the subscription cache that was already migrated over, use
the new api-cache for remote resources as well.

The 'top-entities' API already used the existing resource cache. Since
that API uses blocking IO when reading from the RRD anyways, a new,
blocking getter was added ('get_cached_resources_blocking') which uses
the blocking code paths of namespaced_cache. The 'top-entities' API
handler was made blocking as well, since there are no async code paths
in the handler anyway. As a result, any blocking IO when accessing the
RRD cannot block the async runtime anymore.

Signed-off-by: Lukas Wagner <l.wagner@proxmox.com>
---
 server/src/api/resources.rs                  | 113 ++++++++++++-------
 server/src/metric_collection/top_entities.rs |   4 +-
 2 files changed, 72 insertions(+), 45 deletions(-)

diff --git a/server/src/api/resources.rs b/server/src/api/resources.rs
index 8bcb8ce3..1a6a23d9 100644
--- a/server/src/api/resources.rs
+++ b/server/src/api/resources.rs
@@ -1,6 +1,5 @@
 use std::collections::{HashMap, HashSet};
 use std::str::FromStr;
-use std::sync::{LazyLock, RwLock};
 
 use anyhow::{bail, Context, Error};
 use futures::future::join_all;
@@ -748,7 +747,7 @@ pub async fn get_subscription_status(
     returns: { type: TopEntities }
 )]
 /// Returns the top X entities regarding the chosen type
-async fn get_top_entities(
+fn get_top_entities(
     timeframe: Option<RrdTimeframe>,
     view: Option<String>,
     rpcenv: &mut dyn RpcEnvironment,
@@ -959,72 +958,100 @@ async fn fetch_remote_subscription_info(
     Ok(list)
 }
 
-#[derive(Clone)]
+#[derive(Clone, Serialize, Deserialize)]
+/// Per-remote entry in the resource cache.
 pub struct CachedResources {
+    /// Resources for this remote.
     pub resources: Vec<Resource>,
+    /// Unix timestamp at which the list of resources was polled.
     pub timestamp: i64,
 }
 
-static CACHE: LazyLock<RwLock<HashMap<String, CachedResources>>> =
-    LazyLock::new(|| RwLock::new(HashMap::new()));
-
 /// Get resources for a given remote.
 ///
 /// If recent enough cached data is available, it is returned
 /// instead of calling out to the remote.
 async fn get_resources_for_remote(remote: &Remote, max_age: u64) -> Result<Vec<Resource>, Error> {
-    let remote_name = remote.id.to_owned();
-    if let Some(cached_resource) = get_cached_resources(&remote_name, max_age) {
-        Ok(cached_resource.resources)
+    if let Some(cached_resources) = get_cached_resources(&remote.id, max_age).await? {
+        Ok(cached_resources.resources)
     } else {
         let resources = fetch_remote_resource(remote).await?;
         let now = proxmox_time::epoch_i64();
-        update_cached_resources(&remote_name, &resources, now);
+
+        if let Some(existing_state) =
+            update_cached_resources(&remote.id, resources.clone(), now).await?
+        {
+            // Somebody else updated the cache while we performed the API request,
+            // return the more recent data instead of the data we just fetched.
+            return Ok(existing_state.resources);
+        }
         Ok(resources)
     }
 }
 
+const REMOTE_RESOURCES_CACHE_KEY: &str = "resources";
+
 /// Read cached resource data from the cache
-pub fn get_cached_resources(remote: &str, max_age: u64) -> Option<CachedResources> {
-    // there is no good way to recover from this, so panicking should be fine
-    let cache = CACHE.read().expect("mutex poisoned");
+async fn get_cached_resources(
+    remote: &str,
+    max_age: u64,
+) -> Result<Option<CachedResources>, Error> {
+    let cache = api_cache::read_remote(remote).await?;
+    let resources = cache
+        .get_with_max_age(REMOTE_RESOURCES_CACHE_KEY, max_age as i64)
+        .await
+        .inspect_err(|err| {
+            log::error!(
+                "could not read remote resources for remote '{remote}' from API cache: {err}"
+            )
+        })
+        .ok()
+        .flatten();
 
-    if let Some(cached_resource) = cache.get(remote) {
-        let now = proxmox_time::epoch_i64();
-        let diff = now - cached_resource.timestamp;
+    Ok(resources)
+}
 
-        if diff > max_age as i64 || diff < 0 {
-            // value is too old or from the future
-            None
-        } else {
-            Some(cached_resource.clone())
-        }
-    } else {
-        None
-    }
+/// Read cached resource data from the cache (blocking).
+pub fn get_cached_resources_blocking(
+    remote: &str,
+    max_age: u64,
+) -> Result<Option<CachedResources>, Error> {
+    let cache = api_cache::read_remote_blocking(remote)?;
+    let resources = cache
+        .get_with_max_age(REMOTE_RESOURCES_CACHE_KEY, max_age as i64)
+        .inspect_err(|err| {
+            log::error!(
+                "could not read remote resources for remote '{remote}' from API cache: {err}"
+            )
+        })
+        .ok()
+        .flatten();
+
+    Ok(resources)
 }
 
 /// Update cached resource data.
 ///
-/// If the cache already contains more recent data we don't insert the passed resources.
-fn update_cached_resources(remote: &str, resources: &[Resource], now: i64) {
-    // there is no good way to recover from this, so panicking should be fine
-    let mut cache = CACHE.write().expect("mutex poisoned");
+/// If the cache already contains more recent data, this function returns the already
+/// stored state as `Ok(Some(state))`. If the data that was passed in replaced the cache
+/// entry, `Ok(None)` is returned.
+async fn update_cached_resources(
+    remote: &str,
+    resources: Vec<Resource>,
+    now: i64,
+) -> Result<Option<CachedResources>, Error> {
+    let cache = api_cache::write_remote(remote).await?;
 
-    if let Some(cached_resource) = cache.get(remote) {
-        // skip updating if existing data is newer
-        if cached_resource.timestamp >= now {
-            return;
-        }
-    }
-
-    cache.insert(
-        remote.into(),
-        CachedResources {
-            timestamp: now,
-            resources: resources.into(),
-        },
-    );
+    Ok(cache
+        .set_if_newer_with_timestamp(
+            REMOTE_RESOURCES_CACHE_KEY,
+            CachedResources {
+                resources,
+                timestamp: now,
+            },
+            now,
+        )
+        .await?)
 }
 
 /// Fetch remote resources and map to pdm-native data types.
diff --git a/server/src/metric_collection/top_entities.rs b/server/src/metric_collection/top_entities.rs
index e10c672a..61ea1d9f 100644
--- a/server/src/metric_collection/top_entities.rs
+++ b/server/src/metric_collection/top_entities.rs
@@ -48,8 +48,8 @@ pub fn calculate_top(
             continue;
         }
 
-        if let Some(data) =
-            crate::api::resources::get_cached_resources(remote_name, i64::MAX as u64)
+        if let Ok(Some(data)) =
+            crate::api::resources::get_cached_resources_blocking(remote_name, i64::MAX as u64)
         {
             for res in data.resources {
                 if !is_resource_included(remote_name, &res) {
-- 
2.47.3





^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2026-05-21 13:00 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-20 10:54 [PATCH datacenter-manager] api: resources: use new api-cache for remote resources Lukas Wagner
2026-05-21 13:00 ` applied: " Wolfgang Bumiller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal