all lists on lists.proxmox.com
 help / color / mirror / Atom feed
From: Shannon Sterz <s.sterz@proxmox.com>
To: pdm-devel@lists.proxmox.com
Subject: [PATCH datacenter-manager 17/17] server: bin: api: tasks: add task to discover new staged certificates
Date: Thu, 11 Jun 2026 14:03:27 +0200	[thread overview]
Message-ID: <20260611120327.257523-18-s.sterz@proxmox.com> (raw)
In-Reply-To: <20260611120327.257523-1-s.sterz@proxmox.com>

so pdm stays in the loop if a remote's certificate changes.

Signed-off-by: Shannon Sterz <s.sterz@proxmox.com>
---
 server/src/bin/proxmox-datacenter-api/main.rs |   1 +
 .../bin/proxmox-datacenter-api/tasks/mod.rs   |   1 +
 .../tasks/remote_staged_fingerprints.rs       | 149 ++++++++++++++++++
 3 files changed, 151 insertions(+)
 create mode 100644 server/src/bin/proxmox-datacenter-api/tasks/remote_staged_fingerprints.rs

diff --git a/server/src/bin/proxmox-datacenter-api/main.rs b/server/src/bin/proxmox-datacenter-api/main.rs
index 6e915b67..43c40651 100644
--- a/server/src/bin/proxmox-datacenter-api/main.rs
+++ b/server/src/bin/proxmox-datacenter-api/main.rs
@@ -335,6 +335,7 @@ async fn run(debug: bool) -> Result<(), Error> {
     tasks::remote_tasks::start_task()?;
     tasks::remote_updates::start_task()?;
     tasks::ceph_detection::start_task();
+    tasks::remote_staged_fingerprints::start_task();
 
     server.await?;
     log::info!("server shutting down, waiting for active workers to complete");
diff --git a/server/src/bin/proxmox-datacenter-api/tasks/mod.rs b/server/src/bin/proxmox-datacenter-api/tasks/mod.rs
index 2a05e9b3..968a6dc2 100644
--- a/server/src/bin/proxmox-datacenter-api/tasks/mod.rs
+++ b/server/src/bin/proxmox-datacenter-api/tasks/mod.rs
@@ -2,6 +2,7 @@ pub mod logrotate;
 
 pub mod ceph_detection;
 pub mod remote_node_mapping;
+pub mod remote_staged_fingerprints;
 pub mod remote_tasks;
 pub mod remote_updates;
 
diff --git a/server/src/bin/proxmox-datacenter-api/tasks/remote_staged_fingerprints.rs b/server/src/bin/proxmox-datacenter-api/tasks/remote_staged_fingerprints.rs
new file mode 100644
index 00000000..9299b657
--- /dev/null
+++ b/server/src/bin/proxmox-datacenter-api/tasks/remote_staged_fingerprints.rs
@@ -0,0 +1,149 @@
+//! Periodically query remotes for staged fingerprints.
+//!
+//! Nodes of remotes that have a specified fingerprint are probed regularly to see if a new staged
+//! certificate was issued. This should allow us to keep TLS connections secure without manual user
+//! intervention even across certificate rotation.
+
+use std::collections::HashMap;
+use std::collections::hash_map::Entry;
+
+use anyhow::Error;
+
+use proxmox_schema::PropertyString;
+
+use pdm_api_types::Fingerprint;
+use pdm_api_types::remotes::{NodeUrl, Remote, RemoteType};
+
+use server::connection::make_pve_client_with_endpoint;
+use server::task_utils;
+
+// The daily update task renewing certificates on a remote runs once a day by default, so scan it
+// twice a day to pick up on a new certificate as early as possible.
+const TASK_INTERVAL: u64 = 12 * 60 * 60;
+
+pub fn start_task() {
+    super::spawn_aborted_on_shutdown(run());
+}
+
+async fn run() {
+    loop {
+        run_once().await;
+        let delay_target = task_utils::next_aligned_instant(TASK_INTERVAL);
+        tokio::time::sleep_until(tokio::time::Instant::from_std(delay_target)).await;
+    }
+}
+
+#[tracing::instrument(skip_all, name = "update_staged_fingerprints")]
+async fn run_once() {
+    if let Err(err) = query_staged_fps().await {
+        log::warn!("Could not query remotes for new staged fingerprints: {err:#}");
+    }
+}
+
+async fn query_staged_fps() -> Result<(), Error> {
+    // First check if and what we need to update.
+    let (config, _) = pdm_config::remotes::config()?;
+    let mut updates: HashMap<String, Vec<PropertyString<NodeUrl>>> = HashMap::new();
+
+    for (remote_name, remote) in config {
+        match remote.ty {
+            RemoteType::Pve => {
+                for node in &remote.nodes {
+                    let Some(fp) = node.fingerprint.as_ref() else {
+                        continue; // Node uses the system's trust store, skip querying staged certs.
+                    };
+
+                    let new_fps = match fetch_staged_fps_from_pve_remote(&remote, node, fp).await {
+                        Ok(res) => res,
+                        Err(e) => {
+                            log::warn!(
+                                "Could not get staged fingerprints of node '{}' of remote \
+                                    '{remote_name}': {e:#}",
+                                node.hostname
+                            );
+                            continue;
+                        }
+                    };
+
+                    if node.staged_fingerprints != new_fps {
+                        let new_node = NodeUrl {
+                            hostname: node.hostname.to_owned(),
+                            fingerprint: Some(fp.to_owned()),
+                            staged_fingerprints: new_fps,
+                        };
+
+                        match updates.entry(remote_name.clone()) {
+                            Entry::Occupied(mut e) => e.get_mut().push(new_node.into()),
+                            Entry::Vacant(e) => {
+                                e.insert(vec![new_node.into()]);
+                            }
+                        }
+                    }
+                }
+            }
+            RemoteType::Pbs => {
+                log::debug!("PBS remotes don't rotate certificates, skipping '{remote_name}'.");
+            }
+        }
+    }
+
+    // Only then lock the configuration, re-check if the remote node we want to update still
+    // matches the expected state and then update it.
+    let _lock = pdm_config::remotes::lock_config()?;
+    let (mut new_conf, _) = pdm_config::remotes::config()?;
+
+    for (remote, mut nodes) in updates {
+        if let Some(remote) = new_conf.get_mut(&remote) {
+            for node in &mut nodes {
+                remote.nodes.iter_mut().for_each(|n| {
+                    if node.hostname == n.hostname && node.fingerprint == n.fingerprint {
+                        // At this point we know that a) a remote by the same name with b) a node
+                        // with the same hostname and fingerprint still exists -> update its staged
+                        // fingerprints.
+                        log::info!(
+                            "Got new staged fingerprints for remote {} and node {}, updating...",
+                            remote.id,
+                            n.hostname
+                        );
+                        n.staged_fingerprints = node.staged_fingerprints.take();
+                    }
+                });
+            }
+        }
+    }
+
+    pdm_config::remotes::save_config(new_conf)
+}
+
+async fn fetch_staged_fps_from_pve_remote(
+    remote: &Remote,
+    node: &NodeUrl,
+    current_fp: &str,
+) -> Result<Option<Vec<Fingerprint>>, Error> {
+    // Query each node by connecting to it directly instead of specifying the node
+    // name in the request. The hostname in the remotes config may not match the
+    // node name, this avoids such a mismatch issue.
+    let client = make_pve_client_with_endpoint(remote, Some(&node.hostname))?;
+    let mut certificates = client.certificates_info("localhost").await?;
+    let current_fp = current_fp.to_lowercase();
+
+    let new_staged_fps = certificates
+        .iter_mut()
+        .filter_map(|c| {
+            if let Some(fp) = c.fingerprint.take() {
+                // Stage all but the current or ca certificates.
+                if current_fp != fp.to_lowercase() && c.filename != "pve-root-ca.pem" {
+                    return Some(fp.parse::<Fingerprint>());
+                }
+            }
+
+            None
+        })
+        .collect::<Result<Vec<Fingerprint>, Error>>()?;
+
+    if new_staged_fps.is_empty() {
+        Ok(None)
+    } else {
+        Ok(Some(new_staged_fps))
+    }
+}
-- 
2.47.3





      parent reply	other threads:[~2026-06-11 12:03 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-11 12:03 [RFC cluster/datacenter-manager/manager/proxmox 00/17] TLS Certificate Staging Shannon Sterz
2026-06-11 12:03 ` [PATCH cluster 01/17] setup: allow caller to provide the certificate filename Shannon Sterz
2026-06-11 12:03 ` [PATCH manager 02/17] bin/api: add a new staged certificate when renewing self-signed cert Shannon Sterz
2026-06-11 12:03 ` [PATCH manager 03/17] api: certificates: if node parameter is 'localhost' return local certs Shannon Sterz
2026-06-11 12:03 ` [PATCH proxmox 04/17] client: ignore certificate trust store validation result on fp option Shannon Sterz
2026-06-11 12:03 ` [PATCH proxmox 05/17] pve-api-types: expose certificates info endpoint Shannon Sterz
2026-06-11 12:03 ` [PATCH datacenter-manager 06/17] client: don't short-circuit on valid certificate when tls fp exists Shannon Sterz
2026-06-11 12:03 ` [PATCH datacenter-manager 07/17] client: allow users to update a changed fingerprint interactively Shannon Sterz
2026-06-11 12:03 ` [PATCH datacenter-manager 08/17] cli/api-types: move Fingerprint to common api type crate Shannon Sterz
2026-06-11 12:03 ` [PATCH datacenter-manager 09/17] server: connection: report mismatching fingerprint as untrusted on probe Shannon Sterz
2026-06-11 12:03 ` [PATCH datacenter-manager 10/17] ui: wizzard: add context if a provided fingerprint did not match remote Shannon Sterz
2026-06-11 12:03 ` [PATCH datacenter-manager 11/17] ui: wizzard: nodes page: always update fingerprints on user confirmation Shannon Sterz
2026-06-11 12:03 ` [PATCH datacenter-manager 12/17] pdm-api-types: implement ApiType for Fingerprint Shannon Sterz
2026-06-11 12:03 ` [PATCH datacenter-manager 13/17] pdm-api-types: add staged_fingerprints field to NodeUrl Shannon Sterz
2026-06-11 12:03 ` [PATCH datacenter-manager 14/17] server: remotes: lock remotes config when updating it Shannon Sterz
2026-06-11 12:03 ` [PATCH datacenter-manager 15/17] server: connection: rotate in staged fingerprints when encountering them Shannon Sterz
2026-06-11 12:03 ` [PATCH datacenter-manager 16/17] server: api: tasks: move `spawn_aborted_on_shutdown()` to super module Shannon Sterz
2026-06-11 12:03 ` Shannon Sterz [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260611120327.257523-18-s.sterz@proxmox.com \
    --to=s.sterz@proxmox.com \
    --cc=pdm-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal