public inbox for pdm-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: Shannon Sterz <s.sterz@proxmox.com>
To: pdm-devel@lists.proxmox.com
Subject: [PATCH datacenter-manager 17/17] server: bin: api: tasks: add task to discover new staged certificates
Date: Thu, 11 Jun 2026 14:03:27 +0200	[thread overview]
Message-ID: <20260611120327.257523-18-s.sterz@proxmox.com> (raw)
In-Reply-To: <20260611120327.257523-1-s.sterz@proxmox.com>

so pdm stays in the loop if a remote's certificate changes.

Signed-off-by: Shannon Sterz <s.sterz@proxmox.com>
---
 server/src/bin/proxmox-datacenter-api/main.rs |   1 +
 .../bin/proxmox-datacenter-api/tasks/mod.rs   |   1 +
 .../tasks/remote_staged_fingerprints.rs       | 149 ++++++++++++++++++
 3 files changed, 151 insertions(+)
 create mode 100644 server/src/bin/proxmox-datacenter-api/tasks/remote_staged_fingerprints.rs

diff --git a/server/src/bin/proxmox-datacenter-api/main.rs b/server/src/bin/proxmox-datacenter-api/main.rs
index 6e915b67..43c40651 100644
--- a/server/src/bin/proxmox-datacenter-api/main.rs
+++ b/server/src/bin/proxmox-datacenter-api/main.rs
@@ -335,6 +335,7 @@ async fn run(debug: bool) -> Result<(), Error> {
     tasks::remote_tasks::start_task()?;
     tasks::remote_updates::start_task()?;
     tasks::ceph_detection::start_task();
+    tasks::remote_staged_fingerprints::start_task();
 
     server.await?;
     log::info!("server shutting down, waiting for active workers to complete");
diff --git a/server/src/bin/proxmox-datacenter-api/tasks/mod.rs b/server/src/bin/proxmox-datacenter-api/tasks/mod.rs
index 2a05e9b3..968a6dc2 100644
--- a/server/src/bin/proxmox-datacenter-api/tasks/mod.rs
+++ b/server/src/bin/proxmox-datacenter-api/tasks/mod.rs
@@ -2,6 +2,7 @@ pub mod logrotate;
 
 pub mod ceph_detection;
 pub mod remote_node_mapping;
+pub mod remote_staged_fingerprints;
 pub mod remote_tasks;
 pub mod remote_updates;
 
diff --git a/server/src/bin/proxmox-datacenter-api/tasks/remote_staged_fingerprints.rs b/server/src/bin/proxmox-datacenter-api/tasks/remote_staged_fingerprints.rs
new file mode 100644
index 00000000..9299b657
--- /dev/null
+++ b/server/src/bin/proxmox-datacenter-api/tasks/remote_staged_fingerprints.rs
@@ -0,0 +1,149 @@
+//! Periodically query remotes for staged fingerprints.
+//!
+//! Nodes of remotes that have a specified fingerprint are probed regularly to see if a new staged
+//! certificate was issued. This should allow us to keep TLS connections secure without manual user
+//! intervention even across certificate rotation.
+
+use std::collections::HashMap;
+use std::collections::hash_map::Entry;
+
+use anyhow::Error;
+
+use proxmox_schema::PropertyString;
+
+use pdm_api_types::Fingerprint;
+use pdm_api_types::remotes::{NodeUrl, Remote, RemoteType};
+
+use server::connection::make_pve_client_with_endpoint;
+use server::task_utils;
+
+// The daily update task renewing certificates on a remote runs once a day by default, so scan it
+// twice a day to pick up on a new certificate as early as possible.
+const TASK_INTERVAL: u64 = 12 * 60 * 60;
+
+pub fn start_task() {
+    super::spawn_aborted_on_shutdown(run());
+}
+
+async fn run() {
+    loop {
+        run_once().await;
+        let delay_target = task_utils::next_aligned_instant(TASK_INTERVAL);
+        tokio::time::sleep_until(tokio::time::Instant::from_std(delay_target)).await;
+    }
+}
+
+#[tracing::instrument(skip_all, name = "update_staged_fingerprints")]
+async fn run_once() {
+    if let Err(err) = query_staged_fps().await {
+        log::warn!("Could not query remotes for new staged fingerprints: {err:#}");
+    }
+}
+
+async fn query_staged_fps() -> Result<(), Error> {
+    // First check if and what we need to update.
+    let (config, _) = pdm_config::remotes::config()?;
+    let mut updates: HashMap<String, Vec<PropertyString<NodeUrl>>> = HashMap::new();
+
+    for (remote_name, remote) in config {
+        match remote.ty {
+            RemoteType::Pve => {
+                for node in &remote.nodes {
+                    let Some(fp) = node.fingerprint.as_ref() else {
+                        continue; // Node uses the system's trust store, skip querying staged certs.
+                    };
+
+                    let new_fps = match fetch_staged_fps_from_pve_remote(&remote, node, fp).await {
+                        Ok(res) => res,
+                        Err(e) => {
+                            log::warn!(
+                                "Could not get staged fingerprints of node '{}' of remote \
+                                    '{remote_name}': {e:#}",
+                                node.hostname
+                            );
+                            continue;
+                        }
+                    };
+
+                    if node.staged_fingerprints != new_fps {
+                        let new_node = NodeUrl {
+                            hostname: node.hostname.to_owned(),
+                            fingerprint: Some(fp.to_owned()),
+                            staged_fingerprints: new_fps,
+                        };
+
+                        match updates.entry(remote_name.clone()) {
+                            Entry::Occupied(mut e) => e.get_mut().push(new_node.into()),
+                            Entry::Vacant(e) => {
+                                e.insert(vec![new_node.into()]);
+                            }
+                        }
+                    }
+                }
+            }
+            RemoteType::Pbs => {
+                log::debug!("PBS remotes don't rotate certificates, skipping '{remote_name}'.");
+            }
+        }
+    }
+
+    // Only then lock the configuration, re-check if the remote node we want to update still
+    // matches the expected state and then update it.
+    let _lock = pdm_config::remotes::lock_config()?;
+    let (mut new_conf, _) = pdm_config::remotes::config()?;
+
+    for (remote, mut nodes) in updates {
+        if let Some(remote) = new_conf.get_mut(&remote) {
+            for node in &mut nodes {
+                remote.nodes.iter_mut().for_each(|n| {
+                    if node.hostname == n.hostname && node.fingerprint == n.fingerprint {
+                        // At this point we know that a) a remote by the same name with b) a node
+                        // with the same hostname and fingerprint still exists -> update its staged
+                        // fingerprints.
+                        log::info!(
+                            "Got new staged fingerprints for remote {} and node {}, updating...",
+                            remote.id,
+                            n.hostname
+                        );
+                        n.staged_fingerprints = node.staged_fingerprints.take();
+                    }
+                });
+            }
+        }
+    }
+
+    pdm_config::remotes::save_config(new_conf)
+}
+
+async fn fetch_staged_fps_from_pve_remote(
+    remote: &Remote,
+    node: &NodeUrl,
+    current_fp: &str,
+) -> Result<Option<Vec<Fingerprint>>, Error> {
+    // Query each node by connecting to it directly instead of specifying the node
+    // name in the request. The hostname in the remotes config may not match the
+    // node name, this avoids such a mismatch issue.
+    let client = make_pve_client_with_endpoint(remote, Some(&node.hostname))?;
+    let mut certificates = client.certificates_info("localhost").await?;
+    let current_fp = current_fp.to_lowercase();
+
+    let new_staged_fps = certificates
+        .iter_mut()
+        .filter_map(|c| {
+            if let Some(fp) = c.fingerprint.take() {
+                // Stage all but the current or ca certificates.
+                if current_fp != fp.to_lowercase() && c.filename != "pve-root-ca.pem" {
+                    return Some(fp.parse::<Fingerprint>());
+                }
+            }
+
+            None
+        })
+        .collect::<Result<Vec<Fingerprint>, Error>>()?;
+
+    if new_staged_fps.is_empty() {
+        Ok(None)
+    } else {
+        Ok(Some(new_staged_fps))
+    }
+}
-- 
2.47.3





      parent reply	other threads:[~2026-06-11 12:03 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-11 12:03 [RFC cluster/datacenter-manager/manager/proxmox 00/17] TLS Certificate Staging Shannon Sterz
2026-06-11 12:03 ` [PATCH cluster 01/17] setup: allow caller to provide the certificate filename Shannon Sterz
2026-06-11 12:03 ` [PATCH manager 02/17] bin/api: add a new staged certificate when renewing self-signed cert Shannon Sterz
2026-06-11 12:03 ` [PATCH manager 03/17] api: certificates: if node parameter is 'localhost' return local certs Shannon Sterz
2026-06-11 12:03 ` [PATCH proxmox 04/17] client: ignore certificate trust store validation result on fp option Shannon Sterz
2026-06-11 12:03 ` [PATCH proxmox 05/17] pve-api-types: expose certificates info endpoint Shannon Sterz
2026-06-11 12:03 ` [PATCH datacenter-manager 06/17] client: don't short-circuit on valid certificate when tls fp exists Shannon Sterz
2026-06-11 12:03 ` [PATCH datacenter-manager 07/17] client: allow users to update a changed fingerprint interactively Shannon Sterz
2026-06-11 12:03 ` [PATCH datacenter-manager 08/17] cli/api-types: move Fingerprint to common api type crate Shannon Sterz
2026-06-11 12:03 ` [PATCH datacenter-manager 09/17] server: connection: report mismatching fingerprint as untrusted on probe Shannon Sterz
2026-06-11 12:03 ` [PATCH datacenter-manager 10/17] ui: wizzard: add context if a provided fingerprint did not match remote Shannon Sterz
2026-06-11 12:03 ` [PATCH datacenter-manager 11/17] ui: wizzard: nodes page: always update fingerprints on user confirmation Shannon Sterz
2026-06-11 12:03 ` [PATCH datacenter-manager 12/17] pdm-api-types: implement ApiType for Fingerprint Shannon Sterz
2026-06-11 12:03 ` [PATCH datacenter-manager 13/17] pdm-api-types: add staged_fingerprints field to NodeUrl Shannon Sterz
2026-06-11 12:03 ` [PATCH datacenter-manager 14/17] server: remotes: lock remotes config when updating it Shannon Sterz
2026-06-11 12:03 ` [PATCH datacenter-manager 15/17] server: connection: rotate in staged fingerprints when encountering them Shannon Sterz
2026-06-11 12:03 ` [PATCH datacenter-manager 16/17] server: api: tasks: move `spawn_aborted_on_shutdown()` to super module Shannon Sterz
2026-06-11 12:03 ` Shannon Sterz [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260611120327.257523-18-s.sterz@proxmox.com \
    --to=s.sterz@proxmox.com \
    --cc=pdm-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal