From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id B3EC71FF13C for ; Thu, 11 Jun 2026 14:03:55 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 951263A79; Thu, 11 Jun 2026 14:03:52 +0200 (CEST) From: Shannon Sterz To: pdm-devel@lists.proxmox.com Subject: [PATCH datacenter-manager 17/17] server: bin: api: tasks: add task to discover new staged certificates Date: Thu, 11 Jun 2026 14:03:27 +0200 Message-ID: <20260611120327.257523-18-s.sterz@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260611120327.257523-1-s.sterz@proxmox.com> References: <20260611120327.257523-1-s.sterz@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1781179365079 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.108 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Message-ID-Hash: SDGVCBYNJUOI6AFO47IMCKVPZV3VEH7H X-Message-ID-Hash: SDGVCBYNJUOI6AFO47IMCKVPZV3VEH7H X-MailFrom: s.sterz@proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox Datacenter Manager development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: so pdm stays in the loop if a remote's certificate changes. Signed-off-by: Shannon Sterz --- server/src/bin/proxmox-datacenter-api/main.rs | 1 + .../bin/proxmox-datacenter-api/tasks/mod.rs | 1 + .../tasks/remote_staged_fingerprints.rs | 149 ++++++++++++++++++ 3 files changed, 151 insertions(+) create mode 100644 server/src/bin/proxmox-datacenter-api/tasks/remote_staged_fingerprints.rs diff --git a/server/src/bin/proxmox-datacenter-api/main.rs b/server/src/bin/proxmox-datacenter-api/main.rs index 6e915b67..43c40651 100644 --- a/server/src/bin/proxmox-datacenter-api/main.rs +++ b/server/src/bin/proxmox-datacenter-api/main.rs @@ -335,6 +335,7 @@ async fn run(debug: bool) -> Result<(), Error> { tasks::remote_tasks::start_task()?; tasks::remote_updates::start_task()?; tasks::ceph_detection::start_task(); + tasks::remote_staged_fingerprints::start_task(); server.await?; log::info!("server shutting down, waiting for active workers to complete"); diff --git a/server/src/bin/proxmox-datacenter-api/tasks/mod.rs b/server/src/bin/proxmox-datacenter-api/tasks/mod.rs index 2a05e9b3..968a6dc2 100644 --- a/server/src/bin/proxmox-datacenter-api/tasks/mod.rs +++ b/server/src/bin/proxmox-datacenter-api/tasks/mod.rs @@ -2,6 +2,7 @@ pub mod logrotate; pub mod ceph_detection; pub mod remote_node_mapping; +pub mod remote_staged_fingerprints; pub mod remote_tasks; pub mod remote_updates; diff --git a/server/src/bin/proxmox-datacenter-api/tasks/remote_staged_fingerprints.rs b/server/src/bin/proxmox-datacenter-api/tasks/remote_staged_fingerprints.rs new file mode 100644 index 00000000..9299b657 --- /dev/null +++ b/server/src/bin/proxmox-datacenter-api/tasks/remote_staged_fingerprints.rs @@ -0,0 +1,149 @@ +//! Periodically query remotes for staged fingerprints. +//! +//! Nodes of remotes that have a specified fingerprint are probed regularly to see if a new staged +//! certificate was issued. This should allow us to keep TLS connections secure without manual user +//! intervention even across certificate rotation. + +use std::collections::HashMap; +use std::collections::hash_map::Entry; + +use anyhow::Error; + +use proxmox_schema::PropertyString; + +use pdm_api_types::Fingerprint; +use pdm_api_types::remotes::{NodeUrl, Remote, RemoteType}; + +use server::connection::make_pve_client_with_endpoint; +use server::task_utils; + +// The daily update task renewing certificates on a remote runs once a day by default, so scan it +// twice a day to pick up on a new certificate as early as possible. +const TASK_INTERVAL: u64 = 12 * 60 * 60; + +pub fn start_task() { + super::spawn_aborted_on_shutdown(run()); +} + +async fn run() { + loop { + run_once().await; + let delay_target = task_utils::next_aligned_instant(TASK_INTERVAL); + tokio::time::sleep_until(tokio::time::Instant::from_std(delay_target)).await; + } +} + +#[tracing::instrument(skip_all, name = "update_staged_fingerprints")] +async fn run_once() { + if let Err(err) = query_staged_fps().await { + log::warn!("Could not query remotes for new staged fingerprints: {err:#}"); + } +} + +async fn query_staged_fps() -> Result<(), Error> { + // First check if and what we need to update. + let (config, _) = pdm_config::remotes::config()?; + let mut updates: HashMap>> = HashMap::new(); + + for (remote_name, remote) in config { + match remote.ty { + RemoteType::Pve => { + for node in &remote.nodes { + let Some(fp) = node.fingerprint.as_ref() else { + continue; // Node uses the system's trust store, skip querying staged certs. + }; + + let new_fps = match fetch_staged_fps_from_pve_remote(&remote, node, fp).await { + Ok(res) => res, + Err(e) => { + log::warn!( + "Could not get staged fingerprints of node '{}' of remote \ + '{remote_name}': {e:#}", + node.hostname + ); + continue; + } + }; + + if node.staged_fingerprints != new_fps { + let new_node = NodeUrl { + hostname: node.hostname.to_owned(), + fingerprint: Some(fp.to_owned()), + staged_fingerprints: new_fps, + }; + + match updates.entry(remote_name.clone()) { + Entry::Occupied(mut e) => e.get_mut().push(new_node.into()), + Entry::Vacant(e) => { + e.insert(vec![new_node.into()]); + } + } + } + } + } + RemoteType::Pbs => { + log::debug!("PBS remotes don't rotate certificates, skipping '{remote_name}'."); + } + } + } + + // Only then lock the configuration, re-check if the remote node we want to update still + // matches the expected state and then update it. + let _lock = pdm_config::remotes::lock_config()?; + let (mut new_conf, _) = pdm_config::remotes::config()?; + + for (remote, mut nodes) in updates { + if let Some(remote) = new_conf.get_mut(&remote) { + for node in &mut nodes { + remote.nodes.iter_mut().for_each(|n| { + if node.hostname == n.hostname && node.fingerprint == n.fingerprint { + // At this point we know that a) a remote by the same name with b) a node + // with the same hostname and fingerprint still exists -> update its staged + // fingerprints. + log::info!( + "Got new staged fingerprints for remote {} and node {}, updating...", + remote.id, + n.hostname + ); + n.staged_fingerprints = node.staged_fingerprints.take(); + } + }); + } + } + } + + pdm_config::remotes::save_config(new_conf) +} + +async fn fetch_staged_fps_from_pve_remote( + remote: &Remote, + node: &NodeUrl, + current_fp: &str, +) -> Result>, Error> { + // Query each node by connecting to it directly instead of specifying the node + // name in the request. The hostname in the remotes config may not match the + // node name, this avoids such a mismatch issue. + let client = make_pve_client_with_endpoint(remote, Some(&node.hostname))?; + let mut certificates = client.certificates_info("localhost").await?; + let current_fp = current_fp.to_lowercase(); + + let new_staged_fps = certificates + .iter_mut() + .filter_map(|c| { + if let Some(fp) = c.fingerprint.take() { + // Stage all but the current or ca certificates. + if current_fp != fp.to_lowercase() && c.filename != "pve-root-ca.pem" { + return Some(fp.parse::()); + } + } + + None + }) + .collect::, Error>>()?; + + if new_staged_fps.is_empty() { + Ok(None) + } else { + Ok(Some(new_staged_fps)) + } +} -- 2.47.3