From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id 228BB1FF137 for ; Tue, 17 Feb 2026 15:16:05 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 8845936D9; Tue, 17 Feb 2026 15:15:20 +0100 (CET) From: Daniel Kral To: pve-devel@lists.proxmox.com Subject: [RFC proxmox 2/5] resource-scheduling: introduce generic cluster usage implementation Date: Tue, 17 Feb 2026 15:13:56 +0100 Message-ID: <20260217141437.584852-3-d.kral@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260217141437.584852-1-d.kral@proxmox.com> References: <20260217141437.584852-1-d.kral@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1771337676036 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.019 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Message-ID-Hash: CAQMUQ6FGCRIW73DWCYFGVWZPJTYC2CI X-Message-ID-Hash: CAQMUQ6FGCRIW73DWCYFGVWZPJTYC2CI X-MailFrom: d.kral@proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox VE development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: Declare generic NodeStats and ServiceStats structs, which special use cases convert their types into, and use these to implement generic scheduler methods such as the existing scoring of nodes to start a previously non-running service. This is best viewed with the git option --ignore-all-space. Signed-off-by: Daniel Kral --- proxmox-resource-scheduling/src/pve_static.rs | 45 ++++- proxmox-resource-scheduling/src/scheduler.rs | 185 ++++++++++++------ 2 files changed, 166 insertions(+), 64 deletions(-) diff --git a/proxmox-resource-scheduling/src/pve_static.rs b/proxmox-resource-scheduling/src/pve_static.rs index 184e615d..b269c44f 100644 --- a/proxmox-resource-scheduling/src/pve_static.rs +++ b/proxmox-resource-scheduling/src/pve_static.rs @@ -1,9 +1,9 @@ use anyhow::Error; use serde::{Deserialize, Serialize}; -use crate::scheduler; +use crate::scheduler::{ClusterUsage, NodeStats, NodeUsage, ServiceStats}; -#[derive(Serialize, Deserialize)] +#[derive(Clone, Serialize, Deserialize)] #[serde(rename_all = "kebab-case")] /// Static usage information of a node. pub struct StaticNodeUsage { @@ -33,9 +33,25 @@ impl AsRef for StaticNodeUsage { } } +impl From for NodeUsage { + fn from(value: StaticNodeUsage) -> Self { + let stats = NodeStats { + cpu: value.cpu, + maxcpu: value.maxcpu, + mem: value.mem, + maxmem: value.maxmem, + }; + + Self { + name: value.name, + stats, + } + } +} + /// Calculate new CPU usage in percent. /// `add` being `0.0` means "unlimited" and results in `max` being added. -pub fn add_cpu_usage(old: f64, max: f64, add: f64) -> f64 { +fn add_cpu_usage(old: f64, max: f64, add: f64) -> f64 { if add == 0.0 { old + max } else { @@ -43,7 +59,7 @@ pub fn add_cpu_usage(old: f64, max: f64, add: f64) -> f64 { } } -#[derive(Serialize, Deserialize)] +#[derive(Clone, Copy, Serialize, Deserialize)] #[serde(rename_all = "kebab-case")] /// Static usage information of an HA resource. pub struct StaticServiceUsage { @@ -53,14 +69,33 @@ pub struct StaticServiceUsage { pub maxmem: usize, } +impl From for ServiceStats { + fn from(value: StaticServiceUsage) -> Self { + Self { + cpu: value.maxcpu, + maxcpu: value.maxcpu, + mem: value.maxmem, + maxmem: value.maxmem, + } + } +} + /// Scores candidate `nodes` to start a `service` on. Scoring is done according to the static memory /// and CPU usages of the nodes as if the service would already be running on each. /// /// Returns a vector of (nodename, score) pairs. Scores are between 0.0 and 1.0 and a higher score /// is better. +#[deprecated] pub fn score_nodes_to_start_service>( nodes: &[T], service: &StaticServiceUsage, ) -> Result, Error> { - scheduler::score_nodes_to_start_service(nodes, service) + let nodes = nodes + .iter() + .map(|node| node.as_ref().clone().into()) + .collect::>(); + + let cluster_usage = ClusterUsage::from_nodes(nodes); + + cluster_usage.score_nodes_to_start_service(*service) } diff --git a/proxmox-resource-scheduling/src/scheduler.rs b/proxmox-resource-scheduling/src/scheduler.rs index 29353d84..58215f03 100644 --- a/proxmox-resource-scheduling/src/scheduler.rs +++ b/proxmox-resource-scheduling/src/scheduler.rs @@ -1,9 +1,66 @@ use anyhow::Error; -use crate::{ - pve_static::{add_cpu_usage, StaticNodeUsage, StaticServiceUsage}, - topsis, -}; +use crate::topsis; + +/// Generic service stats. +#[derive(Clone, Copy)] +pub struct ServiceStats { + /// CPU utilization in CPU cores. + pub cpu: f64, + /// Number of assigned CPUs or CPU limit. + pub maxcpu: f64, + /// Used memory in bytes. + pub mem: usize, + /// Maximum assigned memory in bytes. + pub maxmem: usize, +} + +/// Generic node stats. +#[derive(Clone, Copy)] +pub struct NodeStats { + /// CPU utilization in CPU cores. + pub cpu: f64, + /// Total number of CPU cores. + pub maxcpu: usize, + /// Used memory in bytes. + pub mem: usize, + /// Total memory in bytes. + pub maxmem: usize, +} + +impl NodeStats { + /// Adds the service stats to the node stats as if the service has started on the node. + pub fn add_started_service(&mut self, service_stats: &ServiceStats) { + // a maxcpu value of `0.0` means no cpu usage limit on the node + let service_cpu = if service_stats.maxcpu == 0.0 { + self.maxcpu as f64 + } else { + service_stats.maxcpu + }; + + self.cpu += service_cpu; + self.mem += service_stats.maxmem; + } + + /// Returns the current cpu usage as a percentage. + pub fn cpu_load(&self) -> f64 { + self.cpu / self.maxcpu as f64 + } + + /// Returns the current memory usage as a percentage. + pub fn mem_load(&self) -> f64 { + self.mem as f64 / self.maxmem as f64 + } +} + +pub struct NodeUsage { + pub name: String, + pub stats: NodeStats, +} + +pub struct ClusterUsage { + nodes: Vec, +} criteria_struct! { /// A given alternative. @@ -22,65 +79,75 @@ criteria_struct! { static PVE_HA_TOPSIS_CRITERIA; } -/// Scores candidate `nodes` to start a `service` on. Scoring is done according to the static memory -/// and CPU usages of the nodes as if the service would already be running on each. -/// -/// Returns a vector of (nodename, score) pairs. Scores are between 0.0 and 1.0 and a higher score -/// is better. -pub fn score_nodes_to_start_service>( - nodes: &[T], - service: &StaticServiceUsage, -) -> Result, Error> { - let len = nodes.len(); +impl ClusterUsage { + /// Instantiate cluster usage from node usages. + pub fn from_nodes(nodes: I) -> Self + where + I: IntoIterator>, + { + Self { + nodes: nodes.into_iter().map(|node| node.into()).collect(), + } + } - let matrix = nodes - .iter() - .enumerate() - .map(|(target_index, _)| { - // Base values on percentages to allow comparing nodes with different stats. - let mut highest_cpu = 0.0; - let mut squares_cpu = 0.0; - let mut highest_mem = 0.0; - let mut squares_mem = 0.0; + /// Scores candidate `nodes` to start a `service` on. Scoring is done according to the static memory + /// and CPU usages of the nodes as if the service would already be running on each. + /// + /// Returns a vector of (nodename, score) pairs. Scores are between 0.0 and 1.0 and a higher score + /// is better. + pub fn score_nodes_to_start_service>( + &self, + service_stats: T, + ) -> Result, Error> { + let len = self.nodes.len(); + let service_stats = service_stats.into(); - for (index, node) in nodes.iter().enumerate() { - let node = node.as_ref(); - let new_cpu = if index == target_index { - add_cpu_usage(node.cpu, node.maxcpu as f64, service.maxcpu) - } else { - node.cpu - } / (node.maxcpu as f64); - highest_cpu = f64::max(highest_cpu, new_cpu); - squares_cpu += new_cpu.powi(2); + let matrix = self + .nodes + .iter() + .enumerate() + .map(|(target_index, _)| { + // Base values on percentages to allow comparing nodes with different stats. + let mut highest_cpu = 0.0; + let mut squares_cpu = 0.0; + let mut highest_mem = 0.0; + let mut squares_mem = 0.0; - let new_mem = if index == target_index { - node.mem + service.maxmem - } else { - node.mem - } as f64 - / node.maxmem as f64; - highest_mem = f64::max(highest_mem, new_mem); - squares_mem += new_mem.powi(2); - } + for (index, node) in self.nodes.iter().enumerate() { + let mut new_stats = node.stats; - // Add 1.0 to avoid boosting tiny differences: e.g. 0.004 is twice as much as 0.002, but - // 1.004 is only slightly more than 1.002. - PveTopsisAlternative { - average_cpu: 1.0 + (squares_cpu / len as f64).sqrt(), - highest_cpu: 1.0 + highest_cpu, - average_memory: 1.0 + (squares_mem / len as f64).sqrt(), - highest_memory: 1.0 + highest_mem, - } - .into() - }) - .collect::>(); + if index == target_index { + new_stats.add_started_service(&service_stats) + }; - let scores = - topsis::score_alternatives(&topsis::Matrix::new(matrix)?, &PVE_HA_TOPSIS_CRITERIA)?; + let new_cpu = new_stats.cpu_load(); + highest_cpu = f64::max(highest_cpu, new_cpu); + squares_cpu += new_cpu.powi(2); - Ok(scores - .into_iter() - .enumerate() - .map(|(n, score)| (nodes[n].as_ref().name.clone(), score)) - .collect()) + let new_mem = new_stats.mem_load(); + highest_mem = f64::max(highest_mem, new_mem); + squares_mem += new_mem.powi(2); + } + + // Add 1.0 to avoid boosting tiny differences: e.g. 0.004 is twice as much as 0.002, but + // 1.004 is only slightly more than 1.002. + PveTopsisAlternative { + average_cpu: 1.0 + (squares_cpu / len as f64).sqrt(), + highest_cpu: 1.0 + highest_cpu, + average_memory: 1.0 + (squares_mem / len as f64).sqrt(), + highest_memory: 1.0 + highest_mem, + } + .into() + }) + .collect::>(); + + let scores = + topsis::score_alternatives(&topsis::Matrix::new(matrix)?, &PVE_HA_TOPSIS_CRITERIA)?; + + Ok(scores + .into_iter() + .enumerate() + .map(|(n, score)| (self.nodes[n].name.to_string(), score)) + .collect()) + } } -- 2.47.3