From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id 1E4D61FF187 for ; Mon, 20 Oct 2025 18:46:49 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 4380DDE5C; Mon, 20 Oct 2025 18:46:23 +0200 (CEST) From: Daniel Kral To: pve-devel@lists.proxmox.com Date: Mon, 20 Oct 2025 18:45:29 +0200 Message-ID: <20251020164540.517231-4-d.kral@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20251020164540.517231-1-d.kral@proxmox.com> References: <20251020164540.517231-1-d.kral@proxmox.com> MIME-Version: 1.0 X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1760978738381 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.015 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: [pve-devel] [PATCH perl-rs v2 1/2] pve-rs: resource_scheduling: allow granular usage changes X-BeenThere: pve-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox VE development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox VE development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pve-devel-bounces@lists.proxmox.com Sender: "pve-devel" Implements a simple bidirectional map to track which service usages have been added to nodes, so that these can be removed later individually. The `StaticNodeUsage` is newly initialized on every invocation of score_nodes_to_start_service(...) instead of updating the values on every call of `add_service_usage_to_node(...)` to reduce the likelihood of introducing numerical instability caused by floating-point operations done on the `cpu` field. The StaticServiceUsage is added to the HashMap<> in StaticNodeInfo to reduce unnecessary indirection when summing these values in score_nodes_to_start_service(...). Signed-off-by: Daniel Kral Reviewed-by: Fiona Ebner --- Needs a build dependency bump for librust-proxmox-resource-scheduling-dev and a versioned breaks for pve-ha-manager. changes since v1: - added numerical stability bit to patch message - changed service_nodes type from HashMap> to HashMap> and changed relevant methods accordingly - added consistency check in add_service_usage_to_node(...) for the other way around too (whether service_nodes contains the node already) - improve error messages - use vm:{seqnum} instead of single letters .../bindings/resource_scheduling_static.rs | 108 +++++++++++++++--- pve-rs/test/resource_scheduling.pl | 82 +++++++++++-- 2 files changed, 167 insertions(+), 23 deletions(-) diff --git a/pve-rs/src/bindings/resource_scheduling_static.rs b/pve-rs/src/bindings/resource_scheduling_static.rs index 7cf2f35..5b91d36 100644 --- a/pve-rs/src/bindings/resource_scheduling_static.rs +++ b/pve-rs/src/bindings/resource_scheduling_static.rs @@ -6,7 +6,7 @@ pub mod pve_rs_resource_scheduling_static { //! //! See [`proxmox_resource_scheduling`]. - use std::collections::HashMap; + use std::collections::{HashMap, HashSet}; use std::sync::Mutex; use anyhow::{Error, bail}; @@ -16,8 +16,16 @@ pub mod pve_rs_resource_scheduling_static { perlmod::declare_magic!(Box : &Scheduler as "PVE::RS::ResourceScheduling::Static"); + struct StaticNodeInfo { + name: String, + maxcpu: usize, + maxmem: usize, + services: HashMap, + } + struct Usage { - nodes: HashMap, + nodes: HashMap, + service_nodes: HashMap>, } /// A scheduler instance contains the resource usage by node. @@ -30,6 +38,7 @@ pub mod pve_rs_resource_scheduling_static { pub fn new(#[raw] class: Value) -> Result { let inner = Usage { nodes: HashMap::new(), + service_nodes: HashMap::new(), }; Ok(perlmod::instantiate_magic!( @@ -39,7 +48,7 @@ pub mod pve_rs_resource_scheduling_static { /// Method: Add a node with its basic CPU and memory info. /// - /// This inserts a [`StaticNodeUsage`] entry for the node into the scheduler instance. + /// This inserts a [`StaticNodeInfo`] entry for the node into the scheduler instance. #[export] pub fn add_node( #[try_from_ref] this: &Scheduler, @@ -53,12 +62,11 @@ pub mod pve_rs_resource_scheduling_static { bail!("node {} already added", nodename); } - let node = StaticNodeUsage { + let node = StaticNodeInfo { name: nodename.clone(), - cpu: 0.0, maxcpu, - mem: 0, maxmem, + services: HashMap::new(), }; usage.nodes.insert(nodename, node); @@ -67,10 +75,25 @@ pub mod pve_rs_resource_scheduling_static { /// Method: Remove a node from the scheduler. #[export] - pub fn remove_node(#[try_from_ref] this: &Scheduler, nodename: &str) { + pub fn remove_node(#[try_from_ref] this: &Scheduler, nodename: &str) -> Result<(), Error> { let mut usage = this.inner.lock().unwrap(); - usage.nodes.remove(nodename); + if let Some(node) = usage.nodes.remove(nodename) { + for (sid, _) in node.services.iter() { + match usage.service_nodes.get_mut(sid) { + Some(service_nodes) => { + service_nodes.remove(nodename); + } + None => bail!( + "service '{}' not present in service_nodes hashmap while removing node '{}'", + sid, + nodename + ), + } + } + } + + Ok(()) } /// Method: Get a list of all the nodes in the scheduler. @@ -93,22 +116,63 @@ pub mod pve_rs_resource_scheduling_static { usage.nodes.contains_key(nodename) } - /// Method: Add usage of `service` to the node's usage. + /// Method: Add service `sid` and its `service_usage` to the node. #[export] pub fn add_service_usage_to_node( #[try_from_ref] this: &Scheduler, nodename: &str, - service: StaticServiceUsage, + sid: &str, + service_usage: StaticServiceUsage, ) -> Result<(), Error> { let mut usage = this.inner.lock().unwrap(); match usage.nodes.get_mut(nodename) { Some(node) => { - node.add_service_usage(&service); - Ok(()) + if node.services.contains_key(sid) { + bail!("service '{}' already added to node '{}'", sid, nodename); + } + + node.services.insert(sid.to_string(), service_usage); } None => bail!("node '{}' not present in usage hashmap", nodename), } + + if let Some(service_nodes) = usage.service_nodes.get_mut(sid) { + if service_nodes.contains(nodename) { + bail!("node '{}' already added to service '{}'", nodename, sid); + } + + service_nodes.insert(nodename.to_string()); + } else { + let mut service_nodes = HashSet::new(); + service_nodes.insert(nodename.to_string()); + usage.service_nodes.insert(sid.to_string(), service_nodes); + } + + Ok(()) + } + + /// Method: Remove service `sid` and its usage from all assigned nodes. + #[export] + fn remove_service_usage(#[try_from_ref] this: &Scheduler, sid: &str) -> Result<(), Error> { + let mut usage = this.inner.lock().unwrap(); + + if let Some(nodes) = usage.service_nodes.remove(sid) { + for nodename in &nodes { + match usage.nodes.get_mut(nodename) { + Some(node) => { + node.services.remove(sid); + } + None => bail!( + "service '{}' not present in usage hashmap on node '{}'", + sid, + nodename + ), + } + } + } + + Ok(()) } /// Scores all previously added nodes for starting a `service` on. @@ -126,7 +190,25 @@ pub mod pve_rs_resource_scheduling_static { service: StaticServiceUsage, ) -> Result, Error> { let usage = this.inner.lock().unwrap(); - let nodes = usage.nodes.values().collect::>(); + let nodes = usage + .nodes + .values() + .map(|node| { + let mut node_usage = StaticNodeUsage { + name: node.name.to_string(), + cpu: 0.0, + maxcpu: node.maxcpu, + mem: 0, + maxmem: node.maxmem, + }; + + for service in node.services.values() { + node_usage.add_service_usage(service); + } + + node_usage + }) + .collect::>(); proxmox_resource_scheduling::pve_static::score_nodes_to_start_service(&nodes, &service) } diff --git a/pve-rs/test/resource_scheduling.pl b/pve-rs/test/resource_scheduling.pl index e3b7d2e..42556bd 100755 --- a/pve-rs/test/resource_scheduling.pl +++ b/pve-rs/test/resource_scheduling.pl @@ -7,6 +7,20 @@ use Test::More; use PVE::RS::ResourceScheduling::Static; +my sub score_nodes { + my ($static, $service) = @_; + + my $score_list = $static->score_nodes_to_start_service($service); + + # imitate HA manager + my $scores = { map { $_->[0] => -$_->[1] } $score_list->@* }; + my @nodes = sort { + $scores->{$a} <=> $scores->{$b} || $a cmp $b + } keys $scores->%*; + + return @nodes; +} + sub test_basic { my $static = PVE::RS::ResourceScheduling::Static->new(); is(scalar($static->list_nodes()->@*), 0, 'node list empty'); @@ -50,7 +64,54 @@ sub test_balance { is($nodes[1], "A", 'second should be A'); } - $static->add_service_usage_to_node($nodes[0], $service); + $static->add_service_usage_to_node($nodes[0], "vm:" . (100 + $i), $service); + } +} + +sub test_balance_removal { + my $static = PVE::RS::ResourceScheduling::Static->new(); + $static->add_node("A", 10, 100_000_000_000); + $static->add_node("B", 20, 200_000_000_000); + $static->add_node("C", 30, 300_000_000_000); + + my $service = { + maxcpu => 4, + maxmem => 20_000_000_000, + }; + + $static->add_service_usage_to_node("A", "a", $service); + $static->add_service_usage_to_node("A", "b", $service); + $static->add_service_usage_to_node("B", "c", $service); + $static->add_service_usage_to_node("B", "d", $service); + $static->add_service_usage_to_node("C", "c", $service); + + { + my @nodes = score_nodes($static, $service); + + is($nodes[0], "C"); + is($nodes[1], "B"); + is($nodes[2], "A"); + } + + $static->remove_service_usage("d"); + $static->remove_service_usage("c"); + $static->add_service_usage_to_node("C", "c", $service); + + { + my @nodes = score_nodes($static, $service); + + is($nodes[0], "B"); + is($nodes[1], "C"); + is($nodes[2], "A"); + } + + $static->remove_node("B"); + + { + my @nodes = score_nodes($static, $service); + + is($nodes[0], "C"); + is($nodes[1], "A"); } } @@ -66,11 +127,11 @@ sub test_overcommitted { maxmem => 536_870_912, }; - $static->add_service_usage_to_node("A", $service); - $static->add_service_usage_to_node("A", $service); - $static->add_service_usage_to_node("A", $service); - $static->add_service_usage_to_node("B", $service); - $static->add_service_usage_to_node("A", $service); + $static->add_service_usage_to_node("A", "a", $service); + $static->add_service_usage_to_node("A", "b", $service); + $static->add_service_usage_to_node("A", "c", $service); + $static->add_service_usage_to_node("B", "d", $service); + $static->add_service_usage_to_node("A", "e", $service); my $score_list = $static->score_nodes_to_start_service($service); @@ -96,9 +157,9 @@ sub test_balance_small_memory_difference { $static->add_node("C", 4, 8_000_000_000); if ($with_start_load) { - $static->add_service_usage_to_node("A", { maxcpu => 4, maxmem => 1_000_000_000 }); - $static->add_service_usage_to_node("B", { maxcpu => 2, maxmem => 1_000_000_000 }); - $static->add_service_usage_to_node("C", { maxcpu => 2, maxmem => 1_000_000_000 }); + $static->add_service_usage_to_node("A", "vm:100", { maxcpu => 4, maxmem => 1_000_000_000 }); + $static->add_service_usage_to_node("B", "vm:101", { maxcpu => 2, maxmem => 1_000_000_000 }); + $static->add_service_usage_to_node("C", "vm:102", { maxcpu => 2, maxmem => 1_000_000_000 }); } my $service = { @@ -131,12 +192,13 @@ sub test_balance_small_memory_difference { die "internal error, got $i % 4 == " . ($i % 4) . "\n"; } - $static->add_service_usage_to_node($nodes[0], $service); + $static->add_service_usage_to_node($nodes[0], "vm:" . (103 + $i), $service); } } test_basic(); test_balance(); +test_balance_removal(); test_overcommitted(); test_balance_small_memory_difference(1); test_balance_small_memory_difference(0); -- 2.47.3 _______________________________________________ pve-devel mailing list pve-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel