From: Daniel Kral <d.kral@proxmox.com>
To: pve-devel@lists.proxmox.com
Subject: [pve-devel] [PATCH perl-rs 1/1] pve-rs: resource_scheduling: allow granular usage changes
Date: Tue, 30 Sep 2025 16:19:10 +0200 [thread overview]
Message-ID: <20250930142021.366529-4-d.kral@proxmox.com> (raw)
In-Reply-To: <20250930142021.366529-1-d.kral@proxmox.com>
Implements a simple bidirectional map to track which service usages have
been added to nodes, so that these can be removed later individually.
The StaticServiceUsage is added to the HashMap<> in StaticNodeInfo to
reduce unnecessary indirection when summing these values in
score_nodes_to_start_service(...).
Signed-off-by: Daniel Kral <d.kral@proxmox.com>
---
I started out adding and removing the service_usage in StaticNodeUsage
on each add_service_usage_to_node(...) and remove_service_usage(...)
call individually, but I think summing those up every time is better for
numerical stability..
.../bindings/resource_scheduling_static.rs | 98 ++++++++++++++++---
pve-rs/test/resource_scheduling.pl | 84 ++++++++++++++--
2 files changed, 160 insertions(+), 22 deletions(-)
diff --git a/pve-rs/src/bindings/resource_scheduling_static.rs b/pve-rs/src/bindings/resource_scheduling_static.rs
index 7cf2f35..535dfe3 100644
--- a/pve-rs/src/bindings/resource_scheduling_static.rs
+++ b/pve-rs/src/bindings/resource_scheduling_static.rs
@@ -16,8 +16,16 @@ pub mod pve_rs_resource_scheduling_static {
perlmod::declare_magic!(Box<Scheduler> : &Scheduler as "PVE::RS::ResourceScheduling::Static");
+ struct StaticNodeInfo {
+ name: String,
+ maxcpu: usize,
+ maxmem: usize,
+ services: HashMap<String, StaticServiceUsage>,
+ }
+
struct Usage {
- nodes: HashMap<String, StaticNodeUsage>,
+ nodes: HashMap<String, StaticNodeInfo>,
+ service_nodes: HashMap<String, Vec<String>>,
}
/// A scheduler instance contains the resource usage by node.
@@ -30,6 +38,7 @@ pub mod pve_rs_resource_scheduling_static {
pub fn new(#[raw] class: Value) -> Result<Value, Error> {
let inner = Usage {
nodes: HashMap::new(),
+ service_nodes: HashMap::new(),
};
Ok(perlmod::instantiate_magic!(
@@ -39,7 +48,7 @@ pub mod pve_rs_resource_scheduling_static {
/// Method: Add a node with its basic CPU and memory info.
///
- /// This inserts a [`StaticNodeUsage`] entry for the node into the scheduler instance.
+ /// This inserts a [`StaticNodeInfo`] entry for the node into the scheduler instance.
#[export]
pub fn add_node(
#[try_from_ref] this: &Scheduler,
@@ -53,12 +62,11 @@ pub mod pve_rs_resource_scheduling_static {
bail!("node {} already added", nodename);
}
- let node = StaticNodeUsage {
+ let node = StaticNodeInfo {
name: nodename.clone(),
- cpu: 0.0,
maxcpu,
- mem: 0,
maxmem,
+ services: HashMap::new(),
};
usage.nodes.insert(nodename, node);
@@ -67,10 +75,25 @@ pub mod pve_rs_resource_scheduling_static {
/// Method: Remove a node from the scheduler.
#[export]
- pub fn remove_node(#[try_from_ref] this: &Scheduler, nodename: &str) {
+ pub fn remove_node(#[try_from_ref] this: &Scheduler, nodename: &str) -> Result<(), Error> {
let mut usage = this.inner.lock().unwrap();
- usage.nodes.remove(nodename);
+ if let Some(node) = usage.nodes.remove(nodename) {
+ for (sid, _) in node.services.iter() {
+ match usage.service_nodes.get_mut(sid) {
+ Some(nodes) => {
+ nodes.retain(|node| !node.eq(nodename));
+ }
+ None => bail!(
+ "service '{}' not present while removing node '{}'",
+ sid,
+ nodename
+ ),
+ }
+ }
+ }
+
+ Ok(())
}
/// Method: Get a list of all the nodes in the scheduler.
@@ -93,22 +116,55 @@ pub mod pve_rs_resource_scheduling_static {
usage.nodes.contains_key(nodename)
}
- /// Method: Add usage of `service` to the node's usage.
+ /// Method: Add service `sid` and its `service_usage` to the node.
#[export]
pub fn add_service_usage_to_node(
#[try_from_ref] this: &Scheduler,
nodename: &str,
- service: StaticServiceUsage,
+ sid: &str,
+ service_usage: StaticServiceUsage,
) -> Result<(), Error> {
let mut usage = this.inner.lock().unwrap();
match usage.nodes.get_mut(nodename) {
Some(node) => {
- node.add_service_usage(&service);
- Ok(())
+ if node.services.contains_key(sid) {
+ bail!("service '{}' already added to node '{}'", sid, nodename);
+ }
+
+ node.services.insert(sid.to_string(), service_usage);
}
None => bail!("node '{}' not present in usage hashmap", nodename),
}
+
+ if let Some(nodes) = usage.service_nodes.get_mut(sid) {
+ nodes.push(nodename.to_string());
+ } else {
+ usage
+ .service_nodes
+ .insert(sid.to_string(), vec![nodename.to_string()]);
+ }
+
+ Ok(())
+ }
+
+ /// Method: Remove service `sid` and its usage from all assigned nodes.
+ #[export]
+ fn remove_service_usage(#[try_from_ref] this: &Scheduler, sid: &str) -> Result<(), Error> {
+ let mut usage = this.inner.lock().unwrap();
+
+ if let Some(nodes) = usage.service_nodes.remove(sid) {
+ for nodename in &nodes {
+ match usage.nodes.get_mut(nodename) {
+ Some(node) => {
+ node.services.remove(sid);
+ }
+ None => bail!("service '{}' not present on node '{}'", sid, nodename),
+ }
+ }
+ }
+
+ Ok(())
}
/// Scores all previously added nodes for starting a `service` on.
@@ -126,7 +182,25 @@ pub mod pve_rs_resource_scheduling_static {
service: StaticServiceUsage,
) -> Result<Vec<(String, f64)>, Error> {
let usage = this.inner.lock().unwrap();
- let nodes = usage.nodes.values().collect::<Vec<&StaticNodeUsage>>();
+ let nodes = usage
+ .nodes
+ .values()
+ .map(|node| {
+ let mut node_usage = StaticNodeUsage {
+ name: node.name.to_string(),
+ cpu: 0.0,
+ maxcpu: node.maxcpu,
+ mem: 0,
+ maxmem: node.maxmem,
+ };
+
+ for service in node.services.values() {
+ node_usage.add_service_usage(service);
+ }
+
+ node_usage
+ })
+ .collect::<Vec<StaticNodeUsage>>();
proxmox_resource_scheduling::pve_static::score_nodes_to_start_service(&nodes, &service)
}
diff --git a/pve-rs/test/resource_scheduling.pl b/pve-rs/test/resource_scheduling.pl
index e3b7d2e..318238e 100755
--- a/pve-rs/test/resource_scheduling.pl
+++ b/pve-rs/test/resource_scheduling.pl
@@ -7,6 +7,20 @@ use Test::More;
use PVE::RS::ResourceScheduling::Static;
+my sub score_nodes {
+ my ($static, $service) = @_;
+
+ my $score_list = $static->score_nodes_to_start_service($service);
+
+ # imitate HA manager
+ my $scores = { map { $_->[0] => -$_->[1] } $score_list->@* };
+ my @nodes = sort {
+ $scores->{$a} <=> $scores->{$b} || $a cmp $b
+ } keys $scores->%*;
+
+ return @nodes;
+}
+
sub test_basic {
my $static = PVE::RS::ResourceScheduling::Static->new();
is(scalar($static->list_nodes()->@*), 0, 'node list empty');
@@ -33,6 +47,7 @@ sub test_balance {
maxmem => 20_000_000_000,
};
+ my @sid_names = ("a" .. "z");
for (my $i = 0; $i < 15; $i++) {
my $score_list = $static->score_nodes_to_start_service($service);
@@ -50,7 +65,54 @@ sub test_balance {
is($nodes[1], "A", 'second should be A');
}
- $static->add_service_usage_to_node($nodes[0], $service);
+ $static->add_service_usage_to_node($nodes[0], $sid_names[$i], $service);
+ }
+}
+
+sub test_balance_removal {
+ my $static = PVE::RS::ResourceScheduling::Static->new();
+ $static->add_node("A", 10, 100_000_000_000);
+ $static->add_node("B", 20, 200_000_000_000);
+ $static->add_node("C", 30, 300_000_000_000);
+
+ my $service = {
+ maxcpu => 4,
+ maxmem => 20_000_000_000,
+ };
+
+ $static->add_service_usage_to_node("A", "a", $service);
+ $static->add_service_usage_to_node("A", "b", $service);
+ $static->add_service_usage_to_node("B", "c", $service);
+ $static->add_service_usage_to_node("B", "d", $service);
+ $static->add_service_usage_to_node("C", "c", $service);
+
+ {
+ my @nodes = score_nodes($static, $service);
+
+ is($nodes[0], "C");
+ is($nodes[1], "B");
+ is($nodes[2], "A");
+ }
+
+ $static->remove_service_usage("d");
+ $static->remove_service_usage("c");
+ $static->add_service_usage_to_node("C", "c", $service);
+
+ {
+ my @nodes = score_nodes($static, $service);
+
+ is($nodes[0], "B");
+ is($nodes[1], "C");
+ is($nodes[2], "A");
+ }
+
+ $static->remove_node("B");
+
+ {
+ my @nodes = score_nodes($static, $service);
+
+ is($nodes[0], "C");
+ is($nodes[1], "A");
}
}
@@ -66,11 +128,11 @@ sub test_overcommitted {
maxmem => 536_870_912,
};
- $static->add_service_usage_to_node("A", $service);
- $static->add_service_usage_to_node("A", $service);
- $static->add_service_usage_to_node("A", $service);
- $static->add_service_usage_to_node("B", $service);
- $static->add_service_usage_to_node("A", $service);
+ $static->add_service_usage_to_node("A", "a", $service);
+ $static->add_service_usage_to_node("A", "b", $service);
+ $static->add_service_usage_to_node("A", "c", $service);
+ $static->add_service_usage_to_node("B", "d", $service);
+ $static->add_service_usage_to_node("A", "e", $service);
my $score_list = $static->score_nodes_to_start_service($service);
@@ -96,9 +158,9 @@ sub test_balance_small_memory_difference {
$static->add_node("C", 4, 8_000_000_000);
if ($with_start_load) {
- $static->add_service_usage_to_node("A", { maxcpu => 4, maxmem => 1_000_000_000 });
- $static->add_service_usage_to_node("B", { maxcpu => 2, maxmem => 1_000_000_000 });
- $static->add_service_usage_to_node("C", { maxcpu => 2, maxmem => 1_000_000_000 });
+ $static->add_service_usage_to_node("A", "a", { maxcpu => 4, maxmem => 1_000_000_000 });
+ $static->add_service_usage_to_node("B", "b", { maxcpu => 2, maxmem => 1_000_000_000 });
+ $static->add_service_usage_to_node("C", "c", { maxcpu => 2, maxmem => 1_000_000_000 });
}
my $service = {
@@ -106,6 +168,7 @@ sub test_balance_small_memory_difference {
maxmem => 16_000_000,
};
+ my @sid_names = ("d" .. "z");
for (my $i = 0; $i < 20; $i++) {
my $score_list = $static->score_nodes_to_start_service($service);
@@ -131,12 +194,13 @@ sub test_balance_small_memory_difference {
die "internal error, got $i % 4 == " . ($i % 4) . "\n";
}
- $static->add_service_usage_to_node($nodes[0], $service);
+ $static->add_service_usage_to_node($nodes[0], $sid_names[$i], $service);
}
}
test_basic();
test_balance();
+test_balance_removal();
test_overcommitted();
test_balance_small_memory_difference(1);
test_balance_small_memory_difference(0);
--
2.47.3
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
next prev parent reply other threads:[~2025-09-30 14:21 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-30 14:19 [pve-devel] [RFC ha-manager/perl-rs/proxmox/qemu-server 00/12] Granular online_node_usage accounting Daniel Kral
2025-09-30 14:19 ` [pve-devel] [PATCH qemu-server 1/1] config: only fetch necessary default values in get_derived_property helper Daniel Kral
2025-09-30 14:19 ` [pve-devel] [PATCH proxmox 1/1] resource-scheduling: change score_nodes_to_start_service signature Daniel Kral
2025-09-30 14:19 ` Daniel Kral [this message]
2025-09-30 14:19 ` [pve-devel] [PATCH ha-manager 1/9] implement static service stats cache Daniel Kral
2025-09-30 14:19 ` [pve-devel] [PATCH ha-manager 2/9] manager: remove redundant recompute_online_node_usage from next_state_recovery Daniel Kral
2025-09-30 14:19 ` [pve-devel] [PATCH ha-manager 3/9] manager: remove redundant add_service_usage_to_node " Daniel Kral
2025-09-30 14:19 ` [pve-devel] [PATCH ha-manager 4/9] manager: remove redundant add_service_usage_to_node from next_state_started Daniel Kral
2025-09-30 14:19 ` [pve-devel] [PATCH ha-manager 5/9] rules: resource affinity: decouple get_resource_affinity helper from Usage class Daniel Kral
2025-09-30 14:19 ` [pve-devel] [PATCH ha-manager 6/9] manager: make recompute_online_node_usage use get_service_nodes helper Daniel Kral
2025-09-30 14:19 ` [pve-devel] [PATCH ha-manager 7/9] usage: allow granular changes to Usage implementations Daniel Kral
2025-09-30 14:19 ` [pve-devel] [PATCH ha-manager 8/9] manager: make online node usage computation granular Daniel Kral
2025-09-30 14:19 ` [pve-devel] [PATCH ha-manager 9/9] manager: make service node usage computation more granular Daniel Kral
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250930142021.366529-4-d.kral@proxmox.com \
--to=d.kral@proxmox.com \
--cc=pve-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.