From: Daniel Kral <d.kral@proxmox.com>
To: pve-devel@lists.proxmox.com
Subject: [RFC perl-rs 4/6] pve-rs: resource scheduling: expose auto rebalancing methods
Date: Tue, 17 Feb 2026 15:14:03 +0100 [thread overview]
Message-ID: <20260217141437.584852-10-d.kral@proxmox.com> (raw)
In-Reply-To: <20260217141437.584852-1-d.kral@proxmox.com>
In the current implementation, the callee of
{score,select}_best_balancing_migration provides the migration
candidates, which are unpacked with generate_migration_candidates_from()
Signed-off-by: Daniel Kral <d.kral@proxmox.com>
---
.../bindings/resource_scheduling_static.rs | 150 +++++++++++++++++-
1 file changed, 148 insertions(+), 2 deletions(-)
diff --git a/pve-rs/src/bindings/resource_scheduling_static.rs b/pve-rs/src/bindings/resource_scheduling_static.rs
index 3764aaa..84a5497 100644
--- a/pve-rs/src/bindings/resource_scheduling_static.rs
+++ b/pve-rs/src/bindings/resource_scheduling_static.rs
@@ -9,11 +9,15 @@ pub mod pve_rs_resource_scheduling_static {
use std::collections::{HashMap, HashSet};
use std::sync::Mutex;
- use anyhow::{Error, bail};
+ use serde::{Deserialize, Serialize};
+
+ use anyhow::{Context, Error, bail};
use perlmod::Value;
use proxmox_resource_scheduling::pve_static::{StaticNodeUsage, StaticServiceUsage};
- use proxmox_resource_scheduling::scheduler::ClusterUsage;
+ use proxmox_resource_scheduling::scheduler::{
+ ClusterUsage, MigrationCandidate, ScoredMigration,
+ };
perlmod::declare_magic!(Box<Scheduler> : &Scheduler as "PVE::RS::ResourceScheduling::Static");
@@ -208,6 +212,148 @@ pub mod pve_rs_resource_scheduling_static {
ClusterUsage::from_nodes(nodes)
}
+ /// Method: Calculates the loads for each node.
+ #[export]
+ pub fn calculate_node_loads(#[try_from_ref] this: &Scheduler) -> Vec<(String, f64)> {
+ let usage = this.inner.lock().unwrap();
+ let cluster_usage = as_cluster_usage(&usage);
+
+ cluster_usage.node_loads()
+ }
+
+ /// Method: Calculates the imbalance among the nodes.
+ #[export]
+ pub fn calculate_node_imbalance(#[try_from_ref] this: &Scheduler) -> f64 {
+ let usage = this.inner.lock().unwrap();
+ let cluster_usage = as_cluster_usage(&usage);
+
+ cluster_usage.node_imbalance()
+ }
+
+ /// A compact representation of MigationCandidate.
+ #[derive(Serialize, Deserialize)]
+ pub struct CompactMigrationCandidate {
+ /// The identifier of the leading service.
+ pub leader: String,
+ /// The services which are part of the leading service's bundle.
+ pub services: Vec<String>,
+ /// The nodes, which are possible to migrate to for the services.
+ pub nodes: Vec<String>,
+ }
+
+ fn generate_migration_candidates_from(
+ usage: &Usage,
+ candidates: Vec<CompactMigrationCandidate>,
+ ) -> Result<Vec<MigrationCandidate>, Error> {
+ let mut migration_candidates = Vec::new();
+
+ for candidate in candidates.into_iter() {
+ let leader_sid = candidate.leader;
+ let leader = usage.services.get(&leader_sid).with_context(|| {
+ format!(
+ "leader {} is not present in services usage hashmap",
+ leader_sid
+ )
+ })?;
+ let source_node = leader.nodes.iter().next().unwrap();
+
+ let mut service_candidates = Vec::new();
+
+ for sid in candidate.services.iter() {
+ let service = usage
+ .services
+ .get(sid)
+ .with_context(|| format!("service {} is not present in usage hashmap", sid))?;
+ let service_nodes = &service.nodes;
+
+ if service_nodes.len() > 1 {
+ bail!("service {sid} is on multiple nodes");
+ }
+
+ if !service_nodes.contains(source_node) {
+ bail!("service {sid} is not on common source node {source_node}");
+ }
+
+ service_candidates.push(service);
+ }
+
+ let bundle_stats = service_candidates
+ .into_iter()
+ .fold(StaticServiceUsage::default(), |total_stats, service| {
+ total_stats + service.stats
+ });
+
+ for target_node in candidate.nodes.into_iter() {
+ migration_candidates.push(MigrationCandidate {
+ sid: leader_sid.to_string(),
+ source_node: source_node.to_string(),
+ target_node,
+ stats: bundle_stats.into(),
+ });
+ }
+ }
+
+ Ok(migration_candidates)
+ }
+
+ /// Method: Score the service motions by the best node imbalance improvement with exhaustive search.
+ #[export]
+ pub fn score_best_balancing_migrations(
+ #[try_from_ref] this: &Scheduler,
+ candidates: Vec<CompactMigrationCandidate>,
+ limit: usize,
+ ) -> Result<Vec<ScoredMigration>, Error> {
+ let usage = this.inner.lock().unwrap();
+
+ let cluster_usage = as_cluster_usage(&usage);
+ let candidates = generate_migration_candidates_from(&usage, candidates)?;
+
+ cluster_usage.score_best_balancing_migrations(candidates, limit)
+ }
+
+ /// Method: Select the service motion with the best node imbalance improvement with exhaustive search.
+ #[export]
+ pub fn select_best_balancing_migration(
+ #[try_from_ref] this: &Scheduler,
+ candidates: Vec<CompactMigrationCandidate>,
+ ) -> Result<Option<ScoredMigration>, Error> {
+ let usage = this.inner.lock().unwrap();
+
+ let cluster_usage = as_cluster_usage(&usage);
+ let candidates = generate_migration_candidates_from(&usage, candidates)?;
+
+ cluster_usage.select_best_balancing_migration(candidates)
+ }
+
+ /// Method: Score the service motions by the best node imbalance improvement with the TOPSIS method.
+ #[export]
+ pub fn score_best_balancing_migrations_topsis(
+ #[try_from_ref] this: &Scheduler,
+ candidates: Vec<CompactMigrationCandidate>,
+ limit: usize,
+ ) -> Result<Vec<ScoredMigration>, Error> {
+ let usage = this.inner.lock().unwrap();
+
+ let cluster_usage = as_cluster_usage(&usage);
+ let candidates = generate_migration_candidates_from(&usage, candidates)?;
+
+ cluster_usage.score_best_balancing_migrations_topsis(&candidates, limit)
+ }
+
+ /// Method: Select the service motion with the best node imbalance improvement with the TOPSIS method.
+ #[export]
+ pub fn select_best_balancing_migration_topsis(
+ #[try_from_ref] this: &Scheduler,
+ candidates: Vec<CompactMigrationCandidate>,
+ ) -> Result<Option<ScoredMigration>, Error> {
+ let usage = this.inner.lock().unwrap();
+
+ let cluster_usage = as_cluster_usage(&usage);
+ let candidates = generate_migration_candidates_from(&usage, candidates)?;
+
+ cluster_usage.select_best_balancing_migration_topsis(&candidates)
+ }
+
/// Scores all previously added nodes for starting a `service` on.
///
/// Scoring is done according to the static memory and CPU usages of the nodes as if the
--
2.47.3
next prev parent reply other threads:[~2026-02-17 14:14 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-17 14:13 [RFC PATCH-SERIES many 00/36] dynamic scheduler + load rebalancer Daniel Kral
2026-02-17 14:13 ` [RFC proxmox 1/5] resource-scheduling: move score_nodes_to_start_service to scheduler crate Daniel Kral
2026-02-17 14:13 ` [RFC proxmox 2/5] resource-scheduling: introduce generic cluster usage implementation Daniel Kral
2026-02-17 14:13 ` [RFC proxmox 3/5] resource-scheduling: add dynamic node and service stats Daniel Kral
2026-02-17 14:13 ` [RFC proxmox 4/5] resource-scheduling: implement rebalancing migration selection Daniel Kral
2026-02-17 14:13 ` [RFC proxmox 5/5] resource-scheduling: implement Add and Default for {Dynamic,Static}ServiceStats Daniel Kral
2026-02-17 14:14 ` [RFC perl-rs 1/6] pve-rs: resource scheduling: use generic cluster usage implementation Daniel Kral
2026-02-17 14:14 ` [RFC perl-rs 2/6] pve-rs: resource scheduling: create service_nodes hashset from array Daniel Kral
2026-02-17 14:14 ` [RFC perl-rs 3/6] pve-rs: resource scheduling: store service stats independently of node Daniel Kral
2026-02-17 14:14 ` Daniel Kral [this message]
2026-02-17 14:14 ` [RFC perl-rs 5/6] pve-rs: resource scheduling: move pve_static into resource_scheduling module Daniel Kral
2026-02-17 14:14 ` [RFC perl-rs 6/6] pve-rs: resource scheduling: implement pve_dynamic bindings Daniel Kral
2026-02-17 14:14 ` [RFC cluster 1/2] datacenter config: add dynamic load scheduler option Daniel Kral
2026-02-18 11:06 ` Maximiliano Sandoval
2026-02-17 14:14 ` [RFC cluster 2/2] datacenter config: add auto rebalancing options Daniel Kral
2026-02-18 11:15 ` Maximiliano Sandoval
2026-02-17 14:14 ` [RFC ha-manager 01/21] rename static node stats to be consistent with similar interfaces Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 02/21] resources: remove redundant load_config fallback for static config Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 03/21] remove redundant service_node and migration_target parameter Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 04/21] factor out common pve to ha resource type mapping Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 05/21] derive static service stats while filling the service stats repository Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 06/21] test: make static service usage explicit for all resources Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 07/21] make static service stats indexable by sid Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 08/21] move static service stats repository to PVE::HA::Usage::Static Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 09/21] usage: augment service stats with node and state information Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 10/21] include running non-HA resources in the scheduler's accounting Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 11/21] env, resources: add dynamic node and service stats abstraction Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 12/21] env: pve2: implement dynamic node and service stats Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 13/21] sim: hardware: pass correct types for static stats Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 14/21] sim: hardware: factor out static stats' default values Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 15/21] sim: hardware: rewrite set-static-stats Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 16/21] sim: hardware: add set-dynamic-stats for services Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 17/21] usage: add dynamic usage scheduler Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 18/21] manager: rename execute_migration to queue_resource_motion Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 19/21] manager: update_crs_scheduler_mode: factor out crs config Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 20/21] implement automatic rebalancing Daniel Kral
2026-02-17 14:14 ` [RFC ha-manager 21/21] test: add basic automatic rebalancing system test cases Daniel Kral
2026-02-17 14:14 ` [RFC manager 1/2] ui: dc/options: add dynamic load scheduler option Daniel Kral
2026-02-18 11:10 ` Maximiliano Sandoval
2026-02-17 14:14 ` [RFC manager 2/2] ui: dc/options: add auto rebalancing options Daniel Kral
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260217141437.584852-10-d.kral@proxmox.com \
--to=d.kral@proxmox.com \
--cc=pve-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox