From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id 88BC61FF144 for ; Tue, 24 Mar 2026 19:32:42 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 037331AAEA; Tue, 24 Mar 2026 19:31:32 +0100 (CET) From: Daniel Kral To: pve-devel@lists.proxmox.com Subject: [PATCH perl-rs v2 16/40] pve-rs: resource-scheduling: expose auto rebalancing methods Date: Tue, 24 Mar 2026 19:30:00 +0100 Message-ID: <20260324183029.1274972-17-d.kral@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260324183029.1274972-1-d.kral@proxmox.com> References: <20260324183029.1274972-1-d.kral@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1774376988601 X-SPAM-LEVEL: Spam detection results: 0 AWL -0.093 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment POISEN_SPAM_PILL 0.1 Meta: its spam POISEN_SPAM_PILL_1 0.1 random spam to be learned in bayes POISEN_SPAM_PILL_3 0.1 random spam to be learned in bayes SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Message-ID-Hash: MMZSX5PKFBR2FREUWPIUMQGHWGC4YI46 X-Message-ID-Hash: MMZSX5PKFBR2FREUWPIUMQGHWGC4YI46 X-MailFrom: d.kral@proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox VE development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: These methods expose the auto rebalancing methods of both the static and dynamic scheduler. As Scheduler::score_best_balancing_migration_candidates{,_topsis}() takes a possible very large list of migration candidates, the binding takes a more compact representation, which reduces the size that needs to be generated on the caller's side and therefore the runtime of the serialization from Perl to Rust. Additionally, while decomposing the compact representation the input data is validated since the underlying scoring methods do not further validate whether their input is consistent with the cluster usage. The method names score_best_balancing_migration_candidates{,_topsis}() are chosen deliberately, so that future extensions can implement score_best_balancing_migrations{,_topsis}(), which might allow to score migrations without providing the candidates. Signed-off-by: Daniel Kral --- changes v1 -> v2: - improve patch message and documentation - move to the end of the perl-rs changes, which makes it more consistent with the change order in pve-ha-manager as well - uses `UsageAggregator` now to discern how usages are accumulated - s/generate_migration_candidates_from /decompose_compact_migration_candidates - make the decomposition of compact migration candidates more robust and do not use any unwraps or other causes of panic but the Mutex guard unwrap .../resource_scheduling/pve_dynamic.rs | 57 +++++++++++- .../resource_scheduling/pve_static.rs | 56 +++++++++++- .../bindings/resource_scheduling/resource.rs | 88 ++++++++++++++++++- .../src/bindings/resource_scheduling/usage.rs | 15 ++++ 4 files changed, 211 insertions(+), 5 deletions(-) diff --git a/pve-rs/src/bindings/resource_scheduling/pve_dynamic.rs b/pve-rs/src/bindings/resource_scheduling/pve_dynamic.rs index 5b4373e..26f36d1 100644 --- a/pve-rs/src/bindings/resource_scheduling/pve_dynamic.rs +++ b/pve-rs/src/bindings/resource_scheduling/pve_dynamic.rs @@ -14,10 +14,15 @@ pub mod pve_rs_resource_scheduling_dynamic { use perlmod::Value; use proxmox_resource_scheduling::node::NodeStats; use proxmox_resource_scheduling::resource::ResourceStats; + use proxmox_resource_scheduling::scheduler::ScoredMigration; use proxmox_resource_scheduling::usage::Usage; - use crate::bindings::resource_scheduling::resource::PveResource; - use crate::bindings::resource_scheduling::usage::StartingAsStartedResourceAggregator; + use crate::bindings::resource_scheduling::resource::{ + CompactMigrationCandidate, PveResource, decompose_compact_migration_candidates, + }; + use crate::bindings::resource_scheduling::usage::{ + IdentityAggregator, StartingAsStartedResourceAggregator, + }; perlmod::declare_magic!(Box : &Scheduler as "PVE::RS::ResourceScheduling::Dynamic"); @@ -157,6 +162,54 @@ pub mod pve_rs_resource_scheduling_dynamic { usage.remove_resource(sid); } + /// Method: Returns the load imbalance among the nodes. + /// + /// See [`proxmox_resource_scheduling::scheduler::Scheduler::node_imbalance`]. + #[export] + pub fn calculate_node_imbalance(#[try_from_ref] this: &Scheduler) -> f64 { + let usage = this.inner.lock().unwrap(); + + usage.to_scheduler::().node_imbalance() + } + + /// Method: Scores the given migration `candidates` by the best node imbalance improvement with + /// exhaustive search. + /// + /// See [`proxmox_resource_scheduling::scheduler::Scheduler::score_best_balancing_migration_candidates`]. + #[export] + pub fn score_best_balancing_migration_candidates( + #[try_from_ref] this: &Scheduler, + candidates: Vec, + limit: usize, + ) -> Result, Error> { + let usage = this.inner.lock().unwrap(); + + let candidates = decompose_compact_migration_candidates(&usage, candidates)?; + + Ok(usage + .to_scheduler::() + .score_best_balancing_migration_candidates(candidates, limit)) + } + + /// Method: Scores the given migration `candidates` by the best node imbalance improvement with + /// the TOPSIS method. + /// + /// See [`proxmox_resource_scheduling::scheduler::Scheduler::score_best_balancing_migration_candidates_topsis`]. + #[export] + pub fn score_best_balancing_migration_candidates_topsis( + #[try_from_ref] this: &Scheduler, + candidates: Vec, + limit: usize, + ) -> Result, Error> { + let usage = this.inner.lock().unwrap(); + + let candidates = decompose_compact_migration_candidates(&usage, candidates)?; + + usage + .to_scheduler::() + .score_best_balancing_migration_candidates_topsis(&candidates, limit) + } + /// Method: Scores nodes to start a resource with the usage statistics `resource_stats` on. /// /// See [`proxmox_resource_scheduling::scheduler::Scheduler::score_nodes_to_start_resource`]. diff --git a/pve-rs/src/bindings/resource_scheduling/pve_static.rs b/pve-rs/src/bindings/resource_scheduling/pve_static.rs index e2756db..7924889 100644 --- a/pve-rs/src/bindings/resource_scheduling/pve_static.rs +++ b/pve-rs/src/bindings/resource_scheduling/pve_static.rs @@ -14,10 +14,14 @@ pub mod pve_rs_resource_scheduling_static { use perlmod::Value; use proxmox_resource_scheduling::node::NodeStats; use proxmox_resource_scheduling::resource::ResourceStats; + use proxmox_resource_scheduling::scheduler::ScoredMigration; use proxmox_resource_scheduling::usage::Usage; use crate::bindings::resource_scheduling::{ - resource::PveResource, usage::StartedResourceAggregator, + resource::{ + CompactMigrationCandidate, PveResource, decompose_compact_migration_candidates, + }, + usage::StartedResourceAggregator, }; perlmod::declare_magic!(Box : &Scheduler as "PVE::RS::ResourceScheduling::Static"); @@ -154,6 +158,56 @@ pub mod pve_rs_resource_scheduling_static { usage.remove_resource(sid); } + /// Method: Returns the load imbalance among the nodes. + /// + /// See [`proxmox_resource_scheduling::scheduler::Scheduler::node_imbalance`]. + #[export] + pub fn calculate_node_imbalance(#[try_from_ref] this: &Scheduler) -> f64 { + let usage = this.inner.lock().unwrap(); + + usage + .to_scheduler::() + .node_imbalance() + } + + /// Method: Scores the given migration `candidates` by the best node imbalance improvement with + /// exhaustive search. + /// + /// See [`proxmox_resource_scheduling::scheduler::Scheduler::score_best_balancing_migration_candidates`]. + #[export] + pub fn score_best_balancing_migration_candidates( + #[try_from_ref] this: &Scheduler, + candidates: Vec, + limit: usize, + ) -> Result, Error> { + let usage = this.inner.lock().unwrap(); + + let candidates = decompose_compact_migration_candidates(&usage, candidates)?; + + Ok(usage + .to_scheduler::() + .score_best_balancing_migration_candidates(candidates, limit)) + } + + /// Method: Scores the given migration `candidates` by the best node imbalance improvement with + /// the TOPSIS method. + /// + /// See [`proxmox_resource_scheduling::scheduler::Scheduler::score_best_balancing_migration_candidates_topsis`]. + #[export] + pub fn score_best_balancing_migration_candidates_topsis( + #[try_from_ref] this: &Scheduler, + candidates: Vec, + limit: usize, + ) -> Result, Error> { + let usage = this.inner.lock().unwrap(); + + let candidates = decompose_compact_migration_candidates(&usage, candidates)?; + + usage + .to_scheduler::() + .score_best_balancing_migration_candidates_topsis(&candidates, limit) + } + /// Method: Scores nodes to start a service with the usage statistics `service_stats` on. /// /// See [`proxmox_resource_scheduling::scheduler::Scheduler::score_nodes_to_start_resource`]. diff --git a/pve-rs/src/bindings/resource_scheduling/resource.rs b/pve-rs/src/bindings/resource_scheduling/resource.rs index 91d56b9..9186d5b 100644 --- a/pve-rs/src/bindings/resource_scheduling/resource.rs +++ b/pve-rs/src/bindings/resource_scheduling/resource.rs @@ -1,6 +1,8 @@ use anyhow::{Error, bail}; -use proxmox_resource_scheduling::resource::{ - Resource, ResourcePlacement, ResourceState, ResourceStats, +use proxmox_resource_scheduling::{ + resource::{Resource, ResourcePlacement, ResourceState, ResourceStats}, + scheduler::{Migration, MigrationCandidate}, + usage::Usage, }; use serde::{Deserialize, Serialize}; @@ -42,3 +44,85 @@ impl> TryFrom> for Resource { Ok(Resource::new(resource.stats.into(), state, placement)) } } + +/// A compact representation of [`proxmox_resource_scheduling::scheduler::MigrationCandidate`]. +#[derive(Serialize, Deserialize)] +pub struct CompactMigrationCandidate { + /// The identifier of the leading resource. + pub leader: String, + /// The resources which are part of the leading resource's bundle. + pub resources: Vec, + /// The nodes, which are possible to migrate to for the resources. + pub nodes: Vec, +} + +/// Transforms a `Vec` to a `Vec` with the cluster +/// usage from `usage`. +/// +/// This function fails for any of the following conditions for a [`CompactMigrationCandidate`]: +/// +/// - the `leader` is not present in the cluster usage +/// - the `leader` is non-stationary +/// - any resource in `resources` is not present in the cluster usage +/// - any resource in `resources` is non-stationary +/// - any resource in `resources` is on another node than the `leader` +pub(crate) fn decompose_compact_migration_candidates( + usage: &Usage, + compact_candidates: Vec, +) -> Result, Error> { + // The length of `compact_candidates` is at least a lower bound + let mut candidates = Vec::with_capacity(compact_candidates.len()); + + for candidate in compact_candidates.into_iter() { + let leader_sid = candidate.leader; + let leader = match usage.get_resource(&leader_sid) { + Some(resource) => resource, + _ => bail!("leader '{leader_sid}' is not present in the cluster usage"), + }; + let leader_node = match leader.placement() { + ResourcePlacement::Stationary { current_node } => current_node, + _ => bail!("leader '{leader_sid}' is non-stationary"), + }; + + if !candidate.resources.contains(&leader_sid) { + bail!("leader '{leader_sid}' is not present in the resources list"); + } + + let mut resource_stats = Vec::with_capacity(candidate.resources.len()); + + for sid in candidate.resources.iter() { + let resource = match usage.get_resource(sid) { + Some(resource) => resource, + _ => bail!("resource '{sid}' is not present in the cluster usage"), + }; + + match resource.placement() { + ResourcePlacement::Stationary { current_node } => { + if current_node != leader_node { + bail!("resource '{sid}' is on other node than leader"); + } + + resource_stats.push(resource.stats()); + } + _ => bail!("resource '{sid}' is non-stationary"), + } + } + + let bundle_stats = resource_stats.into_iter().sum(); + + for target_node in candidate.nodes.into_iter() { + let migration = Migration { + sid: leader_sid.to_string(), + source_node: leader_node.to_string(), + target_node, + }; + + candidates.push(MigrationCandidate { + migration, + stats: bundle_stats, + }); + } + } + + Ok(candidates) +} diff --git a/pve-rs/src/bindings/resource_scheduling/usage.rs b/pve-rs/src/bindings/resource_scheduling/usage.rs index 87b7e3e..48f6e84 100644 --- a/pve-rs/src/bindings/resource_scheduling/usage.rs +++ b/pve-rs/src/bindings/resource_scheduling/usage.rs @@ -4,6 +4,21 @@ use proxmox_resource_scheduling::{ usage::{Usage, UsageAggregator}, }; +/// The identity aggregator, which passes the node stats as-is. +pub(crate) struct IdentityAggregator; + +impl UsageAggregator for IdentityAggregator { + fn aggregate(usage: &Usage) -> Vec { + usage + .nodes_iter() + .map(|(nodename, node)| NodeUsage { + name: nodename.to_string(), + stats: node.stats(), + }) + .collect() + } +} + /// An aggregator, which adds any resource as a started resource. /// /// This aggregator is useful if the node base stats do not have any current usage. -- 2.47.3