From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id CD88C1FF13C for ; Thu, 30 Apr 2026 09:48:09 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id A6A141D526; Thu, 30 Apr 2026 09:48:09 +0200 (CEST) Mime-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset=UTF-8 Date: Thu, 30 Apr 2026 09:48:03 +0200 Message-Id: Subject: Re: [PATCH proxmox v2 1/6] resource-scheduling: clamp imbalance value to unit interval From: "Daniel Kral" To: "Dominik Rusovac" , X-Mailer: aerc 0.21.0-136-gdb9fe9896a79-dirty References: <20260429122051.179485-1-d.rusovac@proxmox.com> <20260429122051.179485-2-d.rusovac@proxmox.com> In-Reply-To: <20260429122051.179485-2-d.rusovac@proxmox.com> X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1777535184824 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.078 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Message-ID-Hash: BXJOVZSXSLUHWS2RFBCZTHIVT2QLNSD5 X-Message-ID-Hash: BXJOVZSXSLUHWS2RFBCZTHIVT2QLNSD5 X-MailFrom: d.kral@proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox VE development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: On Wed Apr 29, 2026 at 2:20 PM CEST, Dominik Rusovac wrote: > The currently used load imbalance value is given as the so-called > coefficient of variation (CV), a value that may exceed 1. As such, the > CV value alone lacks meaning. A CV value of 0.0 means no imbalance, but > what does a value of, say, 1.7 mean? > > Relative to the number of nodes in a cluster, it is possible to > determine the upper bound of the CV value [0][1]. By dividing the CV > value by its upper bound, the load imbalance can be represented as a > value that varies between 0 and 1. Expressing the CV as a percentage > makes the concept of load imbalance easier to interpret. > > Re-adjust hardcoded imbalance values within tests accordingly. > > [0] https://repositorio.ipbeja.pt/server/api/core/bitstreams/8ed9a444-dbe= 0-402f-9d2f-90c5bf6e418c/content > [1] https://stats.stackexchange.com/questions/18621/maximum-value-of-coef= ficient-of-variation-for-bounded-data-set > > Signed-off-by: Dominik Rusovac > --- > > Notes: > changes since v1: > * squash commit that re-adjusts tests into this one > * back to multiple `as f64` casts of node_count variable > * go from if-else to early-return > * make comment above early return clause more explanatory > * re-order cv and max_cv bindings > * add comment with ref relating to computation of max_cv > > proxmox-resource-scheduling/src/scheduler.rs | 47 +++++++++++-------- > .../tests/scheduler.rs | 8 ++-- > 2 files changed, 32 insertions(+), 23 deletions(-) > > diff --git a/proxmox-resource-scheduling/src/scheduler.rs b/proxmox-resou= rce-scheduling/src/scheduler.rs > index 49d16f9f..87eccfee 100644 > --- a/proxmox-resource-scheduling/src/scheduler.rs > +++ b/proxmox-resource-scheduling/src/scheduler.rs > @@ -17,34 +17,43 @@ pub struct NodeUsage { > pub stats: NodeStats, > } > =20 > -/// Returns the load imbalance among the nodes. > +/// Returns the load imbalance among the nodes, which is a value between= 0 and 1 that describes the > +/// statistical dispersion of the individual node loads around the mean = node load. The lower the > +/// value, the better. > /// > -/// The load balance is measured as the statistical dispersion of the in= dividual node loads. > -/// > -/// The current implementation uses the dimensionless coefficient of var= iation, which expresses the > -/// standard deviation in relation to the average mean of the node loads= . > -/// > -/// The coefficient of variation is not robust, which is a desired prope= rty here, because outliers > -/// should be detected as much as possible. > +/// In more detail, the current implementation computes the so-called co= efficient of variation (CV), > +/// which is the ratio of the standard deviation to the mean of the give= n node loads. The lower > +/// bound of the CV is reached if all node loads are equal. The upper bo= und is reached if all nodes > +/// except one are idle. To present the CV as a value between 0 and 1, i= t's being divided by the > +/// upper bound of the CV for the given number of nodes. > fn calculate_node_imbalance(nodes: &[NodeUsage], to_load: impl Fn(&NodeU= sage) -> f64) -> f64 { > let node_count =3D nodes.len(); > - let node_loads =3D nodes.iter().map(to_load).collect::>(); > =20 > + // early return with perfect imbalance to avoid division by zero > + if node_count < 2 { > + return 0.0; > + } > + > + let node_loads =3D nodes.iter().map(to_load).collect::>(); > let load_sum =3D node_loads.iter().sum::(); > =20 > - // load_sum is guaranteed to be -0.0 for empty `nodes` > + // early return with perfect imbalance to avoid division by zero > if load_sum =3D=3D 0.0 { > - 0.0 > - } else { > - let load_mean =3D load_sum / node_count as f64; > + return 0.0; > + } > =20 > - let squared_diff_sum =3D node_loads > - .iter() > - .fold(0.0, |sum, node_load| sum + (node_load - load_mean).po= wi(2)); > - let load_sd =3D (squared_diff_sum / node_count as f64).sqrt(); > + let load_mean =3D load_sum / node_count as f64; > + let squared_diff_sum =3D node_loads > + .iter() > + .fold(0.0, |sum, node_load| sum + (node_load - load_mean).powi(2= )); > + let load_sd =3D (squared_diff_sum / node_count as f64).sqrt(); > =20 > - load_sd / load_mean > - } > + let cv =3D load_sd / load_mean; > + =20 small whitespace error, could also be fixed on apply though > + // https://stats.stackexchange.com/questions/18621 > + let max_cv =3D ((node_count - 1) as f64).sqrt(); > + > + cv / max_cv > } > =20 > criteria_struct! { > diff --git a/proxmox-resource-scheduling/tests/scheduler.rs b/proxmox-res= ource-scheduling/tests/scheduler.rs > index be90e4f9..21dbe451 100644 > --- a/proxmox-resource-scheduling/tests/scheduler.rs > +++ b/proxmox-resource-scheduling/tests/scheduler.rs > @@ -172,7 +172,7 @@ fn test_score_best_balancing_migration_candidates_wit= h_no_candidates() { > fn test_score_best_balancing_migration_candidates_in_homogeneous_cluster= () { > let scheduler =3D new_homogeneous_cluster_scheduler(); > =20 > - assert_imbalance(scheduler.node_imbalance(), 0.4893954724628247); > + assert_imbalance(scheduler.node_imbalance(), 0.3460548572604576); > =20 > let (candidates, migration1, migration2) =3D new_simple_migration_ca= ndidates(); > =20 > @@ -186,7 +186,7 @@ fn test_score_best_balancing_migration_candidates_in_= homogeneous_cluster() { > fn test_score_best_balancing_migration_candidates_in_heterogeneous_clust= er() { > let scheduler =3D new_heterogeneous_cluster_scheduler(); > =20 > - assert_imbalance(scheduler.node_imbalance(), 0.33026013056867354); > + assert_imbalance(scheduler.node_imbalance(), 0.23352917788066363); > =20 > let (candidates, migration1, migration2) =3D new_simple_migration_ca= ndidates(); > =20 > @@ -225,7 +225,7 @@ fn test_score_best_balancing_migration_candidates_top= sis_in_homogeneous_cluster( > ) -> Result<(), Error> { > let scheduler =3D new_homogeneous_cluster_scheduler(); > =20 > - assert_imbalance(scheduler.node_imbalance(), 0.4893954724628247); > + assert_imbalance(scheduler.node_imbalance(), 0.3460548572604576); > =20 > let (candidates, migration1, migration2) =3D new_simple_migration_ca= ndidates(); > =20 > @@ -242,7 +242,7 @@ fn test_score_best_balancing_migration_candidates_top= sis_in_heterogeneous_cluste > ) -> Result<(), Error> { > let scheduler =3D new_heterogeneous_cluster_scheduler(); > =20 > - assert_imbalance(scheduler.node_imbalance(), 0.33026013056867354); > + assert_imbalance(scheduler.node_imbalance(), 0.23352917788066363); > =20 > let (candidates, migration1, migration2) =3D new_simple_migration_ca= ndidates(); > =20