From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <pve-devel-bounces@lists.proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
	by lore.proxmox.com (Postfix) with ESMTPS id 3CDCA1FF13E
	for <inbox@lore.proxmox.com>; Fri, 06 Mar 2026 09:20:02 +0100 (CET)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
	by firstgate.proxmox.com (Proxmox) with ESMTP id 2E79019FE3;
	Fri,  6 Mar 2026 09:21:04 +0100 (CET)
From: Dominik Rusovac <d.rusovac@proxmox.com>
To: pve-devel@lists.proxmox.com
Subject: [RFC proxmox 3/3] resource-scheduling: evaluate static scheduler
 variants
Date: Fri,  6 Mar 2026 09:20:46 +0100
Message-ID: <20260306082046.34311-4-d.rusovac@proxmox.com>
X-Mailer: git-send-email 2.47.3
In-Reply-To: <20260306082046.34311-1-d.rusovac@proxmox.com>
References: <20260306082046.34311-1-d.rusovac@proxmox.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2
X-Bm-Transport-Timestamp: 1772785224717
X-SPAM-LEVEL: Spam detection results:  0
	AWL                     0.265 Adjusted score from AWL reputation of From:
 address
	BAYES_00                 -1.9 Bayes spam probability is 0 to 1%
	DMARC_MISSING             0.1 Missing DMARC policy
	KAM_DMARC_STATUS         0.01 Test Rule for DKIM or SPF Failure with Strict
 Alignment
	RCVD_IN_MSPIKE_H2       0.001 Average reputation (+2)
	SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
	SPF_PASS               -0.001 SPF: sender matches SPF record
Message-ID-Hash: KE5DJ7P4GQ7QNXVXRNFQZIJKC6HVCUAI
X-Message-ID-Hash: KE5DJ7P4GQ7QNXVXRNFQZIJKC6HVCUAI
X-MailFrom: d.rusovac@proxmox.com
X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop;
 banned-address; emergency; member-moderation; nonmember-moderation;
 administrivia; implicit-dest; max-recipients; max-size; news-moderation;
 no-subject; digests; suspicious-header
X-Mailman-Version: 3.3.10
Precedence: list
List-Id: Proxmox VE development discussion <pve-devel.lists.proxmox.com>
List-Help: <mailto:pve-devel-request@lists.proxmox.com?subject=help>
List-Owner: <mailto:pve-devel-owner@lists.proxmox.com>
List-Post: <mailto:pve-devel@lists.proxmox.com>
List-Subscribe: <mailto:pve-devel-join@lists.proxmox.com>
List-Unsubscribe: <mailto:pve-devel-leave@lists.proxmox.com>

This hacky diff was used to evaluate several variants of the static
scheduler. In particular, results of proptests were pretty printed and
compared in the command line.

Might come in handy for future experiments/evaluations.

Signed-off-by: Dominik Rusovac <d.rusovac@proxmox.com>
---
 proxmox-resource-scheduling/src/pve_static.rs |  60 ++-
 .../tests/pve_static.rs                       | 495 ++++++++++++++++++
 2 files changed, 542 insertions(+), 13 deletions(-)
 create mode 100644 proxmox-resource-scheduling/tests/pve_static.rs

diff --git a/proxmox-resource-scheduling/src/pve_static.rs b/proxmox-resource-scheduling/src/pve_static.rs
index 23e49656..f0c772c7 100644
--- a/proxmox-resource-scheduling/src/pve_static.rs
+++ b/proxmox-resource-scheduling/src/pve_static.rs
@@ -5,7 +5,7 @@ use crate::topsis;
 
 #[derive(Serialize, Deserialize)]
 #[serde(rename_all = "kebab-case")]
-#[cfg_attr(feature = "lab", derive(Debug))]
+#[cfg_attr(feature = "lab", derive(Debug, PartialEq))]
 /// Static usage information of a node.
 pub struct StaticNodeUsage {
     /// Hostname of the node.
@@ -249,6 +249,37 @@ pub mod evaluate {
         }
     }
 
+    pub fn imbalance(
+        target_index: usize,
+        nodes: &[StaticNodeUsage],
+        service: &StaticServiceUsage,
+    ) -> f64 {
+        let len = nodes.len();
+        let mut xs = vec![];
+
+        for (index, node) in nodes.iter().enumerate() {
+            let new_cpu = if index == target_index {
+                add_cpu_usage(node.cpu, node.maxcpu as f64, service.maxcpu)
+            } else {
+                node.cpu
+            } / (node.maxcpu as f64);
+
+            let new_mem = if index == target_index {
+                node.mem + service.maxmem
+            } else {
+                node.mem
+            } as f64
+                / node.maxmem as f64;
+
+            xs.push((new_cpu + new_mem) / 2.0);
+        }
+
+        let mean = xs.iter().sum::<f64>() / len as f64;
+        let sd = (xs.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / len as f64).sqrt();
+
+        sd / mean
+    }
+
     #[cfg(test)]
     mod base_variant_scores_like_present_rollout {
         use crate::{
@@ -270,23 +301,26 @@ pub mod evaluate {
         const MAX_CPU: usize = 32;
         const MAX_MEM: usize = 406;
 
-        fn node() -> impl Strategy<Value = StaticNodeUsage> {
-            (".*", 0.0..(MAX_CPU as f64), 0..MAX_MEM).prop_map(|(name, cpu, mem)| StaticNodeUsage {
-                name,
-                cpu,
-                maxcpu: MAX_CPU,
-                mem,
-                maxmem: MAX_MEM,
-            })
-        }
-
         fn service() -> impl Strategy<Value = StaticServiceUsage> {
-            (0.0..(MAX_CPU as f64), 0..MAX_MEM)
+            (1.0..(MAX_CPU as f64), 1..MAX_MEM)
                 .prop_map(|(maxcpu, maxmem)| StaticServiceUsage { maxcpu, maxmem })
         }
 
         fn nodes() -> impl Strategy<Value = Vec<StaticNodeUsage>> {
-            prop::collection::vec(node(), MIN_NODES..MAX_NODES)
+            prop::collection::vec((0.0..(MAX_CPU as f64), 0..MAX_MEM), MIN_NODES..MAX_NODES)
+                .prop_map(move |resources| {
+                    resources
+                        .into_iter()
+                        .enumerate()
+                        .map(|(i, (cpu, mem))| StaticNodeUsage {
+                            name: format!("node{i}"),
+                            cpu,
+                            maxcpu: MAX_CPU,
+                            mem,
+                            maxmem: MAX_MEM,
+                        })
+                        .collect()
+                })
         }
 
         fn test_case_err(err: impl ToString) -> TestCaseError {
diff --git a/proxmox-resource-scheduling/tests/pve_static.rs b/proxmox-resource-scheduling/tests/pve_static.rs
new file mode 100644
index 00000000..92f6d17b
--- /dev/null
+++ b/proxmox-resource-scheduling/tests/pve_static.rs
@@ -0,0 +1,495 @@
+#[cfg(feature = "lab")]
+mod skewed {
+    //! This test compares several variants on skewed node stats
+    use proxmox_resource_scheduling::{
+        pve_static::{
+            evaluate::{
+                imbalance, score_nodes_to_start_service_with_variant, StaticResourceScheduling,
+            },
+            StaticNodeUsage, StaticServiceUsage,
+        },
+        topsis::evaluate::Topsis,
+    };
+    use std::{fmt::Debug, sync::LazyLock};
+
+    use proptest::prelude::*;
+
+    fn print_table(
+        input: &[impl Evaluate],
+        nodes: &[StaticNodeUsage],
+        service: &StaticServiceUsage,
+    ) {
+        let delim = "  ";
+
+        let (result_names, results) = (
+            input.iter().map(|x| x.name()).collect::<Vec<_>>(),
+            input
+                .iter()
+                .map(|x| x.run(nodes, service))
+                .collect::<Vec<_>>(),
+        );
+
+        let (service_cpu, service_mem) = (service.maxcpu, service.maxmem);
+
+        eprintln!();
+        eprint!(
+            "{:<15}{delim}{:>10}{delim}{:>10}{delim}{:>10}{delim}{:>10}{delim}{:>10}",
+            "name",
+            format!("cpu+{service_cpu:.2}"),
+            "maxcpu",
+            format!("mem+{service_mem}"),
+            "maxmem",
+            "imbalance"
+        );
+        for x in &result_names {
+            eprint!("{delim}{:>10}", x.chars().take(10).collect::<String>());
+        }
+        eprintln!();
+
+        eprint!(
+            "{:-<15}{delim}{:->10}{delim}{:->10}{delim}{:->10}{delim}{:->10}{delim}{:->10}",
+            "", "", "", "", "", ""
+        );
+        for _ in result_names {
+            eprint!("{delim}{:->10}", "");
+        }
+        eprintln!();
+
+        let sorted_results = results
+            .iter()
+            .map(|result| {
+                let mut xs = result.to_vec();
+                xs.sort_by(|(_, a), (_, b)| b.total_cmp(a));
+                xs
+            })
+            .collect::<Vec<_>>();
+
+        let k = sorted_results.len();
+
+        nodes.iter().enumerate().for_each(|(i, n)| {
+            let pivot = &n.name;
+            let scores = sorted_results
+                .iter()
+                .filter_map(|result| result.iter().find(|(n, _)| n == pivot).map(|x| x.1))
+                .collect::<Vec<_>>();
+            let ranks = sorted_results
+                .iter()
+                .filter_map(|result| result.iter().position(|(n, _)| n == pivot).map(|x| x + 1))
+                .collect::<Vec<_>>();
+
+            if let Some(node) = nodes.iter().find(|n| n.name == *pivot) {
+                eprint!(
+                    "{:<15}{delim}{:>10.2}{delim}{:>10}{delim}{:>10}{delim}{:>10}{delim}{:>10.2}",
+                    node.name,
+                    node.cpu,
+                    node.maxcpu,
+                    node.mem,
+                    node.maxmem,
+                    imbalance(i, nodes, service)
+                );
+                for j in 0..k {
+                    let s = match ranks[j] {
+                        1 => format!("\x1B[30m\x1B[1;{}m{:.3}:1\x1B[0m", (41 + j) % 47, scores[j]),
+                        n => format!("\x1B[37m\x1B[0;{}m{:.3}:{n}\x1B[0m", 37, scores[j]),
+                    };
+                    eprint!("{delim}{s:>26}");
+                }
+                eprintln!();
+            }
+        });
+    }
+
+    static NODES: LazyLock<[StaticNodeUsage; 4]> = LazyLock::new(|| {
+        [
+            StaticNodeUsage {
+                name: "node1".to_owned(),
+                cpu: 2.0 + 32.0,
+                maxcpu: 32,
+                mem: 2 + 8,
+                maxmem: 406,
+            },
+            StaticNodeUsage {
+                name: "node2".to_owned(),
+                cpu: 2.0,
+                maxcpu: 32,
+                mem: 400,
+                maxmem: 406,
+            },
+            StaticNodeUsage {
+                name: "node4".to_owned(),
+                cpu: 2.0 + 32.0,
+                maxcpu: 32,
+                mem: 32 + 8,
+                maxmem: 406,
+            },
+            StaticNodeUsage {
+                name: "node5".to_owned(),
+                cpu: 2.0 + 32.0,
+                maxcpu: 32,
+                mem: 2 + 8,
+                maxmem: 406,
+            },
+        ]
+    });
+
+    static SERVICE: StaticServiceUsage = StaticServiceUsage {
+        maxcpu: 32.0,
+        maxmem: 21,
+    };
+
+    trait Evaluate {
+        fn run(
+            &self,
+            nodes: &[StaticNodeUsage],
+            service: &StaticServiceUsage,
+        ) -> Vec<(String, f64)>;
+        fn name(&self) -> String;
+    }
+
+    #[derive(Clone, Debug)]
+    enum Candidate {
+        Base,
+        MinMax,
+        MoreMaxCpu(usize),
+    }
+
+    impl Evaluate for Candidate {
+        fn run(
+            &self,
+            nodes: &[StaticNodeUsage],
+            service: &StaticServiceUsage,
+        ) -> Vec<(String, f64)> {
+            match self {
+                Self::Base => score_nodes_to_start_service_with_variant(
+                    nodes,
+                    service,
+                    None::<&Topsis>,
+                    None::<&StaticResourceScheduling>,
+                ),
+                Self::MinMax => score_nodes_to_start_service_with_variant(
+                    nodes,
+                    service,
+                    Some(&Topsis::MinMaxRelativeDistance),
+                    Some(&StaticResourceScheduling::Base),
+                ),
+                Self::MoreMaxCpu(n) => score_nodes_to_start_service_with_variant(
+                    nodes,
+                    service,
+                    None::<&Topsis>,
+                    Some(&StaticResourceScheduling::RmsNTimesMoreMaxcpu(*n)),
+                ),
+            }
+        }
+
+        fn name(&self) -> String {
+            match self {
+                Candidate::Base => "base".to_owned(),
+                Candidate::MinMax => "minmax".to_owned(),
+                Candidate::MoreMaxCpu(n) => format!("{n}xmaxcpu"),
+            }
+        }
+    }
+
+    #[test]
+    fn comparison_single() {
+        print_table(
+            &[
+                Candidate::Base,
+                Candidate::MinMax,
+                Candidate::MoreMaxCpu(2),
+                Candidate::MoreMaxCpu(8),
+                Candidate::MoreMaxCpu(10),
+                Candidate::MoreMaxCpu(35),
+            ],
+            NODES.as_slice(),
+            &SERVICE,
+        );
+    }
+
+    const REPEAT_N_TIMES: u32 = 300;
+
+    const MIN_NODES: usize = 2;
+    const MAX_NODES: usize = 40;
+
+    const MAX_CPU: usize = 32;
+    const MAX_MEM: usize = 406;
+
+    fn service() -> impl Strategy<Value = StaticServiceUsage> {
+        (1.0..(MAX_CPU as f64), 1..MAX_MEM)
+            .prop_map(|(maxcpu, maxmem)| StaticServiceUsage { maxcpu, maxmem })
+    }
+
+    fn nodes() -> impl Strategy<Value = Vec<StaticNodeUsage>> {
+        prop::collection::vec((0.0..(MAX_CPU as f64), 0..MAX_MEM), MIN_NODES..MAX_NODES).prop_map(
+            move |resources| {
+                resources
+                    .into_iter()
+                    .enumerate()
+                    .map(|(i, (cpu, mem))| StaticNodeUsage {
+                        name: format!("node{i}"),
+                        cpu,
+                        maxcpu: MAX_CPU,
+                        mem,
+                        maxmem: MAX_MEM,
+                    })
+                    .collect()
+            },
+        )
+    }
+
+    proptest! {
+        #![proptest_config(ProptestConfig {
+            failure_persistence: None,
+            cases: REPEAT_N_TIMES,
+            ..ProptestConfig::default()
+        })]
+
+        //#[test]
+        fn minmax_is_not_last(nodes in nodes(), service in service()) {
+            let mut scores = vec![];
+
+            for c in [
+                Candidate::Base,
+                Candidate::MinMax,
+                Candidate::MoreMaxCpu(2),
+                Candidate::MoreMaxCpu(8),
+                Candidate::MoreMaxCpu(10),
+                Candidate::MoreMaxCpu(35),
+            ] {
+                let mut score = 0.0;
+
+                let mut result = c.run(&nodes, &service);
+                result.sort_by(|(_, x), (_, y)| y.total_cmp(x));
+
+                if let Some(node) = result
+                    .first()
+                    .and_then(|(winner, _)| nodes.iter().find(|n| n.name == *winner))
+                {
+                    if let Some(imbalance) = nodes
+                        .iter()
+                        .position(|n| n == node)
+                        .map(|i| imbalance(i, &nodes, &service))
+                    {
+                        score += imbalance;
+
+                        if (node.cpu + service.maxcpu) > node.maxcpu as f64 {
+                            score += 0.5;
+                        }
+                        if (node.mem + service.maxmem) > node.maxmem {
+                            score += 1.0;
+                        }
+
+                        scores.push((c.name(), score, node, imbalance));
+                    };
+                }
+
+            }
+
+            scores.sort_by(|(_, x,_,_), (_, y,_,_)| y.total_cmp(x));
+
+            if let Some((a, b,_,_)) = scores.first() {
+                    dbg!(&scores);
+                if scores
+                        .iter()
+                        .any(|(_, x,_,_)| x != b) {
+                    assert!(*a != Candidate::MinMax.name())
+                }
+            }
+        }
+
+
+        #[test]
+        fn comparison_multiple(nodes in nodes(), service in service()) {
+            print_table(
+                &[
+                    Candidate::Base,
+                    Candidate::MinMax,
+                    Candidate::MoreMaxCpu(2),
+                    Candidate::MoreMaxCpu(8),
+                    Candidate::MoreMaxCpu(10),
+                    Candidate::MoreMaxCpu(35),
+                ],
+                &nodes,
+                &service
+            );
+        }
+    }
+}
+
+#[cfg(feature = "lab")]
+#[allow(dead_code)]
+mod minmax_neq_35xcpu {
+    //! [`crate::skewed_stats::comparison`] suggests that "minmax" and "35xcpu" make similar
+    //! decisions on nodes with skewed stats.
+    //!
+    //! This proptest shrinks the search space to find cases where "minmax" and "35xcpu" do not
+    //! agree on a winner.
+    use proptest::prelude::*;
+    use proxmox_resource_scheduling::{
+        pve_static::{
+            evaluate::{
+                imbalance, score_nodes_to_start_service_with_variant, StaticResourceScheduling,
+            },
+            StaticNodeUsage, StaticServiceUsage,
+        },
+        topsis::evaluate::Topsis,
+    };
+
+    fn print_table(
+        input: &[(&str, &[(String, f64)])],
+        nodes: &[StaticNodeUsage],
+        service: &StaticServiceUsage,
+    ) {
+        let delim = "  ";
+
+        let (result_names, results) = {
+            let (mut lhs, mut rhs) = (vec![], vec![]);
+            input.iter().for_each(|(l, r)| {
+                lhs.push(l);
+                rhs.push(r)
+            });
+            (lhs, rhs)
+        };
+
+        let (service_cpu, service_mem) = (service.maxcpu, service.maxmem);
+
+        eprintln!();
+        eprint!(
+            "{:<15}{delim}{:>10}{delim}{:>10}{delim}{:>10}",
+            "name",
+            format!("cpu+{service_cpu:.2}"),
+            format!("mem+{service_mem}"),
+            "imbalance"
+        );
+        for x in &result_names {
+            eprint!("{delim}{:>10}", x.chars().take(10).collect::<String>());
+        }
+        eprintln!();
+
+        eprint!(
+            "{:-<15}{delim}{:->10}{delim}{:->10}{delim}{:->10}",
+            "", "", "", ""
+        );
+        for _ in result_names {
+            eprint!("{delim}{:->10}", "");
+        }
+        eprintln!();
+
+        let sorted_results = results
+            .iter()
+            .map(|result| {
+                let mut xs = result.to_vec();
+                xs.sort_by(|(_, a), (_, b)| b.total_cmp(a));
+                xs
+            })
+            .collect::<Vec<_>>();
+
+        let k = sorted_results.len();
+
+        nodes.iter().enumerate().for_each(|(i, n)| {
+            let pivot = &n.name;
+            let scores = sorted_results
+                .iter()
+                .filter_map(|result| result.iter().find(|(n, _)| n == pivot).map(|x| x.1))
+                .collect::<Vec<_>>();
+            let ranks = sorted_results
+                .iter()
+                .filter_map(|result| result.iter().position(|(n, _)| n == pivot).map(|x| x + 1))
+                .collect::<Vec<_>>();
+
+            if let Some(node) = nodes.iter().find(|n| n.name == *pivot) {
+                eprint!(
+                    "{:<15}{delim}{:>10.2}{delim}{:>10}{delim}{:>10.2}",
+                    node.name,
+                    node.cpu,
+                    node.mem,
+                    imbalance(i, nodes, service)
+                );
+                for i in 0..k {
+                    let s = format!(
+                        "\x1B[{}m{:.3}:{}\x1B[0m",
+                        (31 + i) % 37,
+                        scores[i],
+                        ranks[i]
+                    );
+                    eprint!("{delim}{s:>19}");
+                }
+                eprintln!();
+            }
+        });
+    }
+
+    const REPEAT_N_TIMES: u32 = 100;
+
+    const MIN_NODES: usize = 2;
+    const MAX_NODES: usize = 40;
+
+    const MAX_CPU: usize = 32;
+    const MAX_MEM: usize = 406;
+
+    fn service() -> impl Strategy<Value = StaticServiceUsage> {
+        (0.0..(MAX_CPU as f64), 0..MAX_MEM)
+            .prop_map(|(maxcpu, maxmem)| StaticServiceUsage { maxcpu, maxmem })
+    }
+
+    fn nodes() -> impl Strategy<Value = Vec<StaticNodeUsage>> {
+        prop::collection::vec((0.0..(MAX_CPU as f64), 0..MAX_MEM), MIN_NODES..MAX_NODES).prop_map(
+            move |resources| {
+                resources
+                    .into_iter()
+                    .enumerate()
+                    .map(|(i, (cpu, mem))| StaticNodeUsage {
+                        name: format!("node{i}"),
+                        cpu,
+                        maxcpu: MAX_CPU,
+                        mem,
+                        maxmem: MAX_MEM,
+                    })
+                    .collect()
+            },
+        )
+    }
+
+    proptest! {
+        #![proptest_config(ProptestConfig {
+            failure_persistence: None,
+            cases: REPEAT_N_TIMES,
+            ..ProptestConfig::default()
+        })]
+
+        //#[test]
+        fn shrink_different_winners(nodes in nodes(), service in service()) {
+            let mut lhs = score_nodes_to_start_service_with_variant(
+                &nodes,
+                &service,
+                Some(&Topsis::MinMaxRelativeDistance),
+                Some(&StaticResourceScheduling::Base),
+            );
+
+            let mut rhs = score_nodes_to_start_service_with_variant(
+                &nodes,
+                &service,
+                None::<&Topsis>,
+                Some(&StaticResourceScheduling::RmsNTimesMoreMaxcpu(35)),
+            );
+
+            print_table(&[("minmax", &lhs), ("35xcpu", &rhs)], &nodes, &service);
+
+            lhs.sort_by(|(_, x), (_, y)| y.total_cmp(x));
+            rhs.sort_by(|(_, x), (_, y)| y.total_cmp(x));
+
+            if let Some((winner_lhs, winner_rhs)) = lhs
+                .first()
+                .and_then(|node| nodes.iter().find(|n| n.name == node.0))
+                .zip(
+                    rhs.first()
+                     .and_then(|node| nodes.iter().find(|n| n.name == node.0)),
+                )
+            {
+                assert_eq!(winner_lhs, winner_rhs);
+            }
+
+        }
+
+    }
+}
-- 
2.47.3