From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id AEEF81FF17E for ; Thu, 16 Oct 2025 17:34:38 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id E9D1616BE3; Thu, 16 Oct 2025 17:34:56 +0200 (CEST) Mime-Version: 1.0 Date: Thu, 16 Oct 2025 17:34:52 +0200 Message-Id: From: "Daniel Kral" To: "Fiona Ebner" , "Proxmox VE development discussion" X-Mailer: aerc 0.20.0 References: <20250930142021.366529-1-d.kral@proxmox.com> <20250930142021.366529-4-d.kral@proxmox.com> <88318e2a-e64f-4537-9a6e-09003bd4af53@proxmox.com> In-Reply-To: <88318e2a-e64f-4537-9a6e-09003bd4af53@proxmox.com> X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1760628889170 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.014 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment RCVD_IN_VALIDITY_CERTIFIED_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. RCVD_IN_VALIDITY_RPBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. RCVD_IN_VALIDITY_SAFE_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pve-devel] [PATCH perl-rs 1/1] pve-rs: resource_scheduling: allow granular usage changes X-BeenThere: pve-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox VE development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox VE development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pve-devel-bounces@lists.proxmox.com Sender: "pve-devel" On Thu Oct 16, 2025 at 12:32 PM CEST, Fiona Ebner wrote: > Am 30.09.25 um 4:21 PM schrieb Daniel Kral: >> Implements a simple bidirectional map to track which service usages have >> been added to nodes, so that these can be removed later individually. >> >> The StaticServiceUsage is added to the HashMap<> in StaticNodeInfo to >> reduce unnecessary indirection when summing these values in >> score_nodes_to_start_service(...). >> >> Signed-off-by: Daniel Kral > > Reviewed-by: Fiona Ebner > >> --- >> I started out adding and removing the service_usage in StaticNodeUsage >> on each add_service_usage_to_node(...) and remove_service_usage(...) >> call individually, but I think summing those up every time is better for >> numerical stability.. > > This can go into the commit message too if worded a bit less informally ;) Will do! Even though I asked myself if the concern about numerical stability really holds true here if we don't go for ha #9 as then these values will only change over a single manage(...) run and I'd guess that most users would use floating-point values which can be represented exactly (0.0, 1.0, 2.0, 3.0, ... and 0.5, 0.25, 0.125, ...). For these, numerical stability shouldn't be a problem at all, or not? > >> >> .../bindings/resource_scheduling_static.rs | 98 ++++++++++++++++--- >> pve-rs/test/resource_scheduling.pl | 84 ++++++++++++++-- >> 2 files changed, 160 insertions(+), 22 deletions(-) >> >> diff --git a/pve-rs/src/bindings/resource_scheduling_static.rs b/pve-rs/src/bindings/resource_scheduling_static.rs >> index 7cf2f35..535dfe3 100644 >> --- a/pve-rs/src/bindings/resource_scheduling_static.rs >> +++ b/pve-rs/src/bindings/resource_scheduling_static.rs >> @@ -16,8 +16,16 @@ pub mod pve_rs_resource_scheduling_static { >> >> perlmod::declare_magic!(Box : &Scheduler as "PVE::RS::ResourceScheduling::Static"); >> >> + struct StaticNodeInfo { >> + name: String, >> + maxcpu: usize, >> + maxmem: usize, >> + services: HashMap, >> + } >> + >> struct Usage { >> - nodes: HashMap, >> + nodes: HashMap, >> + service_nodes: HashMap>, > > Rather than Vec it could be a HashSet, but I'm fine with either way. > >> } >> >> /// A scheduler instance contains the resource usage by node. > > ---snip--- > >> @@ -67,10 +75,25 @@ pub mod pve_rs_resource_scheduling_static { >> >> /// Method: Remove a node from the scheduler. >> #[export] >> - pub fn remove_node(#[try_from_ref] this: &Scheduler, nodename: &str) { >> + pub fn remove_node(#[try_from_ref] this: &Scheduler, nodename: &str) -> Result<(), Error> { >> let mut usage = this.inner.lock().unwrap(); >> >> - usage.nodes.remove(nodename); >> + if let Some(node) = usage.nodes.remove(nodename) { >> + for (sid, _) in node.services.iter() { >> + match usage.service_nodes.get_mut(sid) { >> + Some(nodes) => { >> + nodes.retain(|node| !node.eq(nodename)); >> + } >> + None => bail!( >> + "service '{}' not present while removing node '{}'", > > Nit: for a user (or developer in 6 months time :P), this error message > is rather unspecific. I'd suggest: > "internal error: ... not present in service_nodes hash map while ..." Will do as those are always helpful but hopefully this won't reach any users as it's a consistency check of the implementation itself ;) > >> + sid, >> + nodename >> + ), >> + } >> + } >> + } >> + >> + Ok(()) >> } >> >> /// Method: Get a list of all the nodes in the scheduler. >> @@ -93,22 +116,55 @@ pub mod pve_rs_resource_scheduling_static { >> usage.nodes.contains_key(nodename) >> } >> >> - /// Method: Add usage of `service` to the node's usage. >> + /// Method: Add service `sid` and its `service_usage` to the node. >> #[export] >> pub fn add_service_usage_to_node( >> #[try_from_ref] this: &Scheduler, >> nodename: &str, >> - service: StaticServiceUsage, >> + sid: &str, >> + service_usage: StaticServiceUsage, > > So this requires a versioned Breaks in d/control. Please mention this in > the commit notes and in the cover letter. ACK will add that information here and will keep that in mind for future patch series! > >> ) -> Result<(), Error> { >> let mut usage = this.inner.lock().unwrap(); >> >> match usage.nodes.get_mut(nodename) { >> Some(node) => { >> - node.add_service_usage(&service); >> - Ok(()) >> + if node.services.contains_key(sid) { >> + bail!("service '{}' already added to node '{}'", sid, nodename); >> + } >> + >> + node.services.insert(sid.to_string(), service_usage); >> } >> None => bail!("node '{}' not present in usage hashmap", nodename), >> } >> + >> + if let Some(nodes) = usage.service_nodes.get_mut(sid) { >> + nodes.push(nodename.to_string()); >> + } else { >> + usage >> + .service_nodes >> + .insert(sid.to_string(), vec![nodename.to_string()]); >> + } > > Nit: just to be sure, we could check whether the node would be duplicate > in service_nodes too (is more straightforward with using HashSet). Maybe > not really worth it though, because if we keep the mappings node -> > services and service -> nodes consistent with each other (which we of > course should) then the check above already protects us. I'll check that again, but I'd go for the HashSet idea then as it fits better conceptually. Also because I was curious: ``` println!("HashMap>: {}", size_of::>>()); println!("HashMap>: {}", size_of::>>()); --- HashMap>: 48 HashMap>: 48 ``` although ``` println!("Vec: {}", size_of::>()); println!("HashSet: {}", size_of::>()); --- Vec: 24 HashSet: 48 ``` > > ---snip--- > >> @@ -50,7 +65,54 @@ sub test_balance { >> is($nodes[1], "A", 'second should be A'); >> } >> >> - $static->add_service_usage_to_node($nodes[0], $service); >> + $static->add_service_usage_to_node($nodes[0], $sid_names[$i], $service); > > Nit: could also just use something like "vm:$i" or "vm:" . 100 + $i > rather than letters. If it's a variable-controlled enumeration that > seems more natural to me. Right, I wanted to stick to the alphabet usage here but that makes much more sense for something that is generated ^^. > >> + } >> +} >> + > > ---snip--- > >> @@ -96,9 +158,9 @@ sub test_balance_small_memory_difference { >> $static->add_node("C", 4, 8_000_000_000); >> >> if ($with_start_load) { >> - $static->add_service_usage_to_node("A", { maxcpu => 4, maxmem => 1_000_000_000 }); >> - $static->add_service_usage_to_node("B", { maxcpu => 2, maxmem => 1_000_000_000 }); >> - $static->add_service_usage_to_node("C", { maxcpu => 2, maxmem => 1_000_000_000 }); >> + $static->add_service_usage_to_node("A", "a", { maxcpu => 4, maxmem => 1_000_000_000 }); >> + $static->add_service_usage_to_node("B", "b", { maxcpu => 2, maxmem => 1_000_000_000 }); >> + $static->add_service_usage_to_node("C", "c", { maxcpu => 2, maxmem => 1_000_000_000 }); >> } >> >> my $service = { >> @@ -106,6 +168,7 @@ sub test_balance_small_memory_difference { >> maxmem => 16_000_000, >> }; >> >> + my @sid_names = ("d" .. "z"); >> for (my $i = 0; $i < 20; $i++) { >> my $score_list = $static->score_nodes_to_start_service($service); >> >> @@ -131,12 +194,13 @@ sub test_balance_small_memory_difference { >> die "internal error, got $i % 4 == " . ($i % 4) . "\n"; >> } >> >> - $static->add_service_usage_to_node($nodes[0], $service); >> + $static->add_service_usage_to_node($nodes[0], $sid_names[$i], $service); > > Nit: same as above regarding service names, we're already very close to > the edge of the alphabet in this case ;P > >> } >> } >> >> test_basic(); >> test_balance(); >> +test_balance_removal(); >> test_overcommitted(); >> test_balance_small_memory_difference(1); >> test_balance_small_memory_difference(0); _______________________________________________ pve-devel mailing list pve-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel