From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id 82E211FF144 for ; Tue, 24 Mar 2026 19:36:05 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 842AA1CE4F; Tue, 24 Mar 2026 19:36:19 +0100 (CET) From: Daniel Kral To: pve-devel@lists.proxmox.com Subject: [PATCH ha-manager v2 31/40] usage: add dynamic usage scheduler Date: Tue, 24 Mar 2026 19:30:15 +0100 Message-ID: <20260324183029.1274972-32-d.kral@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260324183029.1274972-1-d.kral@proxmox.com> References: <20260324183029.1274972-1-d.kral@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1774376989632 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.058 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Message-ID-Hash: ANEV7FB6EQUZIVGBWVVUZ476ZDM6AISB X-Message-ID-Hash: ANEV7FB6EQUZIVGBWVVUZ476ZDM6AISB X-MailFrom: d.kral@proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox VE development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: The dynamic usage scheduler allows the HA Manager to make scheduling decisions based on the current usage of the nodes and cluster resources in addition to the maximum usage stats as reported by the PVE::HA::Env implementation. Signed-off-by: Daniel Kral --- changes v1 -> v2: - guard PVE::HA::Usage::Dynamic with my $have_dynamic_scheduling as PVE::RS::ResourceScheduling::Dynamic might not be available (as suggested by @Thomas) - add add_service() impl debian/pve-ha-manager.install | 1 + src/PVE/HA/Env.pm | 12 ++++ src/PVE/HA/Manager.pm | 21 +++++++ src/PVE/HA/Usage/Dynamic.pm | 110 ++++++++++++++++++++++++++++++++++ src/PVE/HA/Usage/Makefile | 2 +- 5 files changed, 145 insertions(+), 1 deletion(-) create mode 100644 src/PVE/HA/Usage/Dynamic.pm diff --git a/debian/pve-ha-manager.install b/debian/pve-ha-manager.install index 38d5d60b..75220a0b 100644 --- a/debian/pve-ha-manager.install +++ b/debian/pve-ha-manager.install @@ -42,6 +42,7 @@ /usr/share/perl5/PVE/HA/Usage.pm /usr/share/perl5/PVE/HA/Usage/Basic.pm /usr/share/perl5/PVE/HA/Usage/Static.pm +/usr/share/perl5/PVE/HA/Usage/Dynamic.pm /usr/share/perl5/PVE/Service/pve_ha_crm.pm /usr/share/perl5/PVE/Service/pve_ha_lrm.pm /usr/share/pve-manager/templates/default/fencing-body.html.hbs diff --git a/src/PVE/HA/Env.pm b/src/PVE/HA/Env.pm index 3643292e..44c26854 100644 --- a/src/PVE/HA/Env.pm +++ b/src/PVE/HA/Env.pm @@ -312,12 +312,24 @@ sub get_static_service_stats { return $self->{plug}->get_static_service_stats(); } +sub get_dynamic_service_stats { + my ($self) = @_; + + return $self->{plug}->get_dynamic_service_stats(); +} + sub get_static_node_stats { my ($self) = @_; return $self->{plug}->get_static_node_stats(); } +sub get_dynamic_node_stats { + my ($self) = @_; + + return $self->{plug}->get_dynamic_node_stats(); +} + sub get_node_version { my ($self, $node) = @_; diff --git a/src/PVE/HA/Manager.pm b/src/PVE/HA/Manager.pm index 152e18e5..6f7b431b 100644 --- a/src/PVE/HA/Manager.pm +++ b/src/PVE/HA/Manager.pm @@ -21,6 +21,12 @@ eval { $have_static_scheduling = 1; }; +my $have_dynamic_scheduling; +eval { + require PVE::HA::Usage::Dynamic; + $have_dynamic_scheduling = 1; +}; + ## Variable Name & Abbreviations Convention # # The HA stack has some variables it uses frequently and thus abbreviates it such that it may be @@ -264,6 +270,21 @@ sub recompute_online_node_usage { 'warning', "fallback to 'basic' scheduler mode, init for 'static' failed - $@", ) if $@; + } elsif ($mode eq 'dynamic') { + if ($have_dynamic_scheduling) { + $online_node_usage = eval { + $service_stats = $haenv->get_dynamic_service_stats(); + my $scheduler = PVE::HA::Usage::Dynamic->new($haenv, $service_stats); + $scheduler->add_node($_) for $online_nodes->@*; + return $scheduler; + }; + } else { + $@ = "dynamic scheduling not available\n"; + } + $haenv->log( + 'warning', + "fallback to 'basic' scheduler mode, init for 'dynamic' failed - $@", + ) if $@; } elsif ($mode eq 'basic') { # handled below in the general fall-back case } else { diff --git a/src/PVE/HA/Usage/Dynamic.pm b/src/PVE/HA/Usage/Dynamic.pm new file mode 100644 index 00000000..7e11715d --- /dev/null +++ b/src/PVE/HA/Usage/Dynamic.pm @@ -0,0 +1,110 @@ +package PVE::HA::Usage::Dynamic; + +use strict; +use warnings; + +use PVE::HA::Resources; +use PVE::RS::ResourceScheduling::Dynamic; + +use base qw(PVE::HA::Usage); + +sub new { + my ($class, $haenv, $service_stats) = @_; + + my $node_stats = eval { $haenv->get_dynamic_node_stats() }; + die "did not get dynamic node usage information - $@" if $@; + + my $scheduler = eval { PVE::RS::ResourceScheduling::Dynamic->new() }; + die "unable to initialize dynamic scheduling - $@" if $@; + + return bless { + 'node-stats' => $node_stats, + 'service-stats' => $service_stats, + haenv => $haenv, + scheduler => $scheduler, + }, $class; +} + +sub add_node { + my ($self, $nodename) = @_; + + my $stats = $self->{'node-stats'}->{$nodename} + or die "did not get dynamic node usage information for '$nodename'\n"; + die "dynamic node usage information for '$nodename' missing cpu count\n" if !$stats->{maxcpu}; + die "dynamic node usage information for '$nodename' missing memory\n" if !$stats->{maxmem}; + + eval { $self->{scheduler}->add_node($nodename, $stats); }; + die "initializing dynamic node usage for '$nodename' failed - $@" if $@; +} + +sub remove_node { + my ($self, $nodename) = @_; + + $self->{scheduler}->remove_node($nodename); +} + +sub list_nodes { + my ($self) = @_; + + return $self->{scheduler}->list_nodes()->@*; +} + +sub contains_node { + my ($self, $nodename) = @_; + + return $self->{scheduler}->contains_node($nodename); +} + +my sub get_service_usage { + my ($self, $sid) = @_; + + my $service_stats = $self->{'service-stats'}->{$sid}->{usage} + or die "did not get dynamic service usage information for '$sid'\n"; + + return $service_stats; +} + +sub add_service { + my ($self, $sid, $current_node, $target_node, $running) = @_; + + # do not add service which do not put any usage on the nodes + return if !defined($current_node) && !defined($target_node); + + eval { + my $service_usage = get_service_usage($self, $sid); + + my $service = { + stats => $service_usage, + running => $running, + current_node => $current_node, + target_node => $target_node, + }; + + $self->{scheduler}->add_resource($sid, $service); + }; + $self->{haenv}->log('warning', "unable to add service '$sid' - $@") if $@; +} + +sub remove_service_usage { + my ($self, $sid) = @_; + + eval { $self->{scheduler}->remove_resource($sid) }; + $self->{haenv}->log('warning', "unable to remove service '$sid' usage - $@") if $@; +} + +sub score_nodes_to_start_service { + my ($self, $sid) = @_; + + my $score_list = eval { + my $service_usage = get_service_usage($self, $sid); + $self->{scheduler}->score_nodes_to_start_resource($service_usage); + }; + $self->{haenv} + ->log('err', "unable to score nodes according to dynamic usage for service '$sid' - $@") + if $@; + + # Take minus the value, so that a lower score is better, which our caller(s) expect(s). + return { map { $_->[0] => -$_->[1] } $score_list->@* }; +} + +1; diff --git a/src/PVE/HA/Usage/Makefile b/src/PVE/HA/Usage/Makefile index befdda60..5d51a9c1 100644 --- a/src/PVE/HA/Usage/Makefile +++ b/src/PVE/HA/Usage/Makefile @@ -1,5 +1,5 @@ SIM_SOURCES=Basic.pm -SOURCES=${SIM_SOURCES} Static.pm +SOURCES=${SIM_SOURCES} Static.pm Dynamic.pm .PHONY: install install: -- 2.47.3