From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id C87681FF16F for ; Tue, 30 Sep 2025 16:21:24 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 078009A62; Tue, 30 Sep 2025 16:21:04 +0200 (CEST) From: Daniel Kral To: pve-devel@lists.proxmox.com Date: Tue, 30 Sep 2025 16:19:19 +0200 Message-ID: <20250930142021.366529-13-d.kral@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20250930142021.366529-1-d.kral@proxmox.com> References: <20250930142021.366529-1-d.kral@proxmox.com> MIME-Version: 1.0 X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1759242004208 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.015 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: [pve-devel] [PATCH ha-manager 9/9] manager: make service node usage computation more granular X-BeenThere: pve-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox VE development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox VE development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pve-devel-bounces@lists.proxmox.com Sender: "pve-devel" The $online_node_usage is built on every call to manage(...) now, but can be reduced to only be built on any scheduler mode change (including initialization or error path to be complete). This allows recompute_online_node_usage(...) to be reduced to adding/removing nodes whenever these become online or are not online anymore and handle the service usage updates whenever these change. Therefore, recompute_online_node_usage(...) must only be called once in manage(...) after $ns was properly updated. Note that this makes the ha-manager not acknowledge any hotplug changes to the guest configs anymore as long as the HA resource state doesn't change. Signed-off-by: Daniel Kral --- If we go for this patch, then we would need some mechanism to update the static usage for a single or all HA resources registered in $online_node_usage at once (or just rebuilt $online_node_usage at that point..). src/PVE/HA/Manager.pm | 90 +++++++++++++++++++++++-------------------- 1 file changed, 49 insertions(+), 41 deletions(-) diff --git a/src/PVE/HA/Manager.pm b/src/PVE/HA/Manager.pm index 253deba9..6fadb3f3 100644 --- a/src/PVE/HA/Manager.pm +++ b/src/PVE/HA/Manager.pm @@ -106,6 +106,7 @@ sub update_crs_scheduler_mode { if (!defined($old_mode)) { $haenv->log('info', "using scheduler mode '$new_mode'") if $new_mode ne 'basic'; } elsif ($new_mode eq $old_mode) { + $haenv->update_static_service_stats() if $old_mode eq 'static'; return; # nothing to do } else { $haenv->log('info', "switching scheduler mode from '$old_mode' to '$new_mode'"); @@ -113,6 +114,39 @@ sub update_crs_scheduler_mode { $self->{crs}->{scheduler} = $new_mode; + my $online_node_usage; + + if ($new_mode eq 'static') { + $online_node_usage = eval { + my $scheduler = PVE::HA::Usage::Static->new($haenv); + $scheduler->add_node($_) for $self->{ns}->list_online_nodes()->@*; + $haenv->update_static_service_stats(); + return $scheduler; + }; + if ($@) { + $self->{crs}->{scheduler} = 'basic'; # retry on next update + $haenv->log( + 'warning', + "fallback to 'basic' scheduler mode, init for 'static' failed - $@", + ); + } + } elsif ($new_mode eq 'basic') { + # handled below in the general fall-back case + } else { + $haenv->log('warning', "got unknown scheduler mode '$new_mode', using 'basic'"); + } + + # fallback to the basic algorithm in any case + if (!$online_node_usage) { + $online_node_usage = PVE::HA::Usage::Basic->new($haenv); + $online_node_usage->add_node($_) for $self->{ns}->list_online_nodes()->@*; + } + + $self->{online_node_usage} = $online_node_usage; + + # initialize with current nodes and services states + $self->add_service_usage($_, $self->{ss}->{$_}) for keys $self->{ss}->%*; + return; } @@ -253,49 +287,19 @@ my $valid_service_states = { sub recompute_online_node_usage { my ($self) = @_; - my $haenv = $self->{haenv}; + my ($haenv, $ns) = $self->@{qw(haenv ns)}; - my $online_nodes = { map { $_ => 1 } $self->{ns}->list_online_nodes()->@* }; + for my $node ($self->{online_node_usage}->list_nodes()) { + next if $ns->node_is_online($node); - my $online_node_usage; - - if (my $mode = $self->{crs}->{scheduler}) { - if ($mode eq 'static') { - $online_node_usage = eval { - my $scheduler = PVE::HA::Usage::Static->new($haenv); - $scheduler->add_node($_) for keys $online_nodes->%*; - $haenv->update_static_service_stats(); - return $scheduler; - }; - $haenv->log( - 'warning', - "fallback to 'basic' scheduler mode, init for 'static' failed - $@", - ) if $@; - } elsif ($mode eq 'basic') { - # handled below in the general fall-back case - } else { - $haenv->log('warning', "got unknown scheduler mode '$mode', using 'basic'"); - } + $self->{online_node_usage}->remove_node($node); } - # fallback to the basic algorithm in any case - if (!$online_node_usage) { - $online_node_usage = PVE::HA::Usage::Basic->new($haenv); - $online_node_usage->add_node($_) for keys $online_nodes->%*; + for my $node ($ns->list_online_nodes()->@*) { + next if $self->{online_node_usage}->contains_node($node); + + $self->{online_node_usage}->add_node($node); } - - for my $sid (sort keys $self->{ss}->%*) { - my $sd = $self->{ss}->{$sid}; - my $used_nodes = PVE::HA::Tools::get_used_service_nodes($sd, $online_nodes); - my ($current, $target) = $used_nodes->@{qw(current target)}; - - $online_node_usage->add_service_usage_to_node($current, $sid, $sd->{node}, $sd->{target}) - if $current; - $online_node_usage->add_service_usage_to_node($target, $sid, $sd->{node}, $sd->{target}) - if $target; - } - - $self->{online_node_usage} = $online_node_usage; } my $change_service_state = sub { @@ -693,6 +697,8 @@ sub manage { $self->{groups} = $haenv->read_group_config(); # update + $self->recompute_online_node_usage(); + # compute new service status # add new service @@ -704,11 +710,13 @@ sub manage { $haenv->log('info', "adding new service '$sid' on node '$cd->{node}'"); # assume we are running to avoid relocate running service at add my $state = ($cd->{state} eq 'started') ? 'request_start' : 'request_stop'; - $ss->{$sid} = { + my $sd = $ss->{$sid} = { state => $state, node => $cd->{node}, uid => compute_new_uuid('started'), }; + + $self->add_service_usage($sid, $sd); } # remove stale or ignored services from manager state @@ -718,12 +726,12 @@ sub manage { my $reason = defined($sc->{$sid}) ? 'ignored state requested' : 'no config'; $haenv->log('info', "removing stale service '$sid' ($reason)"); + $self->{online_node_usage}->remove_service_usage($sid); + # remove all service related state information delete $ss->{$sid}; } - $self->recompute_online_node_usage(); - my $new_rules = $haenv->read_rules_config(); # TODO PVE 10: Remove group migration when HA groups have been fully migrated to rules -- 2.47.3 _______________________________________________ pve-devel mailing list pve-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel