From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 0EC4393FF for ; Thu, 17 Nov 2022 15:06:17 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id E571D2DD2F for ; Thu, 17 Nov 2022 15:06:16 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Thu, 17 Nov 2022 15:06:15 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id CE96342F3A for ; Thu, 17 Nov 2022 15:00:24 +0100 (CET) From: Fiona Ebner To: pve-devel@lists.proxmox.com Date: Thu, 17 Nov 2022 15:00:07 +0100 Message-Id: <20221117140018.105004-7-f.ebner@proxmox.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20221117140018.105004-1-f.ebner@proxmox.com> References: <20221117140018.105004-1-f.ebner@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: =?UTF-8?Q?0=0A=09?=AWL 0.027 Adjusted score from AWL reputation of From: =?UTF-8?Q?address=0A=09?=BAYES_00 -1.9 Bayes spam probability is 0 to 1% KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict =?UTF-8?Q?Alignment=0A=09?=SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF =?UTF-8?Q?Record=0A=09?=SPF_PASS -0.001 SPF: sender matches SPF record Subject: [pve-devel] [PATCH v2 ha-manager 06/15] usage: add Usage::Static plugin X-BeenThere: pve-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox VE development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 17 Nov 2022 14:06:17 -0000 for calculating node usage of services based upon static CPU and memory configuration as well as scoring the nodes with that information to decide where to start a new or recovered service. For getting the service stats, it's necessary to also consider the migration target (if present), becuase the configuration file might have already moved. It's necessary to update the cluster filesystem upon stealing the service to be able to always read the moved config right away when adding the usage. Signed-off-by: Fiona Ebner --- Changes from v1: * Pass haenv to resource's get_static_stats(), required by simulation env. debian/pve-ha-manager.install | 1 + src/PVE/HA/Env/PVE2.pm | 4 ++ src/PVE/HA/Usage.pm | 1 + src/PVE/HA/Usage/Makefile | 2 +- src/PVE/HA/Usage/Static.pm | 114 ++++++++++++++++++++++++++++++++++ 5 files changed, 121 insertions(+), 1 deletion(-) create mode 100644 src/PVE/HA/Usage/Static.pm diff --git a/debian/pve-ha-manager.install b/debian/pve-ha-manager.install index 87fb24c..a7598a9 100644 --- a/debian/pve-ha-manager.install +++ b/debian/pve-ha-manager.install @@ -35,5 +35,6 @@ /usr/share/perl5/PVE/HA/Tools.pm /usr/share/perl5/PVE/HA/Usage.pm /usr/share/perl5/PVE/HA/Usage/Basic.pm +/usr/share/perl5/PVE/HA/Usage/Static.pm /usr/share/perl5/PVE/Service/pve_ha_crm.pm /usr/share/perl5/PVE/Service/pve_ha_lrm.pm diff --git a/src/PVE/HA/Env/PVE2.pm b/src/PVE/HA/Env/PVE2.pm index 7cecf35..7fac43c 100644 --- a/src/PVE/HA/Env/PVE2.pm +++ b/src/PVE/HA/Env/PVE2.pm @@ -176,6 +176,10 @@ sub steal_service { } else { die "implement me"; } + + # Necessary for (at least) static usage plugin to always be able to read service config from new + # node right away. + $self->cluster_state_update(); } sub read_group_config { diff --git a/src/PVE/HA/Usage.pm b/src/PVE/HA/Usage.pm index 4c723d1..66d9572 100644 --- a/src/PVE/HA/Usage.pm +++ b/src/PVE/HA/Usage.pm @@ -33,6 +33,7 @@ sub contains_node { die "implement in subclass"; } +# Logs a warning to $haenv upon failure, but does not die. sub add_service_usage_to_node { my ($self, $nodename, $sid, $service_node, $migration_target) = @_; diff --git a/src/PVE/HA/Usage/Makefile b/src/PVE/HA/Usage/Makefile index ccf1282..5a51359 100644 --- a/src/PVE/HA/Usage/Makefile +++ b/src/PVE/HA/Usage/Makefile @@ -1,4 +1,4 @@ -SOURCES=Basic.pm +SOURCES=Basic.pm Static.pm .PHONY: install install: diff --git a/src/PVE/HA/Usage/Static.pm b/src/PVE/HA/Usage/Static.pm new file mode 100644 index 0000000..ce705eb --- /dev/null +++ b/src/PVE/HA/Usage/Static.pm @@ -0,0 +1,114 @@ +package PVE::HA::Usage::Static; + +use strict; +use warnings; + +use PVE::HA::Resources; +use PVE::RS::ResourceScheduling::Static; + +use base qw(PVE::HA::Usage); + +sub new { + my ($class, $haenv) = @_; + + my $node_stats = eval { $haenv->get_static_node_stats() }; + die "did not get static node usage information - $@" if $@; + + my $scheduler = eval { PVE::RS::ResourceScheduling::Static->new(); }; + die "unable to initialize static scheduling - $@" if $@; + + return bless { + 'node-stats' => $node_stats, + 'service-stats' => {}, + haenv => $haenv, + scheduler => $scheduler, + }, $class; +} + +sub add_node { + my ($self, $nodename) = @_; + + my $stats = $self->{'node-stats'}->{$nodename} + or die "did not get static node usage information for '$nodename'\n"; + die "static node usage information for '$nodename' missing cpu count\n" if !$stats->{cpus}; + die "static node usage information for '$nodename' missing memory\n" if !$stats->{memory}; + + eval { $self->{scheduler}->add_node($nodename, int($stats->{cpus}), int($stats->{memory})); }; + die "initializing static node usage for '$nodename' failed - $@" if $@; +} + +sub remove_node { + my ($self, $nodename) = @_; + + $self->{scheduler}->remove_node($nodename); +} + +sub list_nodes { + my ($self) = @_; + + return $self->{scheduler}->list_nodes()->@*; +} + +sub contains_node { + my ($self, $nodename) = @_; + + return $self->{scheduler}->contains_node($nodename); +} + +my sub get_service_usage { + my ($self, $sid, $service_node, $migration_target) = @_; + + return $self->{'service-stats'}->{$sid} if $self->{'service-stats'}->{$sid}; + + my (undef, $type, $id) = $self->{haenv}->parse_sid($sid); + my $plugin = PVE::HA::Resources->lookup($type); + + my $stats = eval { $plugin->get_static_stats($self->{haenv}, $id, $service_node); }; + if (my $err = $@) { + # config might've already moved during a migration + $stats = eval { $plugin->get_static_stats($self->{haenv}, $id, $migration_target); } if $migration_target; + die "did not get static service usage information for '$sid' - $err\n" if !$stats; + } + + my $service_stats = { + maxcpu => $stats->{maxcpu} + 0.0, # containers allow non-integer cpulimit + maxmem => int($stats->{maxmem}), + }; + + $self->{'service-stats'}->{$sid} = $service_stats; + + return $service_stats; +} + +sub add_service_usage_to_node { + my ($self, $nodename, $sid, $service_node, $migration_target) = @_; + + eval { + my $service_usage = get_service_usage($self, $sid, $service_node, $migration_target); + $self->{scheduler}->add_service_usage_to_node($nodename, $service_usage); + }; + $self->{haenv}->log('warning', "unable to add service '$sid' usage to node '$nodename' - $@") + if $@; +} + +sub score_nodes_to_start_service { + my ($self, $sid, $service_node) = @_; + + my $score_list = eval { + my $service_usage = get_service_usage($self, $sid, $service_node); + $self->{scheduler}->score_nodes_to_start_service($service_usage); + }; + if (my $err = $@) { + $self->{haenv}->log( + 'err', + "unable to score nodes according to static usage for service '$sid' - $err", + ); + # TODO maybe use service count as fallback? + return { map { $_ => 1 } $self->list_nodes() }; + } + + # Take minus the value, so that a lower score is better, which our caller(s) expect(s). + return { map { $_->[0] => -$_->[1] } $score_list->@* }; +} + +1; -- 2.30.2