From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id E2FE9A83F for ; Wed, 27 Apr 2022 17:34:29 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id D08DE2881F for ; Wed, 27 Apr 2022 17:33:59 +0200 (CEST) Received: from bastionodiso.odiso.net (bastionodiso.odiso.net [IPv6:2a0a:1580:2000::2d]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 1BF7B287D2 for ; Wed, 27 Apr 2022 17:33:53 +0200 (CEST) Received: from kvmformation3.odiso.net (formationkvm3.odiso.net [10.3.94.12]) by bastionodiso.odiso.net (Postfix) with ESMTP id BCAF8159B8; Wed, 27 Apr 2022 17:33:52 +0200 (CEST) Received: by kvmformation3.odiso.net (Postfix, from userid 0) id B0462F94BC; Wed, 27 Apr 2022 17:33:52 +0200 (CEST) From: Alexandre Derumier To: pve-devel@lists.proxmox.com Date: Wed, 27 Apr 2022 17:33:45 +0200 Message-Id: <20220427153351.1773666-3-aderumier@odiso.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20220427153351.1773666-1-aderumier@odiso.com> References: <20220427153351.1773666-1-aderumier@odiso.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL -0.003 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% HEADER_FROM_DIFFERENT_DOMAINS 0.248 From and EnvelopeFrom 2nd level mail domains are different KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment KAM_LAZY_DOMAIN_SECURITY 1 Sending domain does not have any anti-forgery methods NO_DNS_FOR_FROM 0.001 Envelope sender has no MX or A DNS records SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_NONE 0.001 SPF: sender does not publish an SPF Record Subject: [pve-devel] [PATCH pve-ha-manager 2/8] get services && nodes stats X-BeenThere: pve-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox VE development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 27 Apr 2022 15:34:29 -0000 For offline vms in recovery state, we look at rrd for last 20minutes average (excluding spike with 90th percentile) For online vms, we get last rrd streamed value. Need to implement a method to compute last minute average for cpu usage without need to re-read rrd file. For other metrics, we can use last value. For Nodes we get last rrd streamed value. (Also need to compute last minute average for cpu usage) A topsis score is compute for recovery state vm with order: - biggest boot memory usage (windows = 100% memory, linux: last mem used) - biggest cpu usage We want to restart biggest services first, to have more chance to find a node with enough ressources --- debian/pve-ha-manager.install | 1 + src/PVE/HA/Balancer/Makefile | 2 +- src/PVE/HA/Balancer/Stats.pm | 134 ++++++++++++++++++++++++++++++ src/PVE/HA/Env.pm | 28 +++++++ src/PVE/HA/Env/PVE2.pm | 149 ++++++++++++++++++++++++++++++++++ src/PVE/HA/Manager.pm | 5 ++ src/PVE/HA/Sim/TestEnv.pm | 48 ++++++++++- 7 files changed, 365 insertions(+), 2 deletions(-) create mode 100644 src/PVE/HA/Balancer/Stats.pm diff --git a/debian/pve-ha-manager.install b/debian/pve-ha-manager.install index d6979c4..6297997 100644 --- a/debian/pve-ha-manager.install +++ b/debian/pve-ha-manager.install @@ -21,6 +21,7 @@ /usr/share/perl5/PVE/HA/CRM.pm /usr/share/perl5/PVE/HA/Balancer/AHP.pm /usr/share/perl5/PVE/HA/Balancer/Topsis.pm +/usr/share/perl5/PVE/HA/Balancer/Stats.pm /usr/share/perl5/PVE/HA/Config.pm /usr/share/perl5/PVE/HA/Config.pm /usr/share/perl5/PVE/HA/Env.pm diff --git a/src/PVE/HA/Balancer/Makefile b/src/PVE/HA/Balancer/Makefile index de4b1b2..95ff86c 100644 --- a/src/PVE/HA/Balancer/Makefile +++ b/src/PVE/HA/Balancer/Makefile @@ -1,4 +1,4 @@ -SOURCES=Topsis.pm AHP.pm +SOURCES=Topsis.pm AHP.pm Stats.pm .PHONY: install install: diff --git a/src/PVE/HA/Balancer/Stats.pm b/src/PVE/HA/Balancer/Stats.pm new file mode 100644 index 0000000..15162ce --- /dev/null +++ b/src/PVE/HA/Balancer/Stats.pm @@ -0,0 +1,134 @@ +package PVE::HA::Balancer::Stats; + +use strict; +use warnings; +use PVE::HA::Balancer::Topsis; +use PVE::HA::Balancer::AHP; + +my $compute_node_vms_pressure = sub { + my ($self, $node, $vm_stats) = @_; + + return if !defined($self->{online_node_stats}->{$node}); + + my $node_stats = $self->{online_node_stats}->{$node}->{stats}; + + #count total number of vms vcpus on this host + $node_stats->{total_vm_vcpus} = 0 if !defined($node_stats->{total_vm_vcpus}); + $node_stats->{total_vm_vcpus} += $vm_stats->{maxcpu}; + + #add biggest vm pressure + $node_stats->{max_vm_pressure} = 0 if !defined($node_stats->{max_vm_pressure}); + $node_stats->{max_vm_pressure} = $vm_stats->{cpu_pressure} if $vm_stats->{cpu_pressure} > $node_stats->{max_vm_pressure}; +}; + +my $get_service_stats = sub { + my ($self, $ss, $sc) = @_; + + my $haenv = $self->{haenv}; + + my $recovery_stats = {}; + + foreach my $sid (sort keys %$ss) { + + my $cd = $sc->{$sid}; + my $node = $cd->{node}; + + my (undef, $type, $vmid) = $haenv->parse_sid($sid); + $ss->{$sid}->{type} = $type; + $ss->{$sid}->{vmid} = $vmid; + + my $stats = { cpu => 0, maxcpu => 0, startmem => 0, mem => 0, maxmem => 0, totalcpu => 0, cpu_pressure => 0, recovery_score => 0 }; + $ss->{$sid}->{stats} = $stats; + + if ($ss->{$sid}->{state} eq 'recovery') { + #get vm/ct stats history on last 20min (95percentile) + $stats = $haenv->get_vm_offline_rrd_stats($vmid, 95); + } elsif ($ss->{$sid}->{state} eq 'started') { + #get last stats from cache. + $stats = $haenv->get_vm_rrd_stats($vmid); + } else { + #avoid to compute all stats, as currently we only support recovery and started for balancing; + next; + } + + my $vmconf = $haenv->read_vm_ct_config($vmid, $type); + $ss->{$sid}->{vmconf} = $vmconf; + + $stats->{startmem} = $stats->{mem}; + #windows vm fill memory with zero at boot, so mem = maxmem + $stats->{startmem} = $stats->{maxmem} if $vmconf && defined($vmconf->{ostype}) && $vmconf->{ostype} eq 'windows'; + + #totalcpu = relative cpu for 1core. 50% of 4 cores = 200% of 1 core + $stats->{totalcpu} = $stats->{cpu} * 100 * $stats->{maxcpu}; + + $stats->{recovery_score} = 0; + + &$compute_node_vms_pressure($self, $cd, $stats); + + $ss->{$sid}->{stats} = $stats; + $recovery_stats->{$sid} = $stats if $ss->{$sid}->{state} eq 'recovery'; + } + + #compute scores for recovery services + return if !keys %$recovery_stats; + + my $weights = $self->{balancer}->{topsis}->{services_recovery}->{weights}; + my $order = $self->{balancer}->{topsis}->{services_recovery}->{order}; + my $scores = PVE::HA::Balancer::Topsis::score($recovery_stats, $weights, $order); + + foreach my $sid (sort keys %$scores) { + $ss->{$sid}->{stats}->{recovery_score} = $scores->{$sid}->{score}; + } +}; + +sub recompute_node_service_stats { + my ($self, $ss, $sc) = @_; + + my $online_node_stats = {}; + my $online_nodes = $self->{ns}->list_online_nodes(); + + foreach my $node (@$online_nodes) { + my $stats = $self->{haenv}->get_node_rrd_stats($node); + $stats->{cpu} = 0 if !defined($stats->{cpu}); + $stats->{cpu_pressure} = 0 if !defined($stats->{cpu_pressure}); #fixme: implement rrd + $stats->{maxcpu} = 0 if !defined($stats->{maxcpu}); + $stats->{mem} = 0 if !defined($stats->{mem}); + $stats->{ksm} = 0 if !defined($stats->{ksm}); #fixme: implement rrd + $stats->{maxmem} = 0 if !defined($stats->{maxmem}); + $stats->{totalcpu} = $stats->{cpu} * 100 * $stats->{maxcpu}; #how to handle different cpu model power ? bogomips ? + $stats->{total_vm_vcpus} = 0; + $stats->{max_vm_pressure} = 0; + $online_node_stats->{$node}->{stats} = $stats; + } + + &$get_service_stats($self, $ss, $sc); + + $self->{online_node_stats} = $online_node_stats; +} + +sub compute_ahp_recovery_weights { + my ($self) = @_; + + #bigger memory/cpu for offline service, better chance to find free space first + + my $bestorder = { + startmem => "+", + totalcpu => "+", + }; + + my $preferences = { + startmem => { + startmem => 1, + totalcpu => 2, + }, + totalcpu => { + startmem => 1, + }, + }; + + my $weights = PVE::HA::Balancer::AHP::compute_weights($preferences); + $self->{balancer}->{topsis}->{services_recovery}->{weights} = $weights; + $self->{balancer}->{topsis}->{services_recovery}->{order} = $bestorder; +} + +1; diff --git a/src/PVE/HA/Env.pm b/src/PVE/HA/Env.pm index ac569a9..2ecc186 100644 --- a/src/PVE/HA/Env.pm +++ b/src/PVE/HA/Env.pm @@ -269,4 +269,32 @@ sub get_ha_settings { return $self->{plug}->get_ha_settings(); } +sub get_node_rrd_stats { + my ($self, $node) = @_; + + return $self->{plug}->get_node_rrd_stats($node); +} + +sub get_vm_rrd_stats { + my ($self, $vmid, $percentile) = @_; + + return $self->{plug}->get_vm_rrd_stats($vmid, $percentile); +} + +sub get_vm_offline_rrd_stats { + my ($self, $vmid, $percentile) = @_; + + return $self->{plug}->get_vm_offline_rrd_stats($vmid, $percentile); +} + +sub read_vm_ct_config { + my ($self, $vmid, $type) = @_; + + if ($type eq 'vm') { + return $self->{plug}->read_vm_config($vmid); + } elsif ($type eq 'ct') { + return $self->{plug}->read_ct_config($vmid); + } +} + 1; diff --git a/src/PVE/HA/Env/PVE2.pm b/src/PVE/HA/Env/PVE2.pm index 5e0a683..917aa62 100644 --- a/src/PVE/HA/Env/PVE2.pm +++ b/src/PVE/HA/Env/PVE2.pm @@ -12,6 +12,11 @@ use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file cfs_lock_file use PVE::DataCenterConfig; use PVE::INotify; use PVE::RPCEnvironment; +use PVE::API2Tools; +use PVE::QemuConfig; +use PVE::QemuServer; +use PVE::LXC::Config; +use RRDs; use PVE::HA::Tools ':exit_codes'; use PVE::HA::Env; @@ -459,4 +464,148 @@ sub get_ha_settings { return $datacenterconfig->{ha}; } +sub get_node_rrd_stats { + my ($self, $node) = @_; + + my $rrd = PVE::Cluster::rrd_dump(); + my $members = PVE::Cluster::get_members(); + + my $stats = PVE::API2Tools::extract_node_stats($node, $members, $rrd); + + # improve me + # we could compute last average minute for cpu usage, + # for others values, use can use last value + + return $stats; +} + +sub get_vm_rrd_stats { + my ($self, $vmid) = @_; + + my $rrd = PVE::Cluster::rrd_dump(); + my $vmlist = PVE::Cluster::get_vmlist() || {}; + my $idlist = $vmlist->{ids} || {}; + + my $data = $idlist->{$vmid}; + my $stats = PVE::API2Tools::extract_vm_stats($vmid, $data, $rrd); + + # improve me + # we could compute last average minute for cpu usage, + # for others values, use can use last value + + return $stats; +} + +sub get_vm_offline_rrd_stats { + my ($self, $vmid, $percentile) = @_; + + my $rrdname = "pve2-vm/$vmid"; + my $rrddir = "/var/lib/rrdcached/db"; + + my $rrd = "$rrddir/$rrdname"; + + my $cf = "AVERAGE"; + + my $reso = 60; + my $ctime = $reso*int(time()/$reso); + + #last 20minutes average? + + my $req_start = $ctime - $reso*20; + my $req_end = $ctime - $reso*1; + + my @args = ( + "-s" => $req_start, + "-e" => $req_end, + "-r" => $reso, + ); + + my $socket = "/var/run/rrdcached.sock"; + push @args, "--daemon" => "unix:$socket" if -S $socket; + + my ($start, $step, $names, $data) = RRDs::fetch($rrd, $cf, @args); + + my @cpu = (); + my @mem = (); + my @maxmem = (); + my @maxcpu = (); + + #fixme: implement true cgroup host cpu/mem && pressure in rrd + + foreach my $rec (@$data) { + my $maxcpu = @$rec[0] || 0; + my $cpu = @$rec[1] || 0; + my $maxmem = @$rec[2] || 0; + my $mem = @$rec[3] || 0; + #skip zeros values if vm is down + push @cpu, $cpu*$maxcpu if $cpu > 0; + push @mem, $mem if $mem > 0; + push @maxcpu, $maxcpu if $maxcpu > 0; + push @maxmem, $maxmem if $maxmem > 0; + } + + my $stats = {}; + + $stats->{cpu} = percentile($percentile, \@cpu) || 0; + $stats->{mem} = percentile($percentile, \@mem) || 0; + $stats->{maxmem} = percentile($percentile, \@maxmem) || 0; + $stats->{maxcpu} = percentile($percentile, \@maxcpu) || 0; + $stats->{totalcpu} = $stats->{cpu} * $stats->{maxcpu} * 100; + $stats->{cpu_pressure} = 0; + return $stats; +} + +sub percentile { + my ($p, $aref) = @_; + my $percentile = int($p * $#{$aref}/100); + return (sort @$aref)[$percentile]; +} + +sub read_vm_config { + my ($self, $vmid) = @_; + + my $conf = undef; + my $finalconf = {}; + + my $vmlist = PVE::Cluster::get_vmlist(); + my $node = $vmlist->{ids}->{$vmid}->{node}; + + eval { $conf = PVE::QemuConfig->load_config($vmid, $node)}; + return if !$conf; + + if ( PVE::QemuServer::windows_version($conf->{ostype}) ) { + $finalconf->{ostype} = 'windows'; + } else { + $finalconf->{ostype} = $conf->{ostype}; + } + + PVE::QemuConfig->foreach_volume($conf, sub { + my ($ds, $drive) = @_; + + $finalconf->{$ds} = $conf->{$ds}; + }); + + return $finalconf; +} + +sub read_ct_config { + my ($self, $vmid) = @_; + + my $conf = undef; + my $finalconf = {}; + + my $vmlist = PVE::Cluster::get_vmlist(); + my $node = $vmlist->{ids}->{$vmid}->{node}; + + eval { $conf = PVE::LXC::Config->load_config($vmid, $node)}; + return if !$conf; + + PVE::LXC::Config->foreach_volume($conf, sub { + my ($ms, $mountpoint) = @_; + $finalconf->{$ms} = $conf->{$ms}; + }); + + return $finalconf; +} + 1; diff --git a/src/PVE/HA/Manager.pm b/src/PVE/HA/Manager.pm index 2deea57..68b2872 100644 --- a/src/PVE/HA/Manager.pm +++ b/src/PVE/HA/Manager.pm @@ -7,6 +7,7 @@ use Digest::MD5 qw(md5_base64); use PVE::Tools; use PVE::HA::Tools ':exit_codes'; use PVE::HA::NodeStatus; +use PVE::HA::Balancer::Stats; sub new { my ($this, $haenv) = @_; @@ -26,6 +27,8 @@ sub new { $self->{ms} = { master_node => $haenv->nodename() }; + PVE::HA::Balancer::Stats::compute_ahp_recovery_weights($self); + return $self; } @@ -395,6 +398,8 @@ sub manage { $self->recompute_online_node_usage(); + PVE::HA::Balancer::Stats::recompute_node_service_stats($self, $ss, $sc); + foreach my $sid (sort keys %$ss) { my $sd = $ss->{$sid}; my $cd = $sc->{$sid} || { state => 'disabled' }; diff --git a/src/PVE/HA/Sim/TestEnv.pm b/src/PVE/HA/Sim/TestEnv.pm index b448d72..ee261ef 100644 --- a/src/PVE/HA/Sim/TestEnv.pm +++ b/src/PVE/HA/Sim/TestEnv.pm @@ -118,4 +118,50 @@ sub get_max_workers { return 0; } -1; +sub get_node_rrd_stats { + my ($self, $node) = @_; + + my $nodestats = $self->{hardware}->{node_stats}; + my $stats = $nodestats->{$node}; + + return $stats; +} + +sub get_vm_rrd_stats { + my ($self, $vmid) = @_; + + my $vmstats = $self->{hardware}->{service_stats}; + my $stats = $vmstats->{$vmid}; + + $stats->{uptime} = $stats->{uptime} || 400; + $stats->{cpu} = $stats->{cpu} || 0; + $stats->{mem} = $stats->{mem} || 0; + $stats->{maxmem} = $stats->{maxmem} || 0; + $stats->{maxcpu} = $stats->{maxcpu} || 0; + $stats->{totalcpu} = $stats->{cpu} * $stats->{maxcpu} * 100; + $stats->{cpu_pressure} = $stats->{cpu_pressure} || 0; + + return $stats; +} + +sub get_vm_offline_rrd_stats { + my ($self, $vmid, $percentile) = @_; + + my $stats = $self->get_vm_rrd_stats($vmid); + + return $stats; +} + +sub read_vm_config { + my ($self, $vmid) = @_; + + return $self->{hardware}->{vm_config}->{$vmid}; +} + +sub read_ct_config { + my ($self, $vmid) = @_; + + return $self->{hardware}->{vm_config}->{$vmid}; +} + +1; \ No newline at end of file -- 2.30.2