From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 5A4EEA8C1 for ; Wed, 27 Apr 2022 17:34:57 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 96ECA28875 for ; Wed, 27 Apr 2022 17:34:02 +0200 (CEST) Received: from bastionodiso.odiso.net (bastionodiso.odiso.net [IPv6:2a0a:1580:2000::2d]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS id 4E822287E7 for ; Wed, 27 Apr 2022 17:33:53 +0200 (CEST) Received: from kvmformation3.odiso.net (formationkvm3.odiso.net [10.3.94.12]) by bastionodiso.odiso.net (Postfix) with ESMTP id CB086159BD; Wed, 27 Apr 2022 17:33:52 +0200 (CEST) Received: by kvmformation3.odiso.net (Postfix, from userid 0) id C9FEADE5EC; Wed, 27 Apr 2022 17:33:52 +0200 (CEST) From: Alexandre Derumier To: pve-devel@lists.proxmox.com Date: Wed, 27 Apr 2022 17:33:50 +0200 Message-Id: <20220427153351.1773666-8-aderumier@odiso.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20220427153351.1773666-1-aderumier@odiso.com> References: <20220427153351.1773666-1-aderumier@odiso.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL -0.003 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% HEADER_FROM_DIFFERENT_DOMAINS 0.248 From and EnvelopeFrom 2nd level mail domains are different KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment KAM_LAZY_DOMAIN_SECURITY 1 Sending domain does not have any anti-forgery methods NO_DNS_FOR_FROM 0.001 Envelope sender has no MX or A DNS records SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_NONE 0.001 SPF: sender does not publish an SPF Record Subject: [pve-devel] [PATCH pve-ha-manager 7/8] add affinity X-BeenThere: pve-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox VE development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 27 Apr 2022 15:34:57 -0000 --- src/PVE/HA/Balancer/Nodes.pm | 36 +++++++++++++++-------------- src/PVE/HA/Balancer/Services.pm | 40 +++++++++++++++++++++++---------- src/PVE/HA/Manager.pm | 24 ++++++++++++++++---- 3 files changed, 67 insertions(+), 33 deletions(-) diff --git a/src/PVE/HA/Balancer/Nodes.pm b/src/PVE/HA/Balancer/Nodes.pm index a06ed62..bce4c38 100644 --- a/src/PVE/HA/Balancer/Nodes.pm +++ b/src/PVE/HA/Balancer/Nodes.pm @@ -4,7 +4,7 @@ use strict; use warnings; use PVE::HA::Balancer::Topsis; use PVE::HA::Balancer::AHP; - +use PVE::HA::Balancer::Services; my $compute_node_target_cpu_pct = sub{ my ($node_stats, $vm_stats) = @_; @@ -21,19 +21,16 @@ my $compute_node_target_mem_pct = sub { }; my $add_prio = sub { - my ($self, $sd, $nodename, $group_members_prio) = @_; - - my $vm_stats = $sd->{stats}; - my $node_stats = $self->{online_node_stats}->{$nodename}->{stats}; + my ($self, $nodename, $group_members_prio, $target_stats) = @_; my $node = {}; $node->{prio} = $group_members_prio->{$nodename}; - $node->{affinity} = 0; #fixme, need to implement vm group + $node->{affinity} = $target_stats->{affinity} || 0; $node->{online_node_usage} = $self->{online_node_usage}->{$nodename}; $node->{name} = $nodename; $node->{cpu_pressure} = 0; #fixme, need to stream rrd graph first - $node->{target_cpu_pct} = &$compute_node_target_cpu_pct($node_stats, $vm_stats); - $node->{target_mem_pct} = &$compute_node_target_mem_pct($node_stats, $vm_stats); + $node->{target_cpu_pct} = $target_stats->{cpu_pct} || 0; + $node->{target_mem_pct} = $target_stats->{mem_pct} || 0; return $node; }; @@ -80,7 +77,7 @@ my $check_cpumodel_compatibility = sub { }; my $check_target_load = sub { - my ($self, $sd, $node) = @_; + my ($self, $sid, $sd, $node, $target_stats) = @_; return 1 if !$self->{balancer}->{enabled}; @@ -91,8 +88,8 @@ my $check_target_load = sub { # if ksm sharing is already huge (20% of total memory), reduce mem threshold to 75% $max_threshold->{mem} = 75 if $node_stats->{ksm} > $node_stats->{maxmem} * 0.2; - my $target_mem_percent = &$compute_node_target_mem_pct($node_stats, $vm_stats); - return if $target_mem_percent > $max_threshold->{mem}; + $target_stats->{mem_pct} = &$compute_node_target_mem_pct($node_stats, $vm_stats); + return if $target_stats->{mem_pct} > $max_threshold->{mem}; #don't use node if already too much global pressure (some cores are already more than 100%, so we can't trust cpu average) return if $node_stats->{cpu_pressure} > $max_threshold->{cpu_pressure}; @@ -100,8 +97,12 @@ my $check_target_load = sub { #don't use node if a vm is already overloaded on this node return if $node_stats->{max_vm_pressure} > $max_threshold->{vm_pressure}; - my $target_cpu_percent = &$compute_node_target_cpu_pct($node_stats, $vm_stats); - return if $target_cpu_percent > $max_threshold->{cpu}; + $target_stats->{cpu_pct} = &$compute_node_target_cpu_pct($node_stats, $vm_stats); + return if $target_stats->{cpu_pct} > $max_threshold->{cpu}; + + #don't use node if affinity is bigger or equal than current affinity + $target_stats->{affinity} = PVE::HA::Balancer::Services::compute_affinity($self, $sid, $node); + return if defined($vm_stats->{affinity}) && $target_stats->{affinity} != 0 && $target_stats->{affinity} >= $vm_stats->{affinity}; return 1; }; @@ -131,7 +132,7 @@ my $check_hard_constraints = sub { }; sub find_target { - my($self, $cd, $sd, $group_members_prio) = @_; + my($self, $sid, $cd, $sd, $group_members_prio) = @_; my $online_nodes = $self->{online_node_stats}; @@ -142,11 +143,12 @@ sub find_target { #### FILTERING NODES WITH HARD CONSTRAINTS (vm can't be started) next if !&$check_hard_constraints($self, $sd, $node, $group_members_prio); - ### FILTERING too much loaded nodes - next if !&$check_target_load($self,$sd, $node); + ### FILTERING too much loaded nodes and compute target stats + my $target_stats = {}; + next if !&$check_target_load($self, $sid, $sd, $node, $target_stats); #### compute differents prio - $target_nodes->{$node} = &$add_prio($self, $sd, $node, $group_members_prio); + $target_nodes->{$node} = &$add_prio($self, $node, $group_members_prio, $target_stats); } # if ressource aware is enabled, order by score diff --git a/src/PVE/HA/Balancer/Services.pm b/src/PVE/HA/Balancer/Services.pm index 6cce6a7..d095b67 100644 --- a/src/PVE/HA/Balancer/Services.pm +++ b/src/PVE/HA/Balancer/Services.pm @@ -5,13 +5,30 @@ use warnings; use PVE::HA::Balancer::Topsis; use PVE::HA::Balancer::AHP; -my $check_anti_affinity = sub { - my ($vmid, $node, $vm_stats) = @_; - - #implement me +sub compute_affinity { + my ($self, $sid, $node) = @_; + + my $groups_resources = $self->{groups_resources}; + my $resources_groups = $self->{resources_groups}; + my $ss = $self->{ss}; + + my $affinity_score = 0; + + my $resource_groups = $resources_groups->{$sid}; + foreach my $groupid (keys %$resource_groups) { + my $affinity = $groups_resources->{$groupid}->{affinity}; + next if !$affinity; + my $resources = $groups_resources->{$groupid}->{resources}; + foreach my $othersid (keys %$resources) { + next if $othersid eq $sid; + my $other_service_node = $ss->{$othersid}->{node}; + $affinity_score++ if ($affinity eq 'separate' && $other_service_node eq $node) || + ($affinity eq 'group' && $other_service_node ne $node); + } + } - return undef; -}; + return $affinity_score; +} my $check_cpu_pressure = sub { my ($vm_stats) = @_; @@ -43,7 +60,7 @@ my $check_pseudo_mem_pressure = sub { }; my $get_bad_vms = sub { - my($ss, $sc, $online_nodes) = @_; + my($self, $ss, $sc, $online_nodes) = @_; my $bad_vms = {}; @@ -67,7 +84,6 @@ my $get_bad_vms = sub { my $node_stats = $online_nodes->{$node}->{stats}; my $vm_stats = $sd->{stats}; - # skip vm is recently started or migrated next if !defined($vm_stats->{uptime}) || $vm_stats->{uptime} < 300; @@ -77,12 +93,12 @@ my $get_bad_vms = sub { #PVE::QemuServer::check_local_resources($vmconf, 1); - $vm_stats->{affinity} = 0; $vm_stats->{mem_pseudo_pressure} = 0; + $vm_stats->{affinity} = compute_affinity($self, $sid, $node); my $add_vm = undef; - $add_vm = 1 if &$check_anti_affinity($sid, $node, $vm_stats); + $add_vm = 1 if $vm_stats->{affinity}; $add_vm = 1 if &$check_cpu_pressure($vm_stats); $add_vm = 1 if &$check_pseudo_mem_pressure($node_stats, $vm_stats); next if !$add_vm; @@ -115,7 +131,7 @@ sub get_vm_targetnode { my $online_nodes = $self->{online_node_stats}; - my $bad_vms = &$get_bad_vms($ss, $sc, $online_nodes); + my $bad_vms = &$get_bad_vms($self, $ss, $sc, $online_nodes); return if !$bad_vms; my $vm_scores = &$get_score($self, $bad_vms); @@ -128,7 +144,7 @@ sub get_vm_targetnode { my $cd = $sc->{$sid}; my $sd = $ss->{$sid}; - my $node = $self->find_node_target($cd , $sd); + my $node = $self->find_node_target($sid, $cd , $sd); next if !$node; # register last sid we tried to migrate, to not try to balance it in loop diff --git a/src/PVE/HA/Manager.pm b/src/PVE/HA/Manager.pm index 4e318bd..03b0520 100644 --- a/src/PVE/HA/Manager.pm +++ b/src/PVE/HA/Manager.pm @@ -371,7 +371,7 @@ sub manage { my $sc = $haenv->read_service_config(); $self->{groups} = $haenv->read_group_config(); # update - $self->{vmgroups} = $haenv->read_vmgroup_config(); + $self->get_resources_groups(); # compute new service status @@ -833,7 +833,7 @@ sub next_state_recovery { $self->recompute_online_node_usage(); # we want the most current node state - my $recovery_node = $self->find_node_target($cd , $sd); + my $recovery_node = $self->find_node_target($sid, $cd , $sd); if ($recovery_node) { my $msg = "recover service '$sid' from fenced node '$fenced_node' to node '$recovery_node'"; @@ -871,13 +871,13 @@ sub next_state_recovery { } sub find_node_target { - my($self, $cd, $sd) = @_; + my($self, $sid, $cd, $sd) = @_; my $online_nodes = $self->{online_node_stats}; my $groups = $self->{groups}; my $hagroup = get_service_group($groups, $online_nodes, $cd); my ($pri_groups, $group_members_prio) = get_node_priority_groups($hagroup, $online_nodes); - return PVE::HA::Balancer::Nodes::find_target($self, $cd, $sd, $group_members_prio); + return PVE::HA::Balancer::Nodes::find_target($self, $sid, $cd, $sd, $group_members_prio); } sub loadbalance { @@ -917,4 +917,20 @@ sub balancer_status { $self->{balancer}->{enabled} = $dc_ha_cfg->{balancer}; } +sub get_resources_groups { + my ($self) = @_; + + my $resources_groups_config = $self->{haenv}->read_resources_groups_config(); + my $groups_resources = $resources_groups_config->{ids}; + my $resources_groups = {}; + foreach my $groupid (keys %$groups_resources) { + my $resources = $groups_resources->{$groupid}->{resources}; + foreach my $sid (keys %$resources) { + $resources_groups->{$sid}->{$groupid} = 1; + } + } + $self->{resources_groups} = $resources_groups; + $self->{groups_resources} = $groups_resources; +} + 1; -- 2.30.2