From: Daniel Kral <d.kral@proxmox.com>
To: pve-devel@lists.proxmox.com
Subject: [pve-devel] [PATCH ha-manager v4 11/19] manager: apply node affinity rules when selecting service nodes
Date: Tue, 29 Jul 2025 20:00:51 +0200 [thread overview]
Message-ID: <20250729180107.428855-12-d.kral@proxmox.com> (raw)
In-Reply-To: <20250729180107.428855-1-d.kral@proxmox.com>
Replace the HA group mechanism with the functionally equivalent node
affinity rules' get_node_affinity(...), which enforces the node affinity
rules defined in the rules config.
This allows the $groups parameter to be replaced with the $rules
parameter in select_service_node(...) as all behavior of the HA groups
is now encoded in $service_conf and $rules.
Signed-off-by: Daniel Kral <d.kral@proxmox.com>
---
src/PVE/HA/Manager.pm | 83 ++++++--------------------------
src/PVE/HA/Rules/NodeAffinity.pm | 83 ++++++++++++++++++++++++++++++++
src/test/test_failover1.pl | 16 ++++--
3 files changed, 110 insertions(+), 72 deletions(-)
diff --git a/src/PVE/HA/Manager.pm b/src/PVE/HA/Manager.pm
index 148447d6..43572531 100644
--- a/src/PVE/HA/Manager.pm
+++ b/src/PVE/HA/Manager.pm
@@ -10,7 +10,7 @@ use PVE::HA::Groups;
use PVE::HA::Tools ':exit_codes';
use PVE::HA::NodeStatus;
use PVE::HA::Rules;
-use PVE::HA::Rules::NodeAffinity;
+use PVE::HA::Rules::NodeAffinity qw(get_node_affinity);
use PVE::HA::Usage::Basic;
use PVE::HA::Usage::Static;
@@ -114,57 +114,13 @@ sub flush_master_status {
$haenv->write_manager_status($ms);
}
-sub get_service_group {
- my ($groups, $online_node_usage, $service_conf) = @_;
-
- my $group = {};
- # add all online nodes to default group to allow try_next when no group set
- $group->{nodes}->{$_} = 1 for $online_node_usage->list_nodes();
-
- # overwrite default if service is bound to a specific group
- if (my $group_id = $service_conf->{group}) {
- $group = $groups->{ids}->{$group_id} if $groups->{ids}->{$group_id};
- }
-
- return $group;
-}
-
-# groups available nodes with their priority as group index
-sub get_node_priority_groups {
- my ($group, $online_node_usage) = @_;
-
- my $pri_groups = {};
- my $group_members = {};
- foreach my $entry (keys %{ $group->{nodes} }) {
- my ($node, $pri) = ($entry, 0);
- if ($entry =~ m/^(\S+):(\d+)$/) {
- ($node, $pri) = ($1, $2);
- }
- next if !$online_node_usage->contains_node($node); # offline
- $pri_groups->{$pri}->{$node} = 1;
- $group_members->{$node} = $pri;
- }
-
- # add non-group members to unrestricted groups (priority -1)
- if (!$group->{restricted}) {
- my $pri = -1;
- for my $node ($online_node_usage->list_nodes()) {
- next if defined($group_members->{$node});
- $pri_groups->{$pri}->{$node} = 1;
- $group_members->{$node} = -1;
- }
- }
-
- return ($pri_groups, $group_members);
-}
-
=head3 select_service_node(...)
-=head3 select_service_node($groups, $online_node_usage, $sid, $service_conf, $sd, $node_preference)
+=head3 select_service_node($rules, $online_node_usage, $sid, $service_conf, $sd, $node_preference)
Used to select the best fitting node for the service C<$sid>, with the
-configuration C<$service_conf> and state C<$sd>, according to the groups defined
-in C<$groups>, available node utilization in C<$online_node_usage>, and the
+configuration C<$service_conf> and state C<$sd>, according to the rules defined
+in C<$rules>, available node utilization in C<$online_node_usage>, and the
given C<$node_preference>.
The C<$node_preference> can be set to:
@@ -182,7 +138,7 @@ The C<$node_preference> can be set to:
=cut
sub select_service_node {
- my ($groups, $online_node_usage, $sid, $service_conf, $sd, $node_preference) = @_;
+ my ($rules, $online_node_usage, $sid, $service_conf, $sd, $node_preference) = @_;
die "'$node_preference' is not a valid node_preference for select_service_node\n"
if $node_preference !~ m/(none|best-score|try-next)/;
@@ -190,42 +146,35 @@ sub select_service_node {
my ($current_node, $tried_nodes, $maintenance_fallback) =
$sd->@{qw(node failed_nodes maintenance_node)};
- my $group = get_service_group($groups, $online_node_usage, $service_conf);
+ my ($allowed_nodes, $pri_nodes) = get_node_affinity($rules, $sid, $online_node_usage);
- my ($pri_groups, $group_members) = get_node_priority_groups($group, $online_node_usage);
-
- my @pri_list = sort { $b <=> $a } keys %$pri_groups;
- return undef if !scalar(@pri_list);
+ return undef if !%$pri_nodes;
# stay on current node if possible (avoids random migrations)
if (
$node_preference eq 'none'
- && $group->{nofailback}
- && defined($group_members->{$current_node})
+ && !$service_conf->{failback}
+ && $allowed_nodes->{$current_node}
) {
return $current_node;
}
- # select node from top priority node list
-
- my $top_pri = $pri_list[0];
-
# try to avoid nodes where the service failed already if we want to relocate
if ($node_preference eq 'try-next') {
foreach my $node (@$tried_nodes) {
- delete $pri_groups->{$top_pri}->{$node};
+ delete $pri_nodes->{$node};
}
}
return $maintenance_fallback
- if defined($maintenance_fallback) && $pri_groups->{$top_pri}->{$maintenance_fallback};
+ if defined($maintenance_fallback) && $pri_nodes->{$maintenance_fallback};
- return $current_node if $node_preference eq 'none' && $pri_groups->{$top_pri}->{$current_node};
+ return $current_node if $node_preference eq 'none' && $pri_nodes->{$current_node};
my $scores = $online_node_usage->score_nodes_to_start_service($sid, $current_node);
my @nodes = sort {
$scores->{$a} <=> $scores->{$b} || $a cmp $b
- } keys %{ $pri_groups->{$top_pri} };
+ } keys %$pri_nodes;
my $found;
for (my $i = scalar(@nodes) - 1; $i >= 0; $i--) {
@@ -843,7 +792,7 @@ sub next_state_request_start {
if ($self->{crs}->{rebalance_on_request_start}) {
my $selected_node = select_service_node(
- $self->{groups},
+ $self->{rules},
$self->{online_node_usage},
$sid,
$cd,
@@ -1010,7 +959,7 @@ sub next_state_started {
}
my $node = select_service_node(
- $self->{groups},
+ $self->{rules},
$self->{online_node_usage},
$sid,
$cd,
@@ -1128,7 +1077,7 @@ sub next_state_recovery {
$self->recompute_online_node_usage(); # we want the most current node state
my $recovery_node = select_service_node(
- $self->{groups},
+ $self->{rules},
$self->{online_node_usage},
$sid,
$cd,
diff --git a/src/PVE/HA/Rules/NodeAffinity.pm b/src/PVE/HA/Rules/NodeAffinity.pm
index 2b3d7390..03313997 100644
--- a/src/PVE/HA/Rules/NodeAffinity.pm
+++ b/src/PVE/HA/Rules/NodeAffinity.pm
@@ -12,8 +12,13 @@ use PVE::Tools;
use PVE::HA::Rules;
use PVE::HA::Tools;
+use base qw(Exporter);
use base qw(PVE::HA::Rules);
+our @EXPORT_OK = qw(
+ get_node_affinity
+);
+
=head1 NAME
PVE::HA::Rules::NodeAffinity
@@ -210,4 +215,82 @@ __PACKAGE__->register_check(
},
);
+=head1 NODE AFFINITY RULE HELPERS
+
+=cut
+
+my $get_resource_node_affinity_rule = sub {
+ my ($rules, $sid) = @_;
+
+ # with the current restriction a resource can only be in one node affinity rule
+ my $node_affinity_rule;
+ PVE::HA::Rules::foreach_rule(
+ $rules,
+ sub {
+ my ($rule) = @_;
+
+ $node_affinity_rule = dclone($rule) if !$node_affinity_rule;
+ },
+ {
+ sid => $sid,
+ type => 'node-affinity',
+ exclude_disabled_rules => 1,
+ },
+ );
+
+ return $node_affinity_rule;
+};
+
+=head3 get_node_affinity($rules, $sid, $online_node_usage)
+
+Returns a list of two hashes representing the node affinity of C<$sid>
+according to the node affinity rules in C<$rules> and the available nodes in
+C<$online_node_usage>.
+
+The first hash is a hash set of available nodes, i.e. nodes where the
+resource C<$sid> is allowed to be assigned to, and the second hash is a hash set
+of preferred nodes, i.e. nodes where the resource C<$sid> should be assigned to.
+
+If there are no available nodes at all, returns C<undef>.
+
+=cut
+
+sub get_node_affinity : prototype($$$) {
+ my ($rules, $sid, $online_node_usage) = @_;
+
+ my $node_affinity_rule = $get_resource_node_affinity_rule->($rules, $sid);
+
+ # default to a node affinity rule with all available nodes
+ if (!$node_affinity_rule) {
+ for my $node ($online_node_usage->list_nodes()) {
+ $node_affinity_rule->{nodes}->{$node} = { priority => 0 };
+ }
+ }
+
+ # add remaining nodes with low priority for non-strict node affinity rules
+ if (!$node_affinity_rule->{strict}) {
+ for my $node ($online_node_usage->list_nodes()) {
+ next if defined($node_affinity_rule->{nodes}->{$node});
+
+ $node_affinity_rule->{nodes}->{$node} = { priority => -1 };
+ }
+ }
+
+ my $allowed_nodes = {};
+ my $prioritized_nodes = {};
+
+ while (my ($node, $props) = each %{ $node_affinity_rule->{nodes} }) {
+ next if !$online_node_usage->contains_node($node); # node is offline
+
+ $allowed_nodes->{$node} = 1;
+ $prioritized_nodes->{ $props->{priority} }->{$node} = 1;
+ }
+
+ my $preferred_nodes = {};
+ my $highest_priority = (sort { $b <=> $a } keys %$prioritized_nodes)[0];
+ $preferred_nodes = $prioritized_nodes->{$highest_priority} if defined($highest_priority);
+
+ return ($allowed_nodes, $preferred_nodes);
+}
+
1;
diff --git a/src/test/test_failover1.pl b/src/test/test_failover1.pl
index f6faa386..78a001eb 100755
--- a/src/test/test_failover1.pl
+++ b/src/test/test_failover1.pl
@@ -4,12 +4,19 @@ use strict;
use warnings;
use lib '..';
-use PVE::HA::Groups;
use PVE::HA::Manager;
use PVE::HA::Usage::Basic;
-my $groups = PVE::HA::Groups->parse_config("groups.tmp", <<EOD);
-group: prefer_node1
+use PVE::HA::Rules;
+use PVE::HA::Rules::NodeAffinity;
+
+PVE::HA::Rules::NodeAffinity->register();
+
+PVE::HA::Rules->init(property_isolation => 1);
+
+my $rules = PVE::HA::Rules->parse_config("rules.tmp", <<EOD);
+node-affinity: prefer_node1
+ resources vm:111
nodes node1
EOD
@@ -21,7 +28,6 @@ $online_node_usage->add_node("node3");
my $service_conf = {
node => 'node1',
- group => 'prefer_node1',
failback => 1,
};
@@ -37,7 +43,7 @@ sub test {
my $select_node_preference = $try_next ? 'try-next' : 'none';
my $node = PVE::HA::Manager::select_service_node(
- $groups, $online_node_usage, "vm:111", $service_conf, $sd, $select_node_preference,
+ $rules, $online_node_usage, "vm:111", $service_conf, $sd, $select_node_preference,
);
my (undef, undef, $line) = caller();
--
2.47.2
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
next prev parent reply other threads:[~2025-07-29 18:02 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-07-29 18:00 [pve-devel] [PATCH docs/ha-manager/manager v4 00/25] HA Rules Daniel Kral
2025-07-29 18:00 ` [pve-devel] [PATCH ha-manager v4 01/19] tree-wide: make arguments for select_service_node explicit Daniel Kral
2025-07-29 18:00 ` [pve-devel] [PATCH ha-manager v4 02/19] manager: improve signature of select_service_node Daniel Kral
2025-07-29 18:00 ` [pve-devel] [PATCH ha-manager v4 03/19] introduce rules base plugin Daniel Kral
2025-07-29 18:00 ` [pve-devel] [PATCH ha-manager v4 04/19] rules: introduce node affinity rule plugin Daniel Kral
2025-07-29 18:00 ` [pve-devel] [PATCH ha-manager v4 05/19] config, env, hw: add rules read and parse methods Daniel Kral
2025-07-29 18:00 ` [pve-devel] [PATCH ha-manager v4 06/19] config: delete services from rules if services are deleted from config Daniel Kral
2025-07-29 18:00 ` [pve-devel] [PATCH ha-manager v4 07/19] manager: read and update rules config Daniel Kral
2025-07-29 18:00 ` [pve-devel] [PATCH ha-manager v4 08/19] test: ha tester: add test cases for future node affinity rules Daniel Kral
2025-07-29 18:00 ` [pve-devel] [PATCH ha-manager v4 09/19] resources: introduce failback property in ha resource config Daniel Kral
2025-07-29 18:00 ` [pve-devel] [PATCH ha-manager v4 10/19] manager: migrate ha groups to node affinity rules in-memory Daniel Kral
2025-07-29 18:00 ` Daniel Kral [this message]
2025-07-29 18:00 ` [pve-devel] [PATCH ha-manager v4 12/19] test: add test cases for rules config Daniel Kral
2025-07-29 18:00 ` [pve-devel] [PATCH ha-manager v4 13/19] api: introduce ha rules api endpoints Daniel Kral
2025-07-29 18:00 ` [pve-devel] [PATCH ha-manager v4 14/19] cli: expose ha rules api endpoints to ha-manager cli Daniel Kral
2025-07-29 18:00 ` [pve-devel] [PATCH ha-manager v4 15/19] sim: do not create default groups for test cases Daniel Kral
2025-07-30 10:01 ` Daniel Kral
2025-07-29 18:00 ` [pve-devel] [PATCH ha-manager v4 16/19] test: ha tester: migrate groups to service and rules config Daniel Kral
2025-07-29 18:00 ` [pve-devel] [PATCH ha-manager v4 17/19] test: ha tester: replace any reference to groups with node affinity rules Daniel Kral
2025-07-29 18:00 ` [pve-devel] [PATCH ha-manager v4 18/19] env: add property delete for update_service_config Daniel Kral
2025-07-29 18:00 ` [pve-devel] [PATCH ha-manager v4 19/19] manager: persistently migrate ha groups to ha rules Daniel Kral
2025-07-29 18:01 ` [pve-devel] [PATCH docs v4 1/2] ha: add documentation about ha rules and ha node affinity rules Daniel Kral
2025-07-29 18:01 ` [pve-devel] [PATCH docs v4 2/2] ha: crs: add effects of ha node affinity rule on the crs scheduler Daniel Kral
2025-07-29 18:01 ` [pve-devel] [PATCH manager v4 1/4] api: ha: add ha rules api endpoints Daniel Kral
2025-07-29 18:01 ` [pve-devel] [PATCH manager v4 2/4] ui: ha: remove ha groups from ha resource components Daniel Kral
2025-07-29 18:01 ` [pve-devel] [PATCH manager v4 3/4] ui: ha: show failback flag in resources status view Daniel Kral
2025-07-29 18:01 ` [pve-devel] [PATCH manager v4 4/4] ui: ha: replace ha groups with ha node affinity rules Daniel Kral
2025-07-30 17:29 ` [pve-devel] [PATCH docs/ha-manager/manager v4 00/25] HA Rules Michael Köppl
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250729180107.428855-12-d.kral@proxmox.com \
--to=d.kral@proxmox.com \
--cc=pve-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.