From: Daniel Kral <d.kral@proxmox.com>
To: pve-devel@lists.proxmox.com
Subject: [pve-devel] [PATCH ha-manager v2 11/12] handle node affinity rules with failback in manual migrations
Date: Tue, 20 Jan 2026 16:27:43 +0100 [thread overview]
Message-ID: <20260120152755.499037-12-d.kral@proxmox.com> (raw)
In-Reply-To: <20260120152755.499037-1-d.kral@proxmox.com>
Do not execute any manual user migration of an HA resource to a target
node, which is not one of the highest priority nodes if the HA resource
has failback set.
This prevents users from moving an HA resource, which would be failed
back to a higher priority node of the strict or non-strict node affinity
rule immediately after, which just wastes time and resources.
Signed-off-by: Daniel Kral <d.kral@proxmox.com>
---
changes v1 -> v2:
- s/conf/cfg/
- use checked_resources_config() helper instead of
read_and_check_resources_config()
src/PVE/HA/Config.pm | 11 +++++--
src/PVE/HA/Helpers.pm | 6 ++--
src/PVE/HA/Manager.pm | 13 +++++---
.../test-node-affinity-nonstrict1/log.expect | 16 +---------
.../test-node-affinity-nonstrict7/log.expect | 32 +++----------------
.../test-node-affinity-strict7/log.expect | 18 ++---------
6 files changed, 27 insertions(+), 69 deletions(-)
diff --git a/src/PVE/HA/Config.pm b/src/PVE/HA/Config.pm
index 7607b194..7304955a 100644
--- a/src/PVE/HA/Config.pm
+++ b/src/PVE/HA/Config.pm
@@ -399,22 +399,27 @@ sub service_is_configured {
sub get_resource_motion_info {
my ($sid) = @_;
- my $resources = read_resources_config();
+ my $cfg = read_resources_config();
my $dependent_resources = [];
my $blocking_resources_by_node = {};
- if (&$service_check_ha_state($resources, $sid)) {
+ if (&$service_check_ha_state($cfg, $sid)) {
my $manager_status = read_manager_status();
my $ss = $manager_status->{service_status};
my $ns = $manager_status->{node_status};
# get_resource_motion_info expects a hashset of all nodes with status 'online'
my $online_nodes = { map { $ns->{$_} eq 'online' ? ($_ => 1) : () } keys %$ns };
+ # get_resource_motion_info expects a resource config with defaults set
+ my $resources = checked_resources_config($cfg);
my $compiled_rules = read_and_compile_rules_config();
+ my $cd = $resources->{$sid} // {};
($dependent_resources, $blocking_resources_by_node) =
- PVE::HA::Helpers::get_resource_motion_info($ss, $sid, $online_nodes, $compiled_rules);
+ PVE::HA::Helpers::get_resource_motion_info(
+ $ss, $sid, $cd, $online_nodes, $compiled_rules,
+ );
}
return ($dependent_resources, $blocking_resources_by_node);
diff --git a/src/PVE/HA/Helpers.pm b/src/PVE/HA/Helpers.pm
index b160c541..a58b1e12 100644
--- a/src/PVE/HA/Helpers.pm
+++ b/src/PVE/HA/Helpers.pm
@@ -18,13 +18,13 @@ causes that make the node unavailable to C<$sid>.
=cut
-sub get_resource_motion_info($ss, $sid, $online_nodes, $compiled_rules) {
+sub get_resource_motion_info($ss, $sid, $cd, $online_nodes, $compiled_rules) {
my $dependent_resources = [];
my $blocking_resources_by_node = {};
my ($node_affinity, $resource_affinity) =
$compiled_rules->@{qw(node-affinity resource-affinity)};
- my ($allowed_nodes) = get_node_affinity($node_affinity, $sid, $online_nodes);
+ my ($allowed_nodes, $pri_nodes) = get_node_affinity($node_affinity, $sid, $online_nodes);
my ($together, $separate) = get_affinitive_resources($resource_affinity, $sid);
for my $csid (sort keys %$together) {
@@ -35,7 +35,7 @@ sub get_resource_motion_info($ss, $sid, $online_nodes, $compiled_rules) {
}
for my $node (keys %$online_nodes) {
- if (!$allowed_nodes->{$node}) {
+ if (!$allowed_nodes->{$node} || ($cd->{failback} && !$pri_nodes->{$node})) {
push $blocking_resources_by_node->{$node}->@*,
{
sid => $sid,
diff --git a/src/PVE/HA/Manager.pm b/src/PVE/HA/Manager.pm
index 3d3829cf..45258641 100644
--- a/src/PVE/HA/Manager.pm
+++ b/src/PVE/HA/Manager.pm
@@ -387,13 +387,15 @@ sub read_lrm_status {
}
sub execute_migration {
- my ($self, $cmd, $task, $sid, $target) = @_;
+ my ($self, $cmd, $task, $sid, $cd, $target) = @_;
my ($haenv, $ss, $ns, $compiled_rules) = $self->@{qw(haenv ss ns compiled_rules)};
my $online_nodes = { map { $_ => 1 } $self->{ns}->list_online_nodes()->@* };
my ($dependent_resources, $blocking_resources_by_node) =
- PVE::HA::Helpers::get_resource_motion_info($ss, $sid, $online_nodes, $compiled_rules);
+ PVE::HA::Helpers::get_resource_motion_info(
+ $ss, $sid, $cd, $online_nodes, $compiled_rules,
+ );
if (my $blocking_resources = $blocking_resources_by_node->{$target}) {
for my $blocking_resource (@$blocking_resources) {
@@ -432,7 +434,7 @@ sub execute_migration {
# read new crm commands and save them into crm master status
sub update_crm_commands {
- my ($self) = @_;
+ my ($self, $sc) = @_;
my ($haenv, $ms, $ns, $ss) = ($self->{haenv}, $self->{ms}, $self->{ns}, $self->{ss});
@@ -453,7 +455,8 @@ sub update_crm_commands {
"ignore crm command - service already on target node: $cmd",
);
} else {
- $self->execute_migration($cmd, $task, $sid, $node);
+ my $cd = $sc->{$sid} // {};
+ $self->execute_migration($cmd, $task, $sid, $cd, $node);
}
}
} else {
@@ -711,7 +714,7 @@ sub manage {
$self->{last_services_digest} = $services_digest;
}
- $self->update_crm_commands();
+ $self->update_crm_commands($sc);
for (;;) {
my $repeat = 0;
diff --git a/src/test/test-node-affinity-nonstrict1/log.expect b/src/test/test-node-affinity-nonstrict1/log.expect
index d86c69de..ca2c40b3 100644
--- a/src/test/test-node-affinity-nonstrict1/log.expect
+++ b/src/test/test-node-affinity-nonstrict1/log.expect
@@ -22,19 +22,5 @@ info 25 node3/lrm: status change wait_for_agent_lock => active
info 25 node3/lrm: starting service vm:101
info 25 node3/lrm: service status vm:101 started
info 120 cmdlist: execute service vm:101 migrate node2
-info 120 node1/crm: got crm command: migrate vm:101 node2
-info 120 node1/crm: migrate service 'vm:101' to node 'node2'
-info 120 node1/crm: service 'vm:101': state changed from 'started' to 'migrate' (node = node3, target = node2)
-info 123 node2/lrm: got lock 'ha_agent_node2_lock'
-info 123 node2/lrm: status change wait_for_agent_lock => active
-info 125 node3/lrm: service vm:101 - start migrate to node 'node2'
-info 125 node3/lrm: service vm:101 - end migrate to node 'node2'
-info 140 node1/crm: service 'vm:101': state changed from 'migrate' to 'started' (node = node2)
-info 140 node1/crm: migrate service 'vm:101' to node 'node3' (running)
-info 140 node1/crm: service 'vm:101': state changed from 'started' to 'migrate' (node = node2, target = node3)
-info 143 node2/lrm: service vm:101 - start migrate to node 'node3'
-info 143 node2/lrm: service vm:101 - end migrate to node 'node3'
-info 160 node1/crm: service 'vm:101': state changed from 'migrate' to 'started' (node = node3)
-info 165 node3/lrm: starting service vm:101
-info 165 node3/lrm: service status vm:101 started
+err 120 node1/crm: crm command 'migrate vm:101 node2' error - service 'vm:101' is not allowed on node 'node2'
info 720 hardware: exit simulation - done
diff --git a/src/test/test-node-affinity-nonstrict7/log.expect b/src/test/test-node-affinity-nonstrict7/log.expect
index 31daa618..54e824ea 100644
--- a/src/test/test-node-affinity-nonstrict7/log.expect
+++ b/src/test/test-node-affinity-nonstrict7/log.expect
@@ -28,35 +28,9 @@ info 25 node3/lrm: status change wait_for_agent_lock => active
info 25 node3/lrm: starting service vm:101
info 25 node3/lrm: service status vm:101 started
info 120 cmdlist: execute service vm:101 migrate node1
-info 120 node1/crm: got crm command: migrate vm:101 node1
-info 120 node1/crm: migrate service 'vm:101' to node 'node1'
-info 120 node1/crm: service 'vm:101': state changed from 'started' to 'migrate' (node = node3, target = node1)
-info 121 node1/lrm: got lock 'ha_agent_node1_lock'
-info 121 node1/lrm: status change wait_for_agent_lock => active
-info 125 node3/lrm: service vm:101 - start migrate to node 'node1'
-info 125 node3/lrm: service vm:101 - end migrate to node 'node1'
-info 140 node1/crm: service 'vm:101': state changed from 'migrate' to 'started' (node = node1)
-info 140 node1/crm: migrate service 'vm:101' to node 'node3' (running)
-info 140 node1/crm: service 'vm:101': state changed from 'started' to 'migrate' (node = node1, target = node3)
-info 141 node1/lrm: service vm:101 - start migrate to node 'node3'
-info 141 node1/lrm: service vm:101 - end migrate to node 'node3'
-info 160 node1/crm: service 'vm:101': state changed from 'migrate' to 'started' (node = node3)
-info 165 node3/lrm: starting service vm:101
-info 165 node3/lrm: service status vm:101 started
+err 120 node1/crm: crm command 'migrate vm:101 node1' error - service 'vm:101' is not allowed on node 'node1'
info 220 cmdlist: execute service vm:101 migrate node2
-info 220 node1/crm: got crm command: migrate vm:101 node2
-info 220 node1/crm: migrate service 'vm:101' to node 'node2'
-info 220 node1/crm: service 'vm:101': state changed from 'started' to 'migrate' (node = node3, target = node2)
-info 225 node3/lrm: service vm:101 - start migrate to node 'node2'
-info 225 node3/lrm: service vm:101 - end migrate to node 'node2'
-info 240 node1/crm: service 'vm:101': state changed from 'migrate' to 'started' (node = node2)
-info 240 node1/crm: migrate service 'vm:101' to node 'node3' (running)
-info 240 node1/crm: service 'vm:101': state changed from 'started' to 'migrate' (node = node2, target = node3)
-info 243 node2/lrm: service vm:101 - start migrate to node 'node3'
-info 243 node2/lrm: service vm:101 - end migrate to node 'node3'
-info 260 node1/crm: service 'vm:101': state changed from 'migrate' to 'started' (node = node3)
-info 265 node3/lrm: starting service vm:101
-info 265 node3/lrm: service status vm:101 started
+err 220 node1/crm: crm command 'migrate vm:101 node2' error - service 'vm:101' is not allowed on node 'node2'
info 320 cmdlist: execute service vm:101 migrate node3
info 320 node1/crm: ignore crm command - service already on target node: migrate vm:101 node3
info 420 cmdlist: execute service vm:102 migrate node3
@@ -81,6 +55,8 @@ info 620 cmdlist: execute service vm:102 migrate node1
info 620 node1/crm: got crm command: migrate vm:102 node1
info 620 node1/crm: migrate service 'vm:102' to node 'node1'
info 620 node1/crm: service 'vm:102': state changed from 'started' to 'migrate' (node = node2, target = node1)
+info 621 node1/lrm: got lock 'ha_agent_node1_lock'
+info 621 node1/lrm: status change wait_for_agent_lock => active
info 623 node2/lrm: service vm:102 - start migrate to node 'node1'
info 623 node2/lrm: service vm:102 - end migrate to node 'node1'
info 640 node1/crm: service 'vm:102': state changed from 'migrate' to 'started' (node = node1)
diff --git a/src/test/test-node-affinity-strict7/log.expect b/src/test/test-node-affinity-strict7/log.expect
index 9c4e9f0b..ae8e43fb 100644
--- a/src/test/test-node-affinity-strict7/log.expect
+++ b/src/test/test-node-affinity-strict7/log.expect
@@ -28,21 +28,7 @@ info 25 node3/lrm: status change wait_for_agent_lock => active
info 25 node3/lrm: starting service vm:101
info 25 node3/lrm: service status vm:101 started
info 120 cmdlist: execute service vm:101 migrate node1
-info 120 node1/crm: got crm command: migrate vm:101 node1
-info 120 node1/crm: migrate service 'vm:101' to node 'node1'
-info 120 node1/crm: service 'vm:101': state changed from 'started' to 'migrate' (node = node3, target = node1)
-info 121 node1/lrm: got lock 'ha_agent_node1_lock'
-info 121 node1/lrm: status change wait_for_agent_lock => active
-info 125 node3/lrm: service vm:101 - start migrate to node 'node1'
-info 125 node3/lrm: service vm:101 - end migrate to node 'node1'
-info 140 node1/crm: service 'vm:101': state changed from 'migrate' to 'started' (node = node1)
-info 140 node1/crm: migrate service 'vm:101' to node 'node3' (running)
-info 140 node1/crm: service 'vm:101': state changed from 'started' to 'migrate' (node = node1, target = node3)
-info 141 node1/lrm: service vm:101 - start migrate to node 'node3'
-info 141 node1/lrm: service vm:101 - end migrate to node 'node3'
-info 160 node1/crm: service 'vm:101': state changed from 'migrate' to 'started' (node = node3)
-info 165 node3/lrm: starting service vm:101
-info 165 node3/lrm: service status vm:101 started
+err 120 node1/crm: crm command 'migrate vm:101 node1' error - service 'vm:101' is not allowed on node 'node1'
info 220 cmdlist: execute service vm:101 migrate node2
err 220 node1/crm: crm command 'migrate vm:101 node2' error - service 'vm:101' is not allowed on node 'node2'
info 320 cmdlist: execute service vm:101 migrate node3
@@ -55,6 +41,8 @@ info 620 cmdlist: execute service vm:102 migrate node1
info 620 node1/crm: got crm command: migrate vm:102 node1
info 620 node1/crm: migrate service 'vm:102' to node 'node1'
info 620 node1/crm: service 'vm:102': state changed from 'started' to 'migrate' (node = node2, target = node1)
+info 621 node1/lrm: got lock 'ha_agent_node1_lock'
+info 621 node1/lrm: status change wait_for_agent_lock => active
info 623 node2/lrm: service vm:102 - start migrate to node 'node1'
info 623 node2/lrm: service vm:102 - end migrate to node 'node1'
info 640 node1/crm: service 'vm:102': state changed from 'migrate' to 'started' (node = node1)
--
2.47.3
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
next prev parent reply other threads:[~2026-01-20 15:29 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-01-20 15:27 [pve-devel] [PATCH-SERIES container/ha-manager/manager/qemu-server v2 00/15] HA node affinity blockers (#1497) Daniel Kral
2026-01-20 15:27 ` [pve-devel] [PATCH ha-manager v2 01/12] ha: put source files on individual new lines Daniel Kral
2026-01-20 15:27 ` [pve-devel] [PATCH ha-manager v2 02/12] d/pve-ha-manager.install: remove duplicate Config.pm Daniel Kral
2026-01-20 15:27 ` [pve-devel] [PATCH ha-manager v2 03/12] config: group and sort use statements Daniel Kral
2026-01-20 15:27 ` [pve-devel] [PATCH ha-manager v2 04/12] manager: " Daniel Kral
2026-01-20 15:27 ` [pve-devel] [PATCH ha-manager v2 05/12] manager: report all reasons when resources are blocked from migration Daniel Kral
2026-01-20 15:27 ` [pve-devel] [PATCH ha-manager v2 06/12] config, manager: factor out resource motion info logic Daniel Kral
2026-01-20 15:27 ` [pve-devel] [PATCH ha-manager v2 07/12] tests: add test cases for migrating resources with node affinity rules Daniel Kral
2026-01-20 15:27 ` [pve-devel] [PATCH ha-manager v2 08/12] handle strict node affinity rules in manual migrations Daniel Kral
2026-01-20 15:27 ` [pve-devel] [PATCH ha-manager v2 09/12] config: improve variable names in read_and_check_resources_config Daniel Kral
2026-01-20 15:27 ` [pve-devel] [PATCH ha-manager v2 10/12] config: factor out checked_resources_config helper Daniel Kral
2026-01-20 15:27 ` Daniel Kral [this message]
2026-01-20 15:27 ` [pve-devel] [PATCH ha-manager v2 12/12] config: remove duplicate config reads in get_resource_motion_info Daniel Kral
2026-01-20 15:27 ` [pve-devel] [PATCH qemu-server v2 1/1] api: migration preconditions: add node affinity as blocking cause Daniel Kral
2026-01-20 15:27 ` [pve-devel] [PATCH container " Daniel Kral
2026-01-20 15:27 ` [pve-devel] [PATCH manager v2 1/1] ui: migrate: display precondition messages for ha node affinity Daniel Kral
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260120152755.499037-12-d.kral@proxmox.com \
--to=d.kral@proxmox.com \
--cc=pve-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox