From: Daniel Kral <d.kral@proxmox.com>
To: pve-devel@lists.proxmox.com
Subject: [pve-devel] [PATCH ha-manager v2 02/26] manager: improve signature of select_service_node
Date: Fri, 20 Jun 2025 16:31:14 +0200 [thread overview]
Message-ID: <20250620143148.218469-7-d.kral@proxmox.com> (raw)
In-Reply-To: <20250620143148.218469-1-d.kral@proxmox.com>
As the signature of select_service_node(...) has become rather long
already, make it more compact by retrieving service- and
affinity-related data directly from the service state in $sd and
introduce a $mode parameter to distinguish the behaviors of $try_next
and $best_scored, which have already been mutually exclusive before.
Signed-off-by: Daniel Kral <d.kral@proxmox.com>
---
changes since v1:
- NEW!
src/PVE/HA/Manager.pm | 87 +++++++++++++++++++++-----------------
src/test/test_failover1.pl | 17 +++-----
2 files changed, 53 insertions(+), 51 deletions(-)
diff --git a/src/PVE/HA/Manager.pm b/src/PVE/HA/Manager.pm
index 85f2b1a..85bb114 100644
--- a/src/PVE/HA/Manager.pm
+++ b/src/PVE/HA/Manager.pm
@@ -149,18 +149,41 @@ sub get_node_priority_groups {
return ($pri_groups, $group_members);
}
+=head3 select_service_node(...)
+
+=head3 select_service_node($groups, $online_node_usage, $sid, $service_conf, $sd, $mode)
+
+Used to select the best fitting node for the service C<$sid>, with the
+configuration C<$service_conf> and state C<$sd>, according to the groups defined
+in C<$groups>, available node utilization in C<$online_node_usage>, and the
+given C<$mode>.
+
+The C<$mode> can be set to:
+
+=over
+
+=item C<'none'>
+
+Try to stay on the current node as much as possible.
+
+=item C<'best-score'>
+
+Try to select the best-scored node.
+
+=item C<'try-next'>
+
+Try to select the best-scored node, which is not in C<< $sd->{failed_nodes} >>,
+while trying to stay on the current node.
+
+=back
+
+=cut
+
sub select_service_node {
- my (
- $groups,
- $online_node_usage,
- $sid,
- $service_conf,
- $current_node,
- $try_next,
- $tried_nodes,
- $maintenance_fallback,
- $best_scored,
- ) = @_;
+ my ($groups, $online_node_usage, $sid, $service_conf, $sd, $mode) = @_;
+
+ my ($current_node, $tried_nodes, $maintenance_fallback) =
+ $sd->@{qw(node failed_nodes maintenance_node)};
my $group = get_service_group($groups, $online_node_usage, $service_conf);
@@ -170,11 +193,7 @@ sub select_service_node {
return undef if !scalar(@pri_list);
# stay on current node if possible (avoids random migrations)
- if (
- (!$try_next && !$best_scored)
- && $group->{nofailback}
- && defined($group_members->{$current_node})
- ) {
+ if ($mode eq 'none' && $group->{nofailback} && defined($group_members->{$current_node})) {
return $current_node;
}
@@ -183,7 +202,7 @@ sub select_service_node {
my $top_pri = $pri_list[0];
# try to avoid nodes where the service failed already if we want to relocate
- if ($try_next) {
+ if ($mode eq 'try-next') {
foreach my $node (@$tried_nodes) {
delete $pri_groups->{$top_pri}->{$node};
}
@@ -192,8 +211,7 @@ sub select_service_node {
return $maintenance_fallback
if defined($maintenance_fallback) && $pri_groups->{$top_pri}->{$maintenance_fallback};
- return $current_node
- if (!$try_next && !$best_scored) && $pri_groups->{$top_pri}->{$current_node};
+ return $current_node if $mode eq 'none' && $pri_groups->{$top_pri}->{$current_node};
my $scores = $online_node_usage->score_nodes_to_start_service($sid, $current_node);
my @nodes = sort {
@@ -208,8 +226,8 @@ sub select_service_node {
}
}
- if ($try_next) {
- if (!$best_scored && defined($found) && ($found < (scalar(@nodes) - 1))) {
+ if ($mode eq 'try-next') {
+ if (defined($found) && ($found < (scalar(@nodes) - 1))) {
return $nodes[$found + 1];
} else {
return $nodes[0];
@@ -797,11 +815,8 @@ sub next_state_request_start {
$self->{online_node_usage},
$sid,
$cd,
- $sd->{node},
- 0, # try_next
- $sd->{failed_nodes},
- $sd->{maintenance_node},
- 1, # best_score
+ $sd,
+ 'best-score',
);
my $select_text = $selected_node ne $current_node ? 'new' : 'current';
$haenv->log(
@@ -901,7 +916,7 @@ sub next_state_started {
} else {
- my $try_next = 0;
+ my $select_mode = 'none';
if ($lrm_res) {
@@ -932,7 +947,7 @@ sub next_state_started {
if (scalar(@{ $sd->{failed_nodes} }) <= $cd->{max_relocate}) {
# tell select_service_node to relocate if possible
- $try_next = 1;
+ $select_mode = 'try-next';
$haenv->log(
'warning',
@@ -967,11 +982,8 @@ sub next_state_started {
$self->{online_node_usage},
$sid,
$cd,
- $sd->{node},
- $try_next,
- $sd->{failed_nodes},
- $sd->{maintenance_node},
- 0, # best_score
+ $sd,
+ $select_mode,
);
if ($node && ($sd->{node} ne $node)) {
@@ -1009,7 +1021,7 @@ sub next_state_started {
);
}
} else {
- if ($try_next && !defined($node)) {
+ if ($select_mode eq 'try-next' && !defined($node)) {
$haenv->log(
'warning',
"Start Error Recovery: Tried all available nodes for service '$sid', retry"
@@ -1088,11 +1100,8 @@ sub next_state_recovery {
$self->{online_node_usage},
$sid,
$cd,
- $sd->{node},
- 0, # try_next
- $sd->{failed_nodes},
- $sd->{maintenance_node},
- 1, # best_score
+ $sd,
+ 'best-score',
);
if ($recovery_node) {
diff --git a/src/test/test_failover1.pl b/src/test/test_failover1.pl
index 2478b2b..90f5cf4 100755
--- a/src/test/test_failover1.pl
+++ b/src/test/test_failover1.pl
@@ -25,32 +25,25 @@ my $service_conf = {
};
my $sd = {
+ node => $service_conf->{node},
failed_nodes => undef,
maintenance_node => undef,
};
-my $current_node = $service_conf->{node};
-
sub test {
my ($expected_node, $try_next) = @_;
+ my $select_mode = $try_next ? 'try-next' : 'none';
+
my $node = PVE::HA::Manager::select_service_node(
- $groups,
- $online_node_usage,
- "vm:111",
- $service_conf,
- $current_node,
- $try_next,
- $sd->{failed_nodes},
- $sd->{maintenance_node},
- 0, # best_score
+ $groups, $online_node_usage, "vm:111", $service_conf, $sd, $select_mode,
);
my (undef, undef, $line) = caller();
die "unexpected result: $node != ${expected_node} at line $line\n"
if $node ne $expected_node;
- $current_node = $node;
+ $sd->{node} = $node;
}
test('node1');
--
2.39.5
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
next prev parent reply other threads:[~2025-06-20 14:31 UTC|newest]
Thread overview: 70+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-06-20 14:31 [pve-devel] [RFC common/cluster/ha-manager/docs/manager v2 00/40] HA colocation rules Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH common v2 1/1] introduce HashTools module Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH cluster v2 1/3] cfs: add 'ha/rules.cfg' to observed files Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH cluster v2 2/3] datacenter config: make pve-ha-shutdown-policy optional Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH cluster v2 3/3] datacenter config: introduce feature flag for location rules Daniel Kral
2025-06-23 15:58 ` Thomas Lamprecht
2025-06-24 7:29 ` Daniel Kral
2025-06-24 7:51 ` Thomas Lamprecht
2025-06-24 8:19 ` Daniel Kral
2025-06-24 8:25 ` Thomas Lamprecht
2025-06-24 8:52 ` Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH ha-manager v2 01/26] tree-wide: make arguments for select_service_node explicit Daniel Kral
2025-06-20 14:31 ` Daniel Kral [this message]
2025-06-23 16:21 ` [pve-devel] [PATCH ha-manager v2 02/26] manager: improve signature of select_service_node Thomas Lamprecht
2025-06-24 8:06 ` Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH ha-manager v2 03/26] introduce rules base plugin Daniel Kral
2025-07-04 14:18 ` Michael Köppl
2025-06-20 14:31 ` [pve-devel] [PATCH ha-manager v2 04/26] rules: introduce location rule plugin Daniel Kral
2025-06-20 16:17 ` Jillian Morgan
2025-06-20 16:30 ` Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH ha-manager v2 05/26] rules: introduce colocation " Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH ha-manager v2 06/26] rules: add global checks between location and colocation rules Daniel Kral
2025-07-01 11:02 ` Daniel Kral
2025-07-04 14:43 ` Michael Köppl
2025-06-20 14:31 ` [pve-devel] [PATCH ha-manager v2 07/26] config, env, hw: add rules read and parse methods Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH ha-manager v2 08/26] manager: read and update rules config Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH ha-manager v2 09/26] test: ha tester: add test cases for future location rules Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH ha-manager v2 10/26] resources: introduce failback property in service config Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH ha-manager v2 11/26] manager: migrate ha groups to location rules in-memory Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH ha-manager v2 12/26] manager: apply location rules when selecting service nodes Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH ha-manager v2 13/26] usage: add information about a service's assigned nodes Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH ha-manager v2 14/26] manager: apply colocation rules when selecting service nodes Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH ha-manager v2 15/26] manager: handle migrations for colocated services Daniel Kral
2025-06-27 9:10 ` Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH ha-manager v2 16/26] sim: resources: add option to limit start and migrate tries to node Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH ha-manager v2 17/26] test: ha tester: add test cases for strict negative colocation rules Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH ha-manager v2 18/26] test: ha tester: add test cases for strict positive " Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH ha-manager v2 19/26] test: ha tester: add test cases in more complex scenarios Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH ha-manager v2 20/26] test: add test cases for rules config Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH ha-manager v2 21/26] manager: handle negative colocations with too many services Daniel Kral
2025-07-01 12:11 ` Michael Köppl
2025-07-01 12:23 ` Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH ha-manager v2 22/26] config: prune services from rules if services are deleted from config Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH ha-manager v2 23/26] api: introduce ha rules api endpoints Daniel Kral
2025-07-04 14:16 ` Michael Köppl
2025-06-20 14:31 ` [pve-devel] [PATCH ha-manager v2 24/26] cli: expose ha rules api endpoints to ha-manager cli Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH ha-manager v2 25/26] api: groups, services: assert use-location-rules feature flag Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH ha-manager v2 26/26] api: services: check for colocations for service motions Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH docs v2 1/5] ha: config: add section about ha rules Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH docs v2 2/5] update static files to include ha rules api endpoints Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH docs v2 3/5] update static files to include use-location-rules feature flag Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH docs v2 4/5] update static files to include ha resources failback flag Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH docs v2 5/5] update static files to include ha service motion return value schema Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH manager v2 1/5] api: ha: add ha rules api endpoints Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH manager v2 2/5] ui: add use-location-rules feature flag Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH manager v2 3/5] ui: ha: hide ha groups if use-location-rules is enabled Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH manager v2 4/5] ui: ha: adapt resources components " Daniel Kral
2025-06-20 14:31 ` [pve-devel] [PATCH manager v2 5/5] ui: ha: add ha rules components and menu entry Daniel Kral
2025-06-30 15:09 ` Michael Köppl
2025-07-01 14:38 ` Michael Köppl
2025-06-20 15:43 ` [pve-devel] [RFC common/cluster/ha-manager/docs/manager v2 00/40] HA colocation rules Daniel Kral
2025-06-20 17:11 ` Jillian Morgan
2025-06-20 17:45 ` DERUMIER, Alexandre via pve-devel
[not found] ` <476c41123dced9d560dfbf27640ef8705fd90f11.camel@groupe-cyllene.com>
2025-06-23 15:36 ` Thomas Lamprecht
2025-06-24 8:48 ` Daniel Kral
2025-06-27 12:23 ` Friedrich Weber
2025-06-27 12:41 ` Daniel Kral
2025-06-23 8:11 ` DERUMIER, Alexandre via pve-devel
[not found] ` <bf973ec4e8c52a10535ed35ad64bf0ec8d1ad37d.camel@groupe-cyllene.com>
2025-06-23 15:28 ` Thomas Lamprecht
2025-06-23 23:21 ` DERUMIER, Alexandre via pve-devel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250620143148.218469-7-d.kral@proxmox.com \
--to=d.kral@proxmox.com \
--cc=pve-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal