From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id A22531FF14C for ; Fri, 26 Jun 2026 14:10:58 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 4A858F39B; Fri, 26 Jun 2026 14:10:51 +0200 (CEST) From: Thomas Lamprecht To: pve-devel@lists.proxmox.com Subject: [PATCH storage 06/13] multipath: broadcast per-node map health to the cluster KV store Date: Fri, 26 Jun 2026 14:07:36 +0200 Message-ID: <20260626121000.2095591-7-t.lamprecht@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260626121000.2095591-1-t.lamprecht@proxmox.com> References: <20260626121000.2095591-1-t.lamprecht@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1782475801604 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.005 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Message-ID-Hash: 5XMFFH6XF6O6BKK7WVQDIJ673DSIQZIV X-Message-ID-Hash: 5XMFFH6XF6O6BKK7WVQDIJ673DSIQZIV X-MailFrom: t.lamprecht@proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox VE development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: Map health is inherently per-node: each node has its own paths to the same LUN, so whether a LUN has full path redundancy can only be told per node. To make a cluster-wide view possible, reduce the local maps to a small per-WWID summary and push it under the cluster KV key 'multipath' via pmxcfs. A present value also signals that the node is actively multipathing: clear the key when no maps are assembled, so the status aggregation can combine just the active nodes without extra bookkeeping. The summary stays well under the 32 KiB KV limit; the full per-path detail stays behind the per-node disks/multipath API. Signed-off-by: Thomas Lamprecht --- src/PVE/Multipath.pm | 56 ++++++++++++++++++++++++++++++++- src/test/run_multipath_tests.pl | 50 +++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 1 deletion(-) diff --git a/src/PVE/Multipath.pm b/src/PVE/Multipath.pm index 59c1103..5647189 100644 --- a/src/PVE/Multipath.pm +++ b/src/PVE/Multipath.pm @@ -3,7 +3,7 @@ package PVE::Multipath; use strict; use warnings; -use JSON qw(decode_json); +use JSON qw(decode_json encode_json); use PVE::Tools qw(run_command file_read_firstline file_get_contents); @@ -279,4 +279,58 @@ sub reconfigure { run_command([$MULTIPATHD, 'reconfigure']); } +# Pure: reduce the rich get_maps() output to the compact per-WWID dict broadcast under the cluster +# KV key 'multipath'. Holds only what the cluster-wide health matrix needs; the full per-path detail +# stays available behind the per-node disks/multipath API. +sub summarize_maps_for_broadcast { + my ($maps) = @_; + + my $out = {}; + for my $map ($maps->@*) { + next if !defined($map->{wwid}); + $out->{ $map->{wwid} } = { + state => $map->{health}, + 'paths-active' => $map->{'paths-active'} // 0, + 'paths-total' => $map->{'paths-total'} // 0, + defined($map->{transport}) ? (transport => $map->{transport}) : (), + defined($map->{size}) ? (size => $map->{size}) : (), + }; + } + return $out; +} + +# Push a compact per-WWID health snapshot into the cluster KV store under the key 'multipath'. A +# present value also means "this node is actively multipathing", so clear the key when no maps are +# assembled and the status aggregation then only combines the active nodes. Never throws, so it is +# safe to call from a status loop where multipath is not the primary concern. +sub broadcast_health { + require PVE::Cluster; + + my $clear = sub { + eval { PVE::Cluster::broadcast_node_kv('multipath', undef) }; + warn "multipath: clearing health broadcast failed - $@" if $@; + }; + + if (!is_running()) { + $clear->(); + return; + } + + my $maps = eval { get_maps() }; + if (my $err = $@) { + warn "multipath: collecting maps for broadcast failed - $err"; + return; + } + + my $summary = summarize_maps_for_broadcast($maps); + if (!%$summary) { + $clear->(); + return; + } + + my $json = encode_json($summary); + eval { PVE::Cluster::broadcast_node_kv('multipath', $json) }; + warn "multipath: health broadcast failed - $@" if $@; +} + 1; diff --git a/src/test/run_multipath_tests.pl b/src/test/run_multipath_tests.pl index f710308..affec23 100755 --- a/src/test/run_multipath_tests.pl +++ b/src/test/run_multipath_tests.pl @@ -235,4 +235,54 @@ is( 'the overrides writer trims trailing whitespace', ); +# --- broadcast summary (per-WWID condensation of get_maps for the cluster KV) --- +my $summary = PVE::Multipath::summarize_maps_for_broadcast($maps); +is_deeply( + [sort keys %$summary], + [sort map { $_->{wwid} } $maps->@*], + 'every map with a WWID appears in the summary', +); +is($summary->{ $a->{wwid} }->{state}, 'optimal', 'optimal map summarized as optimal'); +is($summary->{ $a->{wwid} }->{'paths-active'}, 2, 'optimal map active path count carried'); +is($summary->{ $a->{wwid} }->{'paths-total'}, 2, 'optimal map total path count carried'); +is($summary->{ $b->{wwid} }->{state}, 'degraded', 'degraded map summarized as degraded'); +is($summary->{ $c->{wwid} }->{state}, 'failed', 'failed map summarized as failed'); +ok( + !exists $summary->{ $a->{wwid} }->{transport}, + 'transport omitted when not derived (get_maps fills it live)', +); + +is_deeply( + PVE::Multipath::summarize_maps_for_broadcast([]), + {}, + 'empty maps list summarizes to empty hash (caller clears the KV)', +); + +# transport/size propagate when the caller (get_maps) has set them +my $enriched = [{ + wwid => '3600x', + health => 'optimal', + 'paths-active' => 2, + 'paths-total' => 2, + transport => 'iscsi', + size => 34359738368, +}]; +my $enr = PVE::Multipath::summarize_maps_for_broadcast($enriched); +is($enr->{'3600x'}->{transport}, 'iscsi', 'transport carried into the summary'); +is($enr->{'3600x'}->{size}, 34359738368, 'size carried into the summary'); + +# size budget: well under the 32 KiB pmxcfs KV limit even for many maps +my $many = [ + map { { + wwid => sprintf('3600140500000000000000000000%04x', $_), + health => 'optimal', + 'paths-active' => 4, + 'paths-total' => 4, + transport => 'iscsi', + size => 1099511627776, + } } 0 .. 99 +]; +my $big = JSON::encode_json(PVE::Multipath::summarize_maps_for_broadcast($many)); +ok(length($big) < 32 * 1024, "100-map summary (" . length($big) . " B) fits the KV size limit"); + done_testing(); -- 2.47.3