From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id 305301FF146 for ; Tue, 28 Apr 2026 04:46:04 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id C6C5121F5; Tue, 28 Apr 2026 04:45:55 +0200 (CEST) From: Kefu Chai To: pve-devel@lists.proxmox.com Subject: [PATCH manager 1/5] pve8to9: extract ceph checks into PVE::Ceph::UpgradeCheck Date: Tue, 28 Apr 2026 10:45:34 +0800 Message-ID: <20260428024538.3559017-2-k.chai@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260428024538.3559017-1-k.chai@proxmox.com> References: <20260428024538.3559017-1-k.chai@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1777344251910 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.264 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment PROLO_LEO1 0.1 Meta Catches all Leo drug variations so far SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [releases.pm,tools.pm,pve8to9.pm,upgradecheck.pm,proxmox.com,services.pm] Message-ID-Hash: BMWULE4MUYXNEMONK22TEYSHRUNOSSWV X-Message-ID-Hash: BMWULE4MUYXNEMONK22TEYSHRUNOSSWV X-MailFrom: k.chai@proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox VE development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: Move the body of check_ceph() into a new PVE::Ceph::UpgradeCheck module. The module exposes run_checks() which returns an arrayref of { level, msg } records, and each caller formats the records with its own log_* helpers. This matches the idiomatic PVE pattern where modules return data and callers handle presentation. Prepares the ground for adding more ceph upgrade checks and for exposing the same checks via a standalone 'pveceph upgrade-check' subcommand in a follow-up. No behaviour change: pve8to9 emits the same messages, in the same order, through the same log_* helpers. Signed-off-by: Kefu Chai --- PVE/CLI/pve8to9.pm | 203 +++-------------------- PVE/Ceph/Makefile | 1 + PVE/Ceph/UpgradeCheck.pm | 342 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 363 insertions(+), 183 deletions(-) create mode 100644 PVE/Ceph/UpgradeCheck.pm diff --git a/PVE/CLI/pve8to9.pm b/PVE/CLI/pve8to9.pm index afc4785e..06dde101 100644 --- a/PVE/CLI/pve8to9.pm +++ b/PVE/CLI/pve8to9.pm @@ -14,6 +14,7 @@ use PVE::API2::Cluster::Ceph; use PVE::AccessControl; use PVE::Ceph::Tools; +use PVE::Ceph::UpgradeCheck; use PVE::Cluster; use PVE::Corosync; use PVE::INotify; @@ -61,21 +62,6 @@ my $older_suites = { my ($min_pve_major, $min_pve_minor, $min_pve_pkgrel) = (8, 4, 0); -my $ceph_release2code = { - '12' => 'Luminous', - '13' => 'Mimic', - '14' => 'Nautilus', - '15' => 'Octopus', - '16' => 'Pacific', - '17' => 'Quincy', - '18' => 'Reef', - '19' => 'Squid', - '20' => 'Tentacle', -}; -my $ceph_supported_release = 19; # the version we support for upgrading (i.e., available on both) -my $ceph_supported_code_name = $ceph_release2code->{"$ceph_supported_release"} - or die "inconsistent source code, could not map expected ceph version to code name!"; - my $forced_legacy_cgroup = 0; my $counters = { @@ -588,180 +574,31 @@ sub check_cluster_corosync { } } -sub check_ceph { - print_header("CHECKING HYPER-CONVERGED CEPH STATUS"); - - if (PVE::Ceph::Tools::check_ceph_inited(1)) { - log_info("hyper-converged ceph setup detected!"); - } else { - log_skip("no hyper-converged ceph setup detected!"); - return; - } - - log_info("getting Ceph status/health information.."); - my $ceph_status = eval { PVE::API2::Ceph->status({ node => $nodename }); }; - my $noout = eval { PVE::API2::Cluster::Ceph->get_flag({ flag => "noout" }); }; - if ($@) { - log_fail("failed to get 'noout' flag status - $@"); - } - - my $noout_wanted = 1; - - if (!$ceph_status || !$ceph_status->{health}) { - log_fail("unable to determine Ceph status!"); - } else { - my $ceph_health = $ceph_status->{health}->{status}; - if (!$ceph_health) { - log_fail("unable to determine Ceph health!"); - } elsif ($ceph_health eq 'HEALTH_OK') { - log_pass("Ceph health reported as 'HEALTH_OK'."); - } elsif ( - $ceph_health eq 'HEALTH_WARN' - && $noout - && (keys %{ $ceph_status->{health}->{checks} } == 1) - ) { - log_pass( - "Ceph health reported as 'HEALTH_WARN' with a single failing check and 'noout' flag set." - ); - } else { - log_warn( - "Ceph health reported as '$ceph_health'.\n Use the PVE dashboard or 'ceph -s'" - . " to determine the specific issues and try to resolve them."); - } - } - - # TODO: check OSD min-required version, if to low it breaks stuff! - - log_info("checking local Ceph version.."); - if (my $release = eval { PVE::Ceph::Tools::get_local_version(1) }) { - my $code_name = $ceph_release2code->{"$release"} || 'unknown'; - if ($release == $ceph_supported_release) { - log_pass( - "found expected Ceph $ceph_supported_release $ceph_supported_code_name release."); - } elsif ($release > $ceph_supported_release) { - log_warn( - "found newer Ceph release $release $code_name as the expected $ceph_supported_release" - . " $ceph_supported_code_name, installed third party repos?!"); - } else { - log_fail("Hyper-converged Ceph $release $code_name is to old for upgrade!\n" - . " Upgrade Ceph first to $ceph_supported_code_name following our how-to:\n" - . " "); - } - } else { - log_fail("unable to determine local Ceph version!"); - } - - log_info("getting Ceph daemon versions.."); - my $ceph_versions = eval { PVE::Ceph::Tools::get_cluster_versions(undef, 1); }; - if (!$ceph_versions) { - log_fail("unable to determine Ceph daemon versions!"); - } else { - my $services = [ - { 'key' => 'mon', 'name' => 'monitor' }, - { 'key' => 'mgr', 'name' => 'manager' }, - { 'key' => 'mds', 'name' => 'MDS' }, - { 'key' => 'osd', 'name' => 'OSD' }, - ]; - - my $ceph_versions_simple = {}; - my $ceph_versions_commits = {}; - for my $type (keys %$ceph_versions) { - for my $full_version (keys $ceph_versions->{$type}->%*) { - if ($full_version =~ m/^(.*) \((.*)\).*\(.*\)$/) { - # String is in the form of - # ceph version 17.2.6 (810db68029296377607028a6c6da1ec06f5a2b27) quincy (stable) - # only check the first part, e.g. 'ceph version 17.2.6', the commit hash can - # be different - $ceph_versions_simple->{$type}->{$1} = 1; - $ceph_versions_commits->{$type}->{$2} = 1; - } - } - } - - for my $service (@$services) { - my ($name, $key) = $service->@{ 'name', 'key' }; - if (my $service_versions = $ceph_versions_simple->{$key}) { - if (keys %$service_versions == 0) { - log_skip("no running instances detected for daemon type $name."); - } elsif (keys %$service_versions == 1) { - log_pass("single running version detected for daemon type $name."); - } else { - log_warn("multiple running versions detected for daemon type $name!"); - } - } else { - log_skip("unable to determine versions of running Ceph $name instances."); - } - my $service_commits = $ceph_versions_commits->{$key}; - log_info( - "different builds of same version detected for an $name. Are you in the middle of the upgrade?" - ) if $service_commits && keys %$service_commits > 1; - } +sub log_ceph_upgrade_message { + my ($message) = @_; - my $overall_versions = $ceph_versions->{overall}; - if (!$overall_versions) { - log_warn("unable to determine overall Ceph daemon versions!"); - } elsif (keys %$overall_versions == 1) { - log_pass("single running overall version detected for all Ceph daemon types."); - $noout_wanted = !$upgraded; # off post-upgrade, on pre-upgrade - } elsif (keys $ceph_versions_simple->{overall}->%* != 1) { - log_warn( - "overall version mismatch detected, check 'ceph versions' output for details!"); - } - } - - if ($noout) { - if ($noout_wanted) { - log_pass("'noout' flag set to prevent rebalancing during cluster-wide upgrades."); - } else { - log_warn("'noout' flag set, Ceph cluster upgrade seems finished."); - } - } elsif ($noout_wanted) { - log_warn("'noout' flag not set - recommended to prevent rebalancing during upgrades."); - } + my ($level, $msg) = $message->@{qw(level msg)}; - log_info("checking Ceph config.."); - my $conf = PVE::Cluster::cfs_read_file('ceph.conf'); - if (%$conf) { - my $global = $conf->{global}; + return log_pass($msg) if $level eq 'pass'; + return log_info($msg) if $level eq 'info'; + return log_notice($msg) if $level eq 'notice'; + return log_warn($msg) if $level eq 'warn'; + return log_fail($msg) if $level eq 'fail'; + return log_skip($msg) if $level eq 'skip'; - my $global_monhost = $global->{mon_host} // $global->{"mon host"} // $global->{"mon-host"}; - if (!defined($global_monhost)) { - log_warn( - "No 'mon_host' entry found in ceph config.\n It's recommended to add mon_host with" - . " all monitor addresses (without ports) to the global section."); - } - - my $ipv6 = $global->{ms_bind_ipv6} // $global->{"ms bind ipv6"} - // $global->{"ms-bind-ipv6"}; - if ($ipv6) { - my $ipv4 = $global->{ms_bind_ipv4} // $global->{"ms bind ipv4"} - // $global->{"ms-bind-ipv4"}; - if ($ipv6 eq 'true' && (!defined($ipv4) || $ipv4 ne 'false')) { - log_warn( - "'ms_bind_ipv6' is enabled but 'ms_bind_ipv4' is not disabled.\n Make sure to" - . " disable 'ms_bind_ipv4' for ipv6 only clusters, or add an ipv4 network to public/cluster network." - ); - } - } + return log_info($msg); +} - if (defined($global->{keyring})) { - log_warn( - "[global] config section contains 'keyring' option, which will prevent services from" - . " starting with Nautilus.\n Move 'keyring' option to [client] section instead." - ); - } +sub check_ceph { + print_header("CHECKING HYPER-CONVERGED CEPH STATUS"); - } else { - log_warn("Empty ceph config found"); - } + my $messages = PVE::Ceph::UpgradeCheck::run_checks( + nodename => $nodename, + upgraded => $upgraded, + ); - my $local_ceph_ver = PVE::Ceph::Tools::get_local_version(1); - if (defined($local_ceph_ver)) { - if ($local_ceph_ver <= 14) { - log_fail("local Ceph version too low, at least Octopus required.."); - } - } else { - log_fail("unable to determine local Ceph version."); + for my $m ($messages->@*) { + log_ceph_upgrade_message($m); } } diff --git a/PVE/Ceph/Makefile b/PVE/Ceph/Makefile index 2901ebe5..b64912bb 100644 --- a/PVE/Ceph/Makefile +++ b/PVE/Ceph/Makefile @@ -4,6 +4,7 @@ PERLSOURCE = \ Releases.pm \ Services.pm \ Tools.pm \ + UpgradeCheck.pm \ all: diff --git a/PVE/Ceph/UpgradeCheck.pm b/PVE/Ceph/UpgradeCheck.pm new file mode 100644 index 00000000..6998caf2 --- /dev/null +++ b/PVE/Ceph/UpgradeCheck.pm @@ -0,0 +1,342 @@ +package PVE::Ceph::UpgradeCheck; + +# Produces advisory messages about a Ceph cluster's upgrade-readiness. +# +# Callers (PVE::CLI::pve8to9, 'pveceph upgrade-check') invoke run_checks() +# and format the returned records with their own log_* helpers. +# +# Each record is a hashref of the form: +# { level => 'pass'|'info'|'notice'|'warn'|'fail'|'skip', msg => 'text' } + +use strict; +use warnings; + +use PVE::API2::Ceph; +use PVE::API2::Cluster::Ceph; +use PVE::Ceph::Tools; +use PVE::Cluster; + +my $ceph_release2code = { + '12' => 'Luminous', + '13' => 'Mimic', + '14' => 'Nautilus', + '15' => 'Octopus', + '16' => 'Pacific', + '17' => 'Quincy', + '18' => 'Reef', + '19' => 'Squid', + '20' => 'Tentacle', +}; +my $default_supported_release = 19; # available before and after the current major upgrade +my $default_supported_code_name = $ceph_release2code->{"$default_supported_release"} + or die "inconsistent source code, could not map expected ceph version to code name!"; + +sub run_checks { + my (%args) = @_; + + my $nodename = $args{nodename} + or die "run_checks: 'nodename' argument is required\n"; + my $supported_release = $args{supported_release} // $default_supported_release; + my $upgraded = $args{upgraded} // 0; + + my @messages; + + if (!PVE::Ceph::Tools::check_ceph_inited(1)) { + push @messages, { level => 'skip', msg => "no hyper-converged ceph setup detected!" }; + return \@messages; + } + push @messages, { level => 'info', msg => "hyper-converged ceph setup detected!" }; + + my ($health_msgs, $noout) = check_health($nodename); + push @messages, $health_msgs->@*; + + # TODO: check OSD min-required version, if to low it breaks stuff! + + my ($version_msgs, $noout_wanted) = check_versions($supported_release, $upgraded); + push @messages, $version_msgs->@*; + + push @messages, check_noout_flag($noout, $noout_wanted)->@*; + + push @messages, check_config()->@*; + + push @messages, check_local_version_minimum()->@*; + + return \@messages; +} + +sub check_health { + my ($nodename) = @_; + + my @out; + push @out, { level => 'info', msg => "getting Ceph status/health information.." }; + + my $ceph_status = eval { PVE::API2::Ceph->status({ node => $nodename }); }; + my $noout = eval { PVE::API2::Cluster::Ceph->get_flag({ flag => "noout" }); }; + if ($@) { + push @out, { level => 'fail', msg => "failed to get 'noout' flag status - $@" }; + } + + if (!$ceph_status || !$ceph_status->{health}) { + push @out, { level => 'fail', msg => "unable to determine Ceph status!" }; + return (\@out, $noout); + } + + my $ceph_health = $ceph_status->{health}->{status}; + if (!$ceph_health) { + push @out, { level => 'fail', msg => "unable to determine Ceph health!" }; + } elsif ($ceph_health eq 'HEALTH_OK') { + push @out, { level => 'pass', msg => "Ceph health reported as 'HEALTH_OK'." }; + } elsif ( + $ceph_health eq 'HEALTH_WARN' + && $noout + && (keys %{ $ceph_status->{health}->{checks} } == 1) + ) { + push @out, + { + level => 'pass', + msg => + "Ceph health reported as 'HEALTH_WARN' with a single failing check and 'noout' flag set.", + }; + } else { + push @out, + { + level => 'warn', + msg => + "Ceph health reported as '$ceph_health'.\n Use the PVE dashboard or 'ceph -s'" + . " to determine the specific issues and try to resolve them.", + }; + } + + return (\@out, $noout); +} + +sub check_versions { + my ($supported_release, $upgraded) = @_; + + my @out; + my $noout_wanted = 1; + + my $supported_code_name = $supported_release == $default_supported_release + ? $default_supported_code_name + : ($ceph_release2code->{"$supported_release"} // 'unknown'); + + push @out, { level => 'info', msg => "checking local Ceph version.." }; + if (my $release = eval { PVE::Ceph::Tools::get_local_version(1) }) { + my $code_name = $ceph_release2code->{"$release"} || 'unknown'; + if ($release == $supported_release) { + push @out, + { + level => 'pass', + msg => "found expected Ceph $supported_release $supported_code_name release.", + }; + } elsif ($release > $supported_release) { + push @out, + { + level => 'warn', + msg => "found newer Ceph release $release $code_name as the expected" + . " $supported_release $supported_code_name, installed third party repos?!", + }; + } else { + push @out, + { + level => 'fail', + msg => "Hyper-converged Ceph $release $code_name is to old for upgrade!\n" + . " Upgrade Ceph first to $supported_code_name following our how-to:\n" + . " ", + }; + } + } else { + push @out, { level => 'fail', msg => "unable to determine local Ceph version!" }; + } + + push @out, { level => 'info', msg => "getting Ceph daemon versions.." }; + my $ceph_versions = eval { PVE::Ceph::Tools::get_cluster_versions(undef, 1); }; + if (!$ceph_versions) { + push @out, { level => 'fail', msg => "unable to determine Ceph daemon versions!" }; + return (\@out, $noout_wanted); + } + + my $services = [ + { 'key' => 'mon', 'name' => 'monitor' }, + { 'key' => 'mgr', 'name' => 'manager' }, + { 'key' => 'mds', 'name' => 'MDS' }, + { 'key' => 'osd', 'name' => 'OSD' }, + ]; + + my $ceph_versions_simple = {}; + my $ceph_versions_commits = {}; + for my $type (keys %$ceph_versions) { + for my $full_version (keys $ceph_versions->{$type}->%*) { + if ($full_version =~ m/^(.*) \((.*)\).*\(.*\)$/) { + # String is in the form of + # ceph version 17.2.6 (810db68029296377607028a6c6da1ec06f5a2b27) quincy (stable) + # only check the first part, e.g. 'ceph version 17.2.6', the commit hash can + # be different + $ceph_versions_simple->{$type}->{$1} = 1; + $ceph_versions_commits->{$type}->{$2} = 1; + } + } + } + + for my $service (@$services) { + my ($name, $key) = $service->@{ 'name', 'key' }; + if (my $service_versions = $ceph_versions_simple->{$key}) { + if (keys %$service_versions == 0) { + push @out, + { + level => 'skip', + msg => "no running instances detected for daemon type $name.", + }; + } elsif (keys %$service_versions == 1) { + push @out, + { + level => 'pass', + msg => "single running version detected for daemon type $name.", + }; + } else { + push @out, + { + level => 'warn', + msg => "multiple running versions detected for daemon type $name!", + }; + } + } else { + push @out, + { + level => 'skip', + msg => "unable to determine versions of running Ceph $name instances.", + }; + } + my $service_commits = $ceph_versions_commits->{$key}; + if ($service_commits && keys %$service_commits > 1) { + push @out, + { + level => 'info', + msg => + "different builds of same version detected for an $name. Are you in the middle of the upgrade?", + }; + } + } + + my $overall_versions = $ceph_versions->{overall}; + if (!$overall_versions) { + push @out, { level => 'warn', msg => "unable to determine overall Ceph daemon versions!" }; + } elsif (keys %$overall_versions == 1) { + push @out, + { + level => 'pass', + msg => "single running overall version detected for all Ceph daemon types.", + }; + $noout_wanted = !$upgraded; # off post-upgrade, on pre-upgrade + } elsif (keys $ceph_versions_simple->{overall}->%* != 1) { + push @out, + { + level => 'warn', + msg => + "overall version mismatch detected, check 'ceph versions' output for details!", + }; + } + + return (\@out, $noout_wanted); +} + +sub check_noout_flag { + my ($noout, $noout_wanted) = @_; + + my @out; + if ($noout) { + if ($noout_wanted) { + push @out, + { + level => 'pass', + msg => "'noout' flag set to prevent rebalancing during cluster-wide upgrades.", + }; + } else { + push @out, + { + level => 'warn', + msg => "'noout' flag set, Ceph cluster upgrade seems finished.", + }; + } + } elsif ($noout_wanted) { + push @out, + { + level => 'warn', + msg => "'noout' flag not set - recommended to prevent rebalancing during upgrades.", + }; + } + + return \@out; +} + +sub check_config { + my @out; + + push @out, { level => 'info', msg => "checking Ceph config.." }; + my $conf = PVE::Cluster::cfs_read_file('ceph.conf'); + if (!%$conf) { + push @out, { level => 'warn', msg => "Empty ceph config found" }; + return \@out; + } + + my $global = $conf->{global}; + + my $global_monhost = $global->{mon_host} // $global->{"mon host"} // $global->{"mon-host"}; + if (!defined($global_monhost)) { + push @out, + { + level => 'warn', + msg => + "No 'mon_host' entry found in ceph config.\n It's recommended to add mon_host with" + . " all monitor addresses (without ports) to the global section.", + }; + } + + my $ipv6 = $global->{ms_bind_ipv6} // $global->{"ms bind ipv6"} // $global->{"ms-bind-ipv6"}; + if ($ipv6) { + my $ipv4 = $global->{ms_bind_ipv4} // $global->{"ms bind ipv4"} + // $global->{"ms-bind-ipv4"}; + if ($ipv6 eq 'true' && (!defined($ipv4) || $ipv4 ne 'false')) { + push @out, + { + level => 'warn', + msg => + "'ms_bind_ipv6' is enabled but 'ms_bind_ipv4' is not disabled.\n Make sure to" + . " disable 'ms_bind_ipv4' for ipv6 only clusters, or add an ipv4 network to public/cluster network.", + }; + } + } + + if (defined($global->{keyring})) { + push @out, + { + level => 'warn', + msg => + "[global] config section contains 'keyring' option, which will prevent services from" + . " starting with Nautilus.\n Move 'keyring' option to [client] section instead.", + }; + } + + return \@out; +} + +sub check_local_version_minimum { + my @out; + + my $local_ceph_ver = PVE::Ceph::Tools::get_local_version(1); + if (defined($local_ceph_ver)) { + if ($local_ceph_ver <= 14) { + push @out, + { + level => 'fail', + msg => "local Ceph version too low, at least Octopus required..", + }; + } + } else { + push @out, { level => 'fail', msg => "unable to determine local Ceph version." }; + } + + return \@out; +} + +1; -- 2.47.3