From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <a.lauterer@proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by lists.proxmox.com (Postfix) with ESMTPS id 50D51E266
 for <pve-devel@lists.proxmox.com>; Tue, 18 Jul 2023 09:27:03 +0200 (CEST)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
 by firstgate.proxmox.com (Proxmox) with ESMTP id 2C60016A20
 for <pve-devel@lists.proxmox.com>; Tue, 18 Jul 2023 09:26:33 +0200 (CEST)
Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com
 [94.136.29.106])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by firstgate.proxmox.com (Proxmox) with ESMTPS
 for <pve-devel@lists.proxmox.com>; Tue, 18 Jul 2023 09:26:32 +0200 (CEST)
Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1])
 by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 5246142F8C
 for <pve-devel@lists.proxmox.com>; Tue, 18 Jul 2023 09:26:32 +0200 (CEST)
Message-ID: <9d8f50b4-ff4a-dbe8-3234-ca7896f6c37d@proxmox.com>
Date: Tue, 18 Jul 2023 09:26:31 +0200
MIME-Version: 1.0
User-Agent: Mozilla Thunderbird
Content-Language: en-US
To: pve-devel@lists.proxmox.com
References: <20230614111022.1432946-1-a.lauterer@proxmox.com>
From: Aaron Lauterer <a.lauterer@proxmox.com>
In-Reply-To: <20230614111022.1432946-1-a.lauterer@proxmox.com>
Content-Type: text/plain; charset=UTF-8; format=flowed
Content-Transfer-Encoding: 7bit
X-SPAM-LEVEL: Spam detection results:  0
 AWL -0.084 Adjusted score from AWL reputation of From: address
 BAYES_00                 -1.9 Bayes spam probability is 0 to 1%
 DMARC_MISSING             0.1 Missing DMARC policy
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
 T_SCC_BODY_TEXT_LINE    -0.01 -
 URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See
 http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more
 information. [rbdplugin.pm]
Subject: Re: [pve-devel] [PATCH v2 storage 1/2] rbd: improve handling of
 missing images
X-BeenThere: pve-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox VE development discussion <pve-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pve-devel>, 
 <mailto:pve-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pve-devel/>
List-Post: <mailto:pve-devel@lists.proxmox.com>
List-Help: <mailto:pve-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel>, 
 <mailto:pve-devel-request@lists.proxmox.com?subject=subscribe>
X-List-Received-Date: Tue, 18 Jul 2023 07:27:03 -0000

ping?

On 6/14/23 13:10, Aaron Lauterer wrote:
> It can happen, that an RBD image isn't cleaned up 100%. Calling 'rbd ls
> -l' will then show errors that it is not possible to open the image in
> question:
> ```
> rbd: error opening vm-103-disk-1: (2) No such file or directory
> rbd: listing images failed: (2) No such file or directory
> ```
> 
> Originally we only showed the last error line which is too generic and
> doesn't give a good hint what is actually wrong.
> 
> We can improve that by catching these specific errors and add the
> problematic disk images to the returned list with a size of '-1'.
> 
> When the 'rbd rm' command is used on such an image, it will clean up
> whatever is still left.
> But for that to work, we also need to handle these errors in the
> 'rbd_ls_snap' sub as it is called from 'free_image'.
> 
> Signed-off-by: Aaron Lauterer <a.lauterer@proxmox.com>
> ---
> no changes since v1
> 
>   src/PVE/Storage/RBDPlugin.pm | 52 +++++++++++++++++++++++++++++++-----
>   1 file changed, 46 insertions(+), 6 deletions(-)
> 
> diff --git a/src/PVE/Storage/RBDPlugin.pm b/src/PVE/Storage/RBDPlugin.pm
> index f45ad3f..c4e4467 100644
> --- a/src/PVE/Storage/RBDPlugin.pm
> +++ b/src/PVE/Storage/RBDPlugin.pm
> @@ -169,6 +169,8 @@ my $krbd_feature_update = sub {
>       }
>   };
>   
> +my $missing_image_err_regex = '((?:vm|base)-\d+-.*): \(2\) No such file or directory$';
> +
>   sub run_rbd_command {
>       my ($cmd, %args) = @_;
>   
> @@ -207,13 +209,28 @@ sub rbd_ls {
>       my $raw = '';
>       my $parser = sub { $raw .= shift };
>   
> +    my $show_err = 1;
> +    my $missing_images = {};
> +    my $err_parser = sub {
> +	my $line = shift;
> +	if ($line =~ m/$missing_image_err_regex/) {
> +	    $show_err = 0;
> +	    $missing_images->{$1} = 1;
> +	} elsif ($line ne "rbd: listing images failed: (2) No such file or directory") {
> +	    # this generic error is shown after the image specific "No such file..." one,
> +	    # ignore it but not other errors
> +	    $show_err = 1;
> +	    die $line;
> +	}
> +    };
> +
>       my $cmd = $rbd_cmd->($scfg, $storeid, 'ls', '-l', '--format', 'json');
>       eval {
> -	run_rbd_command($cmd, errmsg => "rbd error", errfunc => sub {}, outfunc => $parser);
> +	run_rbd_command($cmd, errmsg => "rbd error", errfunc => $err_parser, outfunc => $parser);
>       };
>       my $err = $@;
>   
> -    die $err if $err && $err !~ m/doesn't contain rbd images/ ;
> +    die $err if $err && $show_err && $err !~ m/doesn't contain rbd images/ ;
>   
>       my $result;
>       if ($raw eq '') {
> @@ -224,6 +241,13 @@ sub rbd_ls {
>   	die "got unexpected data from rbd ls: '$raw'\n";
>       }
>   
> +    for my $image (keys %$missing_images) {
> +	push @$result, {
> +	    image => $image,
> +	    size => -1,
> +	};
> +    }
> +
>       my $list = {};
>   
>       foreach my $el (@$result) {
> @@ -251,7 +275,20 @@ sub rbd_ls_snap {
>       my $cmd = $rbd_cmd->($scfg, $storeid, 'snap', 'ls', $name, '--format', 'json');
>   
>       my $raw = '';
> -    run_rbd_command($cmd, errmsg => "rbd error", errfunc => sub {}, outfunc => sub { $raw .= shift; });
> +    my $show_err = 0;
> +    my $err_parser = sub {
> +	my $line = shift;
> +	if ($line !~ m/$missing_image_err_regex/) {
> +	    $show_err = 1;
> +	    die $line;
> +	}
> +    };
> +    eval {
> +	run_rbd_command($cmd, errmsg => "rbd error", errfunc => $err_parser, outfunc => sub { $raw .= shift; });
> +    };
> +    my $err = $@;
> +    die $err if $err && $show_err;
> +    return {} if $err && !$show_err; # could not open image, probably missing
>   
>       my $list;
>       if ($raw =~ m/^(\[.*\])$/s) { # untaint
> @@ -633,10 +670,13 @@ sub free_image {
>   
>       $class->deactivate_volume($storeid, $scfg, $volname);
>   
> -    my $cmd = $rbd_cmd->($scfg, $storeid, 'snap', 'purge',  $name);
> -    run_rbd_command($cmd, errmsg => "rbd snap purge '$name' error");
>   
> -    $cmd = $rbd_cmd->($scfg, $storeid, 'rm', $name);
> +    if (keys %{$snaps}) {
> +	my $cmd = $rbd_cmd->($scfg, $storeid, 'snap', 'purge',  $name);
> +	run_rbd_command($cmd, errmsg => "rbd snap purge '$name' error");
> +    }
> +
> +    my $cmd = $rbd_cmd->($scfg, $storeid, 'rm', $name);
>       run_rbd_command($cmd, errmsg => "rbd rm '$name' error");
>   
>       return undef;