From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <a.lauterer@proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by lists.proxmox.com (Postfix) with ESMTPS id EE87AA13B3
 for <pve-devel@lists.proxmox.com>; Wed, 14 Jun 2023 13:10:24 +0200 (CEST)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
 by firstgate.proxmox.com (Proxmox) with ESMTP id D17891B204
 for <pve-devel@lists.proxmox.com>; Wed, 14 Jun 2023 13:10:24 +0200 (CEST)
Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com
 [94.136.29.106])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by firstgate.proxmox.com (Proxmox) with ESMTPS
 for <pve-devel@lists.proxmox.com>; Wed, 14 Jun 2023 13:10:23 +0200 (CEST)
Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1])
 by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 78E2545552
 for <pve-devel@lists.proxmox.com>; Wed, 14 Jun 2023 13:10:23 +0200 (CEST)
From: Aaron Lauterer <a.lauterer@proxmox.com>
To: pve-devel@lists.proxmox.com
Date: Wed, 14 Jun 2023 13:10:21 +0200
Message-Id: <20230614111022.1432946-1-a.lauterer@proxmox.com>
X-Mailer: git-send-email 2.39.2
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-SPAM-LEVEL: Spam detection results:  0
 AWL -0.090 Adjusted score from AWL reputation of From: address
 BAYES_00                 -1.9 Bayes spam probability is 0 to 1%
 DMARC_MISSING             0.1 Missing DMARC policy
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
 T_SCC_BODY_TEXT_LINE    -0.01 -
 URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See
 http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more
 information. [rbdplugin.pm]
Subject: [pve-devel] [PATCH v2 storage 1/2] rbd: improve handling of missing
 images
X-BeenThere: pve-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox VE development discussion <pve-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pve-devel>, 
 <mailto:pve-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pve-devel/>
List-Post: <mailto:pve-devel@lists.proxmox.com>
List-Help: <mailto:pve-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel>, 
 <mailto:pve-devel-request@lists.proxmox.com?subject=subscribe>
X-List-Received-Date: Wed, 14 Jun 2023 11:10:25 -0000

It can happen, that an RBD image isn't cleaned up 100%. Calling 'rbd ls
-l' will then show errors that it is not possible to open the image in
question:
```
rbd: error opening vm-103-disk-1: (2) No such file or directory
rbd: listing images failed: (2) No such file or directory
```

Originally we only showed the last error line which is too generic and
doesn't give a good hint what is actually wrong.

We can improve that by catching these specific errors and add the
problematic disk images to the returned list with a size of '-1'.

When the 'rbd rm' command is used on such an image, it will clean up
whatever is still left.
But for that to work, we also need to handle these errors in the
'rbd_ls_snap' sub as it is called from 'free_image'.

Signed-off-by: Aaron Lauterer <a.lauterer@proxmox.com>
---
no changes since v1

 src/PVE/Storage/RBDPlugin.pm | 52 +++++++++++++++++++++++++++++++-----
 1 file changed, 46 insertions(+), 6 deletions(-)

diff --git a/src/PVE/Storage/RBDPlugin.pm b/src/PVE/Storage/RBDPlugin.pm
index f45ad3f..c4e4467 100644
--- a/src/PVE/Storage/RBDPlugin.pm
+++ b/src/PVE/Storage/RBDPlugin.pm
@@ -169,6 +169,8 @@ my $krbd_feature_update = sub {
     }
 };
 
+my $missing_image_err_regex = '((?:vm|base)-\d+-.*): \(2\) No such file or directory$';
+
 sub run_rbd_command {
     my ($cmd, %args) = @_;
 
@@ -207,13 +209,28 @@ sub rbd_ls {
     my $raw = '';
     my $parser = sub { $raw .= shift };
 
+    my $show_err = 1;
+    my $missing_images = {};
+    my $err_parser = sub {
+	my $line = shift;
+	if ($line =~ m/$missing_image_err_regex/) {
+	    $show_err = 0;
+	    $missing_images->{$1} = 1;
+	} elsif ($line ne "rbd: listing images failed: (2) No such file or directory") {
+	    # this generic error is shown after the image specific "No such file..." one,
+	    # ignore it but not other errors
+	    $show_err = 1;
+	    die $line;
+	}
+    };
+
     my $cmd = $rbd_cmd->($scfg, $storeid, 'ls', '-l', '--format', 'json');
     eval {
-	run_rbd_command($cmd, errmsg => "rbd error", errfunc => sub {}, outfunc => $parser);
+	run_rbd_command($cmd, errmsg => "rbd error", errfunc => $err_parser, outfunc => $parser);
     };
     my $err = $@;
 
-    die $err if $err && $err !~ m/doesn't contain rbd images/ ;
+    die $err if $err && $show_err && $err !~ m/doesn't contain rbd images/ ;
 
     my $result;
     if ($raw eq '') {
@@ -224,6 +241,13 @@ sub rbd_ls {
 	die "got unexpected data from rbd ls: '$raw'\n";
     }
 
+    for my $image (keys %$missing_images) {
+	push @$result, {
+	    image => $image,
+	    size => -1,
+	};
+    }
+
     my $list = {};
 
     foreach my $el (@$result) {
@@ -251,7 +275,20 @@ sub rbd_ls_snap {
     my $cmd = $rbd_cmd->($scfg, $storeid, 'snap', 'ls', $name, '--format', 'json');
 
     my $raw = '';
-    run_rbd_command($cmd, errmsg => "rbd error", errfunc => sub {}, outfunc => sub { $raw .= shift; });
+    my $show_err = 0;
+    my $err_parser = sub {
+	my $line = shift;
+	if ($line !~ m/$missing_image_err_regex/) {
+	    $show_err = 1;
+	    die $line;
+	}
+    };
+    eval {
+	run_rbd_command($cmd, errmsg => "rbd error", errfunc => $err_parser, outfunc => sub { $raw .= shift; });
+    };
+    my $err = $@;
+    die $err if $err && $show_err;
+    return {} if $err && !$show_err; # could not open image, probably missing
 
     my $list;
     if ($raw =~ m/^(\[.*\])$/s) { # untaint
@@ -633,10 +670,13 @@ sub free_image {
 
     $class->deactivate_volume($storeid, $scfg, $volname);
 
-    my $cmd = $rbd_cmd->($scfg, $storeid, 'snap', 'purge',  $name);
-    run_rbd_command($cmd, errmsg => "rbd snap purge '$name' error");
 
-    $cmd = $rbd_cmd->($scfg, $storeid, 'rm', $name);
+    if (keys %{$snaps}) {
+	my $cmd = $rbd_cmd->($scfg, $storeid, 'snap', 'purge',  $name);
+	run_rbd_command($cmd, errmsg => "rbd snap purge '$name' error");
+    }
+
+    my $cmd = $rbd_cmd->($scfg, $storeid, 'rm', $name);
     run_rbd_command($cmd, errmsg => "rbd rm '$name' error");
 
     return undef;
-- 
2.39.2