public inbox for pve-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: Fiona Ebner <f.ebner@proxmox.com>
To: pve-devel@lists.proxmox.com
Subject: [pve-devel] [PATCH v4 qemu-server 7/9] fix #5440: vzdump: better cleanup fleecing images after hard errors
Date: Mon, 11 Nov 2024 14:54:34 +0100	[thread overview]
Message-ID: <20241111135436.82773-8-f.ebner@proxmox.com> (raw)
In-Reply-To: <20241111135436.82773-1-f.ebner@proxmox.com>

By recording the allocated fleecing images in the VM config, they
are not immediately orphaned, should a hard error occur during
backup that prevents cleanup.

They are attempted to be cleaned up during the next backup run.

Suggested-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---

Changes in v4:
* detach left-over fleecing images from a running VM before attempting
  cleanup
* more logging

 PVE/QemuConfig.pm        | 68 ++++++++++++++++++++++++++++++++++++++++
 PVE/VZDump/QemuServer.pm | 36 ++++++++++++++++-----
 2 files changed, 97 insertions(+), 7 deletions(-)

diff --git a/PVE/QemuConfig.pm b/PVE/QemuConfig.pm
index 8e8a7828..3084f831 100644
--- a/PVE/QemuConfig.pm
+++ b/PVE/QemuConfig.pm
@@ -13,6 +13,7 @@ use PVE::QemuServer::Monitor qw(mon_cmd);
 use PVE::QemuServer;
 use PVE::QemuServer::Machine;
 use PVE::QemuServer::Memory qw(get_current_memory);
+use PVE::RESTEnvironment qw(log_warn);
 use PVE::Storage;
 use PVE::Tools;
 use PVE::Format qw(render_bytes render_duration);
@@ -573,4 +574,71 @@ sub has_cloudinit {
     return $found;
 }
 
+# Caller is expected to deal with volumes from an already existing 'fleecing-images' entry in the
+# configuration first.
+sub record_fleecing_images {
+    my ($vmid, $volids) = @_;
+
+    return if scalar($volids->@*) == 0;
+
+    PVE::QemuConfig->lock_config($vmid, sub {
+	my $conf = PVE::QemuConfig->load_config($vmid);
+	$conf->{'fleecing-images'} = join(',', $volids->@*);
+	PVE::QemuConfig->write_config($vmid, $conf);
+    });
+}
+
+sub cleanup_fleecing_images {
+    my ($vmid, $storecfg, $log_func) = @_;
+
+    if (!$log_func) {
+	$log_func = sub {
+	    my ($level, $line) = @_;
+	    chomp($line);
+	    if ($level eq 'info') {
+		print "$line\n";
+	    } else {
+		log_warn($line);
+	    }
+	};
+    }
+
+    my $volids = [];
+    my $failed = [];
+
+    # detach any left-overs from a running VM
+    if (PVE::QemuServer::Helpers::vm_running_locally($vmid)) {
+	my $block_info = mon_cmd($vmid, "query-block");
+	for my $info ($block_info->@*) {
+	    my $device_id = $info->{device};
+	    next if $device_id !~ m/-fleecing$/;
+
+	    $log_func->('info', "detaching (old) fleecing image for '$device_id'");
+	    $device_id =~ s/^drive-//; # re-added by qemu_drivedel()
+	    eval { PVE::QemuServer::qemu_drivedel($vmid, $device_id) };
+	    $log_func->('warn', "error detaching (old) fleecing image '$device_id' - $@") if $@;
+	}
+    }
+
+    PVE::QemuConfig->lock_config($vmid, sub {
+	my $conf = PVE::QemuConfig->load_config($vmid);
+	if ($conf->{'fleecing-images'}) {
+	    $volids = [PVE::Tools::split_list($conf->{'fleecing-images'})];
+	    delete $conf->{'fleecing-images'};
+	    PVE::QemuConfig->write_config($vmid, $conf);
+	}
+    });
+
+    for my $volid ($volids->@*) {
+	$log_func->('info', "removing (old) fleecing image '$volid'");
+	eval { PVE::Storage::vdisk_free($storecfg, $volid); };
+	if (my $err = $@) {
+	    $log_func->('warn', "error removing fleecing image '$volid' - $err");
+	    push $failed->@*, $volid;
+	}
+    }
+
+    record_fleecing_images($vmid, $failed);
+}
+
 1;
diff --git a/PVE/VZDump/QemuServer.pm b/PVE/VZDump/QemuServer.pm
index 17f63568..240e1e95 100644
--- a/PVE/VZDump/QemuServer.pm
+++ b/PVE/VZDump/QemuServer.pm
@@ -530,15 +530,25 @@ sub get_and_check_pbs_encryption_config {
     die "internal error - unhandled case for getting & checking PBS encryption ($keyfile, $master_keyfile)!";
 }
 
+# Helper is intended to be called from allocate_fleecing_images() only. Otherwise, fleecing volids
+# have already been recorded in the configuration and PVE::QemuConfig::cleanup_fleecing_images()
+# should be used instead.
 my sub cleanup_fleecing_images {
-    my ($self, $disks) = @_;
+    my ($self, $vmid, $disks) = @_;
+
+    my $failed = [];
 
     for my $di ($disks->@*) {
 	if (my $volid = $di->{'fleece-volid'}) {
 	    eval { PVE::Storage::vdisk_free($self->{storecfg}, $volid); };
-	    $self->log('warn', "error removing fleecing image '$volid' - $@") if $@;
+	    if (my $err = $@) {
+		$self->log('warn', "error removing fleecing image '$volid' - $err");
+		push $failed->@*, $volid;
+	    }
 	}
     }
+
+    PVE::QemuConfig::record_fleecing_images($vmid, $failed);
 }
 
 my sub allocate_fleecing_images {
@@ -546,8 +556,7 @@ my sub allocate_fleecing_images {
 
     die "internal error - no fleecing storage specified\n" if !$fleecing_storeid;
 
-    # TODO what about potential left-over images from a failed attempt? Just
-    # auto-remove? While unlikely, could conflict with manually created image from user...
+    my $fleece_volids = [];
 
     eval {
 	my $n = 0; # counter for fleecing image names
@@ -564,6 +573,8 @@ my sub allocate_fleecing_images {
 		$di->{'fleece-volid'} = PVE::Storage::vdisk_alloc(
 		    $self->{storecfg}, $fleecing_storeid, $vmid, $format, $name, $size);
 
+		push $fleece_volids->@*, $di->{'fleece-volid'};
+
 		$n++;
 	    } else {
 		die "implement me (type '$di->{type}')";
@@ -571,9 +582,11 @@ my sub allocate_fleecing_images {
 	}
     };
     if (my $err = $@) {
-	cleanup_fleecing_images($self, $disks);
+	cleanup_fleecing_images($self, $vmid, $disks);
 	die $err;
     }
+
+    PVE::QemuConfig::record_fleecing_images($vmid, $fleece_volids);
 }
 
 my sub detach_fleecing_images {
@@ -633,6 +646,13 @@ my sub check_and_prepare_fleecing {
 	$use_fleecing = 0;
     }
 
+    # clean up potential left-overs from a previous attempt
+    eval {
+	PVE::QemuConfig::cleanup_fleecing_images(
+	    $vmid, $self->{storecfg}, sub { $self->log($_[0], $_[1]); });
+    };
+    $self->log('warn', "attempt to clean up left-over fleecing images failed - $@") if $@;
+
     if ($use_fleecing) {
 	my ($default_format, $valid_formats) = PVE::Storage::storage_default_format(
 	    $self->{storecfg}, $fleecing_opts->{storage});
@@ -794,7 +814,8 @@ sub archive_pbs {
 
     if ($use_fleecing) {
 	detach_fleecing_images($task->{disks}, $vmid);
-	cleanup_fleecing_images($self, $task->{disks});
+	PVE::QemuConfig::cleanup_fleecing_images(
+	    $vmid, $self->{storecfg}, sub { $self->log($_[0], $_[1]); });
     }
 
     die $err if $err;
@@ -994,7 +1015,8 @@ sub archive_vma {
 
     if ($use_fleecing) {
 	detach_fleecing_images($task->{disks}, $vmid);
-	cleanup_fleecing_images($self, $task->{disks});
+	PVE::QemuConfig::cleanup_fleecing_images(
+	    $vmid, $self->{storecfg}, sub { $self->log($_[0], $_[1]); });
     }
 
     if ($err) {
-- 
2.39.5



_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel

  parent reply	other threads:[~2024-11-11 13:55 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-11-11 13:54 [pve-devel] [PATCH-SERIES v4 docs/qemu-server] more robust handling of fleecing images Fiona Ebner
2024-11-11 13:54 ` [pve-devel] [PATCH v4 docs 1/9] configuration files: add general section Fiona Ebner
2024-11-11 13:54 ` [pve-devel] [PATCH v4 docs 2/9] cli appendix: reference section about casing style Fiona Ebner
2024-11-11 13:54 ` [pve-devel] [PATCH v4 qemu-server 3/9] backup: prepare: factor out getting running status Fiona Ebner
2024-11-11 13:54 ` [pve-devel] [PATCH v4 qemu-server 4/9] backup: prepare: cancel previous job if still running Fiona Ebner
2024-11-11 13:54 ` [pve-devel] [PATCH v4 qemu-server 5/9] parse config: allow config keys with minus sign Fiona Ebner
2024-11-11 13:54 ` [pve-devel] [PATCH v4 qemu-server 6/9] schema: add fleecing-images config property Fiona Ebner
2024-11-11 13:54 ` Fiona Ebner [this message]
2024-11-11 13:54 ` [pve-devel] [PATCH v4 qemu-server 8/9] migration: attempt to clean up potential left-over fleecing images Fiona Ebner
2024-11-11 13:54 ` [pve-devel] [PATCH v4 qemu-server 9/9] destroy vm: " Fiona Ebner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241111135436.82773-8-f.ebner@proxmox.com \
    --to=f.ebner@proxmox.com \
    --cc=pve-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal