all lists on lists.proxmox.com
 help / color / mirror / Atom feed
From: Fiona Ebner <f.ebner@proxmox.com>
To: pve-devel@lists.proxmox.com
Subject: [pve-devel] [PATCH v4 qemu-server 7/9] fix #5440: vzdump: better cleanup fleecing images after hard errors
Date: Mon, 11 Nov 2024 14:54:34 +0100	[thread overview]
Message-ID: <20241111135436.82773-8-f.ebner@proxmox.com> (raw)
In-Reply-To: <20241111135436.82773-1-f.ebner@proxmox.com>

By recording the allocated fleecing images in the VM config, they
are not immediately orphaned, should a hard error occur during
backup that prevents cleanup.

They are attempted to be cleaned up during the next backup run.

Suggested-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---

Changes in v4:
* detach left-over fleecing images from a running VM before attempting
  cleanup
* more logging

 PVE/QemuConfig.pm        | 68 ++++++++++++++++++++++++++++++++++++++++
 PVE/VZDump/QemuServer.pm | 36 ++++++++++++++++-----
 2 files changed, 97 insertions(+), 7 deletions(-)

diff --git a/PVE/QemuConfig.pm b/PVE/QemuConfig.pm
index 8e8a7828..3084f831 100644
--- a/PVE/QemuConfig.pm
+++ b/PVE/QemuConfig.pm
@@ -13,6 +13,7 @@ use PVE::QemuServer::Monitor qw(mon_cmd);
 use PVE::QemuServer;
 use PVE::QemuServer::Machine;
 use PVE::QemuServer::Memory qw(get_current_memory);
+use PVE::RESTEnvironment qw(log_warn);
 use PVE::Storage;
 use PVE::Tools;
 use PVE::Format qw(render_bytes render_duration);
@@ -573,4 +574,71 @@ sub has_cloudinit {
     return $found;
 }
 
+# Caller is expected to deal with volumes from an already existing 'fleecing-images' entry in the
+# configuration first.
+sub record_fleecing_images {
+    my ($vmid, $volids) = @_;
+
+    return if scalar($volids->@*) == 0;
+
+    PVE::QemuConfig->lock_config($vmid, sub {
+	my $conf = PVE::QemuConfig->load_config($vmid);
+	$conf->{'fleecing-images'} = join(',', $volids->@*);
+	PVE::QemuConfig->write_config($vmid, $conf);
+    });
+}
+
+sub cleanup_fleecing_images {
+    my ($vmid, $storecfg, $log_func) = @_;
+
+    if (!$log_func) {
+	$log_func = sub {
+	    my ($level, $line) = @_;
+	    chomp($line);
+	    if ($level eq 'info') {
+		print "$line\n";
+	    } else {
+		log_warn($line);
+	    }
+	};
+    }
+
+    my $volids = [];
+    my $failed = [];
+
+    # detach any left-overs from a running VM
+    if (PVE::QemuServer::Helpers::vm_running_locally($vmid)) {
+	my $block_info = mon_cmd($vmid, "query-block");
+	for my $info ($block_info->@*) {
+	    my $device_id = $info->{device};
+	    next if $device_id !~ m/-fleecing$/;
+
+	    $log_func->('info', "detaching (old) fleecing image for '$device_id'");
+	    $device_id =~ s/^drive-//; # re-added by qemu_drivedel()
+	    eval { PVE::QemuServer::qemu_drivedel($vmid, $device_id) };
+	    $log_func->('warn', "error detaching (old) fleecing image '$device_id' - $@") if $@;
+	}
+    }
+
+    PVE::QemuConfig->lock_config($vmid, sub {
+	my $conf = PVE::QemuConfig->load_config($vmid);
+	if ($conf->{'fleecing-images'}) {
+	    $volids = [PVE::Tools::split_list($conf->{'fleecing-images'})];
+	    delete $conf->{'fleecing-images'};
+	    PVE::QemuConfig->write_config($vmid, $conf);
+	}
+    });
+
+    for my $volid ($volids->@*) {
+	$log_func->('info', "removing (old) fleecing image '$volid'");
+	eval { PVE::Storage::vdisk_free($storecfg, $volid); };
+	if (my $err = $@) {
+	    $log_func->('warn', "error removing fleecing image '$volid' - $err");
+	    push $failed->@*, $volid;
+	}
+    }
+
+    record_fleecing_images($vmid, $failed);
+}
+
 1;
diff --git a/PVE/VZDump/QemuServer.pm b/PVE/VZDump/QemuServer.pm
index 17f63568..240e1e95 100644
--- a/PVE/VZDump/QemuServer.pm
+++ b/PVE/VZDump/QemuServer.pm
@@ -530,15 +530,25 @@ sub get_and_check_pbs_encryption_config {
     die "internal error - unhandled case for getting & checking PBS encryption ($keyfile, $master_keyfile)!";
 }
 
+# Helper is intended to be called from allocate_fleecing_images() only. Otherwise, fleecing volids
+# have already been recorded in the configuration and PVE::QemuConfig::cleanup_fleecing_images()
+# should be used instead.
 my sub cleanup_fleecing_images {
-    my ($self, $disks) = @_;
+    my ($self, $vmid, $disks) = @_;
+
+    my $failed = [];
 
     for my $di ($disks->@*) {
 	if (my $volid = $di->{'fleece-volid'}) {
 	    eval { PVE::Storage::vdisk_free($self->{storecfg}, $volid); };
-	    $self->log('warn', "error removing fleecing image '$volid' - $@") if $@;
+	    if (my $err = $@) {
+		$self->log('warn', "error removing fleecing image '$volid' - $err");
+		push $failed->@*, $volid;
+	    }
 	}
     }
+
+    PVE::QemuConfig::record_fleecing_images($vmid, $failed);
 }
 
 my sub allocate_fleecing_images {
@@ -546,8 +556,7 @@ my sub allocate_fleecing_images {
 
     die "internal error - no fleecing storage specified\n" if !$fleecing_storeid;
 
-    # TODO what about potential left-over images from a failed attempt? Just
-    # auto-remove? While unlikely, could conflict with manually created image from user...
+    my $fleece_volids = [];
 
     eval {
 	my $n = 0; # counter for fleecing image names
@@ -564,6 +573,8 @@ my sub allocate_fleecing_images {
 		$di->{'fleece-volid'} = PVE::Storage::vdisk_alloc(
 		    $self->{storecfg}, $fleecing_storeid, $vmid, $format, $name, $size);
 
+		push $fleece_volids->@*, $di->{'fleece-volid'};
+
 		$n++;
 	    } else {
 		die "implement me (type '$di->{type}')";
@@ -571,9 +582,11 @@ my sub allocate_fleecing_images {
 	}
     };
     if (my $err = $@) {
-	cleanup_fleecing_images($self, $disks);
+	cleanup_fleecing_images($self, $vmid, $disks);
 	die $err;
     }
+
+    PVE::QemuConfig::record_fleecing_images($vmid, $fleece_volids);
 }
 
 my sub detach_fleecing_images {
@@ -633,6 +646,13 @@ my sub check_and_prepare_fleecing {
 	$use_fleecing = 0;
     }
 
+    # clean up potential left-overs from a previous attempt
+    eval {
+	PVE::QemuConfig::cleanup_fleecing_images(
+	    $vmid, $self->{storecfg}, sub { $self->log($_[0], $_[1]); });
+    };
+    $self->log('warn', "attempt to clean up left-over fleecing images failed - $@") if $@;
+
     if ($use_fleecing) {
 	my ($default_format, $valid_formats) = PVE::Storage::storage_default_format(
 	    $self->{storecfg}, $fleecing_opts->{storage});
@@ -794,7 +814,8 @@ sub archive_pbs {
 
     if ($use_fleecing) {
 	detach_fleecing_images($task->{disks}, $vmid);
-	cleanup_fleecing_images($self, $task->{disks});
+	PVE::QemuConfig::cleanup_fleecing_images(
+	    $vmid, $self->{storecfg}, sub { $self->log($_[0], $_[1]); });
     }
 
     die $err if $err;
@@ -994,7 +1015,8 @@ sub archive_vma {
 
     if ($use_fleecing) {
 	detach_fleecing_images($task->{disks}, $vmid);
-	cleanup_fleecing_images($self, $task->{disks});
+	PVE::QemuConfig::cleanup_fleecing_images(
+	    $vmid, $self->{storecfg}, sub { $self->log($_[0], $_[1]); });
     }
 
     if ($err) {
-- 
2.39.5



_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel

  parent reply	other threads:[~2024-11-11 13:55 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-11-11 13:54 [pve-devel] [PATCH-SERIES v4 docs/qemu-server] more robust handling of fleecing images Fiona Ebner
2024-11-11 13:54 ` [pve-devel] [PATCH v4 docs 1/9] configuration files: add general section Fiona Ebner
2024-11-11 13:54 ` [pve-devel] [PATCH v4 docs 2/9] cli appendix: reference section about casing style Fiona Ebner
2024-11-11 13:54 ` [pve-devel] [PATCH v4 qemu-server 3/9] backup: prepare: factor out getting running status Fiona Ebner
2024-11-11 13:54 ` [pve-devel] [PATCH v4 qemu-server 4/9] backup: prepare: cancel previous job if still running Fiona Ebner
2024-11-11 13:54 ` [pve-devel] [PATCH v4 qemu-server 5/9] parse config: allow config keys with minus sign Fiona Ebner
2024-11-11 13:54 ` [pve-devel] [PATCH v4 qemu-server 6/9] schema: add fleecing-images config property Fiona Ebner
2024-11-11 13:54 ` Fiona Ebner [this message]
2024-11-11 13:54 ` [pve-devel] [PATCH v4 qemu-server 8/9] migration: attempt to clean up potential left-over fleecing images Fiona Ebner
2024-11-11 13:54 ` [pve-devel] [PATCH v4 qemu-server 9/9] destroy vm: " Fiona Ebner
2024-11-17 18:43 ` [pve-devel] partially-applied: [PATCH-SERIES v4 docs/qemu-server] more robust handling of " Thomas Lamprecht
2024-11-18  8:52   ` Fiona Ebner
2024-11-18 11:03     ` Thomas Lamprecht
2024-11-18 20:31     ` Thomas Lamprecht
2024-11-19  9:00       ` Fiona Ebner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241111135436.82773-8-f.ebner@proxmox.com \
    --to=f.ebner@proxmox.com \
    --cc=pve-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal