From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id EDF28B99E6 for ; Fri, 15 Mar 2024 11:34:53 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id CA8071A5DF for ; Fri, 15 Mar 2024 11:34:53 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Fri, 15 Mar 2024 11:34:52 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 46E3148A3A for ; Fri, 15 Mar 2024 11:25:10 +0100 (CET) From: Fiona Ebner To: pve-devel@lists.proxmox.com Date: Fri, 15 Mar 2024 11:24:59 +0100 Message-Id: <20240315102502.84163-19-f.ebner@proxmox.com> X-Mailer: git-send-email 2.39.2 In-Reply-To: <20240315102502.84163-1-f.ebner@proxmox.com> References: <20240315102502.84163-1-f.ebner@proxmox.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL -0.069 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record T_SCC_BODY_TEXT_LINE -0.01 - URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [qemuserver.pm, qemuconfig.pm] Subject: [pve-devel] [RFC qemu-server v2 18/21] vzdump: better cleanup fleecing images after hard errors X-BeenThere: pve-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox VE development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 15 Mar 2024 10:34:54 -0000 By recording the allocated fleecing images in the VM config, they are not immediately orphaned, should a hard error occur during backup that prevents cleanup. They are attempted to be cleaned up during the next backup run. Suggested-by: Fabian Grünbichler Signed-off-by: Fiona Ebner --- New in v2. PVE/QemuConfig.pm | 40 ++++++++++++++++++++++++++++++++++++++++ PVE/VZDump/QemuServer.pm | 31 ++++++++++++++++++++++++------- 2 files changed, 64 insertions(+), 7 deletions(-) diff --git a/PVE/QemuConfig.pm b/PVE/QemuConfig.pm index ca30eda0..fcfce1fb 100644 --- a/PVE/QemuConfig.pm +++ b/PVE/QemuConfig.pm @@ -13,6 +13,7 @@ use PVE::QemuServer::Monitor qw(mon_cmd); use PVE::QemuServer; use PVE::QemuServer::Machine; use PVE::QemuServer::Memory qw(get_current_memory); +use PVE::RESTEnvironment qw(log_warn); use PVE::Storage; use PVE::Tools; use PVE::Format qw(render_bytes render_duration); @@ -572,4 +573,43 @@ sub has_cloudinit { return $found; } +# Caller is expected to deal with volumes from an already exisitng entry first. +sub record_fleecing_images { + my ($vmid, $volids) = @_; + + return if scalar($volids->@*) == 0; + + PVE::QemuConfig->lock_config($vmid, sub { + my $conf = PVE::QemuConfig->load_config($vmid); + $conf->{'fleecing-images'} = join(',', $volids->@*); + PVE::QemuConfig->write_config($vmid, $conf); + }); +} + +sub cleanup_fleecing_images { + my ($vmid, $storecfg) = @_; + + my $volids = []; + my $failed = []; + + PVE::QemuConfig->lock_config($vmid, sub { + my $conf = PVE::QemuConfig->load_config($vmid); + if ($conf->{'fleecing-images'}) { + $volids = [PVE::Tools::split_list($conf->{'fleecing-images'})]; + delete $conf->{'fleecing-images'}; + PVE::QemuConfig->write_config($vmid, $conf); + } + }); + + for my $volid ($volids->@*) { + eval { PVE::Storage::vdisk_free($storecfg, $volid); }; + if (my $err = $@) { + log_warn("error removing fleecing image '$volid' - $err"); + push $failed->@*, $volid; + } + } + + record_fleecing_images($vmid, $failed); +} + 1; diff --git a/PVE/VZDump/QemuServer.pm b/PVE/VZDump/QemuServer.pm index 8a44a1fd..ad0212bc 100644 --- a/PVE/VZDump/QemuServer.pm +++ b/PVE/VZDump/QemuServer.pm @@ -527,15 +527,25 @@ sub get_and_check_pbs_encryption_config { die "internal error - unhandled case for getting & checking PBS encryption ($keyfile, $master_keyfile)!"; } +# Helper is intended to be called from allocate_fleecing_images() only. Otherwise, fleecing volids +# have already been recorded in the configuration and PVE::QemuConfig::cleanup_fleecing_images() +# should be used instead. my sub cleanup_fleecing_images { - my ($self, $disks) = @_; + my ($self, $vmid, $disks) = @_; + + my $failed = []; for my $di ($disks->@*) { if (my $volid = $di->{'fleece-volid'}) { eval { PVE::Storage::vdisk_free($self->{storecfg}, $volid); }; - $self->log('warn', "error removing fleecing image '$volid' - $@") if $@; + if (my $err = $@) { + $self->log('warn', "error removing fleecing image '$volid' - $err"); + push $failed->@*, $volid; + } } } + + PVE::QemuConfig::record_fleecing_images($vmid, $failed); } my sub allocate_fleecing_images { @@ -543,8 +553,7 @@ my sub allocate_fleecing_images { die "internal error - no fleecing storage specified\n" if !$fleecing_storeid; - # TODO what about potential left-over images from a failed attempt? Just - # auto-remove? While unlikely, could conflict with manually created image from user... + my $fleece_volids = []; eval { my $n = 0; # counter for fleecing image names @@ -561,6 +570,8 @@ my sub allocate_fleecing_images { $di->{'fleece-volid'} = PVE::Storage::vdisk_alloc( $self->{storecfg}, $fleecing_storeid, $vmid, $format, $name, $size); + push $fleece_volids->@*, $di->{'fleece-volid'}; + $n++; } else { die "implement me (type '$di->{type}')"; @@ -568,9 +579,11 @@ my sub allocate_fleecing_images { } }; if (my $err = $@) { - cleanup_fleecing_images($self, $disks); + cleanup_fleecing_images($self, $vmid, $disks); die $err; } + + PVE::QemuConfig::record_fleecing_images($vmid, $fleece_volids); } my sub detach_fleecing_images { @@ -630,6 +643,10 @@ my sub check_and_prepare_fleecing { $use_fleecing = 0; } + # clean up potential left-overs from a previous attempt + eval { PVE::QemuConfig::cleanup_fleecing_images($vmid, $self->{storecfg}); }; + $self->log('warn', "attempt to clean up left-over fleecing images failed - $@") if $@; + if ($use_fleecing) { my ($default_format, $valid_formats) = PVE::Storage::storage_default_format( $self->{storecfg}, $fleecing_opts->{storage}); @@ -791,7 +808,7 @@ sub archive_pbs { if ($use_fleecing) { detach_fleecing_images($task->{disks}, $vmid); - cleanup_fleecing_images($self, $task->{disks}); + PVE::QemuConfig::cleanup_fleecing_images($vmid, $self->{storecfg}); } die $err if $err; @@ -991,7 +1008,7 @@ sub archive_vma { if ($use_fleecing) { detach_fleecing_images($task->{disks}, $vmid); - cleanup_fleecing_images($self, $task->{disks}); + PVE::QemuConfig::cleanup_fleecing_images($vmid, $self->{storecfg}); } if ($err) { -- 2.39.2