From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id D425D1FF141 for ; Tue, 16 Jun 2026 12:14:18 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id CF3CB2E33; Tue, 16 Jun 2026 12:14:06 +0200 (CEST) From: Lukas Sichert To: pve-devel@lists.proxmox.com Subject: [PATCH storage v7 2/4] fix #7339: lvm: add discard action for removed volumes Date: Tue, 16 Jun 2026 12:13:18 +0200 Message-ID: <20260616101323.24981-3-l.sichert@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260616101323.24981-1-l.sichert@proxmox.com> References: <20260616101323.24981-1-l.sichert@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1781604755431 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.265 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Message-ID-Hash: XQGVUP4MMXK6HYQM4RYI5UN2Q3MREG4E X-Message-ID-Hash: XQGVUP4MMXK6HYQM4RYI5UN2Q3MREG4E X-MailFrom: l.sichert@proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header CC: Lukas Sichert X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox VE development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: On LVM storages backed by thin-provisioned SAN LUNs, removing an LV does not release the allocated space on the backing storage. Using LVM's `issue_discards` can avoid that, but makes `lvremove` issue the discards while holding the cluster-wide storage lock, which can hit the lock timeout for large volumes. Add an `on-volume-remove` property with an initial `discard` action. The cleanup worker issues the discard for the renamed LV before the final remove, so the long-running operation happens outside the storage lock. When combined with `saferemove`, the worker zeroes and discards the LV range by range, avoiding allocation of the whole LV with zeroes on thin-provisioned backing storage. Signed-off-by: Lukas Sichert Link: bugzilla.proxmox.com/show_bug.cgi?id=7339 --- src/PVE/Storage/LVMPlugin.pm | 95 ++++++++++++++++++++++++++++++++---- 1 file changed, 85 insertions(+), 10 deletions(-) diff --git a/src/PVE/Storage/LVMPlugin.pm b/src/PVE/Storage/LVMPlugin.pm index f0a7a80..ee07543 100644 --- a/src/PVE/Storage/LVMPlugin.pm +++ b/src/PVE/Storage/LVMPlugin.pm @@ -13,6 +13,7 @@ use PVE::Tools qw(run_command file_read_firstline trim); use PVE::Storage::Common; use PVE::Storage::Plugin; +use PVE::SafeSyslog; use PVE::RESTEnvironment qw(log_warn); use base qw(PVE::Storage::Plugin); @@ -20,6 +21,7 @@ use base qw(PVE::Storage::Plugin); # lvm helper functions use constant { + BLKDISCARD => 0x1277, BLKZEROOUT => 0x127f, }; @@ -296,6 +298,12 @@ my sub free_lvm_volumes_locked { my $vg = $scfg->{vgname}; + my $on_remove_opts; + if ($scfg->{'on-volume-remove'}) { + $on_remove_opts = + PVE::JSONSchema::parse_property_string('on-volume-remove', $scfg->{'on-volume-remove'}); + } + my $secure_delete_cmd = sub { my ($lvmpath) = @_; @@ -373,6 +381,14 @@ my sub free_lvm_volumes_locked { die "short syswrite: wrote $written of $stepsize bytes\n"; } } + if ($on_remove_opts->{discard}) { + eval { + blockdev_ioctl_range($fh, 'BLKDISCARD', BLKDISCARD, $offset, $stepsize); + }; + if ($@) { + log_warn("blkdiscard failed: $@"); + } + } $written_total += $stepsize; my $curr_time = time(); @@ -409,28 +425,49 @@ my sub free_lvm_volumes_locked { die "zeroing out failed: $err"; } }; - # we need to zero out LVM data for security reasons - # and to allow thin provisioning - my $zero_out_worker = sub { + # and discard images to free storage space to allow + # thin provisioning + my $cleanup_worker = sub { + for my $name (@$volnames) { my $lvmpath = "/dev/$vg/del-$name"; - print "zero-out data on image $name ($lvmpath)\n"; + + my $discard_action; + if ($scfg->{saferemove} && $on_remove_opts->{discard}) { + $discard_action = 'zero-out and discard (TRIM)'; + } elsif ($scfg->{saferemove}) { + $discard_action = 'zero-out'; + } elsif ($on_remove_opts->{discard}) { + $discard_action = 'discard (TRIM)'; + } + print "$discard_action data on image $name ($lvmpath)\n"; my $cmd_activate = ['/sbin/lvchange', '-aly', $lvmpath]; run_command( $cmd_activate, - errmsg => "can't activate LV '$lvmpath' to zero-out its data", + errmsg => "can't activate LV '$lvmpath' to $discard_action its data", ); $cmd_activate = ['/sbin/lvchange', '--refresh', $lvmpath]; run_command( $cmd_activate, - errmsg => "can't refresh LV '$lvmpath' to zero-out its data", + errmsg => "can't refresh LV '$lvmpath' to $discard_action its data", ); + syslog('info', "starting to $discard_action $name ($lvmpath)"); my $err = undef; - eval { $secure_delete_cmd->($lvmpath); }; - $err = $@ if $@; + if ($scfg->{saferemove}) { + eval { $secure_delete_cmd->($lvmpath); }; + $err = $@ if $@; + } + + # also don't remove the volume if blkdiscard fails, because otherwise the + # storage on a thinly-backed storage can't be reclaimed without removing the + # complete pool + if ($on_remove_opts->{discard} && !$scfg->{saferemove}) { + eval { run_command(['/sbin/blkdiscard', $lvmpath]); }; + $err = $@ if $@; + } if (!$err) { $class->cluster_lock_storage( @@ -449,13 +486,13 @@ my sub free_lvm_volumes_locked { } }; - if ($scfg->{saferemove}) { + if ($scfg->{saferemove} || $on_remove_opts->{discard}) { for my $name (@$volnames) { # avoid long running task, so we only rename here my $cmd = ['/sbin/lvrename', $vg, $name, "del-$name"]; run_command($cmd, errmsg => "lvrename '$vg/$name' error"); } - return $zero_out_worker; + return $cleanup_worker; } else { for my $name (@$volnames) { my $cmd = ['/sbin/lvremove', '-f', "$vg/$name"]; @@ -478,6 +515,36 @@ sub plugindata { }; } +my $on_volume_remove_format = { + discard => { + description => "Issue discard (TRIM) requests for LVs before removing them.", + type => 'boolean', + optional => 1, + verbose_description => "If enabled, blkdiscard is issued for the LV before removing it." + . " This sends discard (TRIM) requests for the LV's block range, allowing" + . " thin-provisioned storage to reclaim previously allocated physical" + . " space, provided the storage supports discard.", + }, +}; + +sub verify_on_volume_remove { + my ($value, $noerr) = @_; + + return undef if !defined($value); + + if (!keys %$value) { + return undef if $noerr; + die "at least one on-volume-remove option must be specified if the property is set\n"; + } + return $value; +} + +PVE::JSONSchema::register_format( + 'on-volume-remove', + $on_volume_remove_format, + \&verify_on_volume_remove, +); + sub properties { return { vgname => { @@ -494,6 +561,13 @@ sub properties { description => "Zero-out data when removing LVs.", type => 'boolean', }, + 'on-volume-remove' => { + description => "Optional actions when removing LVs.", + type => 'string', + format => 'on-volume-remove', + verbose_description => "Configure actions performed before removing an LV." + . " Use 'discard=1' to issue discard (TRIM) requests before removal.", + }, 'saferemove-stepsize' => { description => "Wipe step size in MiB." . " It will be capped to the maximum supported by the storage.", @@ -519,6 +593,7 @@ sub options { shared => { optional => 1 }, disable => { optional => 1 }, saferemove => { optional => 1 }, + 'on-volume-remove' => { optional => 1 }, 'saferemove-stepsize' => { optional => 1 }, saferemove_throughput => { optional => 1 }, content => { optional => 1 }, -- 2.47.3