From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id ED4A11FF141 for ; Tue, 16 Jun 2026 12:13:37 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 0733B27E7; Tue, 16 Jun 2026 12:13:35 +0200 (CEST) From: Lukas Sichert To: pve-devel@lists.proxmox.com Subject: [PATCH storage v7 1/4] lvm: saferemove: zero out volumes range by range Date: Tue, 16 Jun 2026 12:13:17 +0200 Message-ID: <20260616101323.24981-2-l.sichert@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260616101323.24981-1-l.sichert@proxmox.com> References: <20260616101323.24981-1-l.sichert@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1781604755011 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.274 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Message-ID-Hash: ZLTBLCCZRYCHXQ6RR3UPIUZEVON7ODOI X-Message-ID-Hash: ZLTBLCCZRYCHXQ6RR3UPIUZEVON7ODOI X-MailFrom: l.sichert@proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header CC: Lukas Sichert X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox VE development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: 'saferemove' currently uses different full-volume zero-out paths: `blkdiscard --zeroout` for devices with write-zeroes support and `cstream` otherwise. This makes progress and throttling inconsistent and does not allow future discard cleanup to be interleaved with zeroing. On thin-provisioned backing storage, zeroing the whole LV before discarding it can also force unnecessary allocation. Move zeroing into an explicit range loop. Use BLKZEROOUT when supported, capped to the device limit, and fall back to manual zero writes otherwise. Keep progress and throttling in the shared loop, and keep the renamed LV if zeroing fails so cleanup can be retried. Signed-off-by: Lukas Sichert --- src/PVE/Storage/LVMPlugin.pm | 168 ++++++++++++++++++++++++----------- 1 file changed, 117 insertions(+), 51 deletions(-) diff --git a/src/PVE/Storage/LVMPlugin.pm b/src/PVE/Storage/LVMPlugin.pm index 443d292..f0a7a80 100644 --- a/src/PVE/Storage/LVMPlugin.pm +++ b/src/PVE/Storage/LVMPlugin.pm @@ -13,11 +13,16 @@ use PVE::Tools qw(run_command file_read_firstline trim); use PVE::Storage::Common; use PVE::Storage::Plugin; +use PVE::RESTEnvironment qw(log_warn); use base qw(PVE::Storage::Plugin); # lvm helper functions +use constant { + BLKZEROOUT => 0x127f, +}; + my $ignore_no_medium_warnings = sub { my $line = shift; # ignore those, most of the time they're from (virtual) IPMI/iKVM devices @@ -279,6 +284,13 @@ sub lvm_list_volumes { return $lvs; } +my sub blockdev_ioctl_range { + my ($fh, $ioctl_name, $ioctl, $offset, $length) = @_; + + my $range = pack('QQ', $offset, $length); + ioctl($fh, $ioctl, $range) or die "$ioctl_name failed - $!\n"; +} + my sub free_lvm_volumes_locked { my ($class, $scfg, $storeid, $volnames) = @_; @@ -304,49 +316,97 @@ my sub free_lvm_volumes_locked { file_read_firstline("$sysdir/queue/write_zeroes_max_bytes") // 0; ($write_zeroes_max_bytes) = $write_zeroes_max_bytes =~ m/^(\d+)$/; #untaint + my $size = file_read_firstline("$sysdir/size") // 0; + ($size) = $size =~ m/^(\d+)$/; # untaint + $size *= 512; # sysfs size is in 512-byte sectors + + my $zeroout_variant = 'blkzeroout'; + + # If the storage does not support write_zeroes fall back to writing zeroes manually using + # syswrite. Otherwise if the storage supports write_zeroes but stepsize is too big, reduce the stepsize to + # the maximum supported by the storage. if ($write_zeroes_max_bytes == 0) { - # If the storage does not support 'write zeroes', we fallback to cstream. - # wipe throughput up to 10MB/s by default; may be overwritten with saferemove_throughput - my $throughput = '-10485760'; - if ($scfg->{saferemove_throughput}) { - $throughput = $scfg->{saferemove_throughput}; - } + print "falling back to syswrite to zero-out '$lvmpath'\n"; + $stepsize = 1024 * 1024; # 1 MiB + $zeroout_variant = 'syswrite'; + } elsif ($stepsize > $write_zeroes_max_bytes) { + print "reduce stepsize to the maximum supported by the storage:" + . " $write_zeroes_max_bytes bytes\n"; + $stepsize = $write_zeroes_max_bytes; + } + my $zeroes = "\0" x $stepsize; + my $throughput = -1; + if ($scfg->{saferemove_throughput}) { + # use abs as legacy cstream accepted negative values + $throughput = abs($scfg->{saferemove_throughput}); + } - my $cmd = [ - '/usr/bin/cstream', - '-i', - '/dev/zero', - '-o', - $lvmpath, - '-T', - '10', - '-v', - '1', - '-b', - '1048576', - '-t', - "$throughput", - ]; - eval { - run_command( - $cmd, - errmsg => "zero out finished (note: 'No space left on device' is ok here)", - ); - }; - warn $@ if $@; - } else { - # If the storage supports write_zeroes but stepsize is too big, reduce the stepsize to - # the maximum supported by the storage. - if ($write_zeroes_max_bytes > 0 && $stepsize > $write_zeroes_max_bytes) { - print "reduce stepsize to the maximum supported by the storage:" - . " $write_zeroes_max_bytes bytes\n"; + open(my $fh, '+<', $lvmpath) or die "can't open '$lvmpath' - $!\n"; - $stepsize = $write_zeroes_max_bytes; - } + #eval block, so that filehandle is closed even if something fails below + eval { + my $start = time(); + my $written_total = 0; + my $lastprint = -1; + for (my $offset = 0; $offset < $size; $offset += $stepsize) { + + if ($offset + $stepsize > $size) { + $stepsize = $size - $offset; + } + + if ($zeroout_variant eq 'blkzeroout') { + eval { + blockdev_ioctl_range($fh, 'BLKZEROOUT', BLKZEROOUT, $offset, $stepsize); + }; + if ($@) { + die "blkzeroout failed: $@"; + } + } elsif ($zeroout_variant eq 'syswrite') { + # if the $offset is 0, sysseek can return 0, therefore use // to only + # throw an error, if it returns undef + sysseek($fh, $offset, 0) // die "sysseek failed: $!\n"; - my $cmd = ['blkdiscard', $lvmpath, '-v', '--zeroout', '--step', "${stepsize}"]; - eval { run_command($cmd); }; - warn $@ if $@; + my $written = syswrite($fh, $zeroes, $stepsize) + // die "syswrite failed: $!\n"; + + if ($written != $stepsize) { + die "short syswrite: wrote $written of $stepsize bytes\n"; + } + } + $written_total += $stepsize; + + my $curr_time = time(); + if (($curr_time - $lastprint) >= 3) { + my $percent_finished = 100 * $written_total / $size; + my $written_gb = $written_total / (1024**3); + my $size_gb = $size / (1024**3); + my $curr_seconds = $curr_time - $start; + printf( + "zeroed out %.2f GiB of %.2f GiB (%.2f%%) using %s in %d seconds\n", + $written_gb, + $size_gb, + $percent_finished, + $zeroout_variant, + $curr_seconds, + ); + $lastprint = $curr_time; + } + + if ($throughput > 0) { + my $expected_elapsed = $written_total / $throughput; + my $actual_elapsed = $curr_time - $start; + my $delay = $expected_elapsed - $actual_elapsed; + if ($delay > 0) { + sleep($delay); + } + } + } + }; + # close filehandle before throwing an error + my $err = $@; + close($fh); + if ($err) { + die "zeroing out failed: $err"; } }; @@ -368,18 +428,24 @@ my sub free_lvm_volumes_locked { errmsg => "can't refresh LV '$lvmpath' to zero-out its data", ); - $secure_delete_cmd->($lvmpath); - - $class->cluster_lock_storage( - $storeid, - $scfg->{shared}, - undef, - sub { - my $cmd = ['/sbin/lvremove', '-f', "$vg/del-$name"]; - run_command($cmd, errmsg => "lvremove '$vg/del-$name' error"); - }, - ); - print "successfully removed volume $name ($vg/del-$name)\n"; + my $err = undef; + eval { $secure_delete_cmd->($lvmpath); }; + $err = $@ if $@; + + if (!$err) { + $class->cluster_lock_storage( + $storeid, + $scfg->{shared}, + undef, + sub { + my $cmd = ['/sbin/lvremove', '-f', "$vg/del-$name"]; + run_command($cmd, errmsg => "lvremove '$vg/del-$name' error"); + }, + ); + print "successfully removed volume $name ($vg/del-$name)\n"; + } else { + log_warn("$vg/del-$name is not being removed: $err"); + } } }; -- 2.47.3