From: Lukas Sichert <l.sichert@proxmox.com>
To: pve-devel@lists.proxmox.com
Cc: Lukas Sichert <l.sichert@proxmox.com>
Subject: [PATCH storage v7 1/4] lvm: saferemove: zero out volumes range by range
Date: Tue, 16 Jun 2026 12:13:17 +0200 [thread overview]
Message-ID: <20260616101323.24981-2-l.sichert@proxmox.com> (raw)
In-Reply-To: <20260616101323.24981-1-l.sichert@proxmox.com>
'saferemove' currently uses different full-volume zero-out paths:
`blkdiscard --zeroout` for devices with write-zeroes support and
`cstream` otherwise. This makes progress and throttling inconsistent and
does not allow future discard cleanup to be interleaved with zeroing. On
thin-provisioned backing storage, zeroing the whole LV before discarding
it can also force unnecessary allocation.
Move zeroing into an explicit range loop. Use BLKZEROOUT when supported,
capped to the device limit, and fall back to manual zero writes
otherwise. Keep progress and throttling in the shared loop, and keep the
renamed LV if zeroing fails so cleanup can be retried.
Signed-off-by: Lukas Sichert <l.sichert@proxmox.com>
---
src/PVE/Storage/LVMPlugin.pm | 168 ++++++++++++++++++++++++-----------
1 file changed, 117 insertions(+), 51 deletions(-)
diff --git a/src/PVE/Storage/LVMPlugin.pm b/src/PVE/Storage/LVMPlugin.pm
index 443d292..f0a7a80 100644
--- a/src/PVE/Storage/LVMPlugin.pm
+++ b/src/PVE/Storage/LVMPlugin.pm
@@ -13,11 +13,16 @@ use PVE::Tools qw(run_command file_read_firstline trim);
use PVE::Storage::Common;
use PVE::Storage::Plugin;
+use PVE::RESTEnvironment qw(log_warn);
use base qw(PVE::Storage::Plugin);
# lvm helper functions
+use constant {
+ BLKZEROOUT => 0x127f,
+};
+
my $ignore_no_medium_warnings = sub {
my $line = shift;
# ignore those, most of the time they're from (virtual) IPMI/iKVM devices
@@ -279,6 +284,13 @@ sub lvm_list_volumes {
return $lvs;
}
+my sub blockdev_ioctl_range {
+ my ($fh, $ioctl_name, $ioctl, $offset, $length) = @_;
+
+ my $range = pack('QQ', $offset, $length);
+ ioctl($fh, $ioctl, $range) or die "$ioctl_name failed - $!\n";
+}
+
my sub free_lvm_volumes_locked {
my ($class, $scfg, $storeid, $volnames) = @_;
@@ -304,49 +316,97 @@ my sub free_lvm_volumes_locked {
file_read_firstline("$sysdir/queue/write_zeroes_max_bytes") // 0;
($write_zeroes_max_bytes) = $write_zeroes_max_bytes =~ m/^(\d+)$/; #untaint
+ my $size = file_read_firstline("$sysdir/size") // 0;
+ ($size) = $size =~ m/^(\d+)$/; # untaint
+ $size *= 512; # sysfs size is in 512-byte sectors
+
+ my $zeroout_variant = 'blkzeroout';
+
+ # If the storage does not support write_zeroes fall back to writing zeroes manually using
+ # syswrite. Otherwise if the storage supports write_zeroes but stepsize is too big, reduce the stepsize to
+ # the maximum supported by the storage.
if ($write_zeroes_max_bytes == 0) {
- # If the storage does not support 'write zeroes', we fallback to cstream.
- # wipe throughput up to 10MB/s by default; may be overwritten with saferemove_throughput
- my $throughput = '-10485760';
- if ($scfg->{saferemove_throughput}) {
- $throughput = $scfg->{saferemove_throughput};
- }
+ print "falling back to syswrite to zero-out '$lvmpath'\n";
+ $stepsize = 1024 * 1024; # 1 MiB
+ $zeroout_variant = 'syswrite';
+ } elsif ($stepsize > $write_zeroes_max_bytes) {
+ print "reduce stepsize to the maximum supported by the storage:"
+ . " $write_zeroes_max_bytes bytes\n";
+ $stepsize = $write_zeroes_max_bytes;
+ }
+ my $zeroes = "\0" x $stepsize;
+ my $throughput = -1;
+ if ($scfg->{saferemove_throughput}) {
+ # use abs as legacy cstream accepted negative values
+ $throughput = abs($scfg->{saferemove_throughput});
+ }
- my $cmd = [
- '/usr/bin/cstream',
- '-i',
- '/dev/zero',
- '-o',
- $lvmpath,
- '-T',
- '10',
- '-v',
- '1',
- '-b',
- '1048576',
- '-t',
- "$throughput",
- ];
- eval {
- run_command(
- $cmd,
- errmsg => "zero out finished (note: 'No space left on device' is ok here)",
- );
- };
- warn $@ if $@;
- } else {
- # If the storage supports write_zeroes but stepsize is too big, reduce the stepsize to
- # the maximum supported by the storage.
- if ($write_zeroes_max_bytes > 0 && $stepsize > $write_zeroes_max_bytes) {
- print "reduce stepsize to the maximum supported by the storage:"
- . " $write_zeroes_max_bytes bytes\n";
+ open(my $fh, '+<', $lvmpath) or die "can't open '$lvmpath' - $!\n";
- $stepsize = $write_zeroes_max_bytes;
- }
+ #eval block, so that filehandle is closed even if something fails below
+ eval {
+ my $start = time();
+ my $written_total = 0;
+ my $lastprint = -1;
+ for (my $offset = 0; $offset < $size; $offset += $stepsize) {
+
+ if ($offset + $stepsize > $size) {
+ $stepsize = $size - $offset;
+ }
+
+ if ($zeroout_variant eq 'blkzeroout') {
+ eval {
+ blockdev_ioctl_range($fh, 'BLKZEROOUT', BLKZEROOUT, $offset, $stepsize);
+ };
+ if ($@) {
+ die "blkzeroout failed: $@";
+ }
+ } elsif ($zeroout_variant eq 'syswrite') {
+ # if the $offset is 0, sysseek can return 0, therefore use // to only
+ # throw an error, if it returns undef
+ sysseek($fh, $offset, 0) // die "sysseek failed: $!\n";
- my $cmd = ['blkdiscard', $lvmpath, '-v', '--zeroout', '--step', "${stepsize}"];
- eval { run_command($cmd); };
- warn $@ if $@;
+ my $written = syswrite($fh, $zeroes, $stepsize)
+ // die "syswrite failed: $!\n";
+
+ if ($written != $stepsize) {
+ die "short syswrite: wrote $written of $stepsize bytes\n";
+ }
+ }
+ $written_total += $stepsize;
+
+ my $curr_time = time();
+ if (($curr_time - $lastprint) >= 3) {
+ my $percent_finished = 100 * $written_total / $size;
+ my $written_gb = $written_total / (1024**3);
+ my $size_gb = $size / (1024**3);
+ my $curr_seconds = $curr_time - $start;
+ printf(
+ "zeroed out %.2f GiB of %.2f GiB (%.2f%%) using %s in %d seconds\n",
+ $written_gb,
+ $size_gb,
+ $percent_finished,
+ $zeroout_variant,
+ $curr_seconds,
+ );
+ $lastprint = $curr_time;
+ }
+
+ if ($throughput > 0) {
+ my $expected_elapsed = $written_total / $throughput;
+ my $actual_elapsed = $curr_time - $start;
+ my $delay = $expected_elapsed - $actual_elapsed;
+ if ($delay > 0) {
+ sleep($delay);
+ }
+ }
+ }
+ };
+ # close filehandle before throwing an error
+ my $err = $@;
+ close($fh);
+ if ($err) {
+ die "zeroing out failed: $err";
}
};
@@ -368,18 +428,24 @@ my sub free_lvm_volumes_locked {
errmsg => "can't refresh LV '$lvmpath' to zero-out its data",
);
- $secure_delete_cmd->($lvmpath);
-
- $class->cluster_lock_storage(
- $storeid,
- $scfg->{shared},
- undef,
- sub {
- my $cmd = ['/sbin/lvremove', '-f', "$vg/del-$name"];
- run_command($cmd, errmsg => "lvremove '$vg/del-$name' error");
- },
- );
- print "successfully removed volume $name ($vg/del-$name)\n";
+ my $err = undef;
+ eval { $secure_delete_cmd->($lvmpath); };
+ $err = $@ if $@;
+
+ if (!$err) {
+ $class->cluster_lock_storage(
+ $storeid,
+ $scfg->{shared},
+ undef,
+ sub {
+ my $cmd = ['/sbin/lvremove', '-f', "$vg/del-$name"];
+ run_command($cmd, errmsg => "lvremove '$vg/del-$name' error");
+ },
+ );
+ print "successfully removed volume $name ($vg/del-$name)\n";
+ } else {
+ log_warn("$vg/del-$name is not being removed: $err");
+ }
}
};
--
2.47.3
next prev parent reply other threads:[~2026-06-16 10:13 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-16 10:13 [PATCH docs/manager/storage v7 0/4] fix #7339: lvmthick: add option to free storage for deleted VMs Lukas Sichert
2026-06-16 10:13 ` Lukas Sichert [this message]
2026-06-16 10:13 ` [PATCH storage v7 2/4] fix #7339: lvm: add discard action for removed volumes Lukas Sichert
2026-06-16 10:13 ` [PATCH manager v7 3/4] fix #7339: lvmthick: ui: add UI option to free storage Lukas Sichert
2026-06-16 10:13 ` [PATCH docs v7 4/4] fix #7339: lvm: document discard option Lukas Sichert
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260616101323.24981-2-l.sichert@proxmox.com \
--to=l.sichert@proxmox.com \
--cc=pve-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.