From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id 1E9441FF142 for ; Fri, 05 Jun 2026 17:35:36 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 6EC001E230; Fri, 5 Jun 2026 17:35:21 +0200 (CEST) From: Erik Fastermann To: pve-devel@lists.proxmox.com Subject: [PATCH qemu-server 1/3] partially fix #1989: disk: add qcow2 cache options Date: Fri, 5 Jun 2026 17:35:10 +0200 Message-ID: <20260605153512.265703-2-e.fastermann@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260605153512.265703-1-e.fastermann@proxmox.com> References: <20260605153512.265703-1-e.fastermann@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL -0.019 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment KAM_LAZY_DOMAIN_SECURITY 1 Sending domain does not have any anti-forgery methods RDNS_NONE 0.793 Delivered to internal network by a host with no rDNS SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_NONE 0.001 SPF: sender does not publish an SPF Record Message-ID-Hash: NELJQQQHGWLRKLKU7TEL2LJHQJO435QH X-Message-ID-Hash: NELJQQQHGWLRKLKU7TEL2LJHQJO435QH X-MailFrom: efastermann@ruth.proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header CC: Erik Fastermann X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox VE development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: Add multiple options to configure the qcow2 L2/refcount cache. This can provide significant performance gains in some cases. For a detailed explanation of the options see the QEMU docs [0]. Additionally the cache size can be configured based on the size of the disk image automatically. Both blockdev and the older drive commandline are supported, which makes the feature accessible to all QEMU machine versions supported by PVE. Options which only apply to disk creation (cluster_size, refcount_bits) are not considered in this patch. [0] https://gitlab.com/qemu-project/qemu/-/blob/master/docs/qcow2-cache.txt Signed-off-by: Erik Fastermann --- src/PVE/QemuServer.pm | 71 ++++++++++++++++++- src/PVE/QemuServer/Blockdev.pm | 44 ++++++++++++ src/PVE/QemuServer/Drive.pm | 122 ++++++++++++++++++++++++++++++++ src/PVE/QemuServer/QemuImage.pm | 24 +++++++ 4 files changed, 260 insertions(+), 1 deletion(-) diff --git a/src/PVE/QemuServer.pm b/src/PVE/QemuServer.pm index 55e9f520..211927db 100644 --- a/src/PVE/QemuServer.pm +++ b/src/PVE/QemuServer.pm @@ -78,6 +78,7 @@ use PVE::QemuServer::Drive qw( parse_drive print_drive storage_allows_io_uring_default + qcow2_cache_size_by_disk_size ); use PVE::QemuServer::DriveDevice qw(print_drivedevice_full scsihw_infos); use PVE::QemuServer::Machine; @@ -1294,6 +1295,49 @@ sub print_drive_commandline_full { $opts .= ",auto-remove=on"; } + for my $qcow2_cache_option ( + 'qcow2-cache-size', + 'qcow2-l2-cache-size', + 'qcow2-l2-cache-entry-size', + 'qcow2-refcount-cache-size', + 'qcow2-cache-clean-interval', + ) { + if (defined($drive->{$qcow2_cache_option}) && $format ne 'qcow2') { + log_warn("$drive_id: $qcow2_cache_option requires disk format qcow2"); + } + } + + if ($format eq 'qcow2') { + if ( + defined($drive->{'qcow2-cache-size-based-on-disk'}) + && $drive->{'qcow2-cache-size-based-on-disk'} + ) { + my $cache_size = qcow2_cache_size_by_disk_size($storecfg, $drive); + $opts .= ",cache-size=" . int($cache_size); + } + + if (defined($drive->{'qcow2-cache-size'})) { + $opts .= ",cache-size=" . int($drive->{'qcow2-cache-size'}) * 1024 * 1024; + } + + if (defined($drive->{'qcow2-l2-cache-size'})) { + $opts .= ",l2-cache-size=" . int($drive->{'qcow2-l2-cache-size'}) * 1024 * 1024; + } + + if (defined($drive->{'qcow2-l2-cache-entry-size'})) { + $opts .= ",l2-cache-entry-size=" . int($drive->{'qcow2-l2-cache-entry-size'}); + } + + if (defined($drive->{'qcow2-refcount-cache-size'})) { + $opts .= + ",refcount-cache-size=" . int($drive->{'qcow2-refcount-cache-size'}) * 1024 * 1024; + } + + if (defined($drive->{'qcow2-cache-clean-interval'})) { + $opts .= ",cache-clean-interval=" . int($drive->{'qcow2-cache-clean-interval'}); + } + } + # my $file_param = $live_restore_name ? "file.file.filename" : "file"; my $file_param = "file"; if ($live_restore_name) { @@ -5217,6 +5266,26 @@ sub vmconfig_update_disk { || safe_string_ne($drive->{ssd}, $old_drive->{ssd}) || safe_string_ne($drive->{vendor}, $old_drive->{vendor}) || safe_string_ne($drive->{ro}, $old_drive->{ro}) + || safe_string_ne( + $drive->{'qcow2-cache-size'}, + $old_drive->{'qcow2-cache-size'}, + ) + || safe_num_ne( + $drive->{'qcow2-l2-cache-size'}, + $old_drive->{'qcow2-l2-cache-size'}, + ) + || safe_num_ne( + $drive->{'qcow2-l2-cache-entry-size'}, + $old_drive->{'qcow2-l2-cache-entry-size'}, + ) + || safe_num_ne( + $drive->{'qcow2-refcount-cache-size'}, + $old_drive->{'qcow2-refcount-cache-size'}, + ) + || safe_num_ne( + $drive->{'qcow2-cache-clean-interval'}, + $old_drive->{'qcow2-cache-clean-interval'}, + ) ) { die "skip\n"; } diff --git a/src/PVE/QemuServer/Blockdev.pm b/src/PVE/QemuServer/Blockdev.pm index 101c747c..0dd5067d 100644 --- a/src/PVE/QemuServer/Blockdev.pm +++ b/src/PVE/QemuServer/Blockdev.pm @@ -9,6 +9,7 @@ use File::stat; use JSON; use PVE::JSONSchema qw(json_bool); +use PVE::RESTEnvironment qw(log_warn); use PVE::Storage; use PVE::QemuServer::Drive qw(drive_is_cdrom); @@ -403,6 +404,49 @@ sub generate_format_blockdev { $blockdev->{'discard-no-unref'} = JSON::true if $format eq 'qcow2'; } + for my $qcow2_cache_option ( + 'qcow2-cache-size', + 'qcow2-l2-cache-size', + 'qcow2-l2-cache-entry-size', + 'qcow2-refcount-cache-size', + 'qcow2-cache-clean-interval', + ) { + if (defined($drive->{$qcow2_cache_option}) && $format ne 'qcow2') { + log_warn("$drive_id: $qcow2_cache_option requires disk format qcow2"); + } + } + + if ($format eq 'qcow2') { + if ( + defined($drive->{'qcow2-cache-size-based-on-disk'}) + && $drive->{'qcow2-cache-size-based-on-disk'} + ) { + my $cache_size = qcow2_cache_size_by_disk_size($storecfg, $drive); + $blockdev->{'cache-size'} = int($cache_size); + } + + if (defined($drive->{'qcow2-cache-size'})) { + $blockdev->{'cache-size'} = int($drive->{'qcow2-cache-size'}) * 1024 * 1024; + } + + if (defined($drive->{'qcow2-l2-cache-size'})) { + $blockdev->{'l2-cache-size'} = int($drive->{'qcow2-l2-cache-size'}) * 1024 * 1024; + } + + if (defined($drive->{'qcow2-l2-cache-entry-size'})) { + $blockdev->{'l2-cache-entry-size'} = int($drive->{'qcow2-l2-cache-entry-size'}); + } + + if (defined($drive->{'qcow2-refcount-cache-size'})) { + $blockdev->{'refcount-cache-size'} = + int($drive->{'qcow2-refcount-cache-size'}) * 1024 * 1024; + } + + if (defined($drive->{'qcow2-cache-clean-interval'})) { + $blockdev->{'cache-clean-interval'} = int($drive->{'qcow2-cache-clean-interval'}); + } + } + return $blockdev; } diff --git a/src/PVE/QemuServer/Drive.pm b/src/PVE/QemuServer/Drive.pm index b80b7dbb..e123947e 100644 --- a/src/PVE/QemuServer/Drive.pm +++ b/src/PVE/QemuServer/Drive.pm @@ -7,6 +7,7 @@ use Storable qw(dclone); use IO::File; use List::Util qw(first); +use POSIX; use PVE::RESTEnvironment qw(log_warn); use PVE::Storage; @@ -26,6 +27,7 @@ our @EXPORT_OK = qw( parse_drive print_drive storage_allows_io_uring_default + qcow2_cache_size_by_disk_size ); my $DROPPED_PROPERTIES = ['cyls', 'heads', 'secs', 'trans']; @@ -256,6 +258,46 @@ my %drivedesc_base = ( optional => 1, default => 0, }, + 'qcow2-cache-size' => { + type => 'integer', + minimum => 1, + maximum => 10 * 1024, + description => 'Cache size for qcow2 disks in MiB', + optional => 1, + }, + 'qcow2-cache-size-based-on-disk' => { + type => 'boolean', + description => + 'Automatically pick a qcow2 cache size based on the configured disk size', + optional => 1, + }, + 'qcow2-l2-cache-size' => { + type => 'integer', + minimum => 1, + maximum => 10 * 1024, + description => 'L2 cache size for qcow2 disks in MiB', + optional => 1, + }, + 'qcow2-l2-cache-entry-size' => { + type => 'integer', + minimum => 512, + maximum => 2 * 1024 * 1024, + description => 'L2 cache entry size for qcow2 disks in bytes', + optional => 1, + }, + 'qcow2-refcount-cache-size' => { + type => 'integer', + minimum => 1, + maximum => 10 * 1024, + description => 'Refcount cache size for qcow2 disks in MiB', + optional => 1, + }, + 'qcow2-cache-clean-interval' => { + type => 'integer', + minimum => 0, + description => 'Cache clean interval for qcow2 disks in seconds', + optional => 1, + }, ); my %iothread_fmt = ( @@ -838,6 +880,52 @@ sub parse_drive { } } + my $cache_option_count = + defined($res->{'qcow2-cache-size'}) + + defined($res->{'qcow2-l2-cache-size'}) + + defined($res->{'qcow2-refcount-cache-size'}); + + if ($cache_option_count > 2) { + warn "at most two of qcow2-cache-size, qcow2-l2-cache-size, qcow2-refcount-cache-size" + . " can be set simultaneously\n"; + ++$error; + } + + if ( + defined($res->{'qcow2-l2-cache-entry-size'}) + && !is_power_of_two($res->{'qcow2-l2-cache-entry-size'}) + ) { + warn "qcow2-l2-cache-entry-size must be a power of two\n"; + ++$error; + } + + if ( + defined($res->{'qcow2-cache-size-based-on-disk'}) + && $res->{'qcow2-cache-size-based-on-disk'} + ) { + if ( + defined($res->{'qcow2-cache-size'}) + || defined($res->{'qcow2-l2-cache-size'}) + || defined($res->{'qcow2-refcount-cache-size'}) + ) { + warn "qcow2-cache-size-based-on-disk is not compatible with cache-size," + . " l2-cache-size or refcount-cache-size being set\n"; + ++$error; + } + } + + if (defined($res->{'qcow2-cache-size'})) { + if (($res->{'qcow2-l2-cache-size'} // 0) >= $res->{'qcow2-cache-size'}) { + warn "qcow2-l2-cache-size is larger than or equal to qcow2-cache-size\n"; + ++$error; + } + + if (($res->{'qcow2-refcount-cache-size'} // 0) >= $res->{'qcow2-cache-size'}) { + warn "qcow2-refcount-cache-size is larger than or equal to qcow2-cache-size\n"; + ++$error; + } + } + return if $error; return if $res->{mbps_rd} && $res->{mbps}; @@ -857,6 +945,11 @@ sub parse_drive { return $res; } +sub is_power_of_two { + my ($n) = @_; + return $n > 0 && (($n & ($n - 1)) == 0); +} + sub print_drive { my ($drive, $with_alloc) = @_; my $skip = ['index', 'interface']; @@ -1179,4 +1272,33 @@ sub drive_qmp_peer { return drive_uses_qsd_fuse($storecfg, $drive) ? qsd_qmp_peer($vmid) : vm_qmp_peer($vmid); } +sub qcow2_cache_size_by_disk_size { + # The calculation is adapted from here: + # https://gitlab.com/qemu-project/qemu/-/blob/master/docs/qcow2-cache.txt#L97-98 + # As this combines the refcount and L2 cache sizes, QEMU is free to + # choose a different value for each of them, which means the L2 cache + # is not necessarily 4 times bigger than the refcount cache with the + # default cluster_size, refcount_bits and no extended L2 entries. + + my ($storecfg, $drive) = @_; + + my $img_info = PVE::QemuServer::QemuImage::info($storecfg, $drive->{file}); + my $cluster_size = $img_info->{'cluster-size'} // 65536; + my $refcount_bits = $img_info->{'format-specific'}->{data}->{'refcount-bits'} // 16; + my $has_extended_l2 = $img_info->{'format-specific'}->{data}->{'extended-l2'} // 0; + + my $l2_multiplier = $has_extended_l2 ? 16 : 8; + my $l2_cache_size = ($drive->{size} * $l2_multiplier) / $cluster_size; + + $l2_cache_size = 2 * $cluster_size if $l2_cache_size < (2 * $cluster_size); + $l2_cache_size = ceil($l2_cache_size / $cluster_size) * $cluster_size; + + my $refcount_cache_size = ($drive->{size} * $refcount_bits) / (8 * $cluster_size); + + $refcount_cache_size = 4 * $cluster_size if $refcount_cache_size < (4 * $cluster_size); + $refcount_cache_size = ceil($refcount_cache_size / $cluster_size) * $cluster_size; + + return $l2_cache_size + $refcount_cache_size; +} + 1; diff --git a/src/PVE/QemuServer/QemuImage.pm b/src/PVE/QemuServer/QemuImage.pm index 71be3abb..9c9db4e8 100644 --- a/src/PVE/QemuServer/QemuImage.pm +++ b/src/PVE/QemuServer/QemuImage.pm @@ -15,6 +15,30 @@ use PVE::QemuServer::Blockdev; use PVE::QemuServer::Drive qw(checked_volume_format); use PVE::QemuServer::Helpers; +sub info { + my ($storecfg, $volid) = @_; + + my $disk_path = PVE::Storage::abs_filesystem_path($storecfg, $volid, 1); + my $cmd = ['/usr/bin/qemu-img', 'info', '--output=json', $disk_path]; + + my $res = eval { + my $output = ''; + PVE::Tools::run_command( + $cmd, + outfunc => sub { + my $line = shift; + $output .= $line; + }, + ); + decode_json($output); + }; + + my $err = $@; + die "qemu-img info failed: $err" if $err; + + return $res; +} + sub convert_iscsi_path { my ($path) = @_; -- 2.47.3