From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id 4FB271FF14C for ; Fri, 29 May 2026 14:58:39 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id BD38FABF7; Fri, 29 May 2026 14:58:30 +0200 (CEST) From: Erik Fastermann To: pve-devel@lists.proxmox.com Subject: [PATCH qemu-server 2/3] fix #1989: disk: add qcow2 cache options Date: Fri, 29 May 2026 14:58:07 +0200 Message-ID: <20260529125808.204983-3-e.fastermann@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260529125808.204983-1-e.fastermann@proxmox.com> References: <20260529125808.204983-1-e.fastermann@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL -0.022 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment KAM_LAZY_DOMAIN_SECURITY 1 Sending domain does not have any anti-forgery methods RDNS_NONE 0.793 Delivered to internal network by a host with no rDNS SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_NONE 0.001 SPF: sender does not publish an SPF Record Message-ID-Hash: LVCLY4WWZXN6RKJ5QSI3FY37JLQ7PYAL X-Message-ID-Hash: LVCLY4WWZXN6RKJ5QSI3FY37JLQ7PYAL X-MailFrom: efastermann@ruth.proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header CC: Erik Fastermann X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox VE development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: Add multiple options to configure the qcow2 L2/refcount cache. This enables huge performance gains in some cases. For a detailed explanation of the options see the QEMU docs [0]. Additionally the cache size can be configured based on the size of the disk image automatically. Both blockdev and the older drive commandline are supported, which makes the feature accessible to all QEMU machine versions supported by PVE. Options which only apply to disk creation (cluster_size, refcount_bits) are not considered in this patch. [0] https://gitlab.com/qemu-project/qemu/-/blob/master/docs/qcow2-cache.txt Signed-off-by: Erik Fastermann --- src/PVE/QemuServer.pm | 33 ++++++++++++ src/PVE/QemuServer/Blockdev.pm | 33 ++++++++++++ src/PVE/QemuServer/Drive.pm | 99 ++++++++++++++++++++++++++++++++++ 3 files changed, 165 insertions(+) diff --git a/src/PVE/QemuServer.pm b/src/PVE/QemuServer.pm index 118f26bc..e53ae760 100644 --- a/src/PVE/QemuServer.pm +++ b/src/PVE/QemuServer.pm @@ -1294,6 +1294,39 @@ sub print_drive_commandline_full { $opts .= ",auto-remove=on"; } + if (defined($drive->{cache_size})) { + if ($drive->{cache_size} eq 'based-on-disk') { + # Automatically set the cache-size based on the disk size. The formula + # is adapted from here: + # https://gitlab.com/qemu-project/qemu/-/blob/master/docs/qcow2-cache.txt#L97-98 + # Assumes the default values for cluster_size and refcount_bits and + # that extended L2 entries are not used. As this combines the refcount + # and L2 cache sizes, QEMU is free to choose a different value for each + # of them, which means the L2 cache is not necessarily 4 times bigger + # than the refcount cache. + my $cache_size = $drive->{size} / 1024 / 1024 / 1024 * (131072 + 32768); + $opts .= ",cache-size=" . int($cache_size); + } else { + $opts .= ",cache-size=" . int($drive->{cache_size}); + } + } + + if (defined($drive->{l2_cache_size})) { + $opts .= ",cache-size=" . int($drive->{l2_cache_size}); + } + + if (defined($drive->{l2_cache_entry_size})) { + $opts .= ",l2-cache-entry-size=" . int($drive->{l2_cache_entry_size}); + } + + if (defined($drive->{refcount_cache_size})) { + $opts .= ",refcount-cache-size=" . int($drive->{refcount_cache_size}); + } + + if (defined($drive->{cache_clean_interval})) { + $opts .= ",cache-clean-interval=" . int($drive->{cache_clean_interval}); + } + # my $file_param = $live_restore_name ? "file.file.filename" : "file"; my $file_param = "file"; if ($live_restore_name) { diff --git a/src/PVE/QemuServer/Blockdev.pm b/src/PVE/QemuServer/Blockdev.pm index 101c747c..7cb7dccb 100644 --- a/src/PVE/QemuServer/Blockdev.pm +++ b/src/PVE/QemuServer/Blockdev.pm @@ -403,6 +403,39 @@ sub generate_format_blockdev { $blockdev->{'discard-no-unref'} = JSON::true if $format eq 'qcow2'; } + if (defined($drive->{cache_size})) { + if ($drive->{cache_size} eq 'based-on-disk') { + # Automatically set the cache-size based on the disk size. The formula + # is adapted from here: + # https://gitlab.com/qemu-project/qemu/-/blob/master/docs/qcow2-cache.txt#L97-98 + # Assumes the default values for cluster_size and refcount_bits and + # that extended L2 entries are not used. As this combines the refcount + # and L2 cache sizes, QEMU is free to choose a different value for each + # of them, which means the L2 cache is not necessarily 4 times bigger + # than the refcount cache. + my $cache_size = $drive->{size} / 1024 / 1024 / 1024 * (131072 + 32768); + $blockdev->{'cache-size'} = int($cache_size); + } else { + $blockdev->{'cache-size'} = int($drive->{cache_size}); + } + } + + if (defined($drive->{l2_cache_size})) { + $blockdev->{'l2-cache-size'} = int($drive->{l2_cache_size}); + } + + if (defined($drive->{l2_cache_entry_size})) { + $blockdev->{'l2-cache-entry-size'} = int($drive->{l2_cache_entry_size}); + } + + if (defined($drive->{refcount_cache_size})) { + $blockdev->{'refcount-cache-size'} = int($drive->{refcount_cache_size}); + } + + if (defined($drive->{cache_clean_interval})) { + $blockdev->{'cache-clean-interval'} = int($drive->{cache_clean_interval}); + } + return $blockdev; } diff --git a/src/PVE/QemuServer/Drive.pm b/src/PVE/QemuServer/Drive.pm index b80b7dbb..ff4be684 100644 --- a/src/PVE/QemuServer/Drive.pm +++ b/src/PVE/QemuServer/Drive.pm @@ -256,6 +256,51 @@ my %drivedesc_base = ( optional => 1, default => 0, }, + cache_size => { + type => 'string', + oneOf => [ + { + type => 'integer', + minimum => 1, + description => 'Cache size in bytes', + optional => 1, + }, + { + type => 'string', + enum => ['based-on-disk'], + description => + 'Automatically pick a cache size based on the configured disk size', + optional => 1, + }, + ], + description => 'Cache size for qcow2 disks in bytes', + typetext => '( | based-on-disk)', + optional => 1, + }, + l2_cache_size => { + type => 'integer', + minimum => 1, + description => 'L2 cache size for qcow2 disks in bytes', + optional => 1, + }, + l2_cache_entry_size => { + type => 'integer', + minimum => 512, + description => 'L2 cache entry size for qcow2 disks in bytes', + optional => 1, + }, + refcount_cache_size => { + type => 'integer', + minimum => 1, + description => 'Refcount cache size for qcow2 disks in bytes', + optional => 1, + }, + cache_clean_interval => { + type => 'integer', + minimum => 0, + description => 'Cache clean interval for qcow2 disks in seconds', + optional => 1, + }, ); my %iothread_fmt = ( @@ -838,6 +883,60 @@ sub parse_drive { } } + my $is_qcow2 = + (defined($res->{format}) && $res->{format} eq 'qcow2') || $res->{file} =~ /\.qcow2$/; + + my $any_qcow2_cache_option = + defined($res->{cache_size}) + || defined($res->{l2_cache_size}) + || defined($res->{l2_cache_entry_size}) + || defined($res->{refcount_cache_size}) + || defined($res->{cache_clean_interval}); + + if ($any_qcow2_cache_option && !$is_qcow2) { + warn "qcow2 cache options require disk format qcow2\n"; + ++$error; + } + + my $cache_option_count = + defined($res->{cache_size}) + + defined($res->{l2_cache_size}) + + defined($res->{refcount_cache_size}); + + if ($cache_option_count > 2) { + warn "at most two of {,l2_,refcount_}cache_size can be set simultaneously\n"; + ++$error; + } + + sub is_power_of_two { + my ($n) = @_; + return $n > 0 && (($n & ($n - 1)) == 0); + } + + if (defined($res->{l2_cache_entry_size}) && !is_power_of_two($res->{l2_cache_entry_size})) { + warn "l2_cache_entry_size must be a power of two\n"; + ++$error; + } + + if (defined($res->{cache_size}) && $res->{cache_size} eq 'based-on-disk') { + if (defined($res->{l2_cache_size}) || defined($res->{refcount_cache_size})) { + warn "cache_size 'based-on-disk' not compatible with l2 or refcount cache size set\n"; + ++$error; + } + } + + if (defined($res->{cache_size}) && $res->{cache_size} ne 'based-on-disk') { + if (($res->{l2_cache_size} // 0) >= $res->{cache_size}) { + warn "l2_cache_size is larger than or equal to cache_size\n"; + ++$error; + } + + if (($res->{refcount_cache_size} // 0) >= $res->{cache_size}) { + warn "refcount_cache_size is larger than or equal to cache_size\n"; + ++$error; + } + } + return if $error; return if $res->{mbps_rd} && $res->{mbps}; -- 2.47.3