From: Alexandre Derumier via pve-devel <pve-devel@lists.proxmox.com>
To: pve-devel@lists.proxmox.com
Cc: Alexandre Derumier <alexandre.derumier@groupe-cyllene.com>
Subject: [pve-devel] [PATCH qemu-server 1/6] lvmqcow2: set disk write threshold
Date: Mon, 26 Aug 2024 13:00:20 +0200 [thread overview]
Message-ID: <mailman.399.1724670042.302.pve-devel@lists.proxmox.com> (raw)
In-Reply-To: <20240826110030.1744732-1-alexandre.derumier@groupe-cyllene.com>
[-- Attachment #1: Type: message/rfc822, Size: 7723 bytes --]
From: Alexandre Derumier <alexandre.derumier@groupe-cyllene.com>
To: pve-devel@lists.proxmox.com
Subject: [PATCH qemu-server 1/6] lvmqcow2: set disk write threshold
Date: Mon, 26 Aug 2024 13:00:20 +0200
Message-ID: <20240826110030.1744732-3-alexandre.derumier@groupe-cyllene.com>
on vm start or when we hotplug a disk, we add a write threshold.
The threshold is: size of the lvm - (chunk_usage_percent * chunksize)
qemu will emit an event when the vm write on an offset higher than the threshold,
and the counter is reset to 0.
(So, we'll need to set threshold again when we extend the disk)
Signed-off-by: Alexandre Derumier <alexandre.derumier@groupe-cyllene.com>
---
PVE/QemuServer.pm | 97 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 97 insertions(+)
diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index 88c274d..3acb32e 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -4260,6 +4260,7 @@ sub vm_deviceplug {
warn $@ if $@;
die $err;
}
+ eval { set_disks_write_threshold($storecfg, $conf->{pending}, $vmid, $deviceid) };
} elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
@@ -4289,6 +4290,7 @@ sub vm_deviceplug {
warn $@ if $@;
die $err;
}
+ eval { PVE::QemuServer::set_disks_write_threshold($storecfg, $conf->{pending}, $vmid, $deviceid); };
} elsif ($deviceid =~ m/^(net)(\d+)$/) {
return if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
@@ -6069,6 +6071,7 @@ sub vm_start_nolock {
qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down};
}
add_nets_bridge_fdb($conf, $vmid);
+ PVE::QemuServer::set_disks_write_threshold($storecfg, $conf, $vmid);
}
if (!defined($conf->{balloon}) || $conf->{balloon}) {
@@ -8829,4 +8832,98 @@ sub delete_ifaces_ipams_ips {
}
}
+sub qemu_block_set_write_threshold {
+ my ($vmid, $nodename, $threshold) = @_;
+
+ print"set threshold $nodename $threshold\n";
+
+ PVE::QemuServer::mon_cmd(
+ $vmid,
+ "block-set-write-threshold",
+ 'node-name' => $nodename,
+ 'write-threshold' => int($threshold),
+ );
+}
+
+sub get_block_info {
+ my ($vmid, $disk, $block_info) = @_;
+
+ my $res = { deviceid => undef, blocknodeid => undef, wr_highest_offset => 0};
+
+ if($disk =~ m/block/) {
+ $res->{blocknodeid} = $disk;
+ for my $id (keys %$block_info) {
+ if($block_info->{$id}->{parent}->{'node-name'} eq $disk) {
+ $res->{deviceid} = $id;
+ $res->{deviceid} =~ s/^drive-//;
+ $res->{wr_highest_offset} = $block_info->{$id}->{parent}->{stats}->{wr_highest_offset};
+ last;
+ } elsif($block_info->{$id}->{parent}->{parent}->{'node-name'} eq $disk) {
+ $res->{deviceid} = $id;
+ $res->{deviceid} =~ s/^drive-//;
+ $res->{wr_highest_offset} = $block_info->{$id}->{parent}->{parent}->{stats}->{wr_highest_offset};
+ last;
+ }
+ }
+ } else {
+ $res->{deviceid} = $disk;
+ #when backup is running, the chain of image is different
+ if($block_info->{"drive-$disk"}->{parent}->{parent}->{'node-name'} &&
+ $block_info->{"drive-$disk"}->{parent}->{parent}->{'driver-specific'}->{driver} eq 'host_device'
+ ) {
+ $res->{blocknodeid} = $block_info->{"drive-$disk"}->{parent}->{parent}->{'node-name'};
+ $res->{wr_highest_offset} = $block_info->{"drive-$disk"}->{parent}->{parent}->{stats}->{wr_highest_offset};
+ } elsif($block_info->{"drive-$disk"}->{parent}->{'node-name'} &&
+ $block_info->{"drive-$disk"}->{parent}->{'driver-specific'}->{driver} eq 'host_device'
+ ) {
+ $res->{blocknodeid} = $block_info->{"drive-$disk"}->{parent}->{'node-name'};
+ $res->{wr_highest_offset} = $block_info->{"drive-$disk"}->{parent}->{stats}->{wr_highest_offset};
+ }
+ }
+ die "can't find blocknodeid" if !$res->{blocknodeid};
+ die "can't find devicedeid" if !$res->{deviceid};
+ return $res;
+}
+
+sub compute_write_threshold {
+ my ($size, $scfg) = @_;
+
+ my $chunksize = $scfg->{chunksize} // 1024 * 1024 * 1024;
+ my $chunk_pct_extension = $scfg->{chunk_pct_extension} // 0.5;
+
+ my $threshold = $size - ($chunksize * $chunk_pct_extension);
+
+ return $threshold;
+}
+
+sub set_disks_write_threshold {
+ my ($storecfg, $conf, $vmid, $deviceid) = @_;
+
+ $deviceid =~ s/^drive-// if $deviceid;
+
+ my $blockstats = mon_cmd($vmid, "query-blockstats");
+ $blockstats = { map { $_->{device} => $_ } $blockstats->@* };
+
+ PVE::QemuConfig->foreach_volume($conf, sub {
+ my ($ds, $drive) = @_;
+
+ return if $deviceid && $ds ne $deviceid;
+
+ my $volid = $drive->{file};
+ return if !$volid;
+
+ my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
+ return if !$sid;
+
+ my $scfg = PVE::Storage::storage_config($storecfg, $sid);
+ return if $scfg->{type} ne 'lvmqcow2';
+
+ my $size = PVE::Storage::volume_size_info($storecfg, $volid, 5);
+ my $threshold = compute_write_threshold($size, $scfg);
+
+ my $blockinfo = get_block_info($vmid, $ds, $blockstats);
+ qemu_block_set_write_threshold($vmid, $blockinfo->{blocknodeid}, $threshold);
+ });
+}
+
1;
--
2.39.2
[-- Attachment #2: Type: text/plain, Size: 160 bytes --]
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
next prev parent reply other threads:[~2024-08-26 11:00 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20240826110030.1744732-1-alexandre.derumier@groupe-cyllene.com>
2024-08-26 11:00 ` [pve-devel] [PATCH pve-storage 1/5] add lvmqcow2 plugin Alexandre Derumier via pve-devel
2024-08-26 11:00 ` Alexandre Derumier via pve-devel [this message]
2024-08-26 11:00 ` [pve-devel] [PATCH pve-manager 1/1] pvestatd: lvmqcow2 : extend disk on io-error Alexandre Derumier via pve-devel
2024-08-26 11:00 ` [pve-devel] [PATCH qemu-server 2/6] qm cli: add blockextend Alexandre Derumier via pve-devel
2024-08-26 11:00 ` [pve-devel] [PATCH pve-storage 2/5] vdisk_alloc: add underlay_size option Alexandre Derumier via pve-devel
2024-08-26 11:00 ` [pve-devel] [PATCH pve-storage 3/5] add volume_underlay_resize Alexandre Derumier via pve-devel
2024-08-26 11:00 ` [pve-devel] [PATCH qemu-server 3/6] qmevent: call qm disk blockextend when write_threshold event is received Alexandre Derumier via pve-devel
2024-08-26 11:00 ` [pve-devel] [PATCH pve-storage 4/5] add refresh volume Alexandre Derumier via pve-devel
2024-08-26 11:00 ` [pve-devel] [PATCH qemu-server 4/6] migration: refresh remote disk size before resume Alexandre Derumier via pve-devel
2024-08-26 11:00 ` [pve-devel] [PATCH pve-storage 5/5] add volume_underlay_shrink Alexandre Derumier via pve-devel
2024-08-26 11:00 ` [pve-devel] [PATCH qemu-server 5/6] qemu_img_format: lvmqcow2 is a path_storage Alexandre Derumier via pve-devel
2024-08-26 11:00 ` [pve-devel] [PATCH qemu-server 6/6] clone: allocate && shrink lvmcow2 underlay Alexandre Derumier via pve-devel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=mailman.399.1724670042.302.pve-devel@lists.proxmox.com \
--to=pve-devel@lists.proxmox.com \
--cc=alexandre.derumier@groupe-cyllene.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox