From: Alexandre Derumier via pve-devel <pve-devel@lists.proxmox.com>
To: pve-devel@lists.proxmox.com
Cc: Alexandre Derumier <alexandre.derumier@groupe-cyllene.com>
Subject: [pve-devel] [PATCH v3 qemu-server 01/11] blockdev: cmdline: convert drive to blockdev syntax
Date: Mon, 16 Dec 2024 10:12:16 +0100 [thread overview]
Message-ID: <mailman.201.1734340361.332.pve-devel@lists.proxmox.com> (raw)
In-Reply-To: <20241216091229.3142660-1-alexandre.derumier@groupe-cyllene.com>
[-- Attachment #1: Type: message/rfc822, Size: 19086 bytes --]
From: Alexandre Derumier <alexandre.derumier@groupe-cyllene.com>
To: pve-devel@lists.proxmox.com
Subject: [PATCH v3 qemu-server 01/11] blockdev: cmdline: convert drive to blockdev syntax
Date: Mon, 16 Dec 2024 10:12:16 +0100
Message-ID: <20241216091229.3142660-3-alexandre.derumier@groupe-cyllene.com>
The blockdev chain is:
-throttle-group-node (drive-(ide|scsi|virtio)x)
- format-node (fmt-drive-x)
- file-node (file-drive -x)
fixme: implement iscsi:// path
Signed-off-by: Alexandre Derumier <alexandre.derumier@groupe-cyllene.com>
---
PVE/QemuServer.pm | 351 +++++++++++++++++++++++++++++++---------------
1 file changed, 237 insertions(+), 114 deletions(-)
diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index 8192599a..2832ed09 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -1464,7 +1464,8 @@ sub print_drivedevice_full {
} else {
$device .= ",bus=ahci$controller.$unit";
}
- $device .= ",drive=drive-$drive_id,id=$drive_id";
+ $device .= ",id=$drive_id";
+ $device .= ",drive=drive-$drive_id" if $device_type ne 'cd' || $drive->{file} ne 'none';
if ($device_type eq 'hd') {
if (my $model = $drive->{model}) {
@@ -1490,6 +1491,13 @@ sub print_drivedevice_full {
$device .= ",serial=$serial";
}
+ my $writecache = $drive->{cache} && $drive->{cache} =~ /^(?:none|writeback|unsafe)$/ ? "on" : "off";
+ $device .= ",write-cache=$writecache" if $drive->{media} && $drive->{media} ne 'cdrom';
+
+ my @qemu_drive_options = qw(heads secs cyls trans rerror werror);
+ foreach my $o (@qemu_drive_options) {
+ $device .= ",$o=$drive->{$o}" if defined($drive->{$o});
+ }
return $device;
}
@@ -1539,145 +1547,256 @@ my sub drive_uses_cache_direct {
return $cache_direct;
}
-sub print_drive_commandline_full {
- my ($storecfg, $vmid, $drive, $live_restore_name, $io_uring) = @_;
+sub print_drive_throttle_group {
+ my ($drive) = @_;
+ #command line can't use the structured json limits option,
+ #so limit params need to use with x- as it's unstable api
+ return if drive_is_cdrom($drive) && $drive->{file} eq 'none';
- my $path;
- my $volid = $drive->{file};
my $drive_id = get_drive_id($drive);
+ my $throttle_group = "throttle-group,id=throttle-drive-$drive_id";
+ foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
+ my ($dir, $qmpname) = @$type;
+
+ if (my $v = $drive->{"mbps$dir"}) {
+ $throttle_group .= ",x-bps$qmpname=".int($v*1024*1024);
+ }
+ if (my $v = $drive->{"mbps${dir}_max"}) {
+ $throttle_group .= ",x-bps$qmpname-max=".int($v*1024*1024);
+ }
+ if (my $v = $drive->{"bps${dir}_max_length"}) {
+ $throttle_group .= ",x-bps$qmpname-max-length=$v";
+ }
+ if (my $v = $drive->{"iops${dir}"}) {
+ $throttle_group .= ",x-iops$qmpname=$v";
+ }
+ if (my $v = $drive->{"iops${dir}_max"}) {
+ $throttle_group .= ",x-iops$qmpname-max=$v";
+ }
+ if (my $v = $drive->{"iops${dir}_max_length"}) {
+ $throttle_group .= ",x-iops$qmpname-max-length=$v";
+ }
+ }
+
+ return $throttle_group;
+}
+
+sub generate_file_blockdev {
+ my ($storecfg, $drive, $nodename) = @_;
+
+ my $volid = $drive->{file};
my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
- my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
- if (drive_is_cdrom($drive)) {
- $path = get_iso_path($storecfg, $vmid, $volid);
- die "$drive_id: cannot back cdrom drive with a live restore image\n" if $live_restore_name;
+ my $scfg = undef;
+ my $path = $volid;
+ if($storeid && $storeid ne 'nbd') {
+ $scfg = PVE::Storage::storage_config($storecfg, $storeid);
+ $path = PVE::Storage::path($storecfg, $volid);
+ }
+
+ my $blockdev = {};
+
+ if ($path =~ m/^rbd:(\S+)$/) {
+
+ $blockdev->{driver} = 'rbd';
+
+ my @rbd_options = split(/:/, $1);
+ my $keyring = undef;
+ for my $option (@rbd_options) {
+ if ($option =~ m/^(\S+)=(\S+)$/) {
+ my $key = $1;
+ my $value = $2;
+ $blockdev->{'auth-client-required'} = [$value] if $key eq 'auth_supported';
+ $blockdev->{'conf'} = $value if $key eq 'conf';
+ $blockdev->{'user'} = $value if $key eq 'id';
+ $keyring = $value if $key eq 'keyring';
+ if ($key eq 'mon_host') {
+ my $server = [];
+ my @mons = split(';', $value);
+ for my $mon (@mons) {
+ my ($host, $port) = PVE::Tools::parse_host_and_port($mon);
+ $port = '3300' if !$port;
+ push @$server, { host => $host, port => $port };
+ }
+ $blockdev->{server} = $server;
+ }
+ } elsif ($option =~ m|^(\S+)/(\S+)$|){
+ $blockdev->{pool} = $1;
+ my $image = $2;
+
+ if($image =~ m|^(\S+)/(\S+)$|) {
+ $blockdev->{namespace} = $1;
+ $blockdev->{image} = $2;
+ } else {
+ $blockdev->{image} = $image;
+ }
+ }
+ }
+
+ if($keyring && $blockdev->{server}) {
+ #qemu devs are removed passing arbitrary values to blockdev object, and don't have added
+ #keyring to the list of allowed keys. It need to be defined in the store ceph.conf.
+ #https://lists.gnu.org/archive/html/qemu-devel/2018-08/msg02676.html
+ #another way could be to simply patch qemu to allow the key
+ my $ceph_conf = "/etc/pve/priv/ceph/${storeid}.conf";
+ $blockdev->{conf} = $ceph_conf;
+ if (!-e $ceph_conf) {
+ my $content = "[global]\nkeyring = $keyring\n";
+ PVE::Tools::file_set_contents($ceph_conf, $content, 0400);
+ }
+ }
+ } elsif ($path =~ m/^nbd:(\S+):(\d+):exportname=(\S+)$/) {
+ my $server = { type => 'inet', host => $1, port => $2 };
+ $blockdev = { driver => 'nbd', server => $server, export => $3 };
+ } elsif ($path =~ m/^nbd:unix:(\S+):exportname=(\S+)$/) {
+ my $server = { type => 'unix', path => $1 };
+ $blockdev = { driver => 'nbd', server => $server, export => $2 };
+ } elsif ($path =~ m|^gluster(\+(tcp\|unix\|rdma))?://(.*)/(.*)/(images/(\S+)/(\S+))$|) {
+ my $protocol = $2 ? $2 : 'inet';
+ $protocol = 'inet' if $protocol eq 'tcp';
+ my $server = [{ type => $protocol, host => $3, port => '24007' }];
+ $blockdev = { driver => 'gluster', server => $server, volume => $4, path => $5 };
+ } elsif ($path =~ m/^\/dev/) {
+ my $driver = drive_is_cdrom($drive) ? 'host_cdrom' : 'host_device';
+ $blockdev = { driver => $driver, filename => $path };
+ } elsif ($path =~ m/^\//) {
+ $blockdev = { driver => 'file', filename => $path};
} else {
- if ($storeid) {
- $path = PVE::Storage::path($storecfg, $volid);
- } else {
- $path = $volid;
+ die "unsupported path: $path\n";
+ #fixme
+ #'{"driver":"iscsi","portal":"iscsi.example.com:3260","target":"demo-target","lun":3,"transport":"tcp"}'
+ }
+
+ my $cache_direct = drive_uses_cache_direct($drive, $scfg);
+ my $cache = {};
+ $cache->{direct} = $cache_direct ? JSON::true : JSON::false;
+ $cache->{'no-flush'} = $drive->{cache} && $drive->{cache} eq 'unsafe' ? JSON::true : JSON::false;
+ $blockdev->{cache} = $cache;
+
+ ##aio
+ if($blockdev->{filename}) {
+ $drive->{aio} = 'threads' if drive_is_cdrom($drive);
+ my $aio = $drive->{aio};
+ if (!$aio) {
+ if (storage_allows_io_uring_default($scfg, $cache_direct)) {
+ # io_uring supports all cache modes
+ $aio = "io_uring";
+ } else {
+ # aio native works only with O_DIRECT
+ if($cache_direct) {
+ $aio = "native";
+ } else {
+ $aio = "threads";
+ }
+ }
}
+ $blockdev->{aio} = $aio;
}
- # For PVE-managed volumes, use the format from the storage layer and prevent overrides via the
- # drive's 'format' option. For unmanaged volumes, fallback to 'raw' to avoid auto-detection by
- # QEMU. For the special case 'none' (get_iso_path() returns an empty $path), there should be no
- # format or QEMU won't start.
- my $format;
- if (drive_is_cdrom($drive) && !$path) {
- # no format
- } elsif ($storeid) {
- $format = checked_volume_format($storecfg, $volid);
+ ##discard && detect-zeroes
+ my $discard = 'ignore';
+ if($drive->{discard}) {
+ $discard = $drive->{discard};
+ $discard = 'unmap' if $discard eq 'on';
+ }
+ $blockdev->{discard} = $discard if !drive_is_cdrom($drive);
- if ($drive->{format} && $drive->{format} ne $format) {
- die "drive '$drive->{interface}$drive->{index}' - volume '$volid'"
- ." - 'format=$drive->{format}' option different from storage format '$format'\n";
- }
+ my $detectzeroes;
+ if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) {
+ $detectzeroes = 'off';
+ } elsif ($drive->{discard}) {
+ $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on';
} else {
- $format = $drive->{format} // 'raw';
+ # This used to be our default with discard not being specified:
+ $detectzeroes = 'on';
}
+ $blockdev->{'detect-zeroes'} = $detectzeroes if !drive_is_cdrom($drive);
+ $blockdev->{'node-name'} = $nodename if $nodename;
- my $is_rbd = $path =~ m/^rbd:/;
+ return $blockdev;
+}
- my $opts = '';
- my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
- foreach my $o (@qemu_drive_options) {
- $opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
- }
+sub generate_format_blockdev {
+ my ($storecfg, $drive, $nodename, $file, $force_readonly) = @_;
- # snapshot only accepts on|off
- if (defined($drive->{snapshot})) {
- my $v = $drive->{snapshot} ? 'on' : 'off';
- $opts .= ",snapshot=$v";
- }
+ my $volid = $drive->{file};
+ my $scfg = undef;
+ my $path = $volid;
+ my $format = $drive->{format};
+ $format //= "raw";
- if (defined($drive->{ro})) { # ro maps to QEMUs `readonly`, which accepts `on` or `off` only
- $opts .= ",readonly=" . ($drive->{ro} ? 'on' : 'off');
- }
+ my $drive_id = get_drive_id($drive);
- foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
- my ($dir, $qmpname) = @$type;
- if (my $v = $drive->{"mbps$dir"}) {
- $opts .= ",throttling.bps$qmpname=".int($v*1024*1024);
- }
- if (my $v = $drive->{"mbps${dir}_max"}) {
- $opts .= ",throttling.bps$qmpname-max=".int($v*1024*1024);
- }
- if (my $v = $drive->{"bps${dir}_max_length"}) {
- $opts .= ",throttling.bps$qmpname-max-length=$v";
- }
- if (my $v = $drive->{"iops${dir}"}) {
- $opts .= ",throttling.iops$qmpname=$v";
- }
- if (my $v = $drive->{"iops${dir}_max"}) {
- $opts .= ",throttling.iops$qmpname-max=$v";
- }
- if (my $v = $drive->{"iops${dir}_max_length"}) {
- $opts .= ",throttling.iops$qmpname-max-length=$v";
- }
+ if ($drive->{zeroinit}) {
+ #fixme how to handle zeroinit ? insert special blockdev filter ?
}
- if ($live_restore_name) {
- $format = "rbd" if $is_rbd;
- die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
- if !$format;
- $opts .= ",format=alloc-track,file.driver=$format";
- } elsif ($format) {
- $opts .= ",format=$format";
+ my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
+
+ if($storeid) {
+ $scfg = PVE::Storage::storage_config($storecfg, $storeid);
+ $format = checked_volume_format($storecfg, $volid);
+ $path = PVE::Storage::path($storecfg, $volid);
}
+ my $readonly = defined($drive->{ro}) || $force_readonly ? JSON::true : JSON::false;
+
+ #libvirt define cache option on both format && file
my $cache_direct = drive_uses_cache_direct($drive, $scfg);
+ my $cache = {};
+ $cache->{direct} = $cache_direct ? JSON::true : JSON::false;
+ $cache->{'no-flush'} = $drive->{cache} && $drive->{cache} eq 'unsafe' ? JSON::true : JSON::false;
- $opts .= ",cache=none" if !$drive->{cache} && $cache_direct;
+ my $blockdev = { driver => $format, file => $file, cache => $cache, 'read-only' => $readonly };
+ $blockdev->{'node-name'} = $nodename if $nodename;
- if (!$drive->{aio}) {
- if ($io_uring && storage_allows_io_uring_default($scfg, $cache_direct)) {
- # io_uring supports all cache modes
- $opts .= ",aio=io_uring";
- } else {
- # aio native works only with O_DIRECT
- if($cache_direct) {
- $opts .= ",aio=native";
- } else {
- $opts .= ",aio=threads";
- }
- }
- }
+ return $blockdev;
- if (!drive_is_cdrom($drive)) {
- my $detectzeroes;
- if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) {
- $detectzeroes = 'off';
- } elsif ($drive->{discard}) {
- $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on';
- } else {
- # This used to be our default with discard not being specified:
- $detectzeroes = 'on';
- }
+}
- # note: 'detect-zeroes' works per blockdev and we want it to persist
- # after the alloc-track is removed, so put it on 'file' directly
- my $dz_param = $live_restore_name ? "file.detect-zeroes" : "detect-zeroes";
- $opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
- }
+sub generate_drive_blockdev {
+ my ($storecfg, $vmid, $drive, $force_readonly, $live_restore_name) = @_;
- if ($live_restore_name) {
- $opts .= ",backing=$live_restore_name";
- $opts .= ",auto-remove=on";
+ my $path;
+ my $volid = $drive->{file};
+ my $format = $drive->{format};
+ my $drive_id = get_drive_id($drive);
+
+ my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
+ my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
+
+ my $blockdevs = [];
+
+ if (drive_is_cdrom($drive)) {
+ die "$drive_id: cannot back cdrom drive with a live restore image\n" if $live_restore_name;
+
+ $path = get_iso_path($storecfg, $vmid, $volid);
+ return if !$path;
+ $force_readonly = 1;
}
- # my $file_param = $live_restore_name ? "file.file.filename" : "file";
- my $file_param = "file";
+ my $file_nodename = "file-drive-$drive_id";
+ my $blockdev_file = generate_file_blockdev($storecfg, $drive, $file_nodename);
+ my $fmt_nodename = "fmt-drive-$drive_id";
+ my $blockdev_format = generate_format_blockdev($storecfg, $drive, $fmt_nodename, $blockdev_file, $force_readonly);
+
+ my $blockdev_live_restore = undef;
if ($live_restore_name) {
- # non-rbd drivers require the underlying file to be a separate block
- # node, so add a second .file indirection
- $file_param .= ".file" if !$is_rbd;
- $file_param .= ".filename";
+ die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
+ if !$format;
+
+ $blockdev_live_restore = { 'node-name' => "liverestore-drive-$drive_id",
+ backing => $live_restore_name,
+ 'auto-remove' => 'on', format => "alloc-track",
+ file => $blockdev_format };
}
- my $pathinfo = $path ? "$file_param=$path," : '';
- return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
+ #this is the topfilter entry point, use $drive-drive_id as nodename
+ my $blockdev_throttle = { driver => "throttle", 'node-name' => "drive-$drive_id", 'throttle-group' => "throttle-drive-$drive_id" };
+ #put liverestore filter between throttle && format filter
+ $blockdev_throttle->{file} = $live_restore_name ? $blockdev_live_restore : $blockdev_format;
+ return $blockdev_throttle,
}
sub print_pbs_blockdev {
@@ -4091,13 +4210,13 @@ sub config_to_command {
push @$devices, '-blockdev', $live_restore->{blockdev};
}
- my $drive_cmd = print_drive_commandline_full(
- $storecfg, $vmid, $drive, $live_blockdev_name, min_version($kvmver, 6, 0));
-
- # extra protection for templates, but SATA and IDE don't support it..
- $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
+ my $throttle_group = print_drive_throttle_group($drive);
+ push @$devices, '-object', $throttle_group if $throttle_group;
- push @$devices, '-drive',$drive_cmd;
+# # extra protection for templates, but SATA and IDE don't support it..
+ my $force_readonly = drive_is_read_only($conf, $drive);
+ my $blockdev = generate_drive_blockdev($storecfg, $vmid, $drive, $force_readonly, $live_blockdev_name);
+ push @$devices, '-blockdev', encode_json_ordered($blockdev) if $blockdev;
push @$devices, '-device', print_drivedevice_full(
$storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type);
});
@@ -8986,4 +9105,8 @@ sub delete_ifaces_ipams_ips {
}
}
+sub encode_json_ordered {
+ return JSON->new->canonical->allow_nonref->encode( $_[0] );
+}
+
1;
--
2.39.5
[-- Attachment #2: Type: text/plain, Size: 160 bytes --]
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
next prev parent reply other threads:[~2024-12-16 9:12 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20241216091229.3142660-1-alexandre.derumier@groupe-cyllene.com>
2024-12-16 9:12 ` [pve-devel] [PATCH v1 pve-qemu 1/1] add block-commit-replaces option patch Alexandre Derumier via pve-devel
2024-12-16 9:12 ` Alexandre Derumier via pve-devel [this message]
2024-12-16 9:12 ` [pve-devel] [PATCH v3 pve-storage 1/3] qcow2: add external snapshot support Alexandre Derumier via pve-devel
2024-12-16 9:12 ` [pve-devel] [PATCH v3 qemu-server 02/11] blockdev: fix cfg2cmd tests Alexandre Derumier via pve-devel
2024-12-16 9:12 ` [pve-devel] [PATCH v3 pve-storage 2/3] lvmplugin: add qcow2 snapshot Alexandre Derumier via pve-devel
2024-12-16 9:12 ` [pve-devel] [PATCH v3 qemu-server 03/11] blockdev : convert qemu_driveadd && qemu_drivedel Alexandre Derumier via pve-devel
2024-12-16 9:12 ` [pve-devel] [PATCH v3 pve-storage 3/3] storage: vdisk_free: remove external snapshots Alexandre Derumier via pve-devel
2024-12-16 9:12 ` [pve-devel] [PATCH v3 qemu-server 04/11] blockdev: vm_devices_list : fix block-query Alexandre Derumier via pve-devel
2024-12-16 9:12 ` [pve-devel] [PATCH v3 qemu-server 05/11] blockdev: convert cdrom media eject/insert Alexandre Derumier via pve-devel
2024-12-16 9:12 ` [pve-devel] [PATCH v3 qemu-server 06/11] blockdev: block_resize: convert to blockdev Alexandre Derumier via pve-devel
2024-12-16 9:12 ` [pve-devel] [PATCH v3 qemu-server 07/11] blockdev: nbd_export: block-export-add : use drive-$id for nodename Alexandre Derumier via pve-devel
2024-12-16 9:12 ` [pve-devel] [PATCH v3 qemu-server 08/11] blockdev: convert drive_mirror to blockdev_mirror Alexandre Derumier via pve-devel
2024-12-16 9:12 ` [pve-devel] [PATCH v3 qemu-server 09/11] blockdev: mirror: change aio on target if io_uring is not default Alexandre Derumier via pve-devel
2024-12-16 9:12 ` [pve-devel] [PATCH v3 qemu-server 10/11] blockdev: add backing_chain support Alexandre Derumier via pve-devel
2024-12-16 9:12 ` [pve-devel] [PATCH v3 qemu-server 11/11] qcow2: add external snapshot support Alexandre Derumier via pve-devel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=mailman.201.1734340361.332.pve-devel@lists.proxmox.com \
--to=pve-devel@lists.proxmox.com \
--cc=alexandre.derumier@groupe-cyllene.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox