public inbox for pve-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: Alexandre Derumier via pve-devel <pve-devel@lists.proxmox.com>
To: pve-devel@lists.proxmox.com
Cc: Alexandre Derumier <alexandre.derumier@groupe-cyllene.com>
Subject: [pve-devel] [PATCH v3 qemu-server 01/11] blockdev: cmdline: convert drive to blockdev syntax
Date: Mon, 16 Dec 2024 10:12:16 +0100	[thread overview]
Message-ID: <mailman.201.1734340361.332.pve-devel@lists.proxmox.com> (raw)
In-Reply-To: <20241216091229.3142660-1-alexandre.derumier@groupe-cyllene.com>

[-- Attachment #1: Type: message/rfc822, Size: 19086 bytes --]

From: Alexandre Derumier <alexandre.derumier@groupe-cyllene.com>
To: pve-devel@lists.proxmox.com
Subject: [PATCH v3 qemu-server 01/11] blockdev: cmdline: convert drive to blockdev syntax
Date: Mon, 16 Dec 2024 10:12:16 +0100
Message-ID: <20241216091229.3142660-3-alexandre.derumier@groupe-cyllene.com>

The blockdev chain is:
-throttle-group-node (drive-(ide|scsi|virtio)x)
    - format-node (fmt-drive-x)
         - file-node (file-drive -x)

fixme: implement iscsi:// path
Signed-off-by: Alexandre Derumier <alexandre.derumier@groupe-cyllene.com>
---
 PVE/QemuServer.pm | 351 +++++++++++++++++++++++++++++++---------------
 1 file changed, 237 insertions(+), 114 deletions(-)

diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index 8192599a..2832ed09 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -1464,7 +1464,8 @@ sub print_drivedevice_full {
 	} else {
 	    $device .= ",bus=ahci$controller.$unit";
 	}
-	$device .= ",drive=drive-$drive_id,id=$drive_id";
+	$device .= ",id=$drive_id";
+	$device .= ",drive=drive-$drive_id" if $device_type ne 'cd' || $drive->{file} ne 'none';
 
 	if ($device_type eq 'hd') {
 	    if (my $model = $drive->{model}) {
@@ -1490,6 +1491,13 @@ sub print_drivedevice_full {
 	$device .= ",serial=$serial";
     }
 
+    my $writecache = $drive->{cache} && $drive->{cache} =~ /^(?:none|writeback|unsafe)$/  ? "on" : "off";
+    $device .= ",write-cache=$writecache" if $drive->{media} && $drive->{media} ne 'cdrom';
+
+    my @qemu_drive_options = qw(heads secs cyls trans rerror werror);
+    foreach my $o (@qemu_drive_options) {
+       $device .= ",$o=$drive->{$o}" if defined($drive->{$o});
+    }
 
     return $device;
 }
@@ -1539,145 +1547,256 @@ my sub drive_uses_cache_direct {
     return $cache_direct;
 }
 
-sub print_drive_commandline_full {
-    my ($storecfg, $vmid, $drive, $live_restore_name, $io_uring) = @_;
+sub print_drive_throttle_group {
+    my ($drive) = @_;
+    #command line can't use the structured json limits option,
+    #so limit params need to use with x- as it's unstable api
+    return if drive_is_cdrom($drive) && $drive->{file} eq 'none';
 
-    my $path;
-    my $volid = $drive->{file};
     my $drive_id = get_drive_id($drive);
 
+    my $throttle_group = "throttle-group,id=throttle-drive-$drive_id";
+    foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
+        my ($dir, $qmpname) = @$type;
+
+        if (my $v = $drive->{"mbps$dir"}) {
+            $throttle_group .= ",x-bps$qmpname=".int($v*1024*1024);
+        }
+        if (my $v = $drive->{"mbps${dir}_max"}) {
+            $throttle_group .= ",x-bps$qmpname-max=".int($v*1024*1024);
+        }
+        if (my $v = $drive->{"bps${dir}_max_length"}) {
+            $throttle_group .= ",x-bps$qmpname-max-length=$v";
+        }
+        if (my $v = $drive->{"iops${dir}"}) {
+            $throttle_group .= ",x-iops$qmpname=$v";
+        }
+        if (my $v = $drive->{"iops${dir}_max"}) {
+            $throttle_group .= ",x-iops$qmpname-max=$v";
+        }
+        if (my $v = $drive->{"iops${dir}_max_length"}) {
+            $throttle_group .= ",x-iops$qmpname-max-length=$v";
+        }
+   }
+
+   return $throttle_group;
+}
+
+sub generate_file_blockdev {
+    my ($storecfg, $drive, $nodename) = @_;
+
+    my $volid = $drive->{file};
     my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
-    my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
 
-    if (drive_is_cdrom($drive)) {
-	$path = get_iso_path($storecfg, $vmid, $volid);
-	die "$drive_id: cannot back cdrom drive with a live restore image\n" if $live_restore_name;
+    my $scfg = undef;
+    my $path = $volid;
+    if($storeid && $storeid ne 'nbd') {
+	$scfg = PVE::Storage::storage_config($storecfg, $storeid);
+	$path = PVE::Storage::path($storecfg, $volid);
+    }
+
+    my $blockdev = {};
+
+    if ($path =~ m/^rbd:(\S+)$/) {
+
+        $blockdev->{driver} = 'rbd';
+
+	my @rbd_options = split(/:/, $1);
+	my $keyring = undef;
+	for my $option (@rbd_options) {
+	    if ($option =~ m/^(\S+)=(\S+)$/) {
+		my $key = $1;
+		my $value = $2;
+		$blockdev->{'auth-client-required'} = [$value] if $key eq 'auth_supported';
+		$blockdev->{'conf'} = $value if $key eq 'conf';
+		$blockdev->{'user'} = $value if $key eq 'id';
+		$keyring = $value if $key eq 'keyring';
+	        if ($key eq 'mon_host') {
+		    my $server = [];
+		    my @mons = split(';', $value);
+		    for my $mon (@mons) {
+			my ($host, $port) = PVE::Tools::parse_host_and_port($mon);
+			$port = '3300' if !$port;
+			push @$server, { host => $host, port => $port };
+		    }
+		    $blockdev->{server} = $server;
+		}
+	    } elsif ($option =~ m|^(\S+)/(\S+)$|){
+                $blockdev->{pool} = $1;
+		my $image = $2;
+
+                if($image =~ m|^(\S+)/(\S+)$|) {
+                    $blockdev->{namespace} = $1;
+                    $blockdev->{image} = $2;
+                } else {
+                    $blockdev->{image} = $image;
+                }
+            }
+	}
+
+	if($keyring && $blockdev->{server}) {
+	    #qemu devs are removed passing arbitrary values to blockdev object, and don't have added
+	    #keyring to the list of allowed keys. It need to be defined in the store ceph.conf.
+	    #https://lists.gnu.org/archive/html/qemu-devel/2018-08/msg02676.html
+	    #another way could be to simply patch qemu to allow the key
+	    my $ceph_conf = "/etc/pve/priv/ceph/${storeid}.conf";
+	    $blockdev->{conf} = $ceph_conf;
+	    if (!-e $ceph_conf) {
+		my $content = "[global]\nkeyring = $keyring\n";
+		PVE::Tools::file_set_contents($ceph_conf, $content, 0400);
+	    }
+	}
+    } elsif ($path =~ m/^nbd:(\S+):(\d+):exportname=(\S+)$/) {
+	my $server = { type => 'inet', host => $1, port => $2 };
+	$blockdev = { driver => 'nbd', server => $server, export => $3 };
+    } elsif ($path =~ m/^nbd:unix:(\S+):exportname=(\S+)$/) {
+	my $server = { type => 'unix', path => $1 };
+	$blockdev = { driver => 'nbd', server => $server, export => $2 };
+    } elsif ($path =~ m|^gluster(\+(tcp\|unix\|rdma))?://(.*)/(.*)/(images/(\S+)/(\S+))$|) {
+	my $protocol = $2 ? $2 : 'inet';
+	$protocol = 'inet' if $protocol eq 'tcp';
+	my $server = [{ type => $protocol, host => $3, port => '24007' }];
+	$blockdev = { driver => 'gluster', server => $server, volume => $4, path => $5 };
+    } elsif ($path =~ m/^\/dev/) {
+	my $driver = drive_is_cdrom($drive) ? 'host_cdrom' : 'host_device';
+	$blockdev = { driver => $driver, filename => $path };
+    } elsif ($path =~ m/^\//) {
+	$blockdev = { driver => 'file', filename => $path};
     } else {
-	if ($storeid) {
-	    $path = PVE::Storage::path($storecfg, $volid);
-	} else {
-	    $path = $volid;
+	die "unsupported path: $path\n";
+	#fixme
+	#'{"driver":"iscsi","portal":"iscsi.example.com:3260","target":"demo-target","lun":3,"transport":"tcp"}'
+    }
+
+    my $cache_direct = drive_uses_cache_direct($drive, $scfg);
+    my $cache = {};
+    $cache->{direct} = $cache_direct ? JSON::true : JSON::false;
+    $cache->{'no-flush'} = $drive->{cache} && $drive->{cache} eq 'unsafe' ? JSON::true : JSON::false;
+    $blockdev->{cache} = $cache;
+
+    ##aio
+    if($blockdev->{filename}) {
+	$drive->{aio} = 'threads' if drive_is_cdrom($drive);
+	my $aio = $drive->{aio};
+	if (!$aio) {
+	    if (storage_allows_io_uring_default($scfg, $cache_direct)) {
+		# io_uring supports all cache modes
+		$aio = "io_uring";
+	    } else {
+		# aio native works only with O_DIRECT
+		if($cache_direct) {
+		    $aio = "native";
+		} else {
+		    $aio = "threads";
+		}
+	    }
 	}
+	$blockdev->{aio} = $aio;
     }
 
-    # For PVE-managed volumes, use the format from the storage layer and prevent overrides via the
-    # drive's 'format' option. For unmanaged volumes, fallback to 'raw' to avoid auto-detection by
-    # QEMU. For the special case 'none' (get_iso_path() returns an empty $path), there should be no
-    # format or QEMU won't start.
-    my $format;
-    if (drive_is_cdrom($drive) && !$path) {
-	# no format
-    } elsif ($storeid) {
-	$format = checked_volume_format($storecfg, $volid);
+    ##discard && detect-zeroes
+    my $discard = 'ignore';
+    if($drive->{discard}) {
+	$discard = $drive->{discard};
+	$discard = 'unmap' if $discard eq 'on';
+    }
+    $blockdev->{discard} = $discard if !drive_is_cdrom($drive);
 
-	if ($drive->{format} && $drive->{format} ne $format) {
-	    die "drive '$drive->{interface}$drive->{index}' - volume '$volid'"
-		." - 'format=$drive->{format}' option different from storage format '$format'\n";
-	}
+    my $detectzeroes;
+    if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) {
+	$detectzeroes = 'off';
+    } elsif ($drive->{discard}) {
+	$detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on';
     } else {
-	$format = $drive->{format} // 'raw';
+	# This used to be our default with discard not being specified:
+	$detectzeroes = 'on';
     }
+    $blockdev->{'detect-zeroes'} = $detectzeroes if !drive_is_cdrom($drive);
+    $blockdev->{'node-name'} = $nodename if $nodename;
 
-    my $is_rbd = $path =~ m/^rbd:/;
+    return $blockdev;
+}
 
-    my $opts = '';
-    my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
-    foreach my $o (@qemu_drive_options) {
-	$opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
-    }
+sub generate_format_blockdev {
+    my ($storecfg, $drive, $nodename, $file, $force_readonly) = @_;
 
-    # snapshot only accepts on|off
-    if (defined($drive->{snapshot})) {
-	my $v = $drive->{snapshot} ? 'on' : 'off';
-	$opts .= ",snapshot=$v";
-    }
+    my $volid = $drive->{file};
+    my $scfg = undef;
+    my $path = $volid;
+    my $format = $drive->{format};
+    $format //= "raw";
 
-    if (defined($drive->{ro})) { # ro maps to QEMUs `readonly`, which accepts `on` or `off` only
-	$opts .= ",readonly=" . ($drive->{ro} ? 'on' : 'off');
-    }
+    my $drive_id = get_drive_id($drive);
 
-    foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
-	my ($dir, $qmpname) = @$type;
-	if (my $v = $drive->{"mbps$dir"}) {
-	    $opts .= ",throttling.bps$qmpname=".int($v*1024*1024);
-	}
-	if (my $v = $drive->{"mbps${dir}_max"}) {
-	    $opts .= ",throttling.bps$qmpname-max=".int($v*1024*1024);
-	}
-	if (my $v = $drive->{"bps${dir}_max_length"}) {
-	    $opts .= ",throttling.bps$qmpname-max-length=$v";
-	}
-	if (my $v = $drive->{"iops${dir}"}) {
-	    $opts .= ",throttling.iops$qmpname=$v";
-	}
-	if (my $v = $drive->{"iops${dir}_max"}) {
-	    $opts .= ",throttling.iops$qmpname-max=$v";
-	}
-	if (my $v = $drive->{"iops${dir}_max_length"}) {
-	    $opts .= ",throttling.iops$qmpname-max-length=$v";
-	}
+    if ($drive->{zeroinit}) {
+	#fixme how to handle zeroinit ? insert special blockdev filter ?
     }
 
-    if ($live_restore_name) {
-	$format = "rbd" if $is_rbd;
-	die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
-	    if !$format;
-	$opts .= ",format=alloc-track,file.driver=$format";
-    } elsif ($format) {
-	$opts .= ",format=$format";
+    my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
+
+    if($storeid) {
+	$scfg = PVE::Storage::storage_config($storecfg, $storeid);
+        $format = checked_volume_format($storecfg, $volid);
+	$path = PVE::Storage::path($storecfg, $volid);
     }
 
+    my $readonly = defined($drive->{ro}) || $force_readonly ? JSON::true : JSON::false;
+
+    #libvirt define cache option on both format && file
     my $cache_direct = drive_uses_cache_direct($drive, $scfg);
+    my $cache = {};
+    $cache->{direct} = $cache_direct ? JSON::true : JSON::false;
+    $cache->{'no-flush'} = $drive->{cache} && $drive->{cache} eq 'unsafe' ? JSON::true : JSON::false;
 
-    $opts .= ",cache=none" if !$drive->{cache} && $cache_direct;
+    my $blockdev = { driver => $format, file => $file, cache => $cache, 'read-only' => $readonly };
+    $blockdev->{'node-name'} = $nodename if $nodename;
 
-    if (!$drive->{aio}) {
-	if ($io_uring && storage_allows_io_uring_default($scfg, $cache_direct)) {
-	    # io_uring supports all cache modes
-	    $opts .= ",aio=io_uring";
-	} else {
-	    # aio native works only with O_DIRECT
-	    if($cache_direct) {
-		$opts .= ",aio=native";
-	    } else {
-		$opts .= ",aio=threads";
-	    }
-	}
-    }
+    return $blockdev;
 
-    if (!drive_is_cdrom($drive)) {
-	my $detectzeroes;
-	if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) {
-	    $detectzeroes = 'off';
-	} elsif ($drive->{discard}) {
-	    $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on';
-	} else {
-	    # This used to be our default with discard not being specified:
-	    $detectzeroes = 'on';
-	}
+}
 
-	# note: 'detect-zeroes' works per blockdev and we want it to persist
-	# after the alloc-track is removed, so put it on 'file' directly
-	my $dz_param = $live_restore_name ? "file.detect-zeroes" : "detect-zeroes";
-	$opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
-    }
+sub generate_drive_blockdev {
+    my ($storecfg, $vmid, $drive, $force_readonly, $live_restore_name) = @_;
 
-    if ($live_restore_name) {
-	$opts .= ",backing=$live_restore_name";
-	$opts .= ",auto-remove=on";
+    my $path;
+    my $volid = $drive->{file};
+    my $format = $drive->{format};
+    my $drive_id = get_drive_id($drive);
+
+    my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
+    my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
+
+    my $blockdevs = [];
+
+    if (drive_is_cdrom($drive)) {
+        die "$drive_id: cannot back cdrom drive with a live restore image\n" if $live_restore_name;
+
+	$path = get_iso_path($storecfg, $vmid, $volid);
+	return if !$path;
+	$force_readonly = 1;
     }
 
-    # my $file_param = $live_restore_name ? "file.file.filename" : "file";
-    my $file_param = "file";
+    my $file_nodename = "file-drive-$drive_id";
+    my $blockdev_file = generate_file_blockdev($storecfg, $drive, $file_nodename);
+    my $fmt_nodename = "fmt-drive-$drive_id";
+    my $blockdev_format = generate_format_blockdev($storecfg, $drive, $fmt_nodename, $blockdev_file, $force_readonly);
+
+    my $blockdev_live_restore = undef;
     if ($live_restore_name) {
-	# non-rbd drivers require the underlying file to be a separate block
-	# node, so add a second .file indirection
-	$file_param .= ".file" if !$is_rbd;
-	$file_param .= ".filename";
+        die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
+            if !$format;
+
+        $blockdev_live_restore = { 'node-name' => "liverestore-drive-$drive_id",
+				    backing => $live_restore_name,
+				    'auto-remove' => 'on', format => "alloc-track",
+				    file => $blockdev_format };
     }
-    my $pathinfo = $path ? "$file_param=$path," : '';
 
-    return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
+    #this is the topfilter entry point, use $drive-drive_id as nodename
+    my $blockdev_throttle = { driver => "throttle", 'node-name' => "drive-$drive_id", 'throttle-group' => "throttle-drive-$drive_id" };
+    #put liverestore filter between throttle && format filter
+    $blockdev_throttle->{file} = $live_restore_name ? $blockdev_live_restore : $blockdev_format;
+    return $blockdev_throttle,
 }
 
 sub print_pbs_blockdev {
@@ -4091,13 +4210,13 @@ sub config_to_command {
 	    push @$devices, '-blockdev', $live_restore->{blockdev};
 	}
 
-	my $drive_cmd = print_drive_commandline_full(
-	    $storecfg, $vmid, $drive, $live_blockdev_name, min_version($kvmver, 6, 0));
-
-	# extra protection for templates, but SATA and IDE don't support it..
-	$drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
+	my $throttle_group = print_drive_throttle_group($drive);
+	push @$devices, '-object', $throttle_group if $throttle_group;
 
-	push @$devices, '-drive',$drive_cmd;
+#	# extra protection for templates, but SATA and IDE don't support it..
+	my $force_readonly = drive_is_read_only($conf, $drive);
+	my $blockdev = generate_drive_blockdev($storecfg, $vmid, $drive, $force_readonly, $live_blockdev_name);
+	push @$devices, '-blockdev', encode_json_ordered($blockdev) if $blockdev;
 	push @$devices, '-device', print_drivedevice_full(
 	    $storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type);
     });
@@ -8986,4 +9105,8 @@ sub delete_ifaces_ipams_ips {
     }
 }
 
+sub encode_json_ordered {
+   return JSON->new->canonical->allow_nonref->encode( $_[0] );
+}
+
 1;
-- 
2.39.5



[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel

  parent reply	other threads:[~2024-12-16  9:12 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20241216091229.3142660-1-alexandre.derumier@groupe-cyllene.com>
2024-12-16  9:12 ` [pve-devel] [PATCH v1 pve-qemu 1/1] add block-commit-replaces option patch Alexandre Derumier via pve-devel
2024-12-16  9:12 ` Alexandre Derumier via pve-devel [this message]
2024-12-16  9:12 ` [pve-devel] [PATCH v3 pve-storage 1/3] qcow2: add external snapshot support Alexandre Derumier via pve-devel
2024-12-16  9:12 ` [pve-devel] [PATCH v3 qemu-server 02/11] blockdev: fix cfg2cmd tests Alexandre Derumier via pve-devel
2024-12-16  9:12 ` [pve-devel] [PATCH v3 pve-storage 2/3] lvmplugin: add qcow2 snapshot Alexandre Derumier via pve-devel
2024-12-16  9:12 ` [pve-devel] [PATCH v3 qemu-server 03/11] blockdev : convert qemu_driveadd && qemu_drivedel Alexandre Derumier via pve-devel
2024-12-16  9:12 ` [pve-devel] [PATCH v3 pve-storage 3/3] storage: vdisk_free: remove external snapshots Alexandre Derumier via pve-devel
2024-12-16  9:12 ` [pve-devel] [PATCH v3 qemu-server 04/11] blockdev: vm_devices_list : fix block-query Alexandre Derumier via pve-devel
2024-12-16  9:12 ` [pve-devel] [PATCH v3 qemu-server 05/11] blockdev: convert cdrom media eject/insert Alexandre Derumier via pve-devel
2024-12-16  9:12 ` [pve-devel] [PATCH v3 qemu-server 06/11] blockdev: block_resize: convert to blockdev Alexandre Derumier via pve-devel
2024-12-16  9:12 ` [pve-devel] [PATCH v3 qemu-server 07/11] blockdev: nbd_export: block-export-add : use drive-$id for nodename Alexandre Derumier via pve-devel
2024-12-16  9:12 ` [pve-devel] [PATCH v3 qemu-server 08/11] blockdev: convert drive_mirror to blockdev_mirror Alexandre Derumier via pve-devel
2024-12-16  9:12 ` [pve-devel] [PATCH v3 qemu-server 09/11] blockdev: mirror: change aio on target if io_uring is not default Alexandre Derumier via pve-devel
2024-12-16  9:12 ` [pve-devel] [PATCH v3 qemu-server 10/11] blockdev: add backing_chain support Alexandre Derumier via pve-devel
2024-12-16  9:12 ` [pve-devel] [PATCH v3 qemu-server 11/11] qcow2: add external snapshot support Alexandre Derumier via pve-devel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=mailman.201.1734340361.332.pve-devel@lists.proxmox.com \
    --to=pve-devel@lists.proxmox.com \
    --cc=alexandre.derumier@groupe-cyllene.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal