From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id 18A301FF15C for ; Wed, 8 Jan 2025 15:18:13 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 79DE91B827; Wed, 8 Jan 2025 15:17:59 +0100 (CET) Date: Wed, 8 Jan 2025 15:17:23 +0100 (CET) From: =?UTF-8?Q?Fabian_Gr=C3=BCnbichler?= To: Proxmox VE development discussion Message-ID: <1787399718.8060.1736345843084@webmail.proxmox.com> In-Reply-To: References: <20241216091229.3142660-1-alexandre.derumier@groupe-cyllene.com> MIME-Version: 1.0 X-Priority: 3 Importance: Normal X-Mailer: Open-Xchange Mailer v7.10.6-Rev71 X-Originating-Client: open-xchange-appsuite X-SPAM-LEVEL: Spam detection results: 0 AWL 0.045 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment RCVD_IN_VALIDITY_CERTIFIED_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. RCVD_IN_VALIDITY_RPBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. RCVD_IN_VALIDITY_SAFE_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to Validity was blocked. See https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more information. SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [qemuserver.pm, gnu.org] WEIRD_PORT 0.001 Uses non-standard port number for HTTP Subject: Re: [pve-devel] [PATCH v3 qemu-server 01/11] blockdev: cmdline: convert drive to blockdev syntax X-BeenThere: pve-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox VE development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox VE development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pve-devel-bounces@lists.proxmox.com Sender: "pve-devel" > Alexandre Derumier via pve-devel hat am 16.12.2024 10:12 CET geschrieben: > The blockdev chain is: > -throttle-group-node (drive-(ide|scsi|virtio)x) > - format-node (fmt-drive-x) > - file-node (file-drive -x) > > fixme: implement iscsi:// path > Signed-off-by: Alexandre Derumier > --- > PVE/QemuServer.pm | 351 +++++++++++++++++++++++++++++++--------------- > 1 file changed, 237 insertions(+), 114 deletions(-) > > diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm > index 8192599a..2832ed09 100644 > --- a/PVE/QemuServer.pm > +++ b/PVE/QemuServer.pm > @@ -1464,7 +1464,8 @@ sub print_drivedevice_full { > } else { > $device .= ",bus=ahci$controller.$unit"; > } > - $device .= ",drive=drive-$drive_id,id=$drive_id"; > + $device .= ",id=$drive_id"; > + $device .= ",drive=drive-$drive_id" if $device_type ne 'cd' || $drive->{file} ne 'none'; is this just because you remove the whole drive when ejecting? not sure whether that is really needed.. > > if ($device_type eq 'hd') { > if (my $model = $drive->{model}) { > @@ -1490,6 +1491,13 @@ sub print_drivedevice_full { > $device .= ",serial=$serial"; > } > > + my $writecache = $drive->{cache} && $drive->{cache} =~ /^(?:none|writeback|unsafe)$/ ? "on" : "off"; > + $device .= ",write-cache=$writecache" if $drive->{media} && $drive->{media} ne 'cdrom'; > + > + my @qemu_drive_options = qw(heads secs cyls trans rerror werror); > + foreach my $o (@qemu_drive_options) { > + $device .= ",$o=$drive->{$o}" if defined($drive->{$o}); > + } > > return $device; > } > @@ -1539,145 +1547,256 @@ my sub drive_uses_cache_direct { > return $cache_direct; > } > > -sub print_drive_commandline_full { > - my ($storecfg, $vmid, $drive, $live_restore_name, $io_uring) = @_; > +sub print_drive_throttle_group { > + my ($drive) = @_; > + #command line can't use the structured json limits option, > + #so limit params need to use with x- as it's unstable api this comment should be below the early return, or above the whole sub. > + return if drive_is_cdrom($drive) && $drive->{file} eq 'none'; is this needed if we keep empty cdrom drives around like before? I know throttling practically makes no sense in that case, but it might make the code in general more simple? > > - my $path; > - my $volid = $drive->{file}; > my $drive_id = get_drive_id($drive); > > + my $throttle_group = "throttle-group,id=throttle-drive-$drive_id"; > + foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) { > + my ($dir, $qmpname) = @$type; > + > + if (my $v = $drive->{"mbps$dir"}) { > + $throttle_group .= ",x-bps$qmpname=".int($v*1024*1024); > + } > + if (my $v = $drive->{"mbps${dir}_max"}) { > + $throttle_group .= ",x-bps$qmpname-max=".int($v*1024*1024); > + } > + if (my $v = $drive->{"bps${dir}_max_length"}) { > + $throttle_group .= ",x-bps$qmpname-max-length=$v"; > + } > + if (my $v = $drive->{"iops${dir}"}) { > + $throttle_group .= ",x-iops$qmpname=$v"; > + } > + if (my $v = $drive->{"iops${dir}_max"}) { > + $throttle_group .= ",x-iops$qmpname-max=$v"; > + } > + if (my $v = $drive->{"iops${dir}_max_length"}) { > + $throttle_group .= ",x-iops$qmpname-max-length=$v"; > + } > + } > + > + return $throttle_group; > +} > + > +sub generate_file_blockdev { > + my ($storecfg, $drive, $nodename) = @_; > + > + my $volid = $drive->{file}; > my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1); > - my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef; > > - if (drive_is_cdrom($drive)) { > - $path = get_iso_path($storecfg, $vmid, $volid); > - die "$drive_id: cannot back cdrom drive with a live restore image\n" if $live_restore_name; > + my $scfg = undef; > + my $path = $volid; I think this should only happen if the parse_volume_id above told us this is an absolute path and not a PVE-managed volume.. > + if($storeid && $storeid ne 'nbd') { this is wrong.. I guess it's also somewhat wrong in the old qemu_drive_mirror code.. we should probably check using a more specific RE that the "volid" is an NBD URI, and not attempt to parse it as a regular volid in that case.. > + $scfg = PVE::Storage::storage_config($storecfg, $storeid); > + $path = PVE::Storage::path($storecfg, $volid); > + } > + > + my $blockdev = {}; > + > + if ($path =~ m/^rbd:(\S+)$/) { > + > + $blockdev->{driver} = 'rbd'; > + > + my @rbd_options = split(/:/, $1); > + my $keyring = undef; > + for my $option (@rbd_options) { > + if ($option =~ m/^(\S+)=(\S+)$/) { > + my $key = $1; > + my $value = $2; > + $blockdev->{'auth-client-required'} = [$value] if $key eq 'auth_supported'; > + $blockdev->{'conf'} = $value if $key eq 'conf'; > + $blockdev->{'user'} = $value if $key eq 'id'; > + $keyring = $value if $key eq 'keyring'; > + if ($key eq 'mon_host') { > + my $server = []; > + my @mons = split(';', $value); > + for my $mon (@mons) { > + my ($host, $port) = PVE::Tools::parse_host_and_port($mon); > + $port = '3300' if !$port; > + push @$server, { host => $host, port => $port }; > + } > + $blockdev->{server} = $server; > + } > + } elsif ($option =~ m|^(\S+)/(\S+)$|){ > + $blockdev->{pool} = $1; > + my $image = $2; > + > + if($image =~ m|^(\S+)/(\S+)$|) { > + $blockdev->{namespace} = $1; > + $blockdev->{image} = $2; > + } else { > + $blockdev->{image} = $image; > + } > + } > + } > + > + if($keyring && $blockdev->{server}) { > + #qemu devs are removed passing arbitrary values to blockdev object, and don't have added > + #keyring to the list of allowed keys. It need to be defined in the store ceph.conf. > + #https://lists.gnu.org/archive/html/qemu-devel/2018-08/msg02676.html > + #another way could be to simply patch qemu to allow the key I think we either want to allow the keys we need in Qemu (and upstream that), or we want to write the config out to a temporary config and clean that up after Qemu has read its contents.. > + my $ceph_conf = "/etc/pve/priv/ceph/${storeid}.conf"; this file is already taken for external Ceph clusters, we can't just re-use for this purpose without a lot of side effects I think.. > + $blockdev->{conf} = $ceph_conf; > + if (!-e $ceph_conf) { > + my $content = "[global]\nkeyring = $keyring\n"; > + PVE::Tools::file_set_contents($ceph_conf, $content, 0400); > + } > + } > + } elsif ($path =~ m/^nbd:(\S+):(\d+):exportname=(\S+)$/) { > + my $server = { type => 'inet', host => $1, port => $2 }; > + $blockdev = { driver => 'nbd', server => $server, export => $3 }; > + } elsif ($path =~ m/^nbd:unix:(\S+):exportname=(\S+)$/) { > + my $server = { type => 'unix', path => $1 }; > + $blockdev = { driver => 'nbd', server => $server, export => $2 }; > + } elsif ($path =~ m|^gluster(\+(tcp\|unix\|rdma))?://(.*)/(.*)/(images/(\S+)/(\S+))$|) { > + my $protocol = $2 ? $2 : 'inet'; > + $protocol = 'inet' if $protocol eq 'tcp'; > + my $server = [{ type => $protocol, host => $3, port => '24007' }]; > + $blockdev = { driver => 'gluster', server => $server, volume => $4, path => $5 }; > + } elsif ($path =~ m/^\/dev/) { > + my $driver = drive_is_cdrom($drive) ? 'host_cdrom' : 'host_device'; > + $blockdev = { driver => $driver, filename => $path }; > + } elsif ($path =~ m/^\//) { > + $blockdev = { driver => 'file', filename => $path}; > } else { > - if ($storeid) { > - $path = PVE::Storage::path($storecfg, $volid); > - } else { > - $path = $volid; > + die "unsupported path: $path\n"; > + #fixme > + #'{"driver":"iscsi","portal":"iscsi.example.com:3260","target":"demo-target","lun":3,"transport":"tcp"}' > + } > + > + my $cache_direct = drive_uses_cache_direct($drive, $scfg); > + my $cache = {}; > + $cache->{direct} = $cache_direct ? JSON::true : JSON::false; > + $cache->{'no-flush'} = $drive->{cache} && $drive->{cache} eq 'unsafe' ? JSON::true : JSON::false; > + $blockdev->{cache} = $cache; > + > + ##aio > + if($blockdev->{filename}) { > + $drive->{aio} = 'threads' if drive_is_cdrom($drive); > + my $aio = $drive->{aio}; > + if (!$aio) { > + if (storage_allows_io_uring_default($scfg, $cache_direct)) { > + # io_uring supports all cache modes > + $aio = "io_uring"; > + } else { > + # aio native works only with O_DIRECT > + if($cache_direct) { > + $aio = "native"; > + } else { > + $aio = "threads"; > + } > + } > } > + $blockdev->{aio} = $aio; > } > > - # For PVE-managed volumes, use the format from the storage layer and prevent overrides via the > - # drive's 'format' option. For unmanaged volumes, fallback to 'raw' to avoid auto-detection by > - # QEMU. For the special case 'none' (get_iso_path() returns an empty $path), there should be no > - # format or QEMU won't start. > - my $format; > - if (drive_is_cdrom($drive) && !$path) { > - # no format > - } elsif ($storeid) { > - $format = checked_volume_format($storecfg, $volid); > + ##discard && detect-zeroes > + my $discard = 'ignore'; > + if($drive->{discard}) { > + $discard = $drive->{discard}; > + $discard = 'unmap' if $discard eq 'on'; > + } > + $blockdev->{discard} = $discard if !drive_is_cdrom($drive); > > - if ($drive->{format} && $drive->{format} ne $format) { > - die "drive '$drive->{interface}$drive->{index}' - volume '$volid'" > - ." - 'format=$drive->{format}' option different from storage format '$format'\n"; > - } > + my $detectzeroes; nit: detect_zeroes > + if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) { > + $detectzeroes = 'off'; > + } elsif ($drive->{discard}) { > + $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on'; > } else { > - $format = $drive->{format} // 'raw'; > + # This used to be our default with discard not being specified: > + $detectzeroes = 'on'; > } > + $blockdev->{'detect-zeroes'} = $detectzeroes if !drive_is_cdrom($drive); > + $blockdev->{'node-name'} = $nodename if $nodename; this last line could be a lot higher up? > > - my $is_rbd = $path =~ m/^rbd:/; > + return $blockdev; > +} > > - my $opts = ''; > - my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard); > - foreach my $o (@qemu_drive_options) { > - $opts .= ",$o=$drive->{$o}" if defined($drive->{$o}); > - } > +sub generate_format_blockdev { > + my ($storecfg, $drive, $nodename, $file, $force_readonly) = @_; > > - # snapshot only accepts on|off > - if (defined($drive->{snapshot})) { > - my $v = $drive->{snapshot} ? 'on' : 'off'; > - $opts .= ",snapshot=$v"; > - } > + my $volid = $drive->{file}; > + my $scfg = undef; > + my $path = $volid; path is not used at all, other than being conditionally overwritten below.. > + my $format = $drive->{format}; > + $format //= "raw"; the format handling here is very sensitive, and I think this broke it. see the big comment this patch removed ;) short summary: for PVE-managed volumes we want the format from the storage layer (via checked_volume_format). if the drive has a format set that disagrees, that is a hard error. for absolute paths we us the format from the drive with a fallback to raw. > > - if (defined($drive->{ro})) { # ro maps to QEMUs `readonly`, which accepts `on` or `off` only > - $opts .= ",readonly=" . ($drive->{ro} ? 'on' : 'off'); > - } > + my $drive_id = get_drive_id($drive); > > - foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) { > - my ($dir, $qmpname) = @$type; > - if (my $v = $drive->{"mbps$dir"}) { > - $opts .= ",throttling.bps$qmpname=".int($v*1024*1024); > - } > - if (my $v = $drive->{"mbps${dir}_max"}) { > - $opts .= ",throttling.bps$qmpname-max=".int($v*1024*1024); > - } > - if (my $v = $drive->{"bps${dir}_max_length"}) { > - $opts .= ",throttling.bps$qmpname-max-length=$v"; > - } > - if (my $v = $drive->{"iops${dir}"}) { > - $opts .= ",throttling.iops$qmpname=$v"; > - } > - if (my $v = $drive->{"iops${dir}_max"}) { > - $opts .= ",throttling.iops$qmpname-max=$v"; > - } > - if (my $v = $drive->{"iops${dir}_max_length"}) { > - $opts .= ",throttling.iops$qmpname-max-length=$v"; > - } > + if ($drive->{zeroinit}) { > + #fixme how to handle zeroinit ? insert special blockdev filter ? > } > > - if ($live_restore_name) { > - $format = "rbd" if $is_rbd; > - die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n" > - if !$format; > - $opts .= ",format=alloc-track,file.driver=$format"; > - } elsif ($format) { > - $opts .= ",format=$format"; > + my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1); so I guess this should never be called with nbd-URI-volids? nit: $volname is not used anywhere, so can be removed.. > + > + if($storeid) { > + $scfg = PVE::Storage::storage_config($storecfg, $storeid); > + $format = checked_volume_format($storecfg, $volid); this is missing the comparison against $drive->{format} > + $path = PVE::Storage::path($storecfg, $volid); this is not used anywhere.. > } > > + my $readonly = defined($drive->{ro}) || $force_readonly ? JSON::true : JSON::false; > + > + #libvirt define cache option on both format && file > my $cache_direct = drive_uses_cache_direct($drive, $scfg); > + my $cache = {}; > + $cache->{direct} = $cache_direct ? JSON::true : JSON::false; > + $cache->{'no-flush'} = $drive->{cache} && $drive->{cache} eq 'unsafe' ? JSON::true : JSON::false; so we have the same code in two places? should probably be a helper then to not have them go out of sync.. > > - $opts .= ",cache=none" if !$drive->{cache} && $cache_direct; > + my $blockdev = { driver => $format, file => $file, cache => $cache, 'read-only' => $readonly }; > + $blockdev->{'node-name'} = $nodename if $nodename; > > - if (!$drive->{aio}) { > - if ($io_uring && storage_allows_io_uring_default($scfg, $cache_direct)) { > - # io_uring supports all cache modes > - $opts .= ",aio=io_uring"; > - } else { > - # aio native works only with O_DIRECT > - if($cache_direct) { > - $opts .= ",aio=native"; > - } else { > - $opts .= ",aio=threads"; > - } > - } > - } > + return $blockdev; > > - if (!drive_is_cdrom($drive)) { > - my $detectzeroes; > - if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) { > - $detectzeroes = 'off'; > - } elsif ($drive->{discard}) { > - $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on'; > - } else { > - # This used to be our default with discard not being specified: > - $detectzeroes = 'on'; > - } > +} > > - # note: 'detect-zeroes' works per blockdev and we want it to persist > - # after the alloc-track is removed, so put it on 'file' directly > - my $dz_param = $live_restore_name ? "file.detect-zeroes" : "detect-zeroes"; > - $opts .= ",$dz_param=$detectzeroes" if $detectzeroes; > - } > +sub generate_drive_blockdev { > + my ($storecfg, $vmid, $drive, $force_readonly, $live_restore_name) = @_; > > - if ($live_restore_name) { > - $opts .= ",backing=$live_restore_name"; > - $opts .= ",auto-remove=on"; > + my $path; > + my $volid = $drive->{file}; > + my $format = $drive->{format}; this is only used once below > + my $drive_id = get_drive_id($drive); > + > + my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1); > + my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef; > + > + my $blockdevs = []; > + > + if (drive_is_cdrom($drive)) { > + die "$drive_id: cannot back cdrom drive with a live restore image\n" if $live_restore_name; > + > + $path = get_iso_path($storecfg, $vmid, $volid); > + return if !$path; > + $force_readonly = 1; > } > > - # my $file_param = $live_restore_name ? "file.file.filename" : "file"; > - my $file_param = "file"; > + my $file_nodename = "file-drive-$drive_id"; > + my $blockdev_file = generate_file_blockdev($storecfg, $drive, $file_nodename); > + my $fmt_nodename = "fmt-drive-$drive_id"; > + my $blockdev_format = generate_format_blockdev($storecfg, $drive, $fmt_nodename, $blockdev_file, $force_readonly); > + > + my $blockdev_live_restore = undef; > if ($live_restore_name) { > - # non-rbd drivers require the underlying file to be a separate block > - # node, so add a second .file indirection > - $file_param .= ".file" if !$is_rbd; > - $file_param .= ".filename"; > + die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n" > + if !$format; for this check, but it is not actually set anywhere here.. so is something missing or can the check go? > + > + $blockdev_live_restore = { 'node-name' => "liverestore-drive-$drive_id", > + backing => $live_restore_name, > + 'auto-remove' => 'on', format => "alloc-track", > + file => $blockdev_format }; > } > - my $pathinfo = $path ? "$file_param=$path," : ''; > > - return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts"; > + #this is the topfilter entry point, use $drive-drive_id as nodename > + my $blockdev_throttle = { driver => "throttle", 'node-name' => "drive-$drive_id", 'throttle-group' => "throttle-drive-$drive_id" }; > + #put liverestore filter between throttle && format filter > + $blockdev_throttle->{file} = $live_restore_name ? $blockdev_live_restore : $blockdev_format; > + return $blockdev_throttle, > } > > sub print_pbs_blockdev { > @@ -4091,13 +4210,13 @@ sub config_to_command { > push @$devices, '-blockdev', $live_restore->{blockdev}; > } > > - my $drive_cmd = print_drive_commandline_full( > - $storecfg, $vmid, $drive, $live_blockdev_name, min_version($kvmver, 6, 0)); > - > - # extra protection for templates, but SATA and IDE don't support it.. > - $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive); > + my $throttle_group = print_drive_throttle_group($drive); > + push @$devices, '-object', $throttle_group if $throttle_group; > > - push @$devices, '-drive',$drive_cmd; > +# # extra protection for templates, but SATA and IDE don't support it.. > + my $force_readonly = drive_is_read_only($conf, $drive); > + my $blockdev = generate_drive_blockdev($storecfg, $vmid, $drive, $force_readonly, $live_blockdev_name); > + push @$devices, '-blockdev', encode_json_ordered($blockdev) if $blockdev; > push @$devices, '-device', print_drivedevice_full( > $storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type); > }); > @@ -8986,4 +9105,8 @@ sub delete_ifaces_ipams_ips { > } > } > > +sub encode_json_ordered { > + return JSON->new->canonical->allow_nonref->encode( $_[0] ); > +} this is only used in a single place.. > + > 1; > -- > 2.39.5 _______________________________________________ pve-devel mailing list pve-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel