From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id 7D1B61FF16F for ; Fri, 15 Nov 2024 15:01:42 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 83AAC15710; Fri, 15 Nov 2024 15:01:32 +0100 (CET) From: Markus Frank To: pve-devel@lists.proxmox.com Date: Fri, 15 Nov 2024 15:00:30 +0100 Message-Id: <20241115140037.191682-6-m.frank@proxmox.com> X-Mailer: git-send-email 2.39.5 In-Reply-To: <20241115140037.191682-1-m.frank@proxmox.com> References: <20241115140037.191682-1-m.frank@proxmox.com> MIME-Version: 1.0 X-SPAM-LEVEL: Spam detection results: 0 AWL -0.022 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: [pve-devel] [PATCH qemu-server v12 5/12] fix #1027: virtio-fs support X-BeenThere: pve-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox VE development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox VE development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pve-devel-bounces@lists.proxmox.com Sender: "pve-devel" add support for sharing directories with a guest vm. virtio-fs needs virtiofsd to be started. In order to start virtiofsd as a process (despite being a daemon it is does not run in the background), a double-fork is used. virtiofsd should close itself together with QEMU. There are the parameters dirid and the optional parameters direct-io, cache and writeback. Additionally the xattr & acl parameter overwrite the directory mapping settings for xattr & acl. The dirid gets mapped to the path on the current node and is also used as a mount tag (name used to mount the device on the guest). example config: ``` virtiofs0: foo,direct-io=1,cache=always,acl=1 virtiofs1: dirid=bar,cache=never,xattr=1,writeback=1 ``` For information on the optional parameters see the coherent doc patch and the official gitlab README: https://gitlab.com/virtio-fs/virtiofsd/-/blob/main/README.md Also add a permission check for virtiofs directory access. Signed-off-by: Markus Frank --- PVE/API2/Qemu.pm | 40 ++++++- PVE/QemuServer.pm | 22 +++- PVE/QemuServer/Makefile | 3 +- PVE/QemuServer/Memory.pm | 23 ++-- PVE/QemuServer/Virtiofs.pm | 227 +++++++++++++++++++++++++++++++++++++ 5 files changed, 304 insertions(+), 11 deletions(-) create mode 100644 PVE/QemuServer/Virtiofs.pm diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm index 1c3cb271..a369a32b 100644 --- a/PVE/API2/Qemu.pm +++ b/PVE/API2/Qemu.pm @@ -37,6 +37,7 @@ use PVE::QemuServer::Memory qw(get_current_memory); use PVE::QemuServer::PCI; use PVE::QemuServer::QMPHelpers; use PVE::QemuServer::USB; +use PVE::QemuServer::Virtiofs; use PVE::QemuMigrate; use PVE::RPCEnvironment; use PVE::AccessControl; @@ -721,6 +722,32 @@ my sub check_vm_create_hostpci_perm { return 1; }; +my sub check_dir_perm { + my ($rpcenv, $authuser, $vmid, $pool, $opt, $value) = @_; + + return 1 if $authuser eq 'root@pam'; + + $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk']); + + my $virtiofs = PVE::JSONSchema::parse_property_string('pve-qm-virtiofs', $value); + $rpcenv->check_full($authuser, "/mapping/dir/$virtiofs->{dirid}", ['Mapping.Use']); + + return 1; +}; + +my sub check_vm_create_dir_perm { + my ($rpcenv, $authuser, $vmid, $pool, $param) = @_; + + return 1 if $authuser eq 'root@pam'; + + for my $opt (keys %{$param}) { + next if $opt !~ m/^virtiofs\d+$/; + check_dir_perm($rpcenv, $authuser, $vmid, $pool, $opt, $param->{$opt}); + } + + return 1; +}; + my $check_vm_modify_config_perm = sub { my ($rpcenv, $authuser, $vmid, $pool, $key_list) = @_; @@ -731,7 +758,7 @@ my $check_vm_modify_config_perm = sub { # else, as there the permission can be value dependend next if PVE::QemuServer::is_valid_drivename($opt); next if $opt eq 'cdrom'; - next if $opt =~ m/^(?:unused|serial|usb|hostpci)\d+$/; + next if $opt =~ m/^(?:unused|serial|usb|hostpci|virtiofs)\d+$/; next if $opt eq 'tags'; @@ -1025,6 +1052,7 @@ __PACKAGE__->register_method({ &$check_vm_create_serial_perm($rpcenv, $authuser, $vmid, $pool, $param); check_vm_create_usb_perm($rpcenv, $authuser, $vmid, $pool, $param); check_vm_create_hostpci_perm($rpcenv, $authuser, $vmid, $pool, $param); + check_vm_create_dir_perm($rpcenv, $authuser, $vmid, $pool, $param); PVE::QemuServer::check_bridge_access($rpcenv, $authuser, $param); &$check_cpu_model_access($rpcenv, $authuser, $param); @@ -1919,6 +1947,10 @@ my $update_vm_api = sub { check_hostpci_perm($rpcenv, $authuser, $vmid, undef, $opt, $val); PVE::QemuConfig->add_to_pending_delete($conf, $opt, $force); PVE::QemuConfig->write_config($vmid, $conf); + } elsif ($opt =~ m/^virtiofs\d$/) { + check_dir_perm($rpcenv, $authuser, $vmid, undef, $opt, $val); + PVE::QemuConfig->add_to_pending_delete($conf, $opt, $force); + PVE::QemuConfig->write_config($vmid, $conf); } elsif ($opt eq 'tags') { assert_tag_permissions($vmid, $val, '', $rpcenv, $authuser); delete $conf->{$opt}; @@ -2008,6 +2040,12 @@ my $update_vm_api = sub { } check_hostpci_perm($rpcenv, $authuser, $vmid, undef, $opt, $param->{$opt}); $conf->{pending}->{$opt} = $param->{$opt}; + } elsif ($opt =~ m/^virtiofs\d$/) { + if (my $oldvalue = $conf->{$opt}) { + check_dir_perm($rpcenv, $authuser, $vmid, undef, $opt, $oldvalue); + } + check_dir_perm($rpcenv, $authuser, $vmid, undef, $opt, $param->{$opt}); + $conf->{pending}->{$opt} = $param->{$opt}; } elsif ($opt eq 'tags') { assert_tag_permissions($vmid, $conf->{$opt}, $param->{$opt}, $rpcenv, $authuser); $conf->{pending}->{$opt} = PVE::GuestHelpers::get_unique_tags($param->{$opt}); diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm index cb1e0b82..015d4676 100644 --- a/PVE/QemuServer.pm +++ b/PVE/QemuServer.pm @@ -35,6 +35,7 @@ use PVE::Exception qw(raise raise_param_exc); use PVE::Format qw(render_duration render_bytes); use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne); use PVE::HA::Config; +use PVE::Mapping::Dir; use PVE::Mapping::PCI; use PVE::Mapping::USB; use PVE::INotify; @@ -62,6 +63,7 @@ use PVE::QemuServer::Monitor qw(mon_cmd); use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci); use PVE::QemuServer::QMPHelpers qw(qemu_deviceadd qemu_devicedel qemu_objectadd qemu_objectdel); use PVE::QemuServer::USB; +use PVE::QemuServer::Virtiofs qw(max_virtiofs start_all_virtiofsd); my $have_sdn; eval { @@ -957,6 +959,10 @@ my $netdesc = { PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc); +for (my $i = 0; $i < max_virtiofs(); $i++) { + $confdesc->{"virtiofs$i"} = get_standard_option('pve-qm-virtiofs'); +} + my $ipconfig_fmt = { ip => { type => 'string', @@ -3872,8 +3878,11 @@ sub config_to_command { push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough); } + my $virtiofs_enabled = PVE::QemuServer::Virtiofs::virtiofs_enabled($conf); + PVE::QemuServer::Memory::config( - $conf, $vmid, $sockets, $cores, $hotplug_features->{memory}, $cmd); + $conf, $vmid, $sockets, $cores, $hotplug_features->{memory}, $cmd, + $machineFlags, $virtiofs_enabled); push @$cmd, '-S' if $conf->{freeze}; @@ -4160,6 +4169,8 @@ sub config_to_command { } } + PVE::QemuServer::Virtiofs::config($conf, $vmid, $devices); + push @$cmd, @$devices; push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags); push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags); @@ -5945,6 +5956,8 @@ sub vm_start_nolock { PVE::Tools::run_fork sub { PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties); + my $virtiofs_sockets = start_all_virtiofsd($conf, $vmid); + my $tpmpid; if ((my $tpm = $conf->{tpmstate0}) && !PVE::QemuConfig->is_template($conf)) { # start the TPM emulator so QEMU can connect on start @@ -5957,8 +5970,10 @@ sub vm_start_nolock { warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n"; kill 'TERM', $tpmpid; } + PVE::QemuServer::Virtiofs::close_sockets(@$virtiofs_sockets); die "QEMU exited with code $exitcode\n"; } + PVE::QemuServer::Virtiofs::close_sockets(@$virtiofs_sockets); }; }; @@ -6616,7 +6631,10 @@ sub check_mapping_access { } else { die "either 'host' or 'mapping' must be set.\n"; } - } + } elsif ($opt =~ m/^virtiofs\d$/) { + my $virtiofs = PVE::JSONSchema::parse_property_string('pve-qm-virtiofs', $conf->{$opt}); + $rpcenv->check_full($user, "/mapping/dir/$virtiofs->{dirid}", ['Mapping.Use']); + } } }; diff --git a/PVE/QemuServer/Makefile b/PVE/QemuServer/Makefile index ac26e56f..d1bf8bb8 100644 --- a/PVE/QemuServer/Makefile +++ b/PVE/QemuServer/Makefile @@ -11,7 +11,8 @@ SOURCES=PCI.pm \ CPUConfig.pm \ CGroup.pm \ Drive.pm \ - QMPHelpers.pm + QMPHelpers.pm \ + Virtiofs.pm .PHONY: install install: ${SOURCES} diff --git a/PVE/QemuServer/Memory.pm b/PVE/QemuServer/Memory.pm index f365f2d1..490ba378 100644 --- a/PVE/QemuServer/Memory.pm +++ b/PVE/QemuServer/Memory.pm @@ -336,7 +336,7 @@ sub qemu_memdevices_list { } sub config { - my ($conf, $vmid, $sockets, $cores, $hotplug, $cmd) = @_; + my ($conf, $vmid, $sockets, $cores, $hotplug, $cmd, $machine_flags, $virtiofs_enabled) = @_; my $memory = get_current_memory($conf->{memory}); my $static_memory = 0; @@ -379,7 +379,8 @@ sub config { my $numa_memory = $numa->{memory}; $numa_totalmemory += $numa_memory; - my $mem_object = print_mem_object($conf, "ram-node$i", $numa_memory); + my $memdev = $virtiofs_enabled ? "virtiofs-mem$i" : "ram-node$i"; + my $mem_object = print_mem_object($conf, $memdev, $numa_memory); # cpus my $cpulists = $numa->{cpus}; @@ -404,7 +405,7 @@ sub config { } push @$cmd, '-object', $mem_object; - push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i"; + push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=$memdev"; } die "total memory for NUMA nodes must be equal to vm static memory\n" @@ -418,15 +419,21 @@ sub config { die "host NUMA node$i doesn't exist\n" if !host_numanode_exists($i) && $conf->{hugepages}; - my $mem_object = print_mem_object($conf, "ram-node$i", $numa_memory); - push @$cmd, '-object', $mem_object; - my $cpus = ($cores * $i); $cpus .= "-" . ($cpus + $cores - 1) if $cores > 1; - push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i"; + my $memdev = $virtiofs_enabled ? "virtiofs-mem$i" : "ram-node$i"; + my $mem_object = print_mem_object($conf, $memdev, $numa_memory); + push @$cmd, '-object', $mem_object; + push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=$memdev"; } } + } elsif ($virtiofs_enabled) { + # kvm: '-machine memory-backend' and '-numa memdev' properties are + # mutually exclusive + push @$cmd, '-object', 'memory-backend-memfd,id=virtiofs-mem' + .",size=$conf->{memory}M,share=on"; + push @$machine_flags, 'memory-backend=virtiofs-mem'; } if ($hotplug) { @@ -453,6 +460,8 @@ sub print_mem_object { my $path = hugepages_mount_path($hugepages_size); return "memory-backend-file,id=$id,size=${size}M,mem-path=$path,share=on,prealloc=yes"; + } elsif ($id =~ m/^virtiofs-mem/) { + return "memory-backend-memfd,id=$id,size=${size}M,share=on"; } else { return "memory-backend-ram,id=$id,size=${size}M"; } diff --git a/PVE/QemuServer/Virtiofs.pm b/PVE/QemuServer/Virtiofs.pm new file mode 100644 index 00000000..bdd9fafd --- /dev/null +++ b/PVE/QemuServer/Virtiofs.pm @@ -0,0 +1,227 @@ +package PVE::QemuServer::Virtiofs; + +use strict; +use warnings; + +use Fcntl qw(F_GETFD F_SETFD FD_CLOEXEC); +use IO::Socket::UNIX; +use POSIX; +use Socket qw(SOCK_STREAM); + +use PVE::JSONSchema qw(get_standard_option parse_property_string); +use PVE::Mapping::Dir; +use PVE::RESTEnvironment qw(log_warn); + +use base qw(Exporter); + +our @EXPORT_OK = qw( +max_virtiofs +start_all_virtiofsd +); + +my $MAX_VIRTIOFS = 10; +my $socket_path_root = "/run/qemu-server/virtiofsd"; + +my $virtiofs_fmt = { + 'dirid' => { + type => 'string', + default_key => 1, + description => "Mapping identifier of the directory mapping to be shared with the guest." + ." Also used as a mount tag inside the VM.", + format_description => 'mapping-id', + format => 'pve-configid', + }, + 'cache' => { + type => 'string', + description => "The caching policy the file system should use (auto, always, never).", + enum => [qw(auto always never)], + default => "auto", + optional => 1, + }, + 'direct-io' => { + type => 'boolean', + description => "Honor the O_DIRECT flag passed down by guest applications.", + default => 0, + optional => 1, + }, + writeback => { + type => 'boolean', + description => "Enable writeback cache. If enabled, writes may be cached in the guest until" + ." the file is closed or an fsync is performed.", + default => 0, + optional => 1, + }, + xattr => { + type => 'boolean', + description => "Overwrite the xattr option from mapping and explicitly enable/disable" + ." support for extended attributes for the VM.", + default => "use value from mapping", + optional => 1, + }, + acl => { + type => 'boolean', + description => "Overwrite the acl option from mapping and explicitly enable/disable support" + ." for posix ACLs (enabled acl implies xattr) for the VM.", + default => "use value from mapping", + optional => 1, + }, +}; +PVE::JSONSchema::register_format('pve-qm-virtiofs', $virtiofs_fmt); + +my $virtiofsdesc = { + optional => 1, + type => 'string', format => $virtiofs_fmt, + description => "Configuration for sharing a directory between host and guest using Virtio-fs.", +}; +PVE::JSONSchema::register_standard_option("pve-qm-virtiofs", $virtiofsdesc); + +sub max_virtiofs { + return $MAX_VIRTIOFS; +} + +sub assert_virtiofs_config { + my ($conf, $virtiofs) = @_; + + my $dir_cfg = PVE::Mapping::Dir::config()->{ids}->{$virtiofs->{dirid}}; + my $node_list = PVE::Mapping::Dir::find_on_current_node($virtiofs->{dirid}); + + my $acl = $virtiofs->{acl} // $dir_cfg->{acl}; + if ($acl && PVE::QemuServer::Helpers::windows_version($conf->{ostype})) { + log_warn( + "Please disable ACLs for virtiofs on Windows VMs, otherwise" + ." the virtiofs shared directory cannot be mounted." + ); + } + + if (!$node_list || scalar($node_list->@*) != 1) { + die "virtiofs needs exactly one mapping for this node\n"; + } + + eval { PVE::Mapping::Dir::assert_valid($node_list->[0]) }; + die "directory mapping invalid: $@\n" if $@; +} + +sub config { + my ($conf, $vmid, $devices) = @_; + + for (my $i = 0; $i < max_virtiofs(); $i++) { + my $opt = "virtiofs$i"; + + next if !$conf->{$opt}; + my $virtiofs = parse_property_string('pve-qm-virtiofs', $conf->{$opt}); + next if !$virtiofs; + + assert_virtiofs_config($conf, $virtiofs); + + push @$devices, '-chardev', "socket,id=virtiofs$i,path=$socket_path_root/vm$vmid-fs$i"; + + # queue-size is set 1024 because of bug with Windows guests: + # https://bugzilla.redhat.com/show_bug.cgi?id=1873088 + # 1024 is also always used in the virtiofs documentations: + # https://gitlab.com/virtio-fs/virtiofsd#examples + push @$devices, '-device', 'vhost-user-fs-pci,queue-size=1024' + .",chardev=virtiofs$i,tag=$virtiofs->{dirid}"; + } +} + +sub virtiofs_enabled { + my ($conf) = @_; + + my $virtiofs_enabled = 0; + for (my $i = 0; $i < max_virtiofs(); $i++) { + my $opt = "virtiofs$i"; + next if !$conf->{$opt}; + my $virtiofs = parse_property_string('pve-qm-virtiofs', $conf->{$opt}); + if ($virtiofs) { + $virtiofs_enabled = 1; + last; + } + } + return $virtiofs_enabled; +} + +sub start_all_virtiofsd { + my ($conf, $vmid) = @_; + my $virtiofs_sockets = []; + for (my $i = 0; $i < max_virtiofs(); $i++) { + my $opt = "virtiofs$i"; + + next if !$conf->{$opt}; + my $virtiofs = parse_property_string('pve-qm-virtiofs', $conf->{$opt}); + next if !$virtiofs; + + my $virtiofs_socket = start_virtiofsd($vmid, $i, $virtiofs); + push @$virtiofs_sockets, $virtiofs_socket; + } + return $virtiofs_sockets; +} + +sub start_virtiofsd { + my ($vmid, $fsid, $virtiofs) = @_; + + mkdir $socket_path_root; + my $socket_path = "$socket_path_root/vm$vmid-fs$fsid"; + unlink($socket_path); + my $socket = IO::Socket::UNIX->new( + Type => SOCK_STREAM, + Local => $socket_path, + Listen => 1, + ) or die "cannot create socket - $!\n"; + + my $flags = fcntl($socket, F_GETFD, 0) + or die "failed to get file descriptor flags: $!\n"; + fcntl($socket, F_SETFD, $flags & ~FD_CLOEXEC) + or die "failed to remove FD_CLOEXEC from file descriptor\n"; + + my $dir_cfg = PVE::Mapping::Dir::config()->{ids}->{$virtiofs->{dirid}}; + my $node_list = PVE::Mapping::Dir::find_on_current_node($virtiofs->{dirid}); + my $node_cfg = $node_list->[0]; + + my $virtiofsd_bin = '/usr/libexec/virtiofsd'; + my $fd = $socket->fileno(); + my $path = $node_cfg->{path}; + + # Default to dir config xattr & acl settings + my $xattr = $virtiofs->{xattr} // $dir_cfg->{xattr}; + my $acl = $virtiofs->{acl} // $dir_cfg->{acl}; + + my $could_not_fork_err = "could not fork to start virtiofsd\n"; + my $pid = fork(); + if ($pid == 0) { + setsid(); + $0 = "task pve-vm$vmid-virtiofs$fsid"; + my $pid2 = fork(); + if ($pid2 == 0) { + my $cmd = [$virtiofsd_bin, "--fd=$fd", "--shared-dir=$path"]; + push @$cmd, '--xattr' if $xattr; + push @$cmd, '--posix-acl' if $acl; + push @$cmd, '--announce-submounts' if ($node_cfg->{submounts}); + push @$cmd, '--allow-direct-io' if ($virtiofs->{'direct-io'}); + push @$cmd, '--cache='.$virtiofs->{cache} if ($virtiofs->{cache}); + push @$cmd, '--writeback' if ($virtiofs->{'writeback'}); + push @$cmd, '--syslog'; + exec(@$cmd); + } elsif (!defined($pid2)) { + die $could_not_fork_err; + } else { + POSIX::_exit(0); + } + } elsif (!defined($pid)) { + die $could_not_fork_err; + } else { + waitpid($pid, 0); + } + + # return socket to keep it alive, + # so that QEMU will wait for virtiofsd to start + return $socket; +} + +sub close_sockets { + my @sockets = @_; + for my $socket (@sockets) { + shutdown($socket, 2); + close($socket); + } +} +1; -- 2.39.5 _______________________________________________ pve-devel mailing list pve-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel