From: Markus Frank <m.frank@proxmox.com>
To: pve-devel@lists.proxmox.com
Subject: [pve-devel] [PATCH qemu-server v9 5/7] fix #1027: virtio-fs support
Date: Fri, 1 Mar 2024 09:05:24 +0100 [thread overview]
Message-ID: <20240301080526.57798-6-m.frank@proxmox.com> (raw)
In-Reply-To: <20240301080526.57798-1-m.frank@proxmox.com>
add support for sharing directories with a guest vm
virtio-fs needs virtiofsd to be started.
In order to start virtiofsd as a process (despite being a daemon it is does not
run in the background), a double-fork is used.
virtiofsd should close itself together with qemu.
There are the parameters dirid and the optional parameters direct-io, cache and
writeback. Additionally the xattr & acl parameter overwrite the directory
mapping settings for xattr & acl.
The dirid gets mapped to the path on the current node and is also used as a
mount tag (name used to mount the device on the guest).
example config:
```
virtiofs0: foo,direct-io=1,cache=always,acl=1
virtiofs1: dirid=bar,cache=never,xattr=1,writeback=1
```
For information on the optional parameters see the coherent doc patch
and the official gitlab README:
https://gitlab.com/virtio-fs/virtiofsd/-/blob/main/README.md
Also add a permission check for virtiofs directory access.
Signed-off-by: Markus Frank <m.frank@proxmox.com>
---
PVE/API2/Qemu.pm | 39 ++++++-
PVE/QemuServer.pm | 19 +++-
PVE/QemuServer/Makefile | 3 +-
PVE/QemuServer/Memory.pm | 34 ++++--
PVE/QemuServer/Virtiofs.pm | 212 +++++++++++++++++++++++++++++++++++++
5 files changed, 296 insertions(+), 11 deletions(-)
create mode 100644 PVE/QemuServer/Virtiofs.pm
diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm
index cdc8f7a..20edbfb 100644
--- a/PVE/API2/Qemu.pm
+++ b/PVE/API2/Qemu.pm
@@ -648,6 +648,32 @@ my sub check_vm_create_hostpci_perm {
return 1;
};
+my sub check_dir_perm {
+ my ($rpcenv, $authuser, $vmid, $pool, $opt, $value) = @_;
+
+ return 1 if $authuser eq 'root@pam';
+
+ $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk']);
+
+ my $virtiofs = PVE::JSONSchema::parse_property_string('pve-qm-virtiofs', $value);
+ $rpcenv->check_full($authuser, "/mapping/dir/$virtiofs->{dirid}", ['Mapping.Use']);
+
+ return 1;
+};
+
+my sub check_vm_create_dir_perm {
+ my ($rpcenv, $authuser, $vmid, $pool, $param) = @_;
+
+ return 1 if $authuser eq 'root@pam';
+
+ for my $opt (keys %{$param}) {
+ next if $opt !~ m/^virtiofs\d+$/;
+ check_dir_perm($rpcenv, $authuser, $vmid, $pool, $opt, $param->{$opt});
+ }
+
+ return 1;
+};
+
my $check_vm_modify_config_perm = sub {
my ($rpcenv, $authuser, $vmid, $pool, $key_list) = @_;
@@ -658,7 +684,7 @@ my $check_vm_modify_config_perm = sub {
# else, as there the permission can be value dependend
next if PVE::QemuServer::is_valid_drivename($opt);
next if $opt eq 'cdrom';
- next if $opt =~ m/^(?:unused|serial|usb|hostpci)\d+$/;
+ next if $opt =~ m/^(?:unused|serial|usb|hostpci|virtiofs)\d+$/;
next if $opt eq 'tags';
@@ -954,6 +980,7 @@ __PACKAGE__->register_method({
&$check_vm_create_serial_perm($rpcenv, $authuser, $vmid, $pool, $param);
check_vm_create_usb_perm($rpcenv, $authuser, $vmid, $pool, $param);
check_vm_create_hostpci_perm($rpcenv, $authuser, $vmid, $pool, $param);
+ check_vm_create_dir_perm($rpcenv, $authuser, $vmid, $pool, $param);
PVE::QemuServer::check_bridge_access($rpcenv, $authuser, $param);
&$check_cpu_model_access($rpcenv, $authuser, $param);
@@ -1828,6 +1855,10 @@ my $update_vm_api = sub {
check_hostpci_perm($rpcenv, $authuser, $vmid, undef, $opt, $val);
PVE::QemuConfig->add_to_pending_delete($conf, $opt, $force);
PVE::QemuConfig->write_config($vmid, $conf);
+ } elsif ($opt =~ m/^virtiofs\d$/) {
+ check_dir_perm($rpcenv, $authuser, $vmid, undef, $opt, $val);
+ PVE::QemuConfig->add_to_pending_delete($conf, $opt, $force);
+ PVE::QemuConfig->write_config($vmid, $conf);
} elsif ($opt eq 'tags') {
assert_tag_permissions($vmid, $val, '', $rpcenv, $authuser);
delete $conf->{$opt};
@@ -1914,6 +1945,12 @@ my $update_vm_api = sub {
}
check_hostpci_perm($rpcenv, $authuser, $vmid, undef, $opt, $param->{$opt});
$conf->{pending}->{$opt} = $param->{$opt};
+ } elsif ($opt =~ m/^virtiofs\d$/) {
+ if (my $oldvalue = $conf->{$opt}) {
+ check_dir_perm($rpcenv, $authuser, $vmid, undef, $opt, $oldvalue);
+ }
+ check_dir_perm($rpcenv, $authuser, $vmid, undef, $opt, $param->{$opt});
+ $conf->{pending}->{$opt} = $param->{$opt};
} elsif ($opt eq 'tags') {
assert_tag_permissions($vmid, $conf->{$opt}, $param->{$opt}, $rpcenv, $authuser);
$conf->{pending}->{$opt} = PVE::GuestHelpers::get_unique_tags($param->{$opt});
diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index b45507a..06182bf 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -34,6 +34,7 @@ use PVE::DataCenterConfig;
use PVE::Exception qw(raise raise_param_exc);
use PVE::Format qw(render_duration render_bytes);
use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
+use PVE::Mapping::Dir;
use PVE::Mapping::PCI;
use PVE::Mapping::USB;
use PVE::INotify;
@@ -60,6 +61,7 @@ use PVE::QemuServer::Monitor qw(mon_cmd);
use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
use PVE::QemuServer::QMPHelpers qw(qemu_deviceadd qemu_devicedel qemu_objectadd qemu_objectdel);
use PVE::QemuServer::USB;
+use PVE::QemuServer::Virtiofs qw(max_virtiofs start_all_virtiofsd);
my $have_sdn;
eval {
@@ -959,6 +961,10 @@ my $netdesc = {
PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
+for (my $i = 0; $i < max_virtiofs(); $i++) {
+ $confdesc->{"virtiofs$i"} = get_standard_option('pve-qm-virtiofs');
+}
+
my $ipconfig_fmt = {
ip => {
type => 'string',
@@ -3808,7 +3814,7 @@ sub config_to_command {
}
PVE::QemuServer::Memory::config(
- $conf, $vmid, $sockets, $cores, $hotplug_features->{memory}, $cmd);
+ $conf, $vmid, $sockets, $cores, $hotplug_features->{memory}, $cmd, $machineFlags);
push @$cmd, '-S' if $conf->{freeze};
@@ -4084,6 +4090,8 @@ sub config_to_command {
}
push @$machineFlags, "type=${machine_type_min}";
+ PVE::QemuServer::Virtiofs::config($conf, $vmid, $devices, $machineFlags);
+
push @$cmd, @$devices;
push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
@@ -5865,6 +5873,8 @@ sub vm_start_nolock {
PVE::Tools::run_fork sub {
PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
+ my $virtiofs_sockets = start_all_virtiofsd($conf, $vmid);
+
my $tpmpid;
if ((my $tpm = $conf->{tpmstate0}) && !PVE::QemuConfig->is_template($conf)) {
# start the TPM emulator so QEMU can connect on start
@@ -5877,8 +5887,10 @@ sub vm_start_nolock {
warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
kill 'TERM', $tpmpid;
}
+ PVE::QemuServer::Virtiofs::close_sockets(@$virtiofs_sockets);
die "QEMU exited with code $exitcode\n";
}
+ PVE::QemuServer::Virtiofs::close_sockets(@$virtiofs_sockets);
};
};
@@ -6504,7 +6516,10 @@ sub check_mapping_access {
} else {
die "either 'host' or 'mapping' must be set.\n";
}
- }
+ } elsif ($opt =~ m/^virtiofs\d$/) {
+ my $virtiofs = PVE::JSONSchema::parse_property_string('pve-qm-virtiofs', $conf->{$opt});
+ $rpcenv->check_full($user, "/mapping/dir/$virtiofs->{dirid}", ['Mapping.Use']);
+ }
}
};
diff --git a/PVE/QemuServer/Makefile b/PVE/QemuServer/Makefile
index ac26e56..d1bf8bb 100644
--- a/PVE/QemuServer/Makefile
+++ b/PVE/QemuServer/Makefile
@@ -11,7 +11,8 @@ SOURCES=PCI.pm \
CPUConfig.pm \
CGroup.pm \
Drive.pm \
- QMPHelpers.pm
+ QMPHelpers.pm \
+ Virtiofs.pm
.PHONY: install
install: ${SOURCES}
diff --git a/PVE/QemuServer/Memory.pm b/PVE/QemuServer/Memory.pm
index f365f2d..691f9b3 100644
--- a/PVE/QemuServer/Memory.pm
+++ b/PVE/QemuServer/Memory.pm
@@ -10,6 +10,7 @@ use PVE::Exception qw(raise raise_param_exc);
use PVE::QemuServer::Helpers qw(parse_number_sets);
use PVE::QemuServer::Monitor qw(mon_cmd);
use PVE::QemuServer::QMPHelpers qw(qemu_devicedel qemu_objectdel);
+use PVE::QemuServer::Virtiofs;
use base qw(Exporter);
@@ -336,7 +337,7 @@ sub qemu_memdevices_list {
}
sub config {
- my ($conf, $vmid, $sockets, $cores, $hotplug, $cmd) = @_;
+ my ($conf, $vmid, $sockets, $cores, $hotplug, $cmd, $machine_flags) = @_;
my $memory = get_current_memory($conf->{memory});
my $static_memory = 0;
@@ -367,6 +368,16 @@ sub config {
die "numa needs to be enabled to use hugepages" if $conf->{hugepages} && !$conf->{numa};
+ my $virtiofs_enabled = 0;
+ for (my $i = 0; $i < PVE::QemuServer::Virtiofs::max_virtiofs(); $i++) {
+ my $opt = "virtiofs$i";
+ next if !$conf->{$opt};
+ my $virtiofs = PVE::JSONSchema::parse_property_string('pve-qm-virtiofs', $conf->{$opt});
+ if ($virtiofs) {
+ $virtiofs_enabled = 1;
+ }
+ }
+
if ($conf->{numa}) {
my $numa_totalmemory = undef;
@@ -379,7 +390,8 @@ sub config {
my $numa_memory = $numa->{memory};
$numa_totalmemory += $numa_memory;
- my $mem_object = print_mem_object($conf, "ram-node$i", $numa_memory);
+ my $memdev = $virtiofs_enabled ? "virtiofs-mem$i" : "ram-node$i";
+ my $mem_object = print_mem_object($conf, $memdev, $numa_memory);
# cpus
my $cpulists = $numa->{cpus};
@@ -404,7 +416,7 @@ sub config {
}
push @$cmd, '-object', $mem_object;
- push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i";
+ push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=$memdev";
}
die "total memory for NUMA nodes must be equal to vm static memory\n"
@@ -418,15 +430,21 @@ sub config {
die "host NUMA node$i doesn't exist\n"
if !host_numanode_exists($i) && $conf->{hugepages};
- my $mem_object = print_mem_object($conf, "ram-node$i", $numa_memory);
- push @$cmd, '-object', $mem_object;
-
my $cpus = ($cores * $i);
$cpus .= "-" . ($cpus + $cores - 1) if $cores > 1;
- push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i";
+ my $memdev = $virtiofs_enabled ? "virtiofs-mem$i" : "ram-node$i";
+ my $mem_object = print_mem_object($conf, $memdev, $numa_memory);
+ push @$cmd, '-object', $mem_object;
+ push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=$memdev";
}
}
+ } elsif ($virtiofs_enabled) {
+ # kvm: '-machine memory-backend' and '-numa memdev' properties are
+ # mutually exclusive
+ push @$cmd, '-object', 'memory-backend-memfd,id=virtiofs-mem'
+ .",size=$conf->{memory}M,share=on";
+ push @$machine_flags, 'memory-backend=virtiofs-mem';
}
if ($hotplug) {
@@ -453,6 +471,8 @@ sub print_mem_object {
my $path = hugepages_mount_path($hugepages_size);
return "memory-backend-file,id=$id,size=${size}M,mem-path=$path,share=on,prealloc=yes";
+ } elsif ($id =~ m/^virtiofs-mem/) {
+ return "memory-backend-memfd,id=$id,size=${size}M,share=on";
} else {
return "memory-backend-ram,id=$id,size=${size}M";
}
diff --git a/PVE/QemuServer/Virtiofs.pm b/PVE/QemuServer/Virtiofs.pm
new file mode 100644
index 0000000..4c59348
--- /dev/null
+++ b/PVE/QemuServer/Virtiofs.pm
@@ -0,0 +1,212 @@
+package PVE::QemuServer::Virtiofs;
+
+use strict;
+use warnings;
+
+use Fcntl;
+use IO::Socket::UNIX;
+use POSIX;
+use PVE::JSONSchema qw(get_standard_option parse_property_string);
+use PVE::Mapping::Dir;
+
+use base qw(Exporter);
+
+our @EXPORT_OK = qw(
+max_virtiofs
+assert_virtiofs_config
+start_virtiofsd
+start_all_virtiofsd
+virtiofs_qemu_param
+close_sockets
+);
+
+my $MAX_VIRTIOFS = 10;
+my $socket_path_root = "/run/qemu-server/virtiofsd";
+
+my $virtiofs_fmt = {
+ 'dirid' => {
+ type => 'string',
+ default_key => 1,
+ description => "Mapping identifier of the directory mapping to be"
+ ." shared with the guest. Also used as a mount tag inside the VM.",
+ format_description => 'mapping-id',
+ format => 'pve-configid',
+ },
+ 'cache' => {
+ type => 'string',
+ description => "The caching policy the file system should use (auto, always, never).",
+ enum => [qw(auto always never)],
+ default => "auto",
+ optional => 1,
+ },
+ 'direct-io' => {
+ type => 'boolean',
+ description => "Honor the O_DIRECT flag passed down by guest applications.",
+ default => 0,
+ optional => 1,
+ },
+ writeback => {
+ type => 'boolean',
+ description => "Enable writeback cache. If enabled, writes may be cached"
+ ." in the guest until the file is closed or an fsync is performed.",
+ default => 0,
+ optional => 1,
+ },
+ xattr => {
+ type => 'boolean',
+ description => "Overwrite the xattr option from mapping and explicitly"
+ ." enable/disable support for extended attributes for the VM.",
+ default => "use value from mapping",
+ optional => 1,
+ },
+ acl => {
+ type => 'boolean',
+ description => "Overwrite the acl option from mapping and explicitly"
+ ." enable/disable support for posix ACLs (enabled acl implies xattr)"
+ ." for the VM.",
+ default => "use value from mapping",
+ optional => 1,
+ },
+};
+PVE::JSONSchema::register_format('pve-qm-virtiofs', $virtiofs_fmt);
+
+my $virtiofsdesc = {
+ optional => 1,
+ type => 'string', format => $virtiofs_fmt,
+ description => "share a directory between host and guest",
+};
+PVE::JSONSchema::register_standard_option("pve-qm-virtiofs", $virtiofsdesc);
+
+sub max_virtiofs {
+ return $MAX_VIRTIOFS;
+}
+
+sub assert_virtiofs_config {
+ my ($conf, $virtiofs) = @_;
+ my $dir_cfg = PVE::Mapping::Dir::config()->{ids}->{$virtiofs->{dirid}};
+ my $node_list = PVE::Mapping::Dir::find_on_current_node($virtiofs->{dirid});
+
+ my $acl = $virtiofs->{'acl'} // $dir_cfg->{'acl'};
+ if ($acl && PVE::QemuServer::windows_version($conf->{ostype})) {
+ log_warn(
+ "Please disable ACLs for virtiofs on Windows VMs, otherwise"
+ ." the virtiofs shared directory cannot be mounted."
+ );
+ }
+
+ if (!$node_list || scalar($node_list->@*) != 1) {
+ die "virtiofs needs exactly one mapping for this node\n";
+ }
+
+ eval PVE::Mapping::Dir::assert_valid($node_list->[0]);
+ if (my $err = $@) {
+ die "Directory Mapping invalid: $err\n";
+ }
+}
+
+sub config {
+ my ($conf, $vmid, $devices, $machine_flags) = @_;
+ for (my $i = 0; $i < max_virtiofs(); $i++) {
+ my $opt = "virtiofs$i";
+
+ next if !$conf->{$opt};
+ my $virtiofs = parse_property_string('pve-qm-virtiofs', $conf->{$opt});
+ next if !$virtiofs;
+
+ assert_virtiofs_config($conf, $virtiofs);
+
+ push @$devices, '-chardev', "socket,id=virtfs$i,path=$socket_path_root/vm$vmid-fs$i";
+
+ # queue-size is set 1024 because of bug with Windows guests:
+ # https://bugzilla.redhat.com/show_bug.cgi?id=1873088
+ # 1024 is also always used in the virtiofs documentations:
+ # https://gitlab.com/virtio-fs/virtiofsd#examples
+ push @$devices, '-device', 'vhost-user-fs-pci,queue-size=1024'
+ .",chardev=virtfs$i,tag=$virtiofs->{dirid}";
+ }
+}
+
+sub start_all_virtiofsd {
+ my ($conf, $vmid) = @_;
+ my $virtiofs_sockets = [];
+ for (my $i = 0; $i < max_virtiofs(); $i++) {
+ my $opt = "virtiofs$i";
+
+ next if !$conf->{$opt};
+ my $virtiofs = parse_property_string('pve-qm-virtiofs', $conf->{$opt});
+ next if !$virtiofs;
+
+ my $virtiofs_socket = start_virtiofsd($vmid, $i, $virtiofs);
+ push @$virtiofs_sockets, $virtiofs_socket;
+ }
+ return $virtiofs_sockets;
+}
+
+sub close_sockets {
+ my @sockets = @_;
+ for my $socket (@sockets) {
+ shutdown($socket, 2);
+ close($socket);
+ }
+}
+
+sub start_virtiofsd {
+ my ($vmid, $fsid, $virtiofs) = @_;
+
+ my $dir_cfg = PVE::Mapping::Dir::config()->{ids}->{$virtiofs->{dirid}};
+ my $node_list = PVE::Mapping::Dir::find_on_current_node($virtiofs->{dirid});
+
+ # Default to dir config xattr & acl settings
+ my $xattr = $virtiofs->{xattr} // $dir_cfg->{xattr};
+ my $acl = $virtiofs->{'acl'} // $dir_cfg->{'acl'};
+
+ my $node_cfg = $node_list->[0];
+ my $path = $node_cfg->{path};
+ mkdir $socket_path_root;
+ my $socket_path = "$socket_path_root/vm$vmid-fs$fsid";
+ unlink($socket_path);
+ my $socket = IO::Socket::UNIX->new(
+ Type => SOCK_STREAM,
+ Local => $socket_path,
+ Listen => 1,
+ ) or die "cannot create socket - $!\n";
+
+ my $flags = fcntl($socket, F_GETFD, 0)
+ or die "failed to get file descriptor flags: $!\n";
+ fcntl($socket, F_SETFD, $flags & ~FD_CLOEXEC)
+ or die "failed to remove FD_CLOEXEC from file descriptor\n";
+
+ my $fd = $socket->fileno();
+
+ my $virtiofsd_bin = '/usr/libexec/virtiofsd';
+
+ my $pid = fork();
+ if ($pid == 0) {
+ setsid();
+ $0 = "task pve-vm$vmid-virtiofs$fsid";
+ my $pid2 = fork();
+ if ($pid2 == 0) {
+ my $cmd = [$virtiofsd_bin, "--fd=$fd", "--shared-dir=$path"];
+ push @$cmd, '--xattr' if $xattr;
+ push @$cmd, '--posix-acl' if $acl;
+ push @$cmd, '--announce-submounts' if ($node_cfg->{submounts});
+ push @$cmd, '--allow-direct-io' if ($virtiofs->{'direct-io'});
+ push @$cmd, "--cache=$virtiofs->{'cache'}" if ($virtiofs->{'cache'});
+ push @$cmd, "--writeback" if ($virtiofs->{'writeback'});
+ push @$cmd, '--syslog';
+ exec(@$cmd);
+ } elsif (!defined($pid2)) {
+ die "could not fork to start virtiofsd\n";
+ } else {
+ POSIX::_exit(0);
+ }
+ } elsif (!defined($pid)) {
+ die "could not fork to start virtiofsd\n";
+ } else {
+ waitpid($pid, 0);
+ }
+
+ # return socket to keep it alive,
+ # so that QEMU will wait for virtiofsd to start
+ return $socket;
+}
--
2.39.2
next prev parent reply other threads:[~2024-03-01 8:06 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-03-01 8:05 [pve-devel] [PATCH cluster/guest-common/docs/qemu-server/manager v9 0/7] virtiofs Markus Frank
2024-03-01 8:05 ` [pve-devel] [PATCH cluster v9 1/7] add mapping/dir.cfg for resource mapping Markus Frank
2024-03-01 8:05 ` [pve-devel] [PATCH guest-common v9 2/7] add dir mapping section config Markus Frank
2024-03-01 8:05 ` [pve-devel] [PATCH docs v9 3/7] add doc section for the shared filesystem virtio-fs Markus Frank
2024-03-01 8:05 ` [pve-devel] [PATCH qemu-server v9 4/7] add virtiofsd as runtime dependency for qemu-server Markus Frank
2024-03-01 8:05 ` Markus Frank [this message]
2024-03-01 8:05 ` [pve-devel] [PATCH qemu-server v9 6/7] migration: check_local_resources for virtiofs Markus Frank
2024-03-01 8:05 ` [pve-devel] [PATCH manager v9 7/7] api: add resource map api endpoints for directories Markus Frank
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240301080526.57798-6-m.frank@proxmox.com \
--to=m.frank@proxmox.com \
--cc=pve-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal