From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 196C791E79 for ; Thu, 5 Oct 2023 10:56:58 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 05236156D0 for ; Thu, 5 Oct 2023 10:56:58 +0200 (CEST) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Thu, 5 Oct 2023 10:56:54 +0200 (CEST) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id D294844852 for ; Thu, 5 Oct 2023 10:56:53 +0200 (CEST) Date: Thu, 05 Oct 2023 10:56:45 +0200 From: Fabian =?iso-8859-1?q?Gr=FCnbichler?= To: Proxmox VE development discussion References: <20230809083739.100024-1-m.frank@proxmox.com> <20230809083739.100024-5-m.frank@proxmox.com> In-Reply-To: <20230809083739.100024-5-m.frank@proxmox.com> MIME-Version: 1.0 User-Agent: astroid/0.16.0 (https://github.com/astroidmail/astroid) Message-Id: <1696495156.74igartj2g.astroid@yuna.none> Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: quoted-printable X-SPAM-LEVEL: Spam detection results: 0 AWL 0.062 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pve-devel] [PATCH qemu-server v7 4/11] feature #1027: virtio-fs support X-BeenThere: pve-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox VE development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 05 Oct 2023 08:56:58 -0000 On August 9, 2023 10:37 am, Markus Frank wrote: > add support for sharing directories with a guest vm >=20 > virtio-fs needs virtiofsd to be started. >=20 > In order to start virtiofsd as a process (despite being a daemon it is do= es not run > in the background), a double-fork is used. >=20 > virtiofsd should close itself together with qemu. >=20 > There are the parameters dirid > and the optional parameters direct-io & cache. > Additionally the xattr & acl parameter overwrite the > directory mapping settings for xattr & acl. >=20 > The dirid gets mapped to the path on the current node > and is also used as a mount-tag (name used to mount the > device on the guest). >=20 > example config: > ``` > virtiofs0: foo,direct-io=3D1,cache=3Dalways,acl=3D1 > virtiofs1: dirid=3Dbar,cache=3Dnever,xattr=3D1 > ``` >=20 > For information on the optional parameters see there: > https://gitlab.com/virtio-fs/virtiofsd/-/blob/main/README.md >=20 > Signed-off-by: Markus Frank > --- > I did not get virtiofsd to run with run_command without creating zombie > processes after stutdown. > So I replaced run_command with exec for now.=20 > Maybe someone can find out why this happens. >=20 > PVE/QemuServer.pm | 174 ++++++++++++++++++++++++++++++++++++++- > PVE/QemuServer/Memory.pm | 25 ++++-- > debian/control | 1 + > 3 files changed, 193 insertions(+), 7 deletions(-) >=20 > diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm > index 484bc7f..d547dd6 100644 > --- a/PVE/QemuServer.pm > +++ b/PVE/QemuServer.pm > @@ -43,6 +43,7 @@ use PVE::PBSClient; > use PVE::RESTEnvironment qw(log_warn); > use PVE::RPCEnvironment; > use PVE::Storage; > +use PVE::Mapping::Dir; > use PVE::SysFSTools; > use PVE::Systemd; > use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_= glob_foreach get_host_arch $IPV6RE); > @@ -276,6 +277,42 @@ my $rng_fmt =3D { > }, > }; > =20 > +my $virtiofs_fmt =3D { > + 'dirid' =3D> { > + type =3D> 'string', > + default_key =3D> 1, > + description =3D> "Mapping identifier of the directory mapping to be" > + ." shared with the guest. Also used as a mount tag inside the VM.", > + format_description =3D> 'mapping-id', > + format =3D> 'pve-configid', > + }, > + 'cache' =3D> { > + type =3D> 'string', > + description =3D> "The caching policy the file system should use" > + ." (auto, always, never).", > + format_description =3D> "virtiofs-cache", > + enum =3D> [qw(auto always never)], > + optional =3D> 1, > + }, > + 'direct-io' =3D> { > + type =3D> 'boolean', > + description =3D> "Honor the O_DIRECT flag passed down by guest applicat= ions", > + format_description =3D> "virtiofs-directio", > + optional =3D> 1, > + }, > + xattr =3D> { > + type =3D> 'boolean', > + description =3D> "Enable support for extended attributes.", > + optional =3D> 1, > + }, > + acl =3D> { > + type =3D> 'boolean', > + description =3D> "Enable support for posix ACLs (implies --xattr).", > + optional =3D> 1, > + }, > +}; > +PVE::JSONSchema::register_format('pve-qm-virtiofs', $virtiofs_fmt); > + > my $meta_info_fmt =3D { > 'ctime' =3D> { > type =3D> 'integer', > @@ -840,6 +877,7 @@ while (my ($k, $v) =3D each %$confdesc) { > } > =20 > my $MAX_NETS =3D 32; > +my $MAX_VIRTIOFS =3D 10; > my $MAX_SERIAL_PORTS =3D 4; > my $MAX_PARALLEL_PORTS =3D 3; > my $MAX_NUMA =3D 8; > @@ -984,6 +1022,21 @@ my $netdesc =3D { > =20 > PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc); > =20 > +my $virtiofsdesc =3D { > + optional =3D> 1, > + type =3D> 'string', format =3D> $virtiofs_fmt, > + description =3D> "share files between host and guest", > +}; > +PVE::JSONSchema::register_standard_option("pve-qm-virtiofs", $virtiofsde= sc); > + > +sub max_virtiofs { > + return $MAX_VIRTIOFS; > +} > + > +for (my $i =3D 0; $i < $MAX_VIRTIOFS; $i++) { > + $confdesc->{"virtiofs$i"} =3D $virtiofsdesc; > +} > + > my $ipconfig_fmt =3D { > ip =3D> { > type =3D> 'string', > @@ -4113,6 +4166,21 @@ sub config_to_command { > push @$devices, '-device', $netdevicefull; > } > =20 > + my $virtiofs_enabled =3D 0; > + for (my $i =3D 0; $i < $MAX_VIRTIOFS; $i++) { > + my $opt =3D "virtiofs$i"; > + > + next if !$conf->{$opt}; > + my $virtiofs =3D parse_property_string('pve-qm-virtiofs', $conf->{$opt}= ); > + next if !$virtiofs; > + > + push @$devices, '-chardev', "socket,id=3Dvirtfs$i,path=3D/var/run/virti= ofsd/vm$vmid-fs$i"; > + push @$devices, '-device', 'vhost-user-fs-pci,queue-size=3D1024' > + .",chardev=3Dvirtfs$i,tag=3D$virtiofs->{dirid}"; > + > + $virtiofs_enabled =3D 1; > + } > + > if ($conf->{ivshmem}) { > my $ivshmem =3D parse_property_string($ivshmem_fmt, $conf->{ivshmem}); > =20 > @@ -4172,6 +4240,14 @@ sub config_to_command { > } > push @$machineFlags, "type=3D${machine_type_min}"; > =20 > + if ($virtiofs_enabled && !$conf->{numa}) { > + # kvm: '-machine memory-backend' and '-numa memdev' properties are > + # mutually exclusive > + push @$devices, '-object', 'memory-backend-file,id=3Dvirtiofs-mem' > + .",size=3D$conf->{memory}M,mem-path=3D/dev/shm,share=3Don"; as discussed off-list, this might be switched to memfd to avoid /dev/shm (same further below) > + push @$machineFlags, 'memory-backend=3Dvirtiofs-mem'; > + } > + > push @$cmd, @$devices; > push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags); > push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machin= eFlags); > @@ -4198,6 +4274,85 @@ sub config_to_command { > return wantarray ? ($cmd, $vollist, $spice_port, $pci_devices) : $cm= d; > } > =20 > +sub start_virtiofs { > + my ($vmid, $fsid, $virtiofs) =3D @_; > + > + my $dir_cfg =3D PVE::Mapping::Dir::config()->{ids}->{$virtiofs->{dir= id}}; > + my $node_list =3D PVE::Mapping::Dir::find_on_current_node($virtiofs-= >{dirid}); > + > + if (!$node_list || scalar($node_list->@*) !=3D 1) { > + die "virtiofs needs exactly one mapping for this node\n"; > + } > + > + eval { > + PVE::Mapping::Dir::assert_valid($node_list->[0]); > + }; > + if (my $err =3D $@) { > + die "Directory Mapping invalid: $err\n"; > + } > + > + my $node_cfg =3D $node_list->[0]; > + my $path =3D $node_cfg->{path}; > + my $socket_path_root =3D "/var/run/virtiofsd"; > + mkdir $socket_path_root; > + my $socket_path =3D "$socket_path_root/vm$vmid-fs$fsid"; > + unlink($socket_path); > + my $socket =3D IO::Socket::UNIX->new( > + Type =3D> SOCK_STREAM, > + Local =3D> $socket_path, > + Listen =3D> 1, > + ) or die "cannot create socket - $!\n"; > + > + my $flags =3D fcntl($socket, F_GETFD, 0) > + or die "failed to get file descriptor flags: $!\n"; > + fcntl($socket, F_SETFD, $flags & ~FD_CLOEXEC) > + or die "failed to remove FD_CLOEXEC from file descriptor\n"; > + > + my $fd =3D $socket->fileno(); > + > + my $virtiofsd_bin =3D '/usr/libexec/virtiofsd'; > + > + my $pid =3D fork(); > + if ($pid =3D=3D 0) { > + setsid(); > + $0 =3D "task pve-vm$vmid-virtiofs$fsid"; > + for my $fd_loop (3 .. POSIX::sysconf( &POSIX::_SC_OPEN_MAX )) { > + POSIX::close($fd_loop) if ($fd_loop !=3D $fd); > + } > + > + my $pid2 =3D fork(); > + if ($pid2 =3D=3D 0) { > + my $cmd =3D [$virtiofsd_bin, "--fd=3D$fd", "--shared-dir=3D$path"]; > + push @$cmd, '--xattr' if ($virtiofs->{xattr}); > + push @$cmd, '--posix-acl' if ($virtiofs->{acl}); > + > + # Default to dir config xattr & acl settings > + push @$cmd, '--xattr' > + if !defined $virtiofs->{'xattr'} && $dir_cfg->{'xattr'}; > + push @$cmd, '--posix-acl' > + if !defined $virtiofs->{'acl'} && $dir_cfg->{'acl'}; nit: this could be a lot simpler: my $xattr =3D $virtiofs->{xattr} // $dir_cfg->{xattr}; push @$cmd, '--xattr' if $xattr; or even as a one-liner ;) same for ACL > + > + push @$cmd, '--announce-submounts' if ($node_cfg->{submounts}); > + push @$cmd, '--allow-direct-io' if ($virtiofs->{'direct-io'}); > + push @$cmd, "--cache=3D$virtiofs->{'cache'}" if ($virtiofs->{'cache= '}); > + > + exec(@$cmd); > + } elsif (!defined($pid2)) { > + die "could not fork to start virtiofsd\n"; > + } else { > + POSIX::_exit(0); > + } > + } elsif (!defined($pid)) { > + die "could not fork to start virtiofsd\n"; > + } else { > + waitpid($pid, 0); > + } > + > + # return socket to keep it alive, > + # so that qemu will wait for virtiofsd to start > + return $socket; > +}