From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <f.schauer@proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by lists.proxmox.com (Postfix) with ESMTPS id 8D6D99A1AF
 for <pve-devel@lists.proxmox.com>; Fri, 17 Nov 2023 11:30:01 +0100 (CET)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
 by firstgate.proxmox.com (Proxmox) with ESMTP id 6ED9C1FB62
 for <pve-devel@lists.proxmox.com>; Fri, 17 Nov 2023 11:29:31 +0100 (CET)
Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com
 [94.136.29.106])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (2048 bits))
 (No client certificate requested)
 by firstgate.proxmox.com (Proxmox) with ESMTPS
 for <pve-devel@lists.proxmox.com>; Fri, 17 Nov 2023 11:29:30 +0100 (CET)
Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1])
 by proxmox-new.maurer-it.com (Proxmox) with ESMTP id 7967343D5A
 for <pve-devel@lists.proxmox.com>; Fri, 17 Nov 2023 11:29:30 +0100 (CET)
Message-ID: <a13b66a5-3538-44a0-a34f-a19a11151043@proxmox.com>
Date: Fri, 17 Nov 2023 11:29:29 +0100
MIME-Version: 1.0
User-Agent: Mozilla Thunderbird
Content-Language: en-US
To: Wolfgang Bumiller <w.bumiller@proxmox.com>
Cc: pve-devel@lists.proxmox.com
References: <20231116115044.74757-1-f.schauer@proxmox.com>
 <4w5h65ol46vvgkcojpbyzrd2urun5l72elucfy6qpwwbegf55y@mpgszwz47j4a>
From: Filip Schauer <f.schauer@proxmox.com>
In-Reply-To: <4w5h65ol46vvgkcojpbyzrd2urun5l72elucfy6qpwwbegf55y@mpgszwz47j4a>
Content-Type: text/plain; charset=UTF-8; format=flowed
Content-Transfer-Encoding: 7bit
X-SPAM-LEVEL: Spam detection results:  0
 AWL -0.219 Adjusted score from AWL reputation of From: address
 BAYES_00                 -1.9 Bayes spam probability is 0 to 1%
 DMARC_MISSING             0.1 Missing DMARC policy
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
 T_SCC_BODY_TEXT_LINE    -0.01 -
Subject: Re: [pve-devel] [PATCH v5 container] Add device passthrough
X-BeenThere: pve-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox VE development discussion <pve-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pve-devel>, 
 <mailto:pve-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pve-devel/>
List-Post: <mailto:pve-devel@lists.proxmox.com>
List-Help: <mailto:pve-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel>, 
 <mailto:pve-devel-request@lists.proxmox.com?subject=subscribe>
X-List-Received-Date: Fri, 17 Nov 2023 10:30:01 -0000

Patch v6 available

https://lists.proxmox.com/pipermail/pve-devel/2023-November/060367.html

On 16/11/2023 14:35, Wolfgang Bumiller wrote:
> On Thu, Nov 16, 2023 at 12:50:44PM +0100, Filip Schauer wrote:
>> Add a dev[n] argument to the container config to pass devices through to
>> a container. A device can be passed by its path. Additionally the access
>> mode, uid and gid can be specified through their respective properties.
>>
>> Signed-off-by: Filip Schauer <f.schauer@proxmox.com>
>> ---
>> Changes since v4:
>> * Rename device lists to "mounts" and "devices" respectively
>>    and move them into the tmpfs mounted to the passthrough directory
>> * Add detailed $! error messages
>> * Enforce stricter config formatting on passthrough devices
>> * Combine regex in verify_lxc_dev_string and describe what it does in
>>    a comment
>> * Remove unnecessary int() in map_ct_id_to_host since Perl automatically
>>    parses a string as a number when compared to a number
>> * Cosmetic changes (foreach --> for, unless --> if)
>>
>>   src/PVE/LXC.pm            | 53 +++++++++++++++++++++++-
>>   src/PVE/LXC/Config.pm     | 84 +++++++++++++++++++++++++++++++++++++++
>>   src/PVE/LXC/Tools.pm      | 23 ++++++++---
>>   src/lxc-pve-autodev-hook  | 20 ++++++++--
>>   src/lxc-pve-prestart-hook | 62 +++++++++++++++++++++++++++--
>>   5 files changed, 229 insertions(+), 13 deletions(-)
>>
>> diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm
>> index 8f53b53..98eb909 100644
>> --- a/src/PVE/LXC.pm
>> +++ b/src/PVE/LXC.pm
>> @@ -5,7 +5,7 @@ use warnings;
>>   
>>   use Cwd qw();
>>   use Errno qw(ELOOP ENOTDIR EROFS ECONNREFUSED EEXIST);
>> -use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY);
>> +use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY :mode);
>>   use File::Path;
>>   use File::Spec;
>>   use IO::Poll qw(POLLIN POLLHUP);
>> @@ -639,6 +639,27 @@ sub update_lxc_config {
>>   	$raw .= "lxc.mount.auto = sys:mixed\n";
>>       }
>>   
>> +    PVE::LXC::Config->foreach_passthrough_device($conf, sub {
>> +	my ($key, $device) = @_;
>> +
>> +	die "Path is not defined for passthrough device $key"
>> +	    unless (defined($device->{path}));
>> +
>> +	my $absolute_path = $device->{path};
>> +	my ($mode, $rdev) = (stat($absolute_path))[2, 6];
>> +
>> +	die "Device $absolute_path does not exist\n"
>> +	    if (!defined($mode) || !defined($rdev));
> ^ The above is only true for ENOENT, either check it explicitly or use
> something like "Error accessing device $absolute_path: $!\n".
>
>> +
>> +	die "$absolute_path is not a device\n"
>> +	    if (!S_ISBLK($mode) && !S_ISCHR($mode));
>> +
>> +	my $major = PVE::Tools::dev_t_major($rdev);
>> +	my $minor = PVE::Tools::dev_t_minor($rdev);
>> +	my $device_type_char = S_ISBLK($mode) ? 'b' : 'c';
>> +	$raw .= "lxc.cgroup2.devices.allow = $device_type_char $major:$minor rw\n";
>> +    });
>> +
>>       # WARNING: DO NOT REMOVE this without making sure that loop device nodes
>>       # cannot be exposed to the container with r/w access (cgroup perms).
>>       # When this is enabled mounts will still remain in the monitor's namespace
>> diff --git a/src/PVE/LXC/Config.pm b/src/PVE/LXC/Config.pm
>> index 56e1f10..4feae20 100644
>> --- a/src/PVE/LXC/Config.pm
>> +++ b/src/PVE/LXC/Config.pm
>> @@ -1255,6 +1313,20 @@ sub parse_volume {
>>       return;
>>   }
>>   
>> +sub parse_device {
>> +    my ($class, $device_string, $noerr) = @_;
>> +
>> +    my $res = eval { PVE::JSONSchema::parse_property_string($dev_desc, $device_string) };
>> +    if ($@) {
>> +	return undef if $noerr;
>> +	die $@;
>> +    }
>> +
>> +    die "Path has to be defined" if (!$noerr && !defined($res->{path}));
> ^ an error with $noerr should still return `undef` - here we'd fall back
> to `return $res` on error with $noerr currently, follow the same pattern
> as above
>
>> +
>> +    return $res;
>> +}
>> +
>>   sub print_volume {
>>       my ($class, $key, $volume) = @_;
>>   
>> diff --git a/src/lxc-pve-autodev-hook b/src/lxc-pve-autodev-hook
>> index 3c45949..e860fef 100755
>> --- a/src/lxc-pve-autodev-hook
>> +++ b/src/lxc-pve-autodev-hook
>> @@ -3,18 +3,32 @@
>>   use strict;
>>   use warnings;
>>   
>> -use File::Path;
>> +use Fcntl qw(S_IFREG);
>>   use File::Basename;
>> +use File::Path;
>>   
>>   use PVE::LXC::Tools;
>> -use PVE::Tools;
>> +use PVE::Tools qw(MS_BIND);
>>   
>>   PVE::LXC::Tools::lxc_hook('autodev', 'lxc', sub {
>>       my ($vmid, $vars, undef, undef) = @_;
>>   
>>       my $root = $vars->{ROOTFS_MOUNT};
>>   
>> -    PVE::LXC::Tools::for_current_devices($vmid, sub {
>> +    PVE::LXC::Tools::for_current_passthrough_devices($vmid, sub {
>> +	my ($type, $major, $minor, $dev) = @_;
>> +
>> +	my $rel_devpath = "/dev/$dev";
>> +	my $rel_dir = dirname($rel_devpath);
>> +	File::Path::mkpath("$root/$rel_dir");
>> +	PVE::Tools::mknod("$root/dev/$dev", S_IFREG, 0)
>> +	    or die("Could not mknod $root/dev/$dev: $!\n");
>> +
>> +	PVE::Tools::mount("/var/lib/lxc/$vmid/passthrough/dev/$dev", "$root/dev/$dev", 0, MS_BIND, 0)
>> +	    or die("Bind mount of device $dev into container failed: $!\n");
>> +    });
>> +
>> +    PVE::LXC::Tools::for_current_passthrough_mounts($vmid, sub {
>>   	my ($type, $major, $minor, $dev) = @_;
>>   
>>   	my $rel_devpath = "/dev/$dev";
>> diff --git a/src/lxc-pve-prestart-hook b/src/lxc-pve-prestart-hook
>> index 936d0bf..f0cc08d 100755
>> --- a/src/lxc-pve-prestart-hook
>> +++ b/src/lxc-pve-prestart-hook
>> @@ -6,6 +6,7 @@ use strict;
>>   use warnings;
>>   
>>   use Fcntl qw(O_DIRECTORY :mode);
>> +use File::Basename;
>>   use File::Path;
>>   use POSIX;
>>   
>> @@ -58,11 +59,9 @@ PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub {
>>       # Delete any leftover reboot-trigger file
>>       unlink("/var/lib/lxc/$vmid/reboot");
>>   
>> -    my $devlist_file = "/var/lib/lxc/$vmid/devices";
> ^ It's now technically possible for a startup during a package upgrade
> to have issues if we move this since in *theory* the prestart hook could
> run with the old package code and the autodev hook with the new package.
>
> Since the pre-start hook unlinks the file, we *could* potentially
> fallback to that path in the autodev hook to catch this case...
> But then we should keep `unlink`ing on the old path for a while to make
> sure we don't fall back to an old leftover file.
>
>> -    unlink $devlist_file;
>>       my $devices = [];
>>   
>> -    my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
>> +    my ($id_map, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
>>   
>>       # Unmount first when the user mounted the container with "pct mount".
>>       eval {