all lists on lists.proxmox.com
 help / color / mirror / Atom feed
* [pve-devel] [PATCH v5 container] Add device passthrough
@ 2023-11-16 11:50 Filip Schauer
  2023-11-16 13:35 ` Wolfgang Bumiller
  0 siblings, 1 reply; 3+ messages in thread
From: Filip Schauer @ 2023-11-16 11:50 UTC (permalink / raw)
  To: pve-devel

Add a dev[n] argument to the container config to pass devices through to
a container. A device can be passed by its path. Additionally the access
mode, uid and gid can be specified through their respective properties.

Signed-off-by: Filip Schauer <f.schauer@proxmox.com>
---
Changes since v4:
* Rename device lists to "mounts" and "devices" respectively
  and move them into the tmpfs mounted to the passthrough directory
* Add detailed $! error messages
* Enforce stricter config formatting on passthrough devices
* Combine regex in verify_lxc_dev_string and describe what it does in
  a comment
* Remove unnecessary int() in map_ct_id_to_host since Perl automatically
  parses a string as a number when compared to a number
* Cosmetic changes (foreach --> for, unless --> if)

 src/PVE/LXC.pm            | 53 +++++++++++++++++++++++-
 src/PVE/LXC/Config.pm     | 84 +++++++++++++++++++++++++++++++++++++++
 src/PVE/LXC/Tools.pm      | 23 ++++++++---
 src/lxc-pve-autodev-hook  | 20 ++++++++--
 src/lxc-pve-prestart-hook | 62 +++++++++++++++++++++++++++--
 5 files changed, 229 insertions(+), 13 deletions(-)

diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm
index 8f53b53..98eb909 100644
--- a/src/PVE/LXC.pm
+++ b/src/PVE/LXC.pm
@@ -5,7 +5,7 @@ use warnings;
 
 use Cwd qw();
 use Errno qw(ELOOP ENOTDIR EROFS ECONNREFUSED EEXIST);
-use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY);
+use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY :mode);
 use File::Path;
 use File::Spec;
 use IO::Poll qw(POLLIN POLLHUP);
@@ -639,6 +639,27 @@ sub update_lxc_config {
 	$raw .= "lxc.mount.auto = sys:mixed\n";
     }
 
+    PVE::LXC::Config->foreach_passthrough_device($conf, sub {
+	my ($key, $device) = @_;
+
+	die "Path is not defined for passthrough device $key"
+	    unless (defined($device->{path}));
+
+	my $absolute_path = $device->{path};
+	my ($mode, $rdev) = (stat($absolute_path))[2, 6];
+
+	die "Device $absolute_path does not exist\n"
+	    if (!defined($mode) || !defined($rdev));
+
+	die "$absolute_path is not a device\n"
+	    if (!S_ISBLK($mode) && !S_ISCHR($mode));
+
+	my $major = PVE::Tools::dev_t_major($rdev);
+	my $minor = PVE::Tools::dev_t_minor($rdev);
+	my $device_type_char = S_ISBLK($mode) ? 'b' : 'c';
+	$raw .= "lxc.cgroup2.devices.allow = $device_type_char $major:$minor rw\n";
+    });
+
     # WARNING: DO NOT REMOVE this without making sure that loop device nodes
     # cannot be exposed to the container with r/w access (cgroup perms).
     # When this is enabled mounts will still remain in the monitor's namespace
@@ -1344,6 +1365,8 @@ sub check_ct_modify_config_perm {
 	    $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Network']);
 	    check_bridge_access($rpcenv, $authuser, $oldconf->{$opt}) if $oldconf->{$opt};
 	    check_bridge_access($rpcenv, $authuser, $newconf->{$opt}) if $newconf->{$opt};
+	} elsif ($opt =~ m/^dev\d+$/) {
+	    raise_perm_exc("configuring device passthrough is only allowed for root\@pam");
 	} elsif ($opt eq 'nameserver' || $opt eq 'searchdomain' || $opt eq 'hostname') {
 	    $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Network']);
 	} elsif ($opt eq 'features') {
@@ -2393,6 +2416,34 @@ sub validate_id_maps {
     }
 }
 
+sub map_ct_id_to_host {
+    my ($id, $id_map, $id_type) = @_;
+
+    for my $mapping (@$id_map) {
+	my ($type, $ct, $host, $length) = @$mapping;
+
+	next if ($type ne $id_type);
+
+	if ($id >= $ct && $id < ($ct + $length)) {
+	    return $host - $ct + $id;
+	}
+    }
+
+    return $id;
+}
+
+sub map_ct_uid_to_host {
+    my ($uid, $id_map) = @_;
+
+    return map_ct_id_to_host($uid, $id_map, 'u');
+}
+
+sub map_ct_gid_to_host {
+    my ($gid, $id_map) = @_;
+
+    return map_ct_id_to_host($gid, $id_map, 'g');
+}
+
 sub userns_command {
     my ($id_map) = @_;
     if (@$id_map) {
diff --git a/src/PVE/LXC/Config.pm b/src/PVE/LXC/Config.pm
index 56e1f10..4feae20 100644
--- a/src/PVE/LXC/Config.pm
+++ b/src/PVE/LXC/Config.pm
@@ -29,6 +29,7 @@ mkdir $lockdir;
 mkdir "/etc/pve/nodes/$nodename/lxc";
 my $MAX_MOUNT_POINTS = 256;
 my $MAX_UNUSED_DISKS = $MAX_MOUNT_POINTS;
+my $MAX_DEVICES = 256;
 
 # BEGIN implemented abstract methods from PVE::AbstractConfig
 
@@ -908,6 +909,63 @@ for (my $i = 0; $i < $MAX_UNUSED_DISKS; $i++) {
     }
 }
 
+PVE::JSONSchema::register_format('pve-lxc-dev-string', \&verify_lxc_dev_string);
+sub verify_lxc_dev_string {
+    my ($dev, $noerr) = @_;
+
+    # do not allow /./ or /../ or /.$ or /..$
+    # enforce /dev/ at the beginning
+
+    if (
+	$dev =~ m@/\.\.?(?:/|$)@ ||
+	$dev !~ m!^/dev/!
+    ) {
+	return undef if $noerr;
+	die "$dev is not a valid device path\n";
+    }
+
+    return $dev;
+}
+
+my $dev_desc = {
+    path => {
+	optional => 1,
+	type => 'string',
+	default_key => 1,
+	format => 'pve-lxc-dev-string',
+	format_description => 'Path',
+	description => 'Device to pass through to the container',
+	verbose_description => 'Path to the device to pass through to the container',
+    },
+    mode => {
+	optional => 1,
+	type => 'string',
+	pattern => '0[0-7]{3}',
+	format_description => 'Octal access mode',
+	description => 'Access mode to be set on the device node',
+    },
+    uid => {
+	optional => 1,
+	type => 'integer',
+	minimum => 0,
+	description => 'User ID to be assigned to the device node',
+    },
+    gid => {
+	optional => 1,
+	type => 'integer',
+	minimum => 0,
+	description => 'Group ID to be assigned to the device node',
+    },
+};
+
+for (my $i = 0; $i < $MAX_DEVICES; $i++) {
+    $confdesc->{"dev$i"} = {
+	optional => 1,
+	type => 'string', format => $dev_desc,
+	description => "Device to pass through to the container",
+    }
+}
+
 sub parse_pct_config {
     my ($filename, $raw, $strict) = @_;
 
@@ -1255,6 +1313,20 @@ sub parse_volume {
     return;
 }
 
+sub parse_device {
+    my ($class, $device_string, $noerr) = @_;
+
+    my $res = eval { PVE::JSONSchema::parse_property_string($dev_desc, $device_string) };
+    if ($@) {
+	return undef if $noerr;
+	die $@;
+    }
+
+    die "Path has to be defined" if (!$noerr && !defined($res->{path}));
+
+    return $res;
+}
+
 sub print_volume {
     my ($class, $key, $volume) = @_;
 
@@ -1762,4 +1834,16 @@ sub get_derived_property {
     }
 }
 
+sub foreach_passthrough_device {
+    my ($class, $conf, $func, @param) = @_;
+
+    for my $key (keys %$conf) {
+	next if $key !~ m/^dev(\d+)$/;
+
+	my $device = $class->parse_device($conf->{$key});
+
+	$func->($key, $device, @param);
+    }
+}
+
 1;
diff --git a/src/PVE/LXC/Tools.pm b/src/PVE/LXC/Tools.pm
index 62cdbc1..e9262f2 100644
--- a/src/PVE/LXC/Tools.pm
+++ b/src/PVE/LXC/Tools.pm
@@ -81,10 +81,9 @@ sub lxc_hook($$&) {
     $code->($ct_name, $common_vars, $namespaces, $args);
 }
 
-sub for_current_devices($&) {
-    my ($vmid, $code) = @_;
+sub for_devices {
+    my ($devlist_file, $vmid, $code) = @_;
 
-    my $devlist_file = "/var/lib/lxc/$vmid/devices";
     my $fd;
 
     if (! open $fd, '<', $devlist_file) {
@@ -93,8 +92,8 @@ sub for_current_devices($&) {
     }
 
     while (defined(my $line = <$fd>)) {
-	if ($line !~ m@^(b):(\d+):(\d+):/dev/(\S+)\s*$@) {
-	    warn "invalid .pve-devices entry: $line\n";
+	if ($line !~ m@^(b|c):(\d+):(\d+):/dev/(\S+)\s*$@) {
+	    warn "invalid $devlist_file entry: $line\n";
 	    next;
 	}
 
@@ -117,6 +116,20 @@ sub for_current_devices($&) {
     close $fd;
 }
 
+sub for_current_passthrough_mounts($&) {
+    my ($vmid, $code) = @_;
+
+    my $devlist_file = "/var/lib/lxc/$vmid/passthrough/mount";
+    for_devices($devlist_file, $vmid, $code);
+}
+
+sub for_current_passthrough_devices($&) {
+    my ($vmid, $code) = @_;
+
+    my $passthrough_devlist_file = "/var/lib/lxc/$vmid/passthrough/devices";
+    for_devices($passthrough_devlist_file, $vmid, $code);
+}
+
 sub cgroup_do_write($$) {
     my ($path, $value) = @_;
     my $fd;
diff --git a/src/lxc-pve-autodev-hook b/src/lxc-pve-autodev-hook
index 3c45949..e860fef 100755
--- a/src/lxc-pve-autodev-hook
+++ b/src/lxc-pve-autodev-hook
@@ -3,18 +3,32 @@
 use strict;
 use warnings;
 
-use File::Path;
+use Fcntl qw(S_IFREG);
 use File::Basename;
+use File::Path;
 
 use PVE::LXC::Tools;
-use PVE::Tools;
+use PVE::Tools qw(MS_BIND);
 
 PVE::LXC::Tools::lxc_hook('autodev', 'lxc', sub {
     my ($vmid, $vars, undef, undef) = @_;
 
     my $root = $vars->{ROOTFS_MOUNT};
 
-    PVE::LXC::Tools::for_current_devices($vmid, sub {
+    PVE::LXC::Tools::for_current_passthrough_devices($vmid, sub {
+	my ($type, $major, $minor, $dev) = @_;
+
+	my $rel_devpath = "/dev/$dev";
+	my $rel_dir = dirname($rel_devpath);
+	File::Path::mkpath("$root/$rel_dir");
+	PVE::Tools::mknod("$root/dev/$dev", S_IFREG, 0)
+	    or die("Could not mknod $root/dev/$dev: $!\n");
+
+	PVE::Tools::mount("/var/lib/lxc/$vmid/passthrough/dev/$dev", "$root/dev/$dev", 0, MS_BIND, 0)
+	    or die("Bind mount of device $dev into container failed: $!\n");
+    });
+
+    PVE::LXC::Tools::for_current_passthrough_mounts($vmid, sub {
 	my ($type, $major, $minor, $dev) = @_;
 
 	my $rel_devpath = "/dev/$dev";
diff --git a/src/lxc-pve-prestart-hook b/src/lxc-pve-prestart-hook
index 936d0bf..f0cc08d 100755
--- a/src/lxc-pve-prestart-hook
+++ b/src/lxc-pve-prestart-hook
@@ -6,6 +6,7 @@ use strict;
 use warnings;
 
 use Fcntl qw(O_DIRECTORY :mode);
+use File::Basename;
 use File::Path;
 use POSIX;
 
@@ -58,11 +59,9 @@ PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub {
     # Delete any leftover reboot-trigger file
     unlink("/var/lib/lxc/$vmid/reboot");
 
-    my $devlist_file = "/var/lib/lxc/$vmid/devices";
-    unlink $devlist_file;
     my $devices = [];
 
-    my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
+    my ($id_map, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
 
     # Unmount first when the user mounted the container with "pct mount".
     eval {
@@ -118,6 +117,49 @@ PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub {
 
     PVE::LXC::Config->foreach_volume($conf, $setup_mountpoint);
 
+    # Device passthrough
+    my $passthrough_devices = [];
+
+    my $passthrough_dir = "/var/lib/lxc/$vmid/passthrough";
+    File::Path::make_path($passthrough_dir);
+    PVE::Tools::mount("none", $passthrough_dir, "tmpfs", 0, "size=8k")
+        or die ("Could not mount tmpfs for device passthrough at $passthrough_dir: $!");
+
+    my $setup_passthrough_device = sub {
+	my ($key, $device) = @_;
+
+	my $absolute_path = $device->{path};
+	my ($mode, $rdev) = (stat($absolute_path))[2, 6];
+
+	die "Could not get mode or device ID of $absolute_path\n"
+	    if (!defined($mode) || !defined($rdev));
+
+	my $passthrough_device_path = $passthrough_dir . $absolute_path;
+	File::Path::make_path(dirname($passthrough_device_path));
+	PVE::Tools::mknod($passthrough_device_path, $mode, $rdev)
+	    or die("failed to mknod $passthrough_device_path: $!\n");
+
+	# Use chmod because umask could mess with the access mode on mknod
+	my $passthrough_mode = 0660;
+	$passthrough_mode = oct($device->{mode}) if defined($device->{mode});
+	chmod $passthrough_mode, $passthrough_device_path
+	    or die "failed to chmod $passthrough_mode $passthrough_device_path: $!\n";
+
+	# Set uid and gid of the device node
+	my $uid = 0;
+	my $gid = 0;
+	$uid = $device->{uid} if defined($device->{uid});
+	$gid = $device->{gid} if defined($device->{gid});
+	$uid = PVE::LXC::map_ct_uid_to_host($uid, $id_map);
+	$gid = PVE::LXC::map_ct_gid_to_host($gid, $id_map);
+	chown $uid, $gid, $passthrough_device_path
+	    or die("failed to chown $uid:$gid $passthrough_device_path: $!\n");
+
+	push @$passthrough_devices, [$absolute_path, $mode, $rdev];
+    };
+
+    PVE::LXC::Config->foreach_passthrough_device($conf, $setup_passthrough_device);
+
     my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir);
     $lxc_setup->pre_start_hook();
 
@@ -138,7 +180,19 @@ PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub {
 	    my $minor = PVE::Tools::dev_t_minor($rdev);
 	    $devlist .= "b:$major:$minor:$dev\n";
 	}
-	PVE::Tools::file_set_contents($devlist_file, $devlist);
+	PVE::Tools::file_set_contents("/var/lib/lxc/$vmid/passthrough/mount", $devlist);
+    }
+
+    if (@$passthrough_devices) {
+	my $devlist = '';
+	for my $dev (@$passthrough_devices) {
+	    my ($path, $mode, $rdev) = @$dev;
+	    my $major = PVE::Tools::dev_t_major($rdev);
+	    my $minor = PVE::Tools::dev_t_minor($rdev);
+	    my $device_type_char = S_ISBLK($mode) ? 'b' : 'c';
+	    $devlist .= "$device_type_char:$major:$minor:$path\n";
+	}
+	PVE::Tools::file_set_contents("/var/lib/lxc/$vmid/passthrough/devices", $devlist);
     }
 });
 
-- 
2.39.2





^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [pve-devel] [PATCH v5 container] Add device passthrough
  2023-11-16 11:50 [pve-devel] [PATCH v5 container] Add device passthrough Filip Schauer
@ 2023-11-16 13:35 ` Wolfgang Bumiller
  2023-11-17 10:29   ` Filip Schauer
  0 siblings, 1 reply; 3+ messages in thread
From: Wolfgang Bumiller @ 2023-11-16 13:35 UTC (permalink / raw)
  To: Filip Schauer; +Cc: pve-devel

On Thu, Nov 16, 2023 at 12:50:44PM +0100, Filip Schauer wrote:
> Add a dev[n] argument to the container config to pass devices through to
> a container. A device can be passed by its path. Additionally the access
> mode, uid and gid can be specified through their respective properties.
> 
> Signed-off-by: Filip Schauer <f.schauer@proxmox.com>
> ---
> Changes since v4:
> * Rename device lists to "mounts" and "devices" respectively
>   and move them into the tmpfs mounted to the passthrough directory
> * Add detailed $! error messages
> * Enforce stricter config formatting on passthrough devices
> * Combine regex in verify_lxc_dev_string and describe what it does in
>   a comment
> * Remove unnecessary int() in map_ct_id_to_host since Perl automatically
>   parses a string as a number when compared to a number
> * Cosmetic changes (foreach --> for, unless --> if)
> 
>  src/PVE/LXC.pm            | 53 +++++++++++++++++++++++-
>  src/PVE/LXC/Config.pm     | 84 +++++++++++++++++++++++++++++++++++++++
>  src/PVE/LXC/Tools.pm      | 23 ++++++++---
>  src/lxc-pve-autodev-hook  | 20 ++++++++--
>  src/lxc-pve-prestart-hook | 62 +++++++++++++++++++++++++++--
>  5 files changed, 229 insertions(+), 13 deletions(-)
> 
> diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm
> index 8f53b53..98eb909 100644
> --- a/src/PVE/LXC.pm
> +++ b/src/PVE/LXC.pm
> @@ -5,7 +5,7 @@ use warnings;
>  
>  use Cwd qw();
>  use Errno qw(ELOOP ENOTDIR EROFS ECONNREFUSED EEXIST);
> -use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY);
> +use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY :mode);
>  use File::Path;
>  use File::Spec;
>  use IO::Poll qw(POLLIN POLLHUP);
> @@ -639,6 +639,27 @@ sub update_lxc_config {
>  	$raw .= "lxc.mount.auto = sys:mixed\n";
>      }
>  
> +    PVE::LXC::Config->foreach_passthrough_device($conf, sub {
> +	my ($key, $device) = @_;
> +
> +	die "Path is not defined for passthrough device $key"
> +	    unless (defined($device->{path}));
> +
> +	my $absolute_path = $device->{path};
> +	my ($mode, $rdev) = (stat($absolute_path))[2, 6];
> +
> +	die "Device $absolute_path does not exist\n" 
> +	    if (!defined($mode) || !defined($rdev));

^ The above is only true for ENOENT, either check it explicitly or use
something like "Error accessing device $absolute_path: $!\n".

> +
> +	die "$absolute_path is not a device\n"
> +	    if (!S_ISBLK($mode) && !S_ISCHR($mode));
> +
> +	my $major = PVE::Tools::dev_t_major($rdev);
> +	my $minor = PVE::Tools::dev_t_minor($rdev);
> +	my $device_type_char = S_ISBLK($mode) ? 'b' : 'c';
> +	$raw .= "lxc.cgroup2.devices.allow = $device_type_char $major:$minor rw\n";
> +    });
> +
>      # WARNING: DO NOT REMOVE this without making sure that loop device nodes
>      # cannot be exposed to the container with r/w access (cgroup perms).
>      # When this is enabled mounts will still remain in the monitor's namespace
> diff --git a/src/PVE/LXC/Config.pm b/src/PVE/LXC/Config.pm
> index 56e1f10..4feae20 100644
> --- a/src/PVE/LXC/Config.pm
> +++ b/src/PVE/LXC/Config.pm
> @@ -1255,6 +1313,20 @@ sub parse_volume {
>      return;
>  }
>  
> +sub parse_device {
> +    my ($class, $device_string, $noerr) = @_;
> +
> +    my $res = eval { PVE::JSONSchema::parse_property_string($dev_desc, $device_string) };
> +    if ($@) {
> +	return undef if $noerr;
> +	die $@;
> +    }
> +
> +    die "Path has to be defined" if (!$noerr && !defined($res->{path}));

^ an error with $noerr should still return `undef` - here we'd fall back
to `return $res` on error with $noerr currently, follow the same pattern
as above

> +
> +    return $res;
> +}
> +
>  sub print_volume {
>      my ($class, $key, $volume) = @_;
>  
> diff --git a/src/lxc-pve-autodev-hook b/src/lxc-pve-autodev-hook
> index 3c45949..e860fef 100755
> --- a/src/lxc-pve-autodev-hook
> +++ b/src/lxc-pve-autodev-hook
> @@ -3,18 +3,32 @@
>  use strict;
>  use warnings;
>  
> -use File::Path;
> +use Fcntl qw(S_IFREG);
>  use File::Basename;
> +use File::Path;
>  
>  use PVE::LXC::Tools;
> -use PVE::Tools;
> +use PVE::Tools qw(MS_BIND);
>  
>  PVE::LXC::Tools::lxc_hook('autodev', 'lxc', sub {
>      my ($vmid, $vars, undef, undef) = @_;
>  
>      my $root = $vars->{ROOTFS_MOUNT};
>  
> -    PVE::LXC::Tools::for_current_devices($vmid, sub {
> +    PVE::LXC::Tools::for_current_passthrough_devices($vmid, sub {
> +	my ($type, $major, $minor, $dev) = @_;
> +
> +	my $rel_devpath = "/dev/$dev";
> +	my $rel_dir = dirname($rel_devpath);
> +	File::Path::mkpath("$root/$rel_dir");
> +	PVE::Tools::mknod("$root/dev/$dev", S_IFREG, 0)
> +	    or die("Could not mknod $root/dev/$dev: $!\n");
> +
> +	PVE::Tools::mount("/var/lib/lxc/$vmid/passthrough/dev/$dev", "$root/dev/$dev", 0, MS_BIND, 0)
> +	    or die("Bind mount of device $dev into container failed: $!\n");
> +    });
> +
> +    PVE::LXC::Tools::for_current_passthrough_mounts($vmid, sub {
>  	my ($type, $major, $minor, $dev) = @_;
>  
>  	my $rel_devpath = "/dev/$dev";
> diff --git a/src/lxc-pve-prestart-hook b/src/lxc-pve-prestart-hook
> index 936d0bf..f0cc08d 100755
> --- a/src/lxc-pve-prestart-hook
> +++ b/src/lxc-pve-prestart-hook
> @@ -6,6 +6,7 @@ use strict;
>  use warnings;
>  
>  use Fcntl qw(O_DIRECTORY :mode);
> +use File::Basename;
>  use File::Path;
>  use POSIX;
>  
> @@ -58,11 +59,9 @@ PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub {
>      # Delete any leftover reboot-trigger file
>      unlink("/var/lib/lxc/$vmid/reboot");
>  
> -    my $devlist_file = "/var/lib/lxc/$vmid/devices";

^ It's now technically possible for a startup during a package upgrade
to have issues if we move this since in *theory* the prestart hook could
run with the old package code and the autodev hook with the new package.

Since the pre-start hook unlinks the file, we *could* potentially
fallback to that path in the autodev hook to catch this case...
But then we should keep `unlink`ing on the old path for a while to make
sure we don't fall back to an old leftover file.

> -    unlink $devlist_file;
>      my $devices = [];
>  
> -    my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
> +    my ($id_map, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
>  
>      # Unmount first when the user mounted the container with "pct mount".
>      eval {




^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [pve-devel] [PATCH v5 container] Add device passthrough
  2023-11-16 13:35 ` Wolfgang Bumiller
@ 2023-11-17 10:29   ` Filip Schauer
  0 siblings, 0 replies; 3+ messages in thread
From: Filip Schauer @ 2023-11-17 10:29 UTC (permalink / raw)
  To: Wolfgang Bumiller; +Cc: pve-devel

Patch v6 available

https://lists.proxmox.com/pipermail/pve-devel/2023-November/060367.html

On 16/11/2023 14:35, Wolfgang Bumiller wrote:
> On Thu, Nov 16, 2023 at 12:50:44PM +0100, Filip Schauer wrote:
>> Add a dev[n] argument to the container config to pass devices through to
>> a container. A device can be passed by its path. Additionally the access
>> mode, uid and gid can be specified through their respective properties.
>>
>> Signed-off-by: Filip Schauer <f.schauer@proxmox.com>
>> ---
>> Changes since v4:
>> * Rename device lists to "mounts" and "devices" respectively
>>    and move them into the tmpfs mounted to the passthrough directory
>> * Add detailed $! error messages
>> * Enforce stricter config formatting on passthrough devices
>> * Combine regex in verify_lxc_dev_string and describe what it does in
>>    a comment
>> * Remove unnecessary int() in map_ct_id_to_host since Perl automatically
>>    parses a string as a number when compared to a number
>> * Cosmetic changes (foreach --> for, unless --> if)
>>
>>   src/PVE/LXC.pm            | 53 +++++++++++++++++++++++-
>>   src/PVE/LXC/Config.pm     | 84 +++++++++++++++++++++++++++++++++++++++
>>   src/PVE/LXC/Tools.pm      | 23 ++++++++---
>>   src/lxc-pve-autodev-hook  | 20 ++++++++--
>>   src/lxc-pve-prestart-hook | 62 +++++++++++++++++++++++++++--
>>   5 files changed, 229 insertions(+), 13 deletions(-)
>>
>> diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm
>> index 8f53b53..98eb909 100644
>> --- a/src/PVE/LXC.pm
>> +++ b/src/PVE/LXC.pm
>> @@ -5,7 +5,7 @@ use warnings;
>>   
>>   use Cwd qw();
>>   use Errno qw(ELOOP ENOTDIR EROFS ECONNREFUSED EEXIST);
>> -use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY);
>> +use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY :mode);
>>   use File::Path;
>>   use File::Spec;
>>   use IO::Poll qw(POLLIN POLLHUP);
>> @@ -639,6 +639,27 @@ sub update_lxc_config {
>>   	$raw .= "lxc.mount.auto = sys:mixed\n";
>>       }
>>   
>> +    PVE::LXC::Config->foreach_passthrough_device($conf, sub {
>> +	my ($key, $device) = @_;
>> +
>> +	die "Path is not defined for passthrough device $key"
>> +	    unless (defined($device->{path}));
>> +
>> +	my $absolute_path = $device->{path};
>> +	my ($mode, $rdev) = (stat($absolute_path))[2, 6];
>> +
>> +	die "Device $absolute_path does not exist\n"
>> +	    if (!defined($mode) || !defined($rdev));
> ^ The above is only true for ENOENT, either check it explicitly or use
> something like "Error accessing device $absolute_path: $!\n".
>
>> +
>> +	die "$absolute_path is not a device\n"
>> +	    if (!S_ISBLK($mode) && !S_ISCHR($mode));
>> +
>> +	my $major = PVE::Tools::dev_t_major($rdev);
>> +	my $minor = PVE::Tools::dev_t_minor($rdev);
>> +	my $device_type_char = S_ISBLK($mode) ? 'b' : 'c';
>> +	$raw .= "lxc.cgroup2.devices.allow = $device_type_char $major:$minor rw\n";
>> +    });
>> +
>>       # WARNING: DO NOT REMOVE this without making sure that loop device nodes
>>       # cannot be exposed to the container with r/w access (cgroup perms).
>>       # When this is enabled mounts will still remain in the monitor's namespace
>> diff --git a/src/PVE/LXC/Config.pm b/src/PVE/LXC/Config.pm
>> index 56e1f10..4feae20 100644
>> --- a/src/PVE/LXC/Config.pm
>> +++ b/src/PVE/LXC/Config.pm
>> @@ -1255,6 +1313,20 @@ sub parse_volume {
>>       return;
>>   }
>>   
>> +sub parse_device {
>> +    my ($class, $device_string, $noerr) = @_;
>> +
>> +    my $res = eval { PVE::JSONSchema::parse_property_string($dev_desc, $device_string) };
>> +    if ($@) {
>> +	return undef if $noerr;
>> +	die $@;
>> +    }
>> +
>> +    die "Path has to be defined" if (!$noerr && !defined($res->{path}));
> ^ an error with $noerr should still return `undef` - here we'd fall back
> to `return $res` on error with $noerr currently, follow the same pattern
> as above
>
>> +
>> +    return $res;
>> +}
>> +
>>   sub print_volume {
>>       my ($class, $key, $volume) = @_;
>>   
>> diff --git a/src/lxc-pve-autodev-hook b/src/lxc-pve-autodev-hook
>> index 3c45949..e860fef 100755
>> --- a/src/lxc-pve-autodev-hook
>> +++ b/src/lxc-pve-autodev-hook
>> @@ -3,18 +3,32 @@
>>   use strict;
>>   use warnings;
>>   
>> -use File::Path;
>> +use Fcntl qw(S_IFREG);
>>   use File::Basename;
>> +use File::Path;
>>   
>>   use PVE::LXC::Tools;
>> -use PVE::Tools;
>> +use PVE::Tools qw(MS_BIND);
>>   
>>   PVE::LXC::Tools::lxc_hook('autodev', 'lxc', sub {
>>       my ($vmid, $vars, undef, undef) = @_;
>>   
>>       my $root = $vars->{ROOTFS_MOUNT};
>>   
>> -    PVE::LXC::Tools::for_current_devices($vmid, sub {
>> +    PVE::LXC::Tools::for_current_passthrough_devices($vmid, sub {
>> +	my ($type, $major, $minor, $dev) = @_;
>> +
>> +	my $rel_devpath = "/dev/$dev";
>> +	my $rel_dir = dirname($rel_devpath);
>> +	File::Path::mkpath("$root/$rel_dir");
>> +	PVE::Tools::mknod("$root/dev/$dev", S_IFREG, 0)
>> +	    or die("Could not mknod $root/dev/$dev: $!\n");
>> +
>> +	PVE::Tools::mount("/var/lib/lxc/$vmid/passthrough/dev/$dev", "$root/dev/$dev", 0, MS_BIND, 0)
>> +	    or die("Bind mount of device $dev into container failed: $!\n");
>> +    });
>> +
>> +    PVE::LXC::Tools::for_current_passthrough_mounts($vmid, sub {
>>   	my ($type, $major, $minor, $dev) = @_;
>>   
>>   	my $rel_devpath = "/dev/$dev";
>> diff --git a/src/lxc-pve-prestart-hook b/src/lxc-pve-prestart-hook
>> index 936d0bf..f0cc08d 100755
>> --- a/src/lxc-pve-prestart-hook
>> +++ b/src/lxc-pve-prestart-hook
>> @@ -6,6 +6,7 @@ use strict;
>>   use warnings;
>>   
>>   use Fcntl qw(O_DIRECTORY :mode);
>> +use File::Basename;
>>   use File::Path;
>>   use POSIX;
>>   
>> @@ -58,11 +59,9 @@ PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub {
>>       # Delete any leftover reboot-trigger file
>>       unlink("/var/lib/lxc/$vmid/reboot");
>>   
>> -    my $devlist_file = "/var/lib/lxc/$vmid/devices";
> ^ It's now technically possible for a startup during a package upgrade
> to have issues if we move this since in *theory* the prestart hook could
> run with the old package code and the autodev hook with the new package.
>
> Since the pre-start hook unlinks the file, we *could* potentially
> fallback to that path in the autodev hook to catch this case...
> But then we should keep `unlink`ing on the old path for a while to make
> sure we don't fall back to an old leftover file.
>
>> -    unlink $devlist_file;
>>       my $devices = [];
>>   
>> -    my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
>> +    my ($id_map, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
>>   
>>       # Unmount first when the user mounted the container with "pct mount".
>>       eval {




^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-11-17 10:30 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-16 11:50 [pve-devel] [PATCH v5 container] Add device passthrough Filip Schauer
2023-11-16 13:35 ` Wolfgang Bumiller
2023-11-17 10:29   ` Filip Schauer

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal