public inbox for pve-devel@lists.proxmox.com
 help / color / mirror / Atom feed
* [pve-devel] [PATCH v5 container] Add device passthrough
@ 2023-11-16 11:50 Filip Schauer
  2023-11-16 13:35 ` Wolfgang Bumiller
  0 siblings, 1 reply; 3+ messages in thread
From: Filip Schauer @ 2023-11-16 11:50 UTC (permalink / raw)
  To: pve-devel

Add a dev[n] argument to the container config to pass devices through to
a container. A device can be passed by its path. Additionally the access
mode, uid and gid can be specified through their respective properties.

Signed-off-by: Filip Schauer <f.schauer@proxmox.com>
---
Changes since v4:
* Rename device lists to "mounts" and "devices" respectively
  and move them into the tmpfs mounted to the passthrough directory
* Add detailed $! error messages
* Enforce stricter config formatting on passthrough devices
* Combine regex in verify_lxc_dev_string and describe what it does in
  a comment
* Remove unnecessary int() in map_ct_id_to_host since Perl automatically
  parses a string as a number when compared to a number
* Cosmetic changes (foreach --> for, unless --> if)

 src/PVE/LXC.pm            | 53 +++++++++++++++++++++++-
 src/PVE/LXC/Config.pm     | 84 +++++++++++++++++++++++++++++++++++++++
 src/PVE/LXC/Tools.pm      | 23 ++++++++---
 src/lxc-pve-autodev-hook  | 20 ++++++++--
 src/lxc-pve-prestart-hook | 62 +++++++++++++++++++++++++++--
 5 files changed, 229 insertions(+), 13 deletions(-)

diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm
index 8f53b53..98eb909 100644
--- a/src/PVE/LXC.pm
+++ b/src/PVE/LXC.pm
@@ -5,7 +5,7 @@ use warnings;
 
 use Cwd qw();
 use Errno qw(ELOOP ENOTDIR EROFS ECONNREFUSED EEXIST);
-use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY);
+use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY :mode);
 use File::Path;
 use File::Spec;
 use IO::Poll qw(POLLIN POLLHUP);
@@ -639,6 +639,27 @@ sub update_lxc_config {
 	$raw .= "lxc.mount.auto = sys:mixed\n";
     }
 
+    PVE::LXC::Config->foreach_passthrough_device($conf, sub {
+	my ($key, $device) = @_;
+
+	die "Path is not defined for passthrough device $key"
+	    unless (defined($device->{path}));
+
+	my $absolute_path = $device->{path};
+	my ($mode, $rdev) = (stat($absolute_path))[2, 6];
+
+	die "Device $absolute_path does not exist\n"
+	    if (!defined($mode) || !defined($rdev));
+
+	die "$absolute_path is not a device\n"
+	    if (!S_ISBLK($mode) && !S_ISCHR($mode));
+
+	my $major = PVE::Tools::dev_t_major($rdev);
+	my $minor = PVE::Tools::dev_t_minor($rdev);
+	my $device_type_char = S_ISBLK($mode) ? 'b' : 'c';
+	$raw .= "lxc.cgroup2.devices.allow = $device_type_char $major:$minor rw\n";
+    });
+
     # WARNING: DO NOT REMOVE this without making sure that loop device nodes
     # cannot be exposed to the container with r/w access (cgroup perms).
     # When this is enabled mounts will still remain in the monitor's namespace
@@ -1344,6 +1365,8 @@ sub check_ct_modify_config_perm {
 	    $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Network']);
 	    check_bridge_access($rpcenv, $authuser, $oldconf->{$opt}) if $oldconf->{$opt};
 	    check_bridge_access($rpcenv, $authuser, $newconf->{$opt}) if $newconf->{$opt};
+	} elsif ($opt =~ m/^dev\d+$/) {
+	    raise_perm_exc("configuring device passthrough is only allowed for root\@pam");
 	} elsif ($opt eq 'nameserver' || $opt eq 'searchdomain' || $opt eq 'hostname') {
 	    $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Network']);
 	} elsif ($opt eq 'features') {
@@ -2393,6 +2416,34 @@ sub validate_id_maps {
     }
 }
 
+sub map_ct_id_to_host {
+    my ($id, $id_map, $id_type) = @_;
+
+    for my $mapping (@$id_map) {
+	my ($type, $ct, $host, $length) = @$mapping;
+
+	next if ($type ne $id_type);
+
+	if ($id >= $ct && $id < ($ct + $length)) {
+	    return $host - $ct + $id;
+	}
+    }
+
+    return $id;
+}
+
+sub map_ct_uid_to_host {
+    my ($uid, $id_map) = @_;
+
+    return map_ct_id_to_host($uid, $id_map, 'u');
+}
+
+sub map_ct_gid_to_host {
+    my ($gid, $id_map) = @_;
+
+    return map_ct_id_to_host($gid, $id_map, 'g');
+}
+
 sub userns_command {
     my ($id_map) = @_;
     if (@$id_map) {
diff --git a/src/PVE/LXC/Config.pm b/src/PVE/LXC/Config.pm
index 56e1f10..4feae20 100644
--- a/src/PVE/LXC/Config.pm
+++ b/src/PVE/LXC/Config.pm
@@ -29,6 +29,7 @@ mkdir $lockdir;
 mkdir "/etc/pve/nodes/$nodename/lxc";
 my $MAX_MOUNT_POINTS = 256;
 my $MAX_UNUSED_DISKS = $MAX_MOUNT_POINTS;
+my $MAX_DEVICES = 256;
 
 # BEGIN implemented abstract methods from PVE::AbstractConfig
 
@@ -908,6 +909,63 @@ for (my $i = 0; $i < $MAX_UNUSED_DISKS; $i++) {
     }
 }
 
+PVE::JSONSchema::register_format('pve-lxc-dev-string', \&verify_lxc_dev_string);
+sub verify_lxc_dev_string {
+    my ($dev, $noerr) = @_;
+
+    # do not allow /./ or /../ or /.$ or /..$
+    # enforce /dev/ at the beginning
+
+    if (
+	$dev =~ m@/\.\.?(?:/|$)@ ||
+	$dev !~ m!^/dev/!
+    ) {
+	return undef if $noerr;
+	die "$dev is not a valid device path\n";
+    }
+
+    return $dev;
+}
+
+my $dev_desc = {
+    path => {
+	optional => 1,
+	type => 'string',
+	default_key => 1,
+	format => 'pve-lxc-dev-string',
+	format_description => 'Path',
+	description => 'Device to pass through to the container',
+	verbose_description => 'Path to the device to pass through to the container',
+    },
+    mode => {
+	optional => 1,
+	type => 'string',
+	pattern => '0[0-7]{3}',
+	format_description => 'Octal access mode',
+	description => 'Access mode to be set on the device node',
+    },
+    uid => {
+	optional => 1,
+	type => 'integer',
+	minimum => 0,
+	description => 'User ID to be assigned to the device node',
+    },
+    gid => {
+	optional => 1,
+	type => 'integer',
+	minimum => 0,
+	description => 'Group ID to be assigned to the device node',
+    },
+};
+
+for (my $i = 0; $i < $MAX_DEVICES; $i++) {
+    $confdesc->{"dev$i"} = {
+	optional => 1,
+	type => 'string', format => $dev_desc,
+	description => "Device to pass through to the container",
+    }
+}
+
 sub parse_pct_config {
     my ($filename, $raw, $strict) = @_;
 
@@ -1255,6 +1313,20 @@ sub parse_volume {
     return;
 }
 
+sub parse_device {
+    my ($class, $device_string, $noerr) = @_;
+
+    my $res = eval { PVE::JSONSchema::parse_property_string($dev_desc, $device_string) };
+    if ($@) {
+	return undef if $noerr;
+	die $@;
+    }
+
+    die "Path has to be defined" if (!$noerr && !defined($res->{path}));
+
+    return $res;
+}
+
 sub print_volume {
     my ($class, $key, $volume) = @_;
 
@@ -1762,4 +1834,16 @@ sub get_derived_property {
     }
 }
 
+sub foreach_passthrough_device {
+    my ($class, $conf, $func, @param) = @_;
+
+    for my $key (keys %$conf) {
+	next if $key !~ m/^dev(\d+)$/;
+
+	my $device = $class->parse_device($conf->{$key});
+
+	$func->($key, $device, @param);
+    }
+}
+
 1;
diff --git a/src/PVE/LXC/Tools.pm b/src/PVE/LXC/Tools.pm
index 62cdbc1..e9262f2 100644
--- a/src/PVE/LXC/Tools.pm
+++ b/src/PVE/LXC/Tools.pm
@@ -81,10 +81,9 @@ sub lxc_hook($$&) {
     $code->($ct_name, $common_vars, $namespaces, $args);
 }
 
-sub for_current_devices($&) {
-    my ($vmid, $code) = @_;
+sub for_devices {
+    my ($devlist_file, $vmid, $code) = @_;
 
-    my $devlist_file = "/var/lib/lxc/$vmid/devices";
     my $fd;
 
     if (! open $fd, '<', $devlist_file) {
@@ -93,8 +92,8 @@ sub for_current_devices($&) {
     }
 
     while (defined(my $line = <$fd>)) {
-	if ($line !~ m@^(b):(\d+):(\d+):/dev/(\S+)\s*$@) {
-	    warn "invalid .pve-devices entry: $line\n";
+	if ($line !~ m@^(b|c):(\d+):(\d+):/dev/(\S+)\s*$@) {
+	    warn "invalid $devlist_file entry: $line\n";
 	    next;
 	}
 
@@ -117,6 +116,20 @@ sub for_current_devices($&) {
     close $fd;
 }
 
+sub for_current_passthrough_mounts($&) {
+    my ($vmid, $code) = @_;
+
+    my $devlist_file = "/var/lib/lxc/$vmid/passthrough/mount";
+    for_devices($devlist_file, $vmid, $code);
+}
+
+sub for_current_passthrough_devices($&) {
+    my ($vmid, $code) = @_;
+
+    my $passthrough_devlist_file = "/var/lib/lxc/$vmid/passthrough/devices";
+    for_devices($passthrough_devlist_file, $vmid, $code);
+}
+
 sub cgroup_do_write($$) {
     my ($path, $value) = @_;
     my $fd;
diff --git a/src/lxc-pve-autodev-hook b/src/lxc-pve-autodev-hook
index 3c45949..e860fef 100755
--- a/src/lxc-pve-autodev-hook
+++ b/src/lxc-pve-autodev-hook
@@ -3,18 +3,32 @@
 use strict;
 use warnings;
 
-use File::Path;
+use Fcntl qw(S_IFREG);
 use File::Basename;
+use File::Path;
 
 use PVE::LXC::Tools;
-use PVE::Tools;
+use PVE::Tools qw(MS_BIND);
 
 PVE::LXC::Tools::lxc_hook('autodev', 'lxc', sub {
     my ($vmid, $vars, undef, undef) = @_;
 
     my $root = $vars->{ROOTFS_MOUNT};
 
-    PVE::LXC::Tools::for_current_devices($vmid, sub {
+    PVE::LXC::Tools::for_current_passthrough_devices($vmid, sub {
+	my ($type, $major, $minor, $dev) = @_;
+
+	my $rel_devpath = "/dev/$dev";
+	my $rel_dir = dirname($rel_devpath);
+	File::Path::mkpath("$root/$rel_dir");
+	PVE::Tools::mknod("$root/dev/$dev", S_IFREG, 0)
+	    or die("Could not mknod $root/dev/$dev: $!\n");
+
+	PVE::Tools::mount("/var/lib/lxc/$vmid/passthrough/dev/$dev", "$root/dev/$dev", 0, MS_BIND, 0)
+	    or die("Bind mount of device $dev into container failed: $!\n");
+    });
+
+    PVE::LXC::Tools::for_current_passthrough_mounts($vmid, sub {
 	my ($type, $major, $minor, $dev) = @_;
 
 	my $rel_devpath = "/dev/$dev";
diff --git a/src/lxc-pve-prestart-hook b/src/lxc-pve-prestart-hook
index 936d0bf..f0cc08d 100755
--- a/src/lxc-pve-prestart-hook
+++ b/src/lxc-pve-prestart-hook
@@ -6,6 +6,7 @@ use strict;
 use warnings;
 
 use Fcntl qw(O_DIRECTORY :mode);
+use File::Basename;
 use File::Path;
 use POSIX;
 
@@ -58,11 +59,9 @@ PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub {
     # Delete any leftover reboot-trigger file
     unlink("/var/lib/lxc/$vmid/reboot");
 
-    my $devlist_file = "/var/lib/lxc/$vmid/devices";
-    unlink $devlist_file;
     my $devices = [];
 
-    my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
+    my ($id_map, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
 
     # Unmount first when the user mounted the container with "pct mount".
     eval {
@@ -118,6 +117,49 @@ PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub {
 
     PVE::LXC::Config->foreach_volume($conf, $setup_mountpoint);
 
+    # Device passthrough
+    my $passthrough_devices = [];
+
+    my $passthrough_dir = "/var/lib/lxc/$vmid/passthrough";
+    File::Path::make_path($passthrough_dir);
+    PVE::Tools::mount("none", $passthrough_dir, "tmpfs", 0, "size=8k")
+        or die ("Could not mount tmpfs for device passthrough at $passthrough_dir: $!");
+
+    my $setup_passthrough_device = sub {
+	my ($key, $device) = @_;
+
+	my $absolute_path = $device->{path};
+	my ($mode, $rdev) = (stat($absolute_path))[2, 6];
+
+	die "Could not get mode or device ID of $absolute_path\n"
+	    if (!defined($mode) || !defined($rdev));
+
+	my $passthrough_device_path = $passthrough_dir . $absolute_path;
+	File::Path::make_path(dirname($passthrough_device_path));
+	PVE::Tools::mknod($passthrough_device_path, $mode, $rdev)
+	    or die("failed to mknod $passthrough_device_path: $!\n");
+
+	# Use chmod because umask could mess with the access mode on mknod
+	my $passthrough_mode = 0660;
+	$passthrough_mode = oct($device->{mode}) if defined($device->{mode});
+	chmod $passthrough_mode, $passthrough_device_path
+	    or die "failed to chmod $passthrough_mode $passthrough_device_path: $!\n";
+
+	# Set uid and gid of the device node
+	my $uid = 0;
+	my $gid = 0;
+	$uid = $device->{uid} if defined($device->{uid});
+	$gid = $device->{gid} if defined($device->{gid});
+	$uid = PVE::LXC::map_ct_uid_to_host($uid, $id_map);
+	$gid = PVE::LXC::map_ct_gid_to_host($gid, $id_map);
+	chown $uid, $gid, $passthrough_device_path
+	    or die("failed to chown $uid:$gid $passthrough_device_path: $!\n");
+
+	push @$passthrough_devices, [$absolute_path, $mode, $rdev];
+    };
+
+    PVE::LXC::Config->foreach_passthrough_device($conf, $setup_passthrough_device);
+
     my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir);
     $lxc_setup->pre_start_hook();
 
@@ -138,7 +180,19 @@ PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub {
 	    my $minor = PVE::Tools::dev_t_minor($rdev);
 	    $devlist .= "b:$major:$minor:$dev\n";
 	}
-	PVE::Tools::file_set_contents($devlist_file, $devlist);
+	PVE::Tools::file_set_contents("/var/lib/lxc/$vmid/passthrough/mount", $devlist);
+    }
+
+    if (@$passthrough_devices) {
+	my $devlist = '';
+	for my $dev (@$passthrough_devices) {
+	    my ($path, $mode, $rdev) = @$dev;
+	    my $major = PVE::Tools::dev_t_major($rdev);
+	    my $minor = PVE::Tools::dev_t_minor($rdev);
+	    my $device_type_char = S_ISBLK($mode) ? 'b' : 'c';
+	    $devlist .= "$device_type_char:$major:$minor:$path\n";
+	}
+	PVE::Tools::file_set_contents("/var/lib/lxc/$vmid/passthrough/devices", $devlist);
     }
 });
 
-- 
2.39.2





^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [pve-devel] [PATCH v5 container] Add device passthrough
  2023-11-16 11:50 [pve-devel] [PATCH v5 container] Add device passthrough Filip Schauer
@ 2023-11-16 13:35 ` Wolfgang Bumiller
  2023-11-17 10:29   ` Filip Schauer
  0 siblings, 1 reply; 3+ messages in thread
From: Wolfgang Bumiller @ 2023-11-16 13:35 UTC (permalink / raw)
  To: Filip Schauer; +Cc: pve-devel

On Thu, Nov 16, 2023 at 12:50:44PM +0100, Filip Schauer wrote:
> Add a dev[n] argument to the container config to pass devices through to
> a container. A device can be passed by its path. Additionally the access
> mode, uid and gid can be specified through their respective properties.
> 
> Signed-off-by: Filip Schauer <f.schauer@proxmox.com>
> ---
> Changes since v4:
> * Rename device lists to "mounts" and "devices" respectively
>   and move them into the tmpfs mounted to the passthrough directory
> * Add detailed $! error messages
> * Enforce stricter config formatting on passthrough devices
> * Combine regex in verify_lxc_dev_string and describe what it does in
>   a comment
> * Remove unnecessary int() in map_ct_id_to_host since Perl automatically
>   parses a string as a number when compared to a number
> * Cosmetic changes (foreach --> for, unless --> if)
> 
>  src/PVE/LXC.pm            | 53 +++++++++++++++++++++++-
>  src/PVE/LXC/Config.pm     | 84 +++++++++++++++++++++++++++++++++++++++
>  src/PVE/LXC/Tools.pm      | 23 ++++++++---
>  src/lxc-pve-autodev-hook  | 20 ++++++++--
>  src/lxc-pve-prestart-hook | 62 +++++++++++++++++++++++++++--
>  5 files changed, 229 insertions(+), 13 deletions(-)
> 
> diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm
> index 8f53b53..98eb909 100644
> --- a/src/PVE/LXC.pm
> +++ b/src/PVE/LXC.pm
> @@ -5,7 +5,7 @@ use warnings;
>  
>  use Cwd qw();
>  use Errno qw(ELOOP ENOTDIR EROFS ECONNREFUSED EEXIST);
> -use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY);
> +use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY :mode);
>  use File::Path;
>  use File::Spec;
>  use IO::Poll qw(POLLIN POLLHUP);
> @@ -639,6 +639,27 @@ sub update_lxc_config {
>  	$raw .= "lxc.mount.auto = sys:mixed\n";
>      }
>  
> +    PVE::LXC::Config->foreach_passthrough_device($conf, sub {
> +	my ($key, $device) = @_;
> +
> +	die "Path is not defined for passthrough device $key"
> +	    unless (defined($device->{path}));
> +
> +	my $absolute_path = $device->{path};
> +	my ($mode, $rdev) = (stat($absolute_path))[2, 6];
> +
> +	die "Device $absolute_path does not exist\n" 
> +	    if (!defined($mode) || !defined($rdev));

^ The above is only true for ENOENT, either check it explicitly or use
something like "Error accessing device $absolute_path: $!\n".

> +
> +	die "$absolute_path is not a device\n"
> +	    if (!S_ISBLK($mode) && !S_ISCHR($mode));
> +
> +	my $major = PVE::Tools::dev_t_major($rdev);
> +	my $minor = PVE::Tools::dev_t_minor($rdev);
> +	my $device_type_char = S_ISBLK($mode) ? 'b' : 'c';
> +	$raw .= "lxc.cgroup2.devices.allow = $device_type_char $major:$minor rw\n";
> +    });
> +
>      # WARNING: DO NOT REMOVE this without making sure that loop device nodes
>      # cannot be exposed to the container with r/w access (cgroup perms).
>      # When this is enabled mounts will still remain in the monitor's namespace
> diff --git a/src/PVE/LXC/Config.pm b/src/PVE/LXC/Config.pm
> index 56e1f10..4feae20 100644
> --- a/src/PVE/LXC/Config.pm
> +++ b/src/PVE/LXC/Config.pm
> @@ -1255,6 +1313,20 @@ sub parse_volume {
>      return;
>  }
>  
> +sub parse_device {
> +    my ($class, $device_string, $noerr) = @_;
> +
> +    my $res = eval { PVE::JSONSchema::parse_property_string($dev_desc, $device_string) };
> +    if ($@) {
> +	return undef if $noerr;
> +	die $@;
> +    }
> +
> +    die "Path has to be defined" if (!$noerr && !defined($res->{path}));

^ an error with $noerr should still return `undef` - here we'd fall back
to `return $res` on error with $noerr currently, follow the same pattern
as above

> +
> +    return $res;
> +}
> +
>  sub print_volume {
>      my ($class, $key, $volume) = @_;
>  
> diff --git a/src/lxc-pve-autodev-hook b/src/lxc-pve-autodev-hook
> index 3c45949..e860fef 100755
> --- a/src/lxc-pve-autodev-hook
> +++ b/src/lxc-pve-autodev-hook
> @@ -3,18 +3,32 @@
>  use strict;
>  use warnings;
>  
> -use File::Path;
> +use Fcntl qw(S_IFREG);
>  use File::Basename;
> +use File::Path;
>  
>  use PVE::LXC::Tools;
> -use PVE::Tools;
> +use PVE::Tools qw(MS_BIND);
>  
>  PVE::LXC::Tools::lxc_hook('autodev', 'lxc', sub {
>      my ($vmid, $vars, undef, undef) = @_;
>  
>      my $root = $vars->{ROOTFS_MOUNT};
>  
> -    PVE::LXC::Tools::for_current_devices($vmid, sub {
> +    PVE::LXC::Tools::for_current_passthrough_devices($vmid, sub {
> +	my ($type, $major, $minor, $dev) = @_;
> +
> +	my $rel_devpath = "/dev/$dev";
> +	my $rel_dir = dirname($rel_devpath);
> +	File::Path::mkpath("$root/$rel_dir");
> +	PVE::Tools::mknod("$root/dev/$dev", S_IFREG, 0)
> +	    or die("Could not mknod $root/dev/$dev: $!\n");
> +
> +	PVE::Tools::mount("/var/lib/lxc/$vmid/passthrough/dev/$dev", "$root/dev/$dev", 0, MS_BIND, 0)
> +	    or die("Bind mount of device $dev into container failed: $!\n");
> +    });
> +
> +    PVE::LXC::Tools::for_current_passthrough_mounts($vmid, sub {
>  	my ($type, $major, $minor, $dev) = @_;
>  
>  	my $rel_devpath = "/dev/$dev";
> diff --git a/src/lxc-pve-prestart-hook b/src/lxc-pve-prestart-hook
> index 936d0bf..f0cc08d 100755
> --- a/src/lxc-pve-prestart-hook
> +++ b/src/lxc-pve-prestart-hook
> @@ -6,6 +6,7 @@ use strict;
>  use warnings;
>  
>  use Fcntl qw(O_DIRECTORY :mode);
> +use File::Basename;
>  use File::Path;
>  use POSIX;
>  
> @@ -58,11 +59,9 @@ PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub {
>      # Delete any leftover reboot-trigger file
>      unlink("/var/lib/lxc/$vmid/reboot");
>  
> -    my $devlist_file = "/var/lib/lxc/$vmid/devices";

^ It's now technically possible for a startup during a package upgrade
to have issues if we move this since in *theory* the prestart hook could
run with the old package code and the autodev hook with the new package.

Since the pre-start hook unlinks the file, we *could* potentially
fallback to that path in the autodev hook to catch this case...
But then we should keep `unlink`ing on the old path for a while to make
sure we don't fall back to an old leftover file.

> -    unlink $devlist_file;
>      my $devices = [];
>  
> -    my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
> +    my ($id_map, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
>  
>      # Unmount first when the user mounted the container with "pct mount".
>      eval {




^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [pve-devel] [PATCH v5 container] Add device passthrough
  2023-11-16 13:35 ` Wolfgang Bumiller
@ 2023-11-17 10:29   ` Filip Schauer
  0 siblings, 0 replies; 3+ messages in thread
From: Filip Schauer @ 2023-11-17 10:29 UTC (permalink / raw)
  To: Wolfgang Bumiller; +Cc: pve-devel

Patch v6 available

https://lists.proxmox.com/pipermail/pve-devel/2023-November/060367.html

On 16/11/2023 14:35, Wolfgang Bumiller wrote:
> On Thu, Nov 16, 2023 at 12:50:44PM +0100, Filip Schauer wrote:
>> Add a dev[n] argument to the container config to pass devices through to
>> a container. A device can be passed by its path. Additionally the access
>> mode, uid and gid can be specified through their respective properties.
>>
>> Signed-off-by: Filip Schauer <f.schauer@proxmox.com>
>> ---
>> Changes since v4:
>> * Rename device lists to "mounts" and "devices" respectively
>>    and move them into the tmpfs mounted to the passthrough directory
>> * Add detailed $! error messages
>> * Enforce stricter config formatting on passthrough devices
>> * Combine regex in verify_lxc_dev_string and describe what it does in
>>    a comment
>> * Remove unnecessary int() in map_ct_id_to_host since Perl automatically
>>    parses a string as a number when compared to a number
>> * Cosmetic changes (foreach --> for, unless --> if)
>>
>>   src/PVE/LXC.pm            | 53 +++++++++++++++++++++++-
>>   src/PVE/LXC/Config.pm     | 84 +++++++++++++++++++++++++++++++++++++++
>>   src/PVE/LXC/Tools.pm      | 23 ++++++++---
>>   src/lxc-pve-autodev-hook  | 20 ++++++++--
>>   src/lxc-pve-prestart-hook | 62 +++++++++++++++++++++++++++--
>>   5 files changed, 229 insertions(+), 13 deletions(-)
>>
>> diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm
>> index 8f53b53..98eb909 100644
>> --- a/src/PVE/LXC.pm
>> +++ b/src/PVE/LXC.pm
>> @@ -5,7 +5,7 @@ use warnings;
>>   
>>   use Cwd qw();
>>   use Errno qw(ELOOP ENOTDIR EROFS ECONNREFUSED EEXIST);
>> -use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY);
>> +use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY :mode);
>>   use File::Path;
>>   use File::Spec;
>>   use IO::Poll qw(POLLIN POLLHUP);
>> @@ -639,6 +639,27 @@ sub update_lxc_config {
>>   	$raw .= "lxc.mount.auto = sys:mixed\n";
>>       }
>>   
>> +    PVE::LXC::Config->foreach_passthrough_device($conf, sub {
>> +	my ($key, $device) = @_;
>> +
>> +	die "Path is not defined for passthrough device $key"
>> +	    unless (defined($device->{path}));
>> +
>> +	my $absolute_path = $device->{path};
>> +	my ($mode, $rdev) = (stat($absolute_path))[2, 6];
>> +
>> +	die "Device $absolute_path does not exist\n"
>> +	    if (!defined($mode) || !defined($rdev));
> ^ The above is only true for ENOENT, either check it explicitly or use
> something like "Error accessing device $absolute_path: $!\n".
>
>> +
>> +	die "$absolute_path is not a device\n"
>> +	    if (!S_ISBLK($mode) && !S_ISCHR($mode));
>> +
>> +	my $major = PVE::Tools::dev_t_major($rdev);
>> +	my $minor = PVE::Tools::dev_t_minor($rdev);
>> +	my $device_type_char = S_ISBLK($mode) ? 'b' : 'c';
>> +	$raw .= "lxc.cgroup2.devices.allow = $device_type_char $major:$minor rw\n";
>> +    });
>> +
>>       # WARNING: DO NOT REMOVE this without making sure that loop device nodes
>>       # cannot be exposed to the container with r/w access (cgroup perms).
>>       # When this is enabled mounts will still remain in the monitor's namespace
>> diff --git a/src/PVE/LXC/Config.pm b/src/PVE/LXC/Config.pm
>> index 56e1f10..4feae20 100644
>> --- a/src/PVE/LXC/Config.pm
>> +++ b/src/PVE/LXC/Config.pm
>> @@ -1255,6 +1313,20 @@ sub parse_volume {
>>       return;
>>   }
>>   
>> +sub parse_device {
>> +    my ($class, $device_string, $noerr) = @_;
>> +
>> +    my $res = eval { PVE::JSONSchema::parse_property_string($dev_desc, $device_string) };
>> +    if ($@) {
>> +	return undef if $noerr;
>> +	die $@;
>> +    }
>> +
>> +    die "Path has to be defined" if (!$noerr && !defined($res->{path}));
> ^ an error with $noerr should still return `undef` - here we'd fall back
> to `return $res` on error with $noerr currently, follow the same pattern
> as above
>
>> +
>> +    return $res;
>> +}
>> +
>>   sub print_volume {
>>       my ($class, $key, $volume) = @_;
>>   
>> diff --git a/src/lxc-pve-autodev-hook b/src/lxc-pve-autodev-hook
>> index 3c45949..e860fef 100755
>> --- a/src/lxc-pve-autodev-hook
>> +++ b/src/lxc-pve-autodev-hook
>> @@ -3,18 +3,32 @@
>>   use strict;
>>   use warnings;
>>   
>> -use File::Path;
>> +use Fcntl qw(S_IFREG);
>>   use File::Basename;
>> +use File::Path;
>>   
>>   use PVE::LXC::Tools;
>> -use PVE::Tools;
>> +use PVE::Tools qw(MS_BIND);
>>   
>>   PVE::LXC::Tools::lxc_hook('autodev', 'lxc', sub {
>>       my ($vmid, $vars, undef, undef) = @_;
>>   
>>       my $root = $vars->{ROOTFS_MOUNT};
>>   
>> -    PVE::LXC::Tools::for_current_devices($vmid, sub {
>> +    PVE::LXC::Tools::for_current_passthrough_devices($vmid, sub {
>> +	my ($type, $major, $minor, $dev) = @_;
>> +
>> +	my $rel_devpath = "/dev/$dev";
>> +	my $rel_dir = dirname($rel_devpath);
>> +	File::Path::mkpath("$root/$rel_dir");
>> +	PVE::Tools::mknod("$root/dev/$dev", S_IFREG, 0)
>> +	    or die("Could not mknod $root/dev/$dev: $!\n");
>> +
>> +	PVE::Tools::mount("/var/lib/lxc/$vmid/passthrough/dev/$dev", "$root/dev/$dev", 0, MS_BIND, 0)
>> +	    or die("Bind mount of device $dev into container failed: $!\n");
>> +    });
>> +
>> +    PVE::LXC::Tools::for_current_passthrough_mounts($vmid, sub {
>>   	my ($type, $major, $minor, $dev) = @_;
>>   
>>   	my $rel_devpath = "/dev/$dev";
>> diff --git a/src/lxc-pve-prestart-hook b/src/lxc-pve-prestart-hook
>> index 936d0bf..f0cc08d 100755
>> --- a/src/lxc-pve-prestart-hook
>> +++ b/src/lxc-pve-prestart-hook
>> @@ -6,6 +6,7 @@ use strict;
>>   use warnings;
>>   
>>   use Fcntl qw(O_DIRECTORY :mode);
>> +use File::Basename;
>>   use File::Path;
>>   use POSIX;
>>   
>> @@ -58,11 +59,9 @@ PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub {
>>       # Delete any leftover reboot-trigger file
>>       unlink("/var/lib/lxc/$vmid/reboot");
>>   
>> -    my $devlist_file = "/var/lib/lxc/$vmid/devices";
> ^ It's now technically possible for a startup during a package upgrade
> to have issues if we move this since in *theory* the prestart hook could
> run with the old package code and the autodev hook with the new package.
>
> Since the pre-start hook unlinks the file, we *could* potentially
> fallback to that path in the autodev hook to catch this case...
> But then we should keep `unlink`ing on the old path for a while to make
> sure we don't fall back to an old leftover file.
>
>> -    unlink $devlist_file;
>>       my $devices = [];
>>   
>> -    my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
>> +    my ($id_map, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
>>   
>>       # Unmount first when the user mounted the container with "pct mount".
>>       eval {




^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-11-17 10:30 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-16 11:50 [pve-devel] [PATCH v5 container] Add device passthrough Filip Schauer
2023-11-16 13:35 ` Wolfgang Bumiller
2023-11-17 10:29   ` Filip Schauer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal