all lists on lists.proxmox.com
 help / color / mirror / Atom feed
* [pve-devel] [PATCH v6 container] Add device passthrough
@ 2023-11-17 10:28 Filip Schauer
  2023-11-20 13:33 ` [pve-devel] applied: " Wolfgang Bumiller
  0 siblings, 1 reply; 2+ messages in thread
From: Filip Schauer @ 2023-11-17 10:28 UTC (permalink / raw)
  To: pve-devel

Add a dev[n] argument to the container config to pass devices through to
a container. A device can be passed by its path. Additionally the access
mode, uid and gid can be specified through their respective properties.

Signed-off-by: Filip Schauer <f.schauer@proxmox.com>
---
Changes since v5:
* Add error message for when a device node exists but stat fails
  in sub update_lxc_config
* Return undef if $noerr is set in sub parse_device
* Handle potentially leftover device list file
  from an old pve-container version
* Rename "mount" device list to "mounts" so it is consistent
  with the plural of "devices"

Changes since v4:
* Rename device lists to "mount" and "devices" respectively
  and move them into the tmpfs mounted to the passthrough directory
* Add detailed $! error messages
* Enforce stricter config formatting on passthrough devices
* Combine regex in verify_lxc_dev_string and describe what it does in
  a comment
* Remove unnecessary int() in map_ct_id_to_host since Perl automatically
  parses a string as a number when compared to a number
* Cosmetic changes (foreach --> for, unless --> if)

 src/PVE/LXC.pm            | 57 ++++++++++++++++++++++++-
 src/PVE/LXC/Config.pm     | 87 +++++++++++++++++++++++++++++++++++++++
 src/PVE/LXC/Tools.pm      | 30 +++++++++++---
 src/lxc-pve-autodev-hook  | 20 +++++++--
 src/lxc-pve-prestart-hook | 66 +++++++++++++++++++++++++++--
 5 files changed, 246 insertions(+), 14 deletions(-)

diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm
index 8f53b53..259fcb2 100644
--- a/src/PVE/LXC.pm
+++ b/src/PVE/LXC.pm
@@ -4,8 +4,8 @@ use strict;
 use warnings;
 
 use Cwd qw();
-use Errno qw(ELOOP ENOTDIR EROFS ECONNREFUSED EEXIST);
-use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY);
+use Errno qw(ELOOP ENOENT ENOTDIR EROFS ECONNREFUSED EEXIST);
+use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY :mode);
 use File::Path;
 use File::Spec;
 use IO::Poll qw(POLLIN POLLHUP);
@@ -639,6 +639,29 @@ sub update_lxc_config {
 	$raw .= "lxc.mount.auto = sys:mixed\n";
     }
 
+    PVE::LXC::Config->foreach_passthrough_device($conf, sub {
+	my ($key, $device) = @_;
+
+	die "Path is not defined for passthrough device $key"
+	    unless (defined($device->{path}));
+
+	my $absolute_path = $device->{path};
+	my ($mode, $rdev) = (stat($absolute_path))[2, 6];
+
+	die "Device $absolute_path does not exist\n" if $! == ENOENT;
+
+	die "Error accessing device $absolute_path\n"
+	    if (!defined($mode) || !defined($rdev));
+
+	die "$absolute_path is not a device\n"
+	    if (!S_ISBLK($mode) && !S_ISCHR($mode));
+
+	my $major = PVE::Tools::dev_t_major($rdev);
+	my $minor = PVE::Tools::dev_t_minor($rdev);
+	my $device_type_char = S_ISBLK($mode) ? 'b' : 'c';
+	$raw .= "lxc.cgroup2.devices.allow = $device_type_char $major:$minor rw\n";
+    });
+
     # WARNING: DO NOT REMOVE this without making sure that loop device nodes
     # cannot be exposed to the container with r/w access (cgroup perms).
     # When this is enabled mounts will still remain in the monitor's namespace
@@ -1344,6 +1367,8 @@ sub check_ct_modify_config_perm {
 	    $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Network']);
 	    check_bridge_access($rpcenv, $authuser, $oldconf->{$opt}) if $oldconf->{$opt};
 	    check_bridge_access($rpcenv, $authuser, $newconf->{$opt}) if $newconf->{$opt};
+	} elsif ($opt =~ m/^dev\d+$/) {
+	    raise_perm_exc("configuring device passthrough is only allowed for root\@pam");
 	} elsif ($opt eq 'nameserver' || $opt eq 'searchdomain' || $opt eq 'hostname') {
 	    $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Network']);
 	} elsif ($opt eq 'features') {
@@ -2393,6 +2418,34 @@ sub validate_id_maps {
     }
 }
 
+sub map_ct_id_to_host {
+    my ($id, $id_map, $id_type) = @_;
+
+    for my $mapping (@$id_map) {
+	my ($type, $ct, $host, $length) = @$mapping;
+
+	next if ($type ne $id_type);
+
+	if ($id >= $ct && $id < ($ct + $length)) {
+	    return $host - $ct + $id;
+	}
+    }
+
+    return $id;
+}
+
+sub map_ct_uid_to_host {
+    my ($uid, $id_map) = @_;
+
+    return map_ct_id_to_host($uid, $id_map, 'u');
+}
+
+sub map_ct_gid_to_host {
+    my ($gid, $id_map) = @_;
+
+    return map_ct_id_to_host($gid, $id_map, 'g');
+}
+
 sub userns_command {
     my ($id_map) = @_;
     if (@$id_map) {
diff --git a/src/PVE/LXC/Config.pm b/src/PVE/LXC/Config.pm
index 56e1f10..e5accfb 100644
--- a/src/PVE/LXC/Config.pm
+++ b/src/PVE/LXC/Config.pm
@@ -29,6 +29,7 @@ mkdir $lockdir;
 mkdir "/etc/pve/nodes/$nodename/lxc";
 my $MAX_MOUNT_POINTS = 256;
 my $MAX_UNUSED_DISKS = $MAX_MOUNT_POINTS;
+my $MAX_DEVICES = 256;
 
 # BEGIN implemented abstract methods from PVE::AbstractConfig
 
@@ -908,6 +909,63 @@ for (my $i = 0; $i < $MAX_UNUSED_DISKS; $i++) {
     }
 }
 
+PVE::JSONSchema::register_format('pve-lxc-dev-string', \&verify_lxc_dev_string);
+sub verify_lxc_dev_string {
+    my ($dev, $noerr) = @_;
+
+    # do not allow /./ or /../ or /.$ or /..$
+    # enforce /dev/ at the beginning
+
+    if (
+	$dev =~ m@/\.\.?(?:/|$)@ ||
+	$dev !~ m!^/dev/!
+    ) {
+	return undef if $noerr;
+	die "$dev is not a valid device path\n";
+    }
+
+    return $dev;
+}
+
+my $dev_desc = {
+    path => {
+	optional => 1,
+	type => 'string',
+	default_key => 1,
+	format => 'pve-lxc-dev-string',
+	format_description => 'Path',
+	description => 'Device to pass through to the container',
+	verbose_description => 'Path to the device to pass through to the container',
+    },
+    mode => {
+	optional => 1,
+	type => 'string',
+	pattern => '0[0-7]{3}',
+	format_description => 'Octal access mode',
+	description => 'Access mode to be set on the device node',
+    },
+    uid => {
+	optional => 1,
+	type => 'integer',
+	minimum => 0,
+	description => 'User ID to be assigned to the device node',
+    },
+    gid => {
+	optional => 1,
+	type => 'integer',
+	minimum => 0,
+	description => 'Group ID to be assigned to the device node',
+    },
+};
+
+for (my $i = 0; $i < $MAX_DEVICES; $i++) {
+    $confdesc->{"dev$i"} = {
+	optional => 1,
+	type => 'string', format => $dev_desc,
+	description => "Device to pass through to the container",
+    }
+}
+
 sub parse_pct_config {
     my ($filename, $raw, $strict) = @_;
 
@@ -1255,6 +1313,23 @@ sub parse_volume {
     return;
 }
 
+sub parse_device {
+    my ($class, $device_string, $noerr) = @_;
+
+    my $res = eval { PVE::JSONSchema::parse_property_string($dev_desc, $device_string) };
+    if ($@) {
+	return undef if $noerr;
+	die $@;
+    }
+
+    if (!defined($res->{path})) {
+	return undef if $noerr;
+	die "Path has to be defined\n";
+    }
+
+    return $res;
+}
+
 sub print_volume {
     my ($class, $key, $volume) = @_;
 
@@ -1762,4 +1837,16 @@ sub get_derived_property {
     }
 }
 
+sub foreach_passthrough_device {
+    my ($class, $conf, $func, @param) = @_;
+
+    for my $key (keys %$conf) {
+	next if $key !~ m/^dev(\d+)$/;
+
+	my $device = $class->parse_device($conf->{$key});
+
+	$func->($key, $device, @param);
+    }
+}
+
 1;
diff --git a/src/PVE/LXC/Tools.pm b/src/PVE/LXC/Tools.pm
index 62cdbc1..0ad6eba 100644
--- a/src/PVE/LXC/Tools.pm
+++ b/src/PVE/LXC/Tools.pm
@@ -81,10 +81,9 @@ sub lxc_hook($$&) {
     $code->($ct_name, $common_vars, $namespaces, $args);
 }
 
-sub for_current_devices($&) {
-    my ($vmid, $code) = @_;
+sub for_devices {
+    my ($devlist_file, $vmid, $code) = @_;
 
-    my $devlist_file = "/var/lib/lxc/$vmid/devices";
     my $fd;
 
     if (! open $fd, '<', $devlist_file) {
@@ -93,8 +92,8 @@ sub for_current_devices($&) {
     }
 
     while (defined(my $line = <$fd>)) {
-	if ($line !~ m@^(b):(\d+):(\d+):/dev/(\S+)\s*$@) {
-	    warn "invalid .pve-devices entry: $line\n";
+	if ($line !~ m@^(b|c):(\d+):(\d+):/dev/(\S+)\s*$@) {
+	    warn "invalid $devlist_file entry: $line\n";
 	    next;
 	}
 
@@ -117,6 +116,27 @@ sub for_current_devices($&) {
     close $fd;
 }
 
+sub for_current_passthrough_mounts($&) {
+    my ($vmid, $code) = @_;
+
+    my $devlist_file = "/var/lib/lxc/$vmid/passthrough/mounts";
+
+    if (-e $devlist_file) {
+	for_devices($devlist_file, $vmid, $code);
+    } else {
+	# Fallback to the old device list file in case a package upgrade
+	# occurs between lxc-pve-prestart-hook and now.
+	for_devices("/var/lib/lxc/$vmid/devices", $vmid, $code);
+    }
+}
+
+sub for_current_passthrough_devices($&) {
+    my ($vmid, $code) = @_;
+
+    my $passthrough_devlist_file = "/var/lib/lxc/$vmid/passthrough/devices";
+    for_devices($passthrough_devlist_file, $vmid, $code);
+}
+
 sub cgroup_do_write($$) {
     my ($path, $value) = @_;
     my $fd;
diff --git a/src/lxc-pve-autodev-hook b/src/lxc-pve-autodev-hook
index 3c45949..e860fef 100755
--- a/src/lxc-pve-autodev-hook
+++ b/src/lxc-pve-autodev-hook
@@ -3,18 +3,32 @@
 use strict;
 use warnings;
 
-use File::Path;
+use Fcntl qw(S_IFREG);
 use File::Basename;
+use File::Path;
 
 use PVE::LXC::Tools;
-use PVE::Tools;
+use PVE::Tools qw(MS_BIND);
 
 PVE::LXC::Tools::lxc_hook('autodev', 'lxc', sub {
     my ($vmid, $vars, undef, undef) = @_;
 
     my $root = $vars->{ROOTFS_MOUNT};
 
-    PVE::LXC::Tools::for_current_devices($vmid, sub {
+    PVE::LXC::Tools::for_current_passthrough_devices($vmid, sub {
+	my ($type, $major, $minor, $dev) = @_;
+
+	my $rel_devpath = "/dev/$dev";
+	my $rel_dir = dirname($rel_devpath);
+	File::Path::mkpath("$root/$rel_dir");
+	PVE::Tools::mknod("$root/dev/$dev", S_IFREG, 0)
+	    or die("Could not mknod $root/dev/$dev: $!\n");
+
+	PVE::Tools::mount("/var/lib/lxc/$vmid/passthrough/dev/$dev", "$root/dev/$dev", 0, MS_BIND, 0)
+	    or die("Bind mount of device $dev into container failed: $!\n");
+    });
+
+    PVE::LXC::Tools::for_current_passthrough_mounts($vmid, sub {
 	my ($type, $major, $minor, $dev) = @_;
 
 	my $rel_devpath = "/dev/$dev";
diff --git a/src/lxc-pve-prestart-hook b/src/lxc-pve-prestart-hook
index 936d0bf..438ffad 100755
--- a/src/lxc-pve-prestart-hook
+++ b/src/lxc-pve-prestart-hook
@@ -6,6 +6,7 @@ use strict;
 use warnings;
 
 use Fcntl qw(O_DIRECTORY :mode);
+use File::Basename;
 use File::Path;
 use POSIX;
 
@@ -58,11 +59,13 @@ PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub {
     # Delete any leftover reboot-trigger file
     unlink("/var/lib/lxc/$vmid/reboot");
 
-    my $devlist_file = "/var/lib/lxc/$vmid/devices";
-    unlink $devlist_file;
+    # Delete the old device list file
+    # in case it was left over from a previous version of pve-container.
+    unlink("/var/lib/lxc/$vmid/devices");
+
     my $devices = [];
 
-    my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
+    my ($id_map, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
 
     # Unmount first when the user mounted the container with "pct mount".
     eval {
@@ -118,6 +121,49 @@ PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub {
 
     PVE::LXC::Config->foreach_volume($conf, $setup_mountpoint);
 
+    # Device passthrough
+    my $passthrough_devices = [];
+
+    my $passthrough_dir = "/var/lib/lxc/$vmid/passthrough";
+    File::Path::make_path($passthrough_dir);
+    PVE::Tools::mount("none", $passthrough_dir, "tmpfs", 0, "size=8k")
+        or die ("Could not mount tmpfs for device passthrough at $passthrough_dir: $!");
+
+    my $setup_passthrough_device = sub {
+	my ($key, $device) = @_;
+
+	my $absolute_path = $device->{path};
+	my ($mode, $rdev) = (stat($absolute_path))[2, 6];
+
+	die "Could not get mode or device ID of $absolute_path\n"
+	    if (!defined($mode) || !defined($rdev));
+
+	my $passthrough_device_path = $passthrough_dir . $absolute_path;
+	File::Path::make_path(dirname($passthrough_device_path));
+	PVE::Tools::mknod($passthrough_device_path, $mode, $rdev)
+	    or die("failed to mknod $passthrough_device_path: $!\n");
+
+	# Use chmod because umask could mess with the access mode on mknod
+	my $passthrough_mode = 0660;
+	$passthrough_mode = oct($device->{mode}) if defined($device->{mode});
+	chmod $passthrough_mode, $passthrough_device_path
+	    or die "failed to chmod $passthrough_mode $passthrough_device_path: $!\n";
+
+	# Set uid and gid of the device node
+	my $uid = 0;
+	my $gid = 0;
+	$uid = $device->{uid} if defined($device->{uid});
+	$gid = $device->{gid} if defined($device->{gid});
+	$uid = PVE::LXC::map_ct_uid_to_host($uid, $id_map);
+	$gid = PVE::LXC::map_ct_gid_to_host($gid, $id_map);
+	chown $uid, $gid, $passthrough_device_path
+	    or die("failed to chown $uid:$gid $passthrough_device_path: $!\n");
+
+	push @$passthrough_devices, [$absolute_path, $mode, $rdev];
+    };
+
+    PVE::LXC::Config->foreach_passthrough_device($conf, $setup_passthrough_device);
+
     my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir);
     $lxc_setup->pre_start_hook();
 
@@ -138,7 +184,19 @@ PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub {
 	    my $minor = PVE::Tools::dev_t_minor($rdev);
 	    $devlist .= "b:$major:$minor:$dev\n";
 	}
-	PVE::Tools::file_set_contents($devlist_file, $devlist);
+	PVE::Tools::file_set_contents("/var/lib/lxc/$vmid/passthrough/mounts", $devlist);
+    }
+
+    if (@$passthrough_devices) {
+	my $devlist = '';
+	for my $dev (@$passthrough_devices) {
+	    my ($path, $mode, $rdev) = @$dev;
+	    my $major = PVE::Tools::dev_t_major($rdev);
+	    my $minor = PVE::Tools::dev_t_minor($rdev);
+	    my $device_type_char = S_ISBLK($mode) ? 'b' : 'c';
+	    $devlist .= "$device_type_char:$major:$minor:$path\n";
+	}
+	PVE::Tools::file_set_contents("/var/lib/lxc/$vmid/passthrough/devices", $devlist);
     }
 });
 
-- 
2.39.2





^ permalink raw reply	[flat|nested] 2+ messages in thread

* [pve-devel] applied: [PATCH v6 container] Add device passthrough
  2023-11-17 10:28 [pve-devel] [PATCH v6 container] Add device passthrough Filip Schauer
@ 2023-11-20 13:33 ` Wolfgang Bumiller
  0 siblings, 0 replies; 2+ messages in thread
From: Wolfgang Bumiller @ 2023-11-20 13:33 UTC (permalink / raw)
  To: Filip Schauer; +Cc: pve-devel

applied, thanks




^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-11-20 13:33 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-17 10:28 [pve-devel] [PATCH v6 container] Add device passthrough Filip Schauer
2023-11-20 13:33 ` [pve-devel] applied: " Wolfgang Bumiller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal