From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by lists.proxmox.com (Postfix) with ESMTPS id 4D6389A132 for ; Fri, 17 Nov 2023 11:28:33 +0100 (CET) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 338B21FACF for ; Fri, 17 Nov 2023 11:28:33 +0100 (CET) Received: from proxmox-new.maurer-it.com (proxmox-new.maurer-it.com [94.136.29.106]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by firstgate.proxmox.com (Proxmox) with ESMTPS for ; Fri, 17 Nov 2023 11:28:32 +0100 (CET) Received: from proxmox-new.maurer-it.com (localhost.localdomain [127.0.0.1]) by proxmox-new.maurer-it.com (Proxmox) with ESMTP id BFCE643D57 for ; Fri, 17 Nov 2023 11:28:31 +0100 (CET) From: Filip Schauer To: pve-devel@lists.proxmox.com Date: Fri, 17 Nov 2023 11:28:16 +0100 Message-Id: <20231117102816.37663-1-f.schauer@proxmox.com> X-Mailer: git-send-email 2.39.2 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-SPAM-LEVEL: Spam detection results: 0 AWL -0.222 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record T_SCC_BODY_TEXT_LINE -0.01 - URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more information. [lxc.pm, mount.auto, tools.pm, config.pm] Subject: [pve-devel] [PATCH v6 container] Add device passthrough X-BeenThere: pve-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox VE development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 17 Nov 2023 10:28:33 -0000 Add a dev[n] argument to the container config to pass devices through to a container. A device can be passed by its path. Additionally the access mode, uid and gid can be specified through their respective properties. Signed-off-by: Filip Schauer --- Changes since v5: * Add error message for when a device node exists but stat fails in sub update_lxc_config * Return undef if $noerr is set in sub parse_device * Handle potentially leftover device list file from an old pve-container version * Rename "mount" device list to "mounts" so it is consistent with the plural of "devices" Changes since v4: * Rename device lists to "mount" and "devices" respectively and move them into the tmpfs mounted to the passthrough directory * Add detailed $! error messages * Enforce stricter config formatting on passthrough devices * Combine regex in verify_lxc_dev_string and describe what it does in a comment * Remove unnecessary int() in map_ct_id_to_host since Perl automatically parses a string as a number when compared to a number * Cosmetic changes (foreach --> for, unless --> if) src/PVE/LXC.pm | 57 ++++++++++++++++++++++++- src/PVE/LXC/Config.pm | 87 +++++++++++++++++++++++++++++++++++++++ src/PVE/LXC/Tools.pm | 30 +++++++++++--- src/lxc-pve-autodev-hook | 20 +++++++-- src/lxc-pve-prestart-hook | 66 +++++++++++++++++++++++++++-- 5 files changed, 246 insertions(+), 14 deletions(-) diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm index 8f53b53..259fcb2 100644 --- a/src/PVE/LXC.pm +++ b/src/PVE/LXC.pm @@ -4,8 +4,8 @@ use strict; use warnings; use Cwd qw(); -use Errno qw(ELOOP ENOTDIR EROFS ECONNREFUSED EEXIST); -use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY); +use Errno qw(ELOOP ENOENT ENOTDIR EROFS ECONNREFUSED EEXIST); +use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY :mode); use File::Path; use File::Spec; use IO::Poll qw(POLLIN POLLHUP); @@ -639,6 +639,29 @@ sub update_lxc_config { $raw .= "lxc.mount.auto = sys:mixed\n"; } + PVE::LXC::Config->foreach_passthrough_device($conf, sub { + my ($key, $device) = @_; + + die "Path is not defined for passthrough device $key" + unless (defined($device->{path})); + + my $absolute_path = $device->{path}; + my ($mode, $rdev) = (stat($absolute_path))[2, 6]; + + die "Device $absolute_path does not exist\n" if $! == ENOENT; + + die "Error accessing device $absolute_path\n" + if (!defined($mode) || !defined($rdev)); + + die "$absolute_path is not a device\n" + if (!S_ISBLK($mode) && !S_ISCHR($mode)); + + my $major = PVE::Tools::dev_t_major($rdev); + my $minor = PVE::Tools::dev_t_minor($rdev); + my $device_type_char = S_ISBLK($mode) ? 'b' : 'c'; + $raw .= "lxc.cgroup2.devices.allow = $device_type_char $major:$minor rw\n"; + }); + # WARNING: DO NOT REMOVE this without making sure that loop device nodes # cannot be exposed to the container with r/w access (cgroup perms). # When this is enabled mounts will still remain in the monitor's namespace @@ -1344,6 +1367,8 @@ sub check_ct_modify_config_perm { $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Network']); check_bridge_access($rpcenv, $authuser, $oldconf->{$opt}) if $oldconf->{$opt}; check_bridge_access($rpcenv, $authuser, $newconf->{$opt}) if $newconf->{$opt}; + } elsif ($opt =~ m/^dev\d+$/) { + raise_perm_exc("configuring device passthrough is only allowed for root\@pam"); } elsif ($opt eq 'nameserver' || $opt eq 'searchdomain' || $opt eq 'hostname') { $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Network']); } elsif ($opt eq 'features') { @@ -2393,6 +2418,34 @@ sub validate_id_maps { } } +sub map_ct_id_to_host { + my ($id, $id_map, $id_type) = @_; + + for my $mapping (@$id_map) { + my ($type, $ct, $host, $length) = @$mapping; + + next if ($type ne $id_type); + + if ($id >= $ct && $id < ($ct + $length)) { + return $host - $ct + $id; + } + } + + return $id; +} + +sub map_ct_uid_to_host { + my ($uid, $id_map) = @_; + + return map_ct_id_to_host($uid, $id_map, 'u'); +} + +sub map_ct_gid_to_host { + my ($gid, $id_map) = @_; + + return map_ct_id_to_host($gid, $id_map, 'g'); +} + sub userns_command { my ($id_map) = @_; if (@$id_map) { diff --git a/src/PVE/LXC/Config.pm b/src/PVE/LXC/Config.pm index 56e1f10..e5accfb 100644 --- a/src/PVE/LXC/Config.pm +++ b/src/PVE/LXC/Config.pm @@ -29,6 +29,7 @@ mkdir $lockdir; mkdir "/etc/pve/nodes/$nodename/lxc"; my $MAX_MOUNT_POINTS = 256; my $MAX_UNUSED_DISKS = $MAX_MOUNT_POINTS; +my $MAX_DEVICES = 256; # BEGIN implemented abstract methods from PVE::AbstractConfig @@ -908,6 +909,63 @@ for (my $i = 0; $i < $MAX_UNUSED_DISKS; $i++) { } } +PVE::JSONSchema::register_format('pve-lxc-dev-string', \&verify_lxc_dev_string); +sub verify_lxc_dev_string { + my ($dev, $noerr) = @_; + + # do not allow /./ or /../ or /.$ or /..$ + # enforce /dev/ at the beginning + + if ( + $dev =~ m@/\.\.?(?:/|$)@ || + $dev !~ m!^/dev/! + ) { + return undef if $noerr; + die "$dev is not a valid device path\n"; + } + + return $dev; +} + +my $dev_desc = { + path => { + optional => 1, + type => 'string', + default_key => 1, + format => 'pve-lxc-dev-string', + format_description => 'Path', + description => 'Device to pass through to the container', + verbose_description => 'Path to the device to pass through to the container', + }, + mode => { + optional => 1, + type => 'string', + pattern => '0[0-7]{3}', + format_description => 'Octal access mode', + description => 'Access mode to be set on the device node', + }, + uid => { + optional => 1, + type => 'integer', + minimum => 0, + description => 'User ID to be assigned to the device node', + }, + gid => { + optional => 1, + type => 'integer', + minimum => 0, + description => 'Group ID to be assigned to the device node', + }, +}; + +for (my $i = 0; $i < $MAX_DEVICES; $i++) { + $confdesc->{"dev$i"} = { + optional => 1, + type => 'string', format => $dev_desc, + description => "Device to pass through to the container", + } +} + sub parse_pct_config { my ($filename, $raw, $strict) = @_; @@ -1255,6 +1313,23 @@ sub parse_volume { return; } +sub parse_device { + my ($class, $device_string, $noerr) = @_; + + my $res = eval { PVE::JSONSchema::parse_property_string($dev_desc, $device_string) }; + if ($@) { + return undef if $noerr; + die $@; + } + + if (!defined($res->{path})) { + return undef if $noerr; + die "Path has to be defined\n"; + } + + return $res; +} + sub print_volume { my ($class, $key, $volume) = @_; @@ -1762,4 +1837,16 @@ sub get_derived_property { } } +sub foreach_passthrough_device { + my ($class, $conf, $func, @param) = @_; + + for my $key (keys %$conf) { + next if $key !~ m/^dev(\d+)$/; + + my $device = $class->parse_device($conf->{$key}); + + $func->($key, $device, @param); + } +} + 1; diff --git a/src/PVE/LXC/Tools.pm b/src/PVE/LXC/Tools.pm index 62cdbc1..0ad6eba 100644 --- a/src/PVE/LXC/Tools.pm +++ b/src/PVE/LXC/Tools.pm @@ -81,10 +81,9 @@ sub lxc_hook($$&) { $code->($ct_name, $common_vars, $namespaces, $args); } -sub for_current_devices($&) { - my ($vmid, $code) = @_; +sub for_devices { + my ($devlist_file, $vmid, $code) = @_; - my $devlist_file = "/var/lib/lxc/$vmid/devices"; my $fd; if (! open $fd, '<', $devlist_file) { @@ -93,8 +92,8 @@ sub for_current_devices($&) { } while (defined(my $line = <$fd>)) { - if ($line !~ m@^(b):(\d+):(\d+):/dev/(\S+)\s*$@) { - warn "invalid .pve-devices entry: $line\n"; + if ($line !~ m@^(b|c):(\d+):(\d+):/dev/(\S+)\s*$@) { + warn "invalid $devlist_file entry: $line\n"; next; } @@ -117,6 +116,27 @@ sub for_current_devices($&) { close $fd; } +sub for_current_passthrough_mounts($&) { + my ($vmid, $code) = @_; + + my $devlist_file = "/var/lib/lxc/$vmid/passthrough/mounts"; + + if (-e $devlist_file) { + for_devices($devlist_file, $vmid, $code); + } else { + # Fallback to the old device list file in case a package upgrade + # occurs between lxc-pve-prestart-hook and now. + for_devices("/var/lib/lxc/$vmid/devices", $vmid, $code); + } +} + +sub for_current_passthrough_devices($&) { + my ($vmid, $code) = @_; + + my $passthrough_devlist_file = "/var/lib/lxc/$vmid/passthrough/devices"; + for_devices($passthrough_devlist_file, $vmid, $code); +} + sub cgroup_do_write($$) { my ($path, $value) = @_; my $fd; diff --git a/src/lxc-pve-autodev-hook b/src/lxc-pve-autodev-hook index 3c45949..e860fef 100755 --- a/src/lxc-pve-autodev-hook +++ b/src/lxc-pve-autodev-hook @@ -3,18 +3,32 @@ use strict; use warnings; -use File::Path; +use Fcntl qw(S_IFREG); use File::Basename; +use File::Path; use PVE::LXC::Tools; -use PVE::Tools; +use PVE::Tools qw(MS_BIND); PVE::LXC::Tools::lxc_hook('autodev', 'lxc', sub { my ($vmid, $vars, undef, undef) = @_; my $root = $vars->{ROOTFS_MOUNT}; - PVE::LXC::Tools::for_current_devices($vmid, sub { + PVE::LXC::Tools::for_current_passthrough_devices($vmid, sub { + my ($type, $major, $minor, $dev) = @_; + + my $rel_devpath = "/dev/$dev"; + my $rel_dir = dirname($rel_devpath); + File::Path::mkpath("$root/$rel_dir"); + PVE::Tools::mknod("$root/dev/$dev", S_IFREG, 0) + or die("Could not mknod $root/dev/$dev: $!\n"); + + PVE::Tools::mount("/var/lib/lxc/$vmid/passthrough/dev/$dev", "$root/dev/$dev", 0, MS_BIND, 0) + or die("Bind mount of device $dev into container failed: $!\n"); + }); + + PVE::LXC::Tools::for_current_passthrough_mounts($vmid, sub { my ($type, $major, $minor, $dev) = @_; my $rel_devpath = "/dev/$dev"; diff --git a/src/lxc-pve-prestart-hook b/src/lxc-pve-prestart-hook index 936d0bf..438ffad 100755 --- a/src/lxc-pve-prestart-hook +++ b/src/lxc-pve-prestart-hook @@ -6,6 +6,7 @@ use strict; use warnings; use Fcntl qw(O_DIRECTORY :mode); +use File::Basename; use File::Path; use POSIX; @@ -58,11 +59,13 @@ PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub { # Delete any leftover reboot-trigger file unlink("/var/lib/lxc/$vmid/reboot"); - my $devlist_file = "/var/lib/lxc/$vmid/devices"; - unlink $devlist_file; + # Delete the old device list file + # in case it was left over from a previous version of pve-container. + unlink("/var/lib/lxc/$vmid/devices"); + my $devices = []; - my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf); + my ($id_map, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf); # Unmount first when the user mounted the container with "pct mount". eval { @@ -118,6 +121,49 @@ PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub { PVE::LXC::Config->foreach_volume($conf, $setup_mountpoint); + # Device passthrough + my $passthrough_devices = []; + + my $passthrough_dir = "/var/lib/lxc/$vmid/passthrough"; + File::Path::make_path($passthrough_dir); + PVE::Tools::mount("none", $passthrough_dir, "tmpfs", 0, "size=8k") + or die ("Could not mount tmpfs for device passthrough at $passthrough_dir: $!"); + + my $setup_passthrough_device = sub { + my ($key, $device) = @_; + + my $absolute_path = $device->{path}; + my ($mode, $rdev) = (stat($absolute_path))[2, 6]; + + die "Could not get mode or device ID of $absolute_path\n" + if (!defined($mode) || !defined($rdev)); + + my $passthrough_device_path = $passthrough_dir . $absolute_path; + File::Path::make_path(dirname($passthrough_device_path)); + PVE::Tools::mknod($passthrough_device_path, $mode, $rdev) + or die("failed to mknod $passthrough_device_path: $!\n"); + + # Use chmod because umask could mess with the access mode on mknod + my $passthrough_mode = 0660; + $passthrough_mode = oct($device->{mode}) if defined($device->{mode}); + chmod $passthrough_mode, $passthrough_device_path + or die "failed to chmod $passthrough_mode $passthrough_device_path: $!\n"; + + # Set uid and gid of the device node + my $uid = 0; + my $gid = 0; + $uid = $device->{uid} if defined($device->{uid}); + $gid = $device->{gid} if defined($device->{gid}); + $uid = PVE::LXC::map_ct_uid_to_host($uid, $id_map); + $gid = PVE::LXC::map_ct_gid_to_host($gid, $id_map); + chown $uid, $gid, $passthrough_device_path + or die("failed to chown $uid:$gid $passthrough_device_path: $!\n"); + + push @$passthrough_devices, [$absolute_path, $mode, $rdev]; + }; + + PVE::LXC::Config->foreach_passthrough_device($conf, $setup_passthrough_device); + my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir); $lxc_setup->pre_start_hook(); @@ -138,7 +184,19 @@ PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub { my $minor = PVE::Tools::dev_t_minor($rdev); $devlist .= "b:$major:$minor:$dev\n"; } - PVE::Tools::file_set_contents($devlist_file, $devlist); + PVE::Tools::file_set_contents("/var/lib/lxc/$vmid/passthrough/mounts", $devlist); + } + + if (@$passthrough_devices) { + my $devlist = ''; + for my $dev (@$passthrough_devices) { + my ($path, $mode, $rdev) = @$dev; + my $major = PVE::Tools::dev_t_major($rdev); + my $minor = PVE::Tools::dev_t_minor($rdev); + my $device_type_char = S_ISBLK($mode) ? 'b' : 'c'; + $devlist .= "$device_type_char:$major:$minor:$path\n"; + } + PVE::Tools::file_set_contents("/var/lib/lxc/$vmid/passthrough/devices", $devlist); } }); -- 2.39.2