* [pve-devel] [PATCH v3 many] Add container device passthrough @ 2023-11-07 13:46 Filip Schauer 2023-11-07 13:46 ` [pve-devel] [PATCH v3 common 1/1] tools: Add mknod syscall Filip Schauer 2023-11-07 13:46 ` [pve-devel] [PATCH v3 container 1/1] Add device passthrough Filip Schauer 0 siblings, 2 replies; 6+ messages in thread From: Filip Schauer @ 2023-11-07 13:46 UTC (permalink / raw) To: pve-devel Changes since v1: * mknod the devices in /var/lib/lxc/$vmid/passthrough and setup proper permissions instead of bind mounting the devices from /dev directly * Add support for USB mapping * Add foreach_passthrough_device helper function Changes since v2: * Remove support for USB mapping * Add mknod syscall * Setup a tmpfs to mknod the devices to instead of writing to the host file system * Write passthrough dev list to /var/lib/lxc/$vmid/passthrough_devices * Move sub foreach_passthrough_device out of the AbstractConfig into the container config * Add mode, uid and gid properties to passthrough device config pve-common: Filip Schauer (1): tools: add mknod syscall src/PVE/Syscall.pm | 1 + src/PVE/Tools.pm | 5 +++++ 2 files changed, 6 insertions(+) pve-container: Filip Schauer (1): Add device passthrough src/PVE/LXC.pm | 29 +++++++++++++- src/PVE/LXC/Config.pm | 79 +++++++++++++++++++++++++++++++++++++++ src/PVE/LXC/Tools.pm | 23 +++++++++--- src/lxc-pve-autodev-hook | 14 ++++++- src/lxc-pve-prestart-hook | 63 +++++++++++++++++++++++++++++++ 5 files changed, 201 insertions(+), 7 deletions(-) ^ permalink raw reply [flat|nested] 6+ messages in thread
* [pve-devel] [PATCH v3 common 1/1] tools: Add mknod syscall 2023-11-07 13:46 [pve-devel] [PATCH v3 many] Add container device passthrough Filip Schauer @ 2023-11-07 13:46 ` Filip Schauer 2023-11-07 13:46 ` [pve-devel] [PATCH v3 container 1/1] Add device passthrough Filip Schauer 1 sibling, 0 replies; 6+ messages in thread From: Filip Schauer @ 2023-11-07 13:46 UTC (permalink / raw) To: pve-devel Signed-off-by: Filip Schauer <f.schauer@proxmox.com> --- src/PVE/Syscall.pm | 1 + src/PVE/Tools.pm | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/src/PVE/Syscall.pm b/src/PVE/Syscall.pm index 4c0b9cf..2a423e8 100644 --- a/src/PVE/Syscall.pm +++ b/src/PVE/Syscall.pm @@ -16,6 +16,7 @@ BEGIN { openat => &SYS_openat, close => &SYS_close, mkdirat => &SYS_mkdirat, + mknod => &SYS_mknod, faccessat => &SYS_faccessat, setresuid => &SYS_setresuid, fchownat => &SYS_fchownat, diff --git a/src/PVE/Tools.pm b/src/PVE/Tools.pm index c91e933..fbb6773 100644 --- a/src/PVE/Tools.pm +++ b/src/PVE/Tools.pm @@ -1720,6 +1720,11 @@ sub mkdirat($$$) { return syscall(PVE::Syscall::mkdirat, int($dirfd), $name, int($mode)) == 0; } +sub mknod($$$) { + my ($filename, $mode, $dev) = @_; + return syscall(PVE::Syscall::SYS_mknod, $filename, int($mode), int($dev)) == 0; +} + sub fchownat($$$$$) { my ($dirfd, $pathname, $owner, $group, $flags) = @_; return syscall( -- 2.39.2 ^ permalink raw reply [flat|nested] 6+ messages in thread
* [pve-devel] [PATCH v3 container 1/1] Add device passthrough 2023-11-07 13:46 [pve-devel] [PATCH v3 many] Add container device passthrough Filip Schauer 2023-11-07 13:46 ` [pve-devel] [PATCH v3 common 1/1] tools: Add mknod syscall Filip Schauer @ 2023-11-07 13:46 ` Filip Schauer 2023-11-08 11:03 ` Dominik Csapak 2023-11-10 10:44 ` Wolfgang Bumiller 1 sibling, 2 replies; 6+ messages in thread From: Filip Schauer @ 2023-11-07 13:46 UTC (permalink / raw) To: pve-devel Add a dev[n] argument to the container config to pass devices through to a container. A device can be passed by its path. Additionally the access mode, uid and gid can be specified through their respective properties. Signed-off-by: Filip Schauer <f.schauer@proxmox.com> --- src/PVE/LXC.pm | 29 +++++++++++++- src/PVE/LXC/Config.pm | 79 +++++++++++++++++++++++++++++++++++++++ src/PVE/LXC/Tools.pm | 23 +++++++++--- src/lxc-pve-autodev-hook | 14 ++++++- src/lxc-pve-prestart-hook | 63 +++++++++++++++++++++++++++++++ 5 files changed, 201 insertions(+), 7 deletions(-) diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm index 7ec816b..162f093 100644 --- a/src/PVE/LXC.pm +++ b/src/PVE/LXC.pm @@ -5,7 +5,7 @@ use warnings; use Cwd qw(); use Errno qw(ELOOP ENOTDIR EROFS ECONNREFUSED EEXIST); -use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY); +use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY :mode); use File::Path; use File::Spec; use IO::Poll qw(POLLIN POLLHUP); @@ -639,6 +639,33 @@ sub update_lxc_config { $raw .= "lxc.mount.auto = sys:mixed\n"; } + PVE::LXC::Config->foreach_passthrough_device($conf, sub { + my ($key, $device) = @_; + + die "Path is not defined for passthrough device $key" + unless (defined($device->{path})); + + my $absolute_path = $device->{path}; + + die "Device $absolute_path does not exist\n" + unless (-e $absolute_path); + + my ($mode, $rdev) = (stat($absolute_path))[2, 6]; + + die "Could not get mode of device $absolute_path\n" + if $mode !~ /^([\d]+)$/; + $mode = $1; + + die "Could not get device ID of $absolute_path\n" + if $rdev !~ /^([\d]+)$/; + $rdev = $1; + + my $major = PVE::Tools::dev_t_major($rdev); + my $minor = PVE::Tools::dev_t_minor($rdev); + my $device_type_char = S_ISBLK($mode) ? 'b' : 'c'; + $raw .= "lxc.cgroup2.devices.allow = $device_type_char $major:$minor rw\n"; + }); + # WARNING: DO NOT REMOVE this without making sure that loop device nodes # cannot be exposed to the container with r/w access (cgroup perms). # When this is enabled mounts will still remain in the monitor's namespace diff --git a/src/PVE/LXC/Config.pm b/src/PVE/LXC/Config.pm index 56e1f10..b4fa147 100644 --- a/src/PVE/LXC/Config.pm +++ b/src/PVE/LXC/Config.pm @@ -29,6 +29,7 @@ mkdir $lockdir; mkdir "/etc/pve/nodes/$nodename/lxc"; my $MAX_MOUNT_POINTS = 256; my $MAX_UNUSED_DISKS = $MAX_MOUNT_POINTS; +my $MAX_DEVICES = 256; # BEGIN implemented abstract methods from PVE::AbstractConfig @@ -908,6 +909,57 @@ for (my $i = 0; $i < $MAX_UNUSED_DISKS; $i++) { } } +PVE::JSONSchema::register_format('pve-lxc-dev-string', \&verify_lxc_dev_string); +sub verify_lxc_dev_string { + my ($dev, $noerr) = @_; + + if ( + $dev =~ m@/\.\.?/@ || + $dev =~ m@/\.\.?$@ || + $dev !~ m!^/dev/! + ) { + return undef if $noerr; + die "$dev is not a valid device path\n"; + } + + return $dev; +} + +my $dev_desc = { + path => { + optional => 1, + type => 'string', + default_key => 1, + format => 'pve-lxc-dev-string', + format_description => 'Path', + description => 'Device to pass through to the container', + verbose_description => 'Path to the device to pass through to the container', + }, + mode => { + optional => 1, + type => 'integer', + description => 'Access mode (in octal) to be set on the device node', + }, + uid => { + optional => 1, + type => 'integer', + description => 'User ID to be assigned to the device node', + }, + gid => { + optional => 1, + type => 'integer', + description => 'Group ID to be assigned to the device node', + }, +}; + +for (my $i = 0; $i < $MAX_DEVICES; $i++) { + $confdesc->{"dev$i"} = { + optional => 1, + type => 'string', format => $dev_desc, + description => "Device to pass through to the container", + } +} + sub parse_pct_config { my ($filename, $raw, $strict) = @_; @@ -1255,6 +1307,21 @@ sub parse_volume { return; } +sub parse_device { + my ($class, $device_string, $noerr) = @_; + + my $res; + eval { $res = PVE::JSONSchema::parse_property_string($dev_desc, $device_string) }; + if ($@) { + return undef if $noerr; + die $@; + } + + die "Path has to be defined" unless (defined($res->{path})); + + return $res; +} + sub print_volume { my ($class, $key, $volume) = @_; @@ -1762,4 +1829,16 @@ sub get_derived_property { } } +sub foreach_passthrough_device { + my ($class, $conf, $func, @param) = @_; + + foreach my $key (keys %$conf) { + next if $key !~ m/^dev(\d+)$/; + + my $device = $class->parse_device($conf->{$key}); + + $func->($key, $device, @param); + } +} + 1; diff --git a/src/PVE/LXC/Tools.pm b/src/PVE/LXC/Tools.pm index 62cdbc1..c9fb834 100644 --- a/src/PVE/LXC/Tools.pm +++ b/src/PVE/LXC/Tools.pm @@ -81,10 +81,9 @@ sub lxc_hook($$&) { $code->($ct_name, $common_vars, $namespaces, $args); } -sub for_current_devices($&) { - my ($vmid, $code) = @_; +sub for_devices { + my ($devlist_file, $vmid, $code) = @_; - my $devlist_file = "/var/lib/lxc/$vmid/devices"; my $fd; if (! open $fd, '<', $devlist_file) { @@ -93,8 +92,8 @@ sub for_current_devices($&) { } while (defined(my $line = <$fd>)) { - if ($line !~ m@^(b):(\d+):(\d+):/dev/(\S+)\s*$@) { - warn "invalid .pve-devices entry: $line\n"; + if ($line !~ m@^(b|c):(\d+):(\d+):/dev/(\S+)\s*$@) { + warn "invalid $devlist_file entry: $line\n"; next; } @@ -117,6 +116,20 @@ sub for_current_devices($&) { close $fd; } +sub for_current_devices($&) { + my ($vmid, $code) = @_; + + my $devlist_file = "/var/lib/lxc/$vmid/devices"; + for_devices($devlist_file, $vmid, $code); +} + +sub for_passthrough_devices($&) { + my ($vmid, $code) = @_; + + my $passthrough_devlist_file = "/var/lib/lxc/$vmid/passthrough_devices"; + for_devices($passthrough_devlist_file, $vmid, $code); +} + sub cgroup_do_write($$) { my ($path, $value) = @_; my $fd; diff --git a/src/lxc-pve-autodev-hook b/src/lxc-pve-autodev-hook index 3c45949..cd2b92a 100755 --- a/src/lxc-pve-autodev-hook +++ b/src/lxc-pve-autodev-hook @@ -3,8 +3,9 @@ use strict; use warnings; -use File::Path; use File::Basename; +use File::Path; +use File::Touch; use PVE::LXC::Tools; use PVE::Tools; @@ -14,6 +15,17 @@ PVE::LXC::Tools::lxc_hook('autodev', 'lxc', sub { my $root = $vars->{ROOTFS_MOUNT}; + PVE::LXC::Tools::for_passthrough_devices($vmid, sub { + my ($type, $major, $minor, $dev) = @_; + + my $rel_devpath = "/dev/$dev"; + my $rel_dir = dirname($rel_devpath); + File::Path::mkpath("$root/$rel_dir"); + touch("$root/dev/$dev"); + + PVE::Tools::mount("/var/lib/lxc/$vmid/passthrough/dev/$dev", "$root/dev/$dev", 0, 4096, 0); + }); + PVE::LXC::Tools::for_current_devices($vmid, sub { my ($type, $major, $minor, $dev) = @_; diff --git a/src/lxc-pve-prestart-hook b/src/lxc-pve-prestart-hook index 936d0bf..68c7cec 100755 --- a/src/lxc-pve-prestart-hook +++ b/src/lxc-pve-prestart-hook @@ -6,6 +6,7 @@ use strict; use warnings; use Fcntl qw(O_DIRECTORY :mode); +use File::Basename; use File::Path; use POSIX; @@ -118,6 +119,55 @@ PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub { PVE::LXC::Config->foreach_volume($conf, $setup_mountpoint); + # Device passthrough + my $passthrough_devlist_file = "/var/lib/lxc/$vmid/passthrough_devices"; + unlink $passthrough_devlist_file; + my $passthrough_devices = []; + + my $passthrough_dir = "/var/lib/lxc/$vmid/passthrough"; + File::Path::make_path($passthrough_dir); + PVE::Tools::mount("none", $passthrough_dir, "tmpfs", 0, "size=8k"); + + my $setup_passthrough_device = sub { + my ($key, $device) = @_; + + my $absolute_path = $device->{path}; + my ($mode, $rdev) = (stat($absolute_path))[2, 6]; + + die "Could not get mode of device $absolute_path\n" + if $mode !~ /^([\d]+)$/; + $mode = $1; + + die "Could not get device ID of $absolute_path\n" + if $rdev !~ /^([\d]+)$/; + $rdev = $1; + + my $passthrough_device_path = "$passthrough_dir$absolute_path"; + + die "passthrough device path is invalid\n" + if $passthrough_device_path !~ /^([\w\/]+)$/; + $passthrough_device_path = $1; + + File::Path::make_path(dirname($passthrough_device_path)); + PVE::Tools::mknod($passthrough_device_path, $mode, $rdev); + + # Use chmod because umask could mess with the access mode on mknod + my $passthrough_mode = 0660; + $passthrough_mode = oct($device->{mode}) if (defined($device->{mode})); + chmod $passthrough_mode, $passthrough_device_path; + + # Set uid and gid of the device node + my $uid = $rootuid; + my $gid = $rootgid; + $uid += $device->{uid} if (defined($device->{uid})); + $gid += $device->{gid} if (defined($device->{gid})); + chown $uid, $gid, $passthrough_device_path; + + push @$passthrough_devices, $absolute_path; + }; + + PVE::LXC::Config->foreach_passthrough_device($conf, $setup_passthrough_device); + my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir); $lxc_setup->pre_start_hook(); @@ -140,6 +190,19 @@ PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub { } PVE::Tools::file_set_contents($devlist_file, $devlist); } + + if (@$passthrough_devices) { + my $devlist = ''; + foreach my $dev (@$passthrough_devices) { + my ($mode, $rdev) = (stat($dev))[2, 6]; + next if !$mode || !$rdev; + my $major = PVE::Tools::dev_t_major($rdev); + my $minor = PVE::Tools::dev_t_minor($rdev); + my $device_type_char = S_ISBLK($mode) ? 'b' : 'c'; + $devlist .= "$device_type_char:$major:$minor:$dev\n"; + } + PVE::Tools::file_set_contents($passthrough_devlist_file, $devlist); + } }); # Leftover cgroups prevent lxc from starting without any useful information -- 2.39.2 ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [pve-devel] [PATCH v3 container 1/1] Add device passthrough 2023-11-07 13:46 ` [pve-devel] [PATCH v3 container 1/1] Add device passthrough Filip Schauer @ 2023-11-08 11:03 ` Dominik Csapak 2023-11-10 10:44 ` Wolfgang Bumiller 1 sibling, 0 replies; 6+ messages in thread From: Dominik Csapak @ 2023-11-08 11:03 UTC (permalink / raw) To: Proxmox VE development discussion, Filip Schauer did not properly review this, but what caught my attention was that you don't define any permissions for this new property? by default new options in pve-container only need 'VM.Config.Options' but IMHO this should be root only for now? (unless we can use mappings where we can use those permissions) ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [pve-devel] [PATCH v3 container 1/1] Add device passthrough 2023-11-07 13:46 ` [pve-devel] [PATCH v3 container 1/1] Add device passthrough Filip Schauer 2023-11-08 11:03 ` Dominik Csapak @ 2023-11-10 10:44 ` Wolfgang Bumiller 2023-11-13 10:33 ` Filip Schauer 1 sibling, 1 reply; 6+ messages in thread From: Wolfgang Bumiller @ 2023-11-10 10:44 UTC (permalink / raw) To: Filip Schauer; +Cc: pve-devel On Tue, Nov 07, 2023 at 02:46:42PM +0100, Filip Schauer wrote: > Add a dev[n] argument to the container config to pass devices through to > a container. A device can be passed by its path. Additionally the access > mode, uid and gid can be specified through their respective properties. > > Signed-off-by: Filip Schauer <f.schauer@proxmox.com> > --- > src/PVE/LXC.pm | 29 +++++++++++++- > src/PVE/LXC/Config.pm | 79 +++++++++++++++++++++++++++++++++++++++ > src/PVE/LXC/Tools.pm | 23 +++++++++--- > src/lxc-pve-autodev-hook | 14 ++++++- > src/lxc-pve-prestart-hook | 63 +++++++++++++++++++++++++++++++ > 5 files changed, 201 insertions(+), 7 deletions(-) > > diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm > index 7ec816b..162f093 100644 > --- a/src/PVE/LXC.pm > +++ b/src/PVE/LXC.pm > @@ -5,7 +5,7 @@ use warnings; > > use Cwd qw(); > use Errno qw(ELOOP ENOTDIR EROFS ECONNREFUSED EEXIST); > -use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY); > +use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY :mode); > use File::Path; > use File::Spec; > use IO::Poll qw(POLLIN POLLHUP); > @@ -639,6 +639,33 @@ sub update_lxc_config { > $raw .= "lxc.mount.auto = sys:mixed\n"; > } > > + PVE::LXC::Config->foreach_passthrough_device($conf, sub { > + my ($key, $device) = @_; > + > + die "Path is not defined for passthrough device $key" > + unless (defined($device->{path})); > + > + my $absolute_path = $device->{path}; > + > + die "Device $absolute_path does not exist\n" > + unless (-e $absolute_path); > + > + my ($mode, $rdev) = (stat($absolute_path))[2, 6]; > + > + die "Could not get mode of device $absolute_path\n" > + if $mode !~ /^([\d]+)$/; > + $mode = $1; > + > + die "Could not get device ID of $absolute_path\n" > + if $rdev !~ /^([\d]+)$/; > + $rdev = $1; ^ The above 2 checks don't seem necessary. `stat()` cannot return different types of values there. Just check them for definedness once together. > + > + my $major = PVE::Tools::dev_t_major($rdev); > + my $minor = PVE::Tools::dev_t_minor($rdev); > + my $device_type_char = S_ISBLK($mode) ? 'b' : 'c'; > + $raw .= "lxc.cgroup2.devices.allow = $device_type_char $major:$minor rw\n"; > + }); > + > # WARNING: DO NOT REMOVE this without making sure that loop device nodes > # cannot be exposed to the container with r/w access (cgroup perms). > # When this is enabled mounts will still remain in the monitor's namespace > diff --git a/src/PVE/LXC/Config.pm b/src/PVE/LXC/Config.pm > index 56e1f10..b4fa147 100644 > --- a/src/PVE/LXC/Config.pm > +++ b/src/PVE/LXC/Config.pm > @@ -29,6 +29,7 @@ mkdir $lockdir; > mkdir "/etc/pve/nodes/$nodename/lxc"; > my $MAX_MOUNT_POINTS = 256; > my $MAX_UNUSED_DISKS = $MAX_MOUNT_POINTS; > +my $MAX_DEVICES = 256; > > # BEGIN implemented abstract methods from PVE::AbstractConfig > > @@ -908,6 +909,57 @@ for (my $i = 0; $i < $MAX_UNUSED_DISKS; $i++) { > } > } > > +PVE::JSONSchema::register_format('pve-lxc-dev-string', \&verify_lxc_dev_string); > +sub verify_lxc_dev_string { > + my ($dev, $noerr) = @_; > + > + if ( > + $dev =~ m@/\.\.?/@ || > + $dev =~ m@/\.\.?$@ || > + $dev !~ m!^/dev/! > + ) { > + return undef if $noerr; > + die "$dev is not a valid device path\n"; > + } > + > + return $dev; > +} > + > +my $dev_desc = { > + path => { > + optional => 1, > + type => 'string', > + default_key => 1, > + format => 'pve-lxc-dev-string', > + format_description => 'Path', > + description => 'Device to pass through to the container', > + verbose_description => 'Path to the device to pass through to the container', > + }, > + mode => { > + optional => 1, > + type => 'integer', > + description => 'Access mode (in octal) to be set on the device node', > + }, > + uid => { > + optional => 1, > + type => 'integer', > + description => 'User ID to be assigned to the device node', > + }, > + gid => { > + optional => 1, > + type => 'integer', > + description => 'Group ID to be assigned to the device node', > + }, > +}; > + > +for (my $i = 0; $i < $MAX_DEVICES; $i++) { > + $confdesc->{"dev$i"} = { > + optional => 1, > + type => 'string', format => $dev_desc, > + description => "Device to pass through to the container", > + } > +} > + > sub parse_pct_config { > my ($filename, $raw, $strict) = @_; > > @@ -1255,6 +1307,21 @@ sub parse_volume { > return; > } > > +sub parse_device { > + my ($class, $device_string, $noerr) = @_; > + > + my $res; > + eval { $res = PVE::JSONSchema::parse_property_string($dev_desc, $device_string) }; > + if ($@) { > + return undef if $noerr; > + die $@; > + } > + > + die "Path has to be defined" unless (defined($res->{path})); > + > + return $res; > +} > + > sub print_volume { > my ($class, $key, $volume) = @_; > > @@ -1762,4 +1829,16 @@ sub get_derived_property { > } > } > > +sub foreach_passthrough_device { > + my ($class, $conf, $func, @param) = @_; > + > + foreach my $key (keys %$conf) { > + next if $key !~ m/^dev(\d+)$/; > + > + my $device = $class->parse_device($conf->{$key}); > + > + $func->($key, $device, @param); > + } > +} > + > 1; > diff --git a/src/PVE/LXC/Tools.pm b/src/PVE/LXC/Tools.pm > index 62cdbc1..c9fb834 100644 > --- a/src/PVE/LXC/Tools.pm > +++ b/src/PVE/LXC/Tools.pm > @@ -81,10 +81,9 @@ sub lxc_hook($$&) { > $code->($ct_name, $common_vars, $namespaces, $args); > } > > -sub for_current_devices($&) { > - my ($vmid, $code) = @_; > +sub for_devices { > + my ($devlist_file, $vmid, $code) = @_; > > - my $devlist_file = "/var/lib/lxc/$vmid/devices"; > my $fd; > > if (! open $fd, '<', $devlist_file) { > @@ -93,8 +92,8 @@ sub for_current_devices($&) { > } > > while (defined(my $line = <$fd>)) { > - if ($line !~ m@^(b):(\d+):(\d+):/dev/(\S+)\s*$@) { > - warn "invalid .pve-devices entry: $line\n"; > + if ($line !~ m@^(b|c):(\d+):(\d+):/dev/(\S+)\s*$@) { > + warn "invalid $devlist_file entry: $line\n"; > next; > } > > @@ -117,6 +116,20 @@ sub for_current_devices($&) { > close $fd; > } > > +sub for_current_devices($&) { > + my ($vmid, $code) = @_; > + > + my $devlist_file = "/var/lib/lxc/$vmid/devices"; > + for_devices($devlist_file, $vmid, $code); > +} > + > +sub for_passthrough_devices($&) { > + my ($vmid, $code) = @_; > + > + my $passthrough_devlist_file = "/var/lib/lxc/$vmid/passthrough_devices"; > + for_devices($passthrough_devlist_file, $vmid, $code); > +} > + > sub cgroup_do_write($$) { > my ($path, $value) = @_; > my $fd; > diff --git a/src/lxc-pve-autodev-hook b/src/lxc-pve-autodev-hook > index 3c45949..cd2b92a 100755 > --- a/src/lxc-pve-autodev-hook > +++ b/src/lxc-pve-autodev-hook > @@ -3,8 +3,9 @@ > use strict; > use warnings; > > -use File::Path; > use File::Basename; > +use File::Path; > +use File::Touch; > > use PVE::LXC::Tools; > use PVE::Tools; > @@ -14,6 +15,17 @@ PVE::LXC::Tools::lxc_hook('autodev', 'lxc', sub { > > my $root = $vars->{ROOTFS_MOUNT}; > > + PVE::LXC::Tools::for_passthrough_devices($vmid, sub { > + my ($type, $major, $minor, $dev) = @_; > + > + my $rel_devpath = "/dev/$dev"; > + my $rel_dir = dirname($rel_devpath); > + File::Path::mkpath("$root/$rel_dir"); > + touch("$root/dev/$dev"); ^ Hint: `touch` is not a syscall. This does an `utimensat()` to update the file mtimes. This isn't really necessary, and an easy way to just create a file and do nothing if it already exists is to just use mknod with S_IFREG for the file type in the mode bits and zero for the device type: PVE::Tools::mknod("$root/dev/$dev", S_IFREG, 0); > + > + PVE::Tools::mount("/var/lib/lxc/$vmid/passthrough/dev/$dev", "$root/dev/$dev", 0, 4096, 0); Don't use magic numbers. Please place the MS_BIND constant in PVE::Tools instead. Also die() if mount fails. > + }); > + > PVE::LXC::Tools::for_current_devices($vmid, sub { > my ($type, $major, $minor, $dev) = @_; > > diff --git a/src/lxc-pve-prestart-hook b/src/lxc-pve-prestart-hook > index 936d0bf..68c7cec 100755 > --- a/src/lxc-pve-prestart-hook > +++ b/src/lxc-pve-prestart-hook > @@ -6,6 +6,7 @@ use strict; > use warnings; > > use Fcntl qw(O_DIRECTORY :mode); > +use File::Basename; > use File::Path; > use POSIX; > > @@ -118,6 +119,55 @@ PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub { > > PVE::LXC::Config->foreach_volume($conf, $setup_mountpoint); > > + # Device passthrough > + my $passthrough_devlist_file = "/var/lib/lxc/$vmid/passthrough_devices"; > + unlink $passthrough_devlist_file; > + my $passthrough_devices = []; > + > + my $passthrough_dir = "/var/lib/lxc/$vmid/passthrough"; > + File::Path::make_path($passthrough_dir); > + PVE::Tools::mount("none", $passthrough_dir, "tmpfs", 0, "size=8k"); ^ checking that `mount` succeeded is particularly important here > + > + my $setup_passthrough_device = sub { > + my ($key, $device) = @_; > + > + my $absolute_path = $device->{path}; > + my ($mode, $rdev) = (stat($absolute_path))[2, 6]; > + > + die "Could not get mode of device $absolute_path\n" > + if $mode !~ /^([\d]+)$/; > + $mode = $1; > + > + die "Could not get device ID of $absolute_path\n" > + if $rdev !~ /^([\d]+)$/; > + $rdev = $1; ^ As above for the 2 checks with the stat() returned values. > + > + my $passthrough_device_path = "$passthrough_dir$absolute_path"; ^ The above concatenation would probably be more readable with `.` instead of quotes ($passthrough_dir . $absolute_path), or with `{}` around the names. > + > + die "passthrough device path is invalid\n" > + if $passthrough_device_path !~ /^([\w\/]+)$/; > + $passthrough_device_path = $1; ^ Why this check? Both portions should be fine - $passthrough_dir is trivial and $absolute_path comes from the parse function after verification (and is already used in `stat()` so definitely contains a valid path we also already used. > + > + File::Path::make_path(dirname($passthrough_device_path)); > + PVE::Tools::mknod($passthrough_device_path, $mode, $rdev); ^ `mknod` should be checked for errors. > + > + # Use chmod because umask could mess with the access mode on mknod > + my $passthrough_mode = 0660; > + $passthrough_mode = oct($device->{mode}) if (defined($device->{mode})); > + chmod $passthrough_mode, $passthrough_device_path; ^ should be checked for errors. > + > + # Set uid and gid of the device node > + my $uid = $rootuid; > + my $gid = $rootgid; > + $uid += $device->{uid} if (defined($device->{uid})); > + $gid += $device->{gid} if (defined($device->{gid})); ^ id mappings can constist of multiple ranges. You get the mapping from `parse_id_maps` (the 1st return value in the line fetching `$root*id`). It's an array of tuples of the form: [ "g" or "u", <ns id>, <host id>, <length> ] Make a helper to map ids using that array (see [1]). $device->{*id} is from the perspective of the container and you want to map from that to the host. [1] https://git.proxmox.com/?p=pve-lxc-syscalld.git;a=blob;f=src/process/id_map.rs;h=d61d489da7af7418908fd8d85f1660c435b7c73e;hb=refs/heads/master#l26 > + chown $uid, $gid, $passthrough_device_path; ^ check for errors > + > + push @$passthrough_devices, $absolute_path; push @$passthrough_devices, [$absolute_path, $mode, $rdev]; to avoid the extra stat below... > + }; > + > + PVE::LXC::Config->foreach_passthrough_device($conf, $setup_passthrough_device); > + > my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir); > $lxc_setup->pre_start_hook(); > > @@ -140,6 +190,19 @@ PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub { > } > PVE::Tools::file_set_contents($devlist_file, $devlist); > } > + > + if (@$passthrough_devices) { > + my $devlist = ''; > + foreach my $dev (@$passthrough_devices) { > + my ($mode, $rdev) = (stat($dev))[2, 6]; > + next if !$mode || !$rdev; ...and then resolve the tuple via: ($dev, my $mode, my $rdev) = @$dev; > + my $major = PVE::Tools::dev_t_major($rdev); > + my $minor = PVE::Tools::dev_t_minor($rdev); > + my $device_type_char = S_ISBLK($mode) ? 'b' : 'c'; > + $devlist .= "$device_type_char:$major:$minor:$dev\n"; > + } > + PVE::Tools::file_set_contents($passthrough_devlist_file, $devlist); > + } > }); > > # Leftover cgroups prevent lxc from starting without any useful information > -- > 2.39.2 ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [pve-devel] [PATCH v3 container 1/1] Add device passthrough 2023-11-10 10:44 ` Wolfgang Bumiller @ 2023-11-13 10:33 ` Filip Schauer 0 siblings, 0 replies; 6+ messages in thread From: Filip Schauer @ 2023-11-13 10:33 UTC (permalink / raw) To: Wolfgang Bumiller; +Cc: pve-devel Patch v4 available: https://lists.proxmox.com/pipermail/pve-devel/2023-November/060022.html On 10/11/2023 11:44, Wolfgang Bumiller wrote: > On Tue, Nov 07, 2023 at 02:46:42PM +0100, Filip Schauer wrote: >> Add a dev[n] argument to the container config to pass devices through to >> a container. A device can be passed by its path. Additionally the access >> mode, uid and gid can be specified through their respective properties. >> >> Signed-off-by: Filip Schauer <f.schauer@proxmox.com> >> --- >> src/PVE/LXC.pm | 29 +++++++++++++- >> src/PVE/LXC/Config.pm | 79 +++++++++++++++++++++++++++++++++++++++ >> src/PVE/LXC/Tools.pm | 23 +++++++++--- >> src/lxc-pve-autodev-hook | 14 ++++++- >> src/lxc-pve-prestart-hook | 63 +++++++++++++++++++++++++++++++ >> 5 files changed, 201 insertions(+), 7 deletions(-) >> >> diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm >> index 7ec816b..162f093 100644 >> --- a/src/PVE/LXC.pm >> +++ b/src/PVE/LXC.pm >> @@ -5,7 +5,7 @@ use warnings; >> >> use Cwd qw(); >> use Errno qw(ELOOP ENOTDIR EROFS ECONNREFUSED EEXIST); >> -use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY); >> +use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY :mode); >> use File::Path; >> use File::Spec; >> use IO::Poll qw(POLLIN POLLHUP); >> @@ -639,6 +639,33 @@ sub update_lxc_config { >> $raw .= "lxc.mount.auto = sys:mixed\n"; >> } >> >> + PVE::LXC::Config->foreach_passthrough_device($conf, sub { >> + my ($key, $device) = @_; >> + >> + die "Path is not defined for passthrough device $key" >> + unless (defined($device->{path})); >> + >> + my $absolute_path = $device->{path}; >> + >> + die "Device $absolute_path does not exist\n" >> + unless (-e $absolute_path); >> + >> + my ($mode, $rdev) = (stat($absolute_path))[2, 6]; >> + >> + die "Could not get mode of device $absolute_path\n" >> + if $mode !~ /^([\d]+)$/; >> + $mode = $1; >> + >> + die "Could not get device ID of $absolute_path\n" >> + if $rdev !~ /^([\d]+)$/; >> + $rdev = $1; > ^ The above 2 checks don't seem necessary. `stat()` cannot return > different types of values there. > Just check them for definedness once together. > >> + >> + my $major = PVE::Tools::dev_t_major($rdev); >> + my $minor = PVE::Tools::dev_t_minor($rdev); >> + my $device_type_char = S_ISBLK($mode) ? 'b' : 'c'; >> + $raw .= "lxc.cgroup2.devices.allow = $device_type_char $major:$minor rw\n"; >> + }); >> + >> # WARNING: DO NOT REMOVE this without making sure that loop device nodes >> # cannot be exposed to the container with r/w access (cgroup perms). >> # When this is enabled mounts will still remain in the monitor's namespace >> diff --git a/src/PVE/LXC/Config.pm b/src/PVE/LXC/Config.pm >> index 56e1f10..b4fa147 100644 >> --- a/src/PVE/LXC/Config.pm >> +++ b/src/PVE/LXC/Config.pm >> @@ -29,6 +29,7 @@ mkdir $lockdir; >> mkdir "/etc/pve/nodes/$nodename/lxc"; >> my $MAX_MOUNT_POINTS = 256; >> my $MAX_UNUSED_DISKS = $MAX_MOUNT_POINTS; >> +my $MAX_DEVICES = 256; >> >> # BEGIN implemented abstract methods from PVE::AbstractConfig >> >> @@ -908,6 +909,57 @@ for (my $i = 0; $i < $MAX_UNUSED_DISKS; $i++) { >> } >> } >> >> +PVE::JSONSchema::register_format('pve-lxc-dev-string', \&verify_lxc_dev_string); >> +sub verify_lxc_dev_string { >> + my ($dev, $noerr) = @_; >> + >> + if ( >> + $dev =~ m@/\.\.?/@ || >> + $dev =~ m@/\.\.?$@ || >> + $dev !~ m!^/dev/! >> + ) { >> + return undef if $noerr; >> + die "$dev is not a valid device path\n"; >> + } >> + >> + return $dev; >> +} >> + >> +my $dev_desc = { >> + path => { >> + optional => 1, >> + type => 'string', >> + default_key => 1, >> + format => 'pve-lxc-dev-string', >> + format_description => 'Path', >> + description => 'Device to pass through to the container', >> + verbose_description => 'Path to the device to pass through to the container', >> + }, >> + mode => { >> + optional => 1, >> + type => 'integer', >> + description => 'Access mode (in octal) to be set on the device node', >> + }, >> + uid => { >> + optional => 1, >> + type => 'integer', >> + description => 'User ID to be assigned to the device node', >> + }, >> + gid => { >> + optional => 1, >> + type => 'integer', >> + description => 'Group ID to be assigned to the device node', >> + }, >> +}; >> + >> +for (my $i = 0; $i < $MAX_DEVICES; $i++) { >> + $confdesc->{"dev$i"} = { >> + optional => 1, >> + type => 'string', format => $dev_desc, >> + description => "Device to pass through to the container", >> + } >> +} >> + >> sub parse_pct_config { >> my ($filename, $raw, $strict) = @_; >> >> @@ -1255,6 +1307,21 @@ sub parse_volume { >> return; >> } >> >> +sub parse_device { >> + my ($class, $device_string, $noerr) = @_; >> + >> + my $res; >> + eval { $res = PVE::JSONSchema::parse_property_string($dev_desc, $device_string) }; >> + if ($@) { >> + return undef if $noerr; >> + die $@; >> + } >> + >> + die "Path has to be defined" unless (defined($res->{path})); >> + >> + return $res; >> +} >> + >> sub print_volume { >> my ($class, $key, $volume) = @_; >> >> @@ -1762,4 +1829,16 @@ sub get_derived_property { >> } >> } >> >> +sub foreach_passthrough_device { >> + my ($class, $conf, $func, @param) = @_; >> + >> + foreach my $key (keys %$conf) { >> + next if $key !~ m/^dev(\d+)$/; >> + >> + my $device = $class->parse_device($conf->{$key}); >> + >> + $func->($key, $device, @param); >> + } >> +} >> + >> 1; >> diff --git a/src/PVE/LXC/Tools.pm b/src/PVE/LXC/Tools.pm >> index 62cdbc1..c9fb834 100644 >> --- a/src/PVE/LXC/Tools.pm >> +++ b/src/PVE/LXC/Tools.pm >> @@ -81,10 +81,9 @@ sub lxc_hook($$&) { >> $code->($ct_name, $common_vars, $namespaces, $args); >> } >> >> -sub for_current_devices($&) { >> - my ($vmid, $code) = @_; >> +sub for_devices { >> + my ($devlist_file, $vmid, $code) = @_; >> >> - my $devlist_file = "/var/lib/lxc/$vmid/devices"; >> my $fd; >> >> if (! open $fd, '<', $devlist_file) { >> @@ -93,8 +92,8 @@ sub for_current_devices($&) { >> } >> >> while (defined(my $line = <$fd>)) { >> - if ($line !~ m@^(b):(\d+):(\d+):/dev/(\S+)\s*$@) { >> - warn "invalid .pve-devices entry: $line\n"; >> + if ($line !~ m@^(b|c):(\d+):(\d+):/dev/(\S+)\s*$@) { >> + warn "invalid $devlist_file entry: $line\n"; >> next; >> } >> >> @@ -117,6 +116,20 @@ sub for_current_devices($&) { >> close $fd; >> } >> >> +sub for_current_devices($&) { >> + my ($vmid, $code) = @_; >> + >> + my $devlist_file = "/var/lib/lxc/$vmid/devices"; >> + for_devices($devlist_file, $vmid, $code); >> +} >> + >> +sub for_passthrough_devices($&) { >> + my ($vmid, $code) = @_; >> + >> + my $passthrough_devlist_file = "/var/lib/lxc/$vmid/passthrough_devices"; >> + for_devices($passthrough_devlist_file, $vmid, $code); >> +} >> + >> sub cgroup_do_write($$) { >> my ($path, $value) = @_; >> my $fd; >> diff --git a/src/lxc-pve-autodev-hook b/src/lxc-pve-autodev-hook >> index 3c45949..cd2b92a 100755 >> --- a/src/lxc-pve-autodev-hook >> +++ b/src/lxc-pve-autodev-hook >> @@ -3,8 +3,9 @@ >> use strict; >> use warnings; >> >> -use File::Path; >> use File::Basename; >> +use File::Path; >> +use File::Touch; >> >> use PVE::LXC::Tools; >> use PVE::Tools; >> @@ -14,6 +15,17 @@ PVE::LXC::Tools::lxc_hook('autodev', 'lxc', sub { >> >> my $root = $vars->{ROOTFS_MOUNT}; >> >> + PVE::LXC::Tools::for_passthrough_devices($vmid, sub { >> + my ($type, $major, $minor, $dev) = @_; >> + >> + my $rel_devpath = "/dev/$dev"; >> + my $rel_dir = dirname($rel_devpath); >> + File::Path::mkpath("$root/$rel_dir"); >> + touch("$root/dev/$dev"); > ^ Hint: `touch` is not a syscall. This does an `utimensat()` to update > the file mtimes. This isn't really necessary, and an easy way to just > create a file and do nothing if it already exists is to just use mknod > with S_IFREG for the file type in the mode bits and zero for the device > type: > PVE::Tools::mknod("$root/dev/$dev", S_IFREG, 0); > > >> + >> + PVE::Tools::mount("/var/lib/lxc/$vmid/passthrough/dev/$dev", "$root/dev/$dev", 0, 4096, 0); > Don't use magic numbers. Please place the MS_BIND constant in PVE::Tools > instead. > > Also die() if mount fails. > >> + }); >> + >> PVE::LXC::Tools::for_current_devices($vmid, sub { >> my ($type, $major, $minor, $dev) = @_; >> >> diff --git a/src/lxc-pve-prestart-hook b/src/lxc-pve-prestart-hook >> index 936d0bf..68c7cec 100755 >> --- a/src/lxc-pve-prestart-hook >> +++ b/src/lxc-pve-prestart-hook >> @@ -6,6 +6,7 @@ use strict; >> use warnings; >> >> use Fcntl qw(O_DIRECTORY :mode); >> +use File::Basename; >> use File::Path; >> use POSIX; >> >> @@ -118,6 +119,55 @@ PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub { >> >> PVE::LXC::Config->foreach_volume($conf, $setup_mountpoint); >> >> + # Device passthrough >> + my $passthrough_devlist_file = "/var/lib/lxc/$vmid/passthrough_devices"; >> + unlink $passthrough_devlist_file; >> + my $passthrough_devices = []; >> + >> + my $passthrough_dir = "/var/lib/lxc/$vmid/passthrough"; >> + File::Path::make_path($passthrough_dir); >> + PVE::Tools::mount("none", $passthrough_dir, "tmpfs", 0, "size=8k"); > ^ checking that `mount` succeeded is particularly important here > >> + >> + my $setup_passthrough_device = sub { >> + my ($key, $device) = @_; >> + >> + my $absolute_path = $device->{path}; >> + my ($mode, $rdev) = (stat($absolute_path))[2, 6]; >> + >> + die "Could not get mode of device $absolute_path\n" >> + if $mode !~ /^([\d]+)$/; >> + $mode = $1; >> + >> + die "Could not get device ID of $absolute_path\n" >> + if $rdev !~ /^([\d]+)$/; >> + $rdev = $1; > ^ As above for the 2 checks with the stat() returned values. > >> + >> + my $passthrough_device_path = "$passthrough_dir$absolute_path"; > ^ The above concatenation would probably be more readable with `.` > instead of quotes ($passthrough_dir . $absolute_path), or with `{}` > around the names. > >> + >> + die "passthrough device path is invalid\n" >> + if $passthrough_device_path !~ /^([\w\/]+)$/; >> + $passthrough_device_path = $1; > ^ Why this check? Both portions should be fine - $passthrough_dir is > trivial and $absolute_path comes from the parse function after > verification (and is already used in `stat()` so definitely contains a > valid path we also already used. > >> + >> + File::Path::make_path(dirname($passthrough_device_path)); >> + PVE::Tools::mknod($passthrough_device_path, $mode, $rdev); > ^ `mknod` should be checked for errors. > >> + >> + # Use chmod because umask could mess with the access mode on mknod >> + my $passthrough_mode = 0660; >> + $passthrough_mode = oct($device->{mode}) if (defined($device->{mode})); >> + chmod $passthrough_mode, $passthrough_device_path; > ^ should be checked for errors. > >> + >> + # Set uid and gid of the device node >> + my $uid = $rootuid; >> + my $gid = $rootgid; >> + $uid += $device->{uid} if (defined($device->{uid})); >> + $gid += $device->{gid} if (defined($device->{gid})); > ^ id mappings can constist of multiple ranges. > You get the mapping from `parse_id_maps` (the 1st return value in the > line fetching `$root*id`). > It's an array of tuples of the form: > [ "g" or "u", <ns id>, <host id>, <length> ] > Make a helper to map ids using that array (see [1]). > $device->{*id} is from the perspective of the container and you want to > map from that to the host. > > [1] https://git.proxmox.com/?p=pve-lxc-syscalld.git;a=blob;f=src/process/id_map.rs;h=d61d489da7af7418908fd8d85f1660c435b7c73e;hb=refs/heads/master#l26 > >> + chown $uid, $gid, $passthrough_device_path; > ^ check for errors > >> + >> + push @$passthrough_devices, $absolute_path; > push @$passthrough_devices, [$absolute_path, $mode, $rdev]; > to avoid the extra stat below... > >> + }; >> + >> + PVE::LXC::Config->foreach_passthrough_device($conf, $setup_passthrough_device); >> + >> my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir); >> $lxc_setup->pre_start_hook(); >> >> @@ -140,6 +190,19 @@ PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub { >> } >> PVE::Tools::file_set_contents($devlist_file, $devlist); >> } >> + >> + if (@$passthrough_devices) { >> + my $devlist = ''; >> + foreach my $dev (@$passthrough_devices) { >> + my ($mode, $rdev) = (stat($dev))[2, 6]; >> + next if !$mode || !$rdev; > ...and then resolve the tuple via: > ($dev, my $mode, my $rdev) = @$dev; > >> + my $major = PVE::Tools::dev_t_major($rdev); >> + my $minor = PVE::Tools::dev_t_minor($rdev); >> + my $device_type_char = S_ISBLK($mode) ? 'b' : 'c'; >> + $devlist .= "$device_type_char:$major:$minor:$dev\n"; >> + } >> + PVE::Tools::file_set_contents($passthrough_devlist_file, $devlist); >> + } >> }); >> >> # Leftover cgroups prevent lxc from starting without any useful information >> -- >> 2.39.2 ^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2023-11-13 10:33 UTC | newest] Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2023-11-07 13:46 [pve-devel] [PATCH v3 many] Add container device passthrough Filip Schauer 2023-11-07 13:46 ` [pve-devel] [PATCH v3 common 1/1] tools: Add mknod syscall Filip Schauer 2023-11-07 13:46 ` [pve-devel] [PATCH v3 container 1/1] Add device passthrough Filip Schauer 2023-11-08 11:03 ` Dominik Csapak 2023-11-10 10:44 ` Wolfgang Bumiller 2023-11-13 10:33 ` Filip Schauer
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox