From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9]) by lore.proxmox.com (Postfix) with ESMTPS id 086891FF183 for ; Wed, 30 Jul 2025 12:58:42 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id 4446DAEB4; Wed, 30 Jul 2025 13:00:00 +0200 (CEST) Message-ID: Date: Wed, 30 Jul 2025 12:59:52 +0200 MIME-Version: 1.0 User-Agent: Mozilla Thunderbird Beta To: Proxmox VE development discussion , Filip Schauer References: <20250728124800.96685-1-f.schauer@proxmox.com> <20250728124800.96685-5-f.schauer@proxmox.com> Content-Language: en-US From: Thomas Lamprecht In-Reply-To: <20250728124800.96685-5-f.schauer@proxmox.com> X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1753873182202 X-SPAM-LEVEL: Spam detection results: 0 AWL -0.031 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Subject: Re: [pve-devel] [PATCH container v4 4/4] implement device hotplug X-BeenThere: pve-devel@lists.proxmox.com X-Mailman-Version: 2.1.29 Precedence: list List-Id: Proxmox VE development discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: Proxmox VE development discussion Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: pve-devel-bounces@lists.proxmox.com Sender: "pve-devel" Am 28.07.25 um 14:49 schrieb Filip Schauer: > This only includes adding devices to a running container. Removing or > editing existing devices is still not implemented. > > Signed-off-by: Filip Schauer > --- > src/PVE/LXC.pm | 84 ++++++++++++++++++++++++++++++++++++++++++- > src/PVE/LXC/Config.pm | 19 ++++++++++ > 2 files changed, 102 insertions(+), 1 deletion(-) > > diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm > index e5c0714..63fb5d1 100644 > --- a/src/PVE/LXC.pm > +++ b/src/PVE/LXC.pm > @@ -5,7 +5,7 @@ use warnings; > > use Cwd qw(); > use Errno qw(ELOOP ENOTDIR EROFS ECONNREFUSED EEXIST); > -use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY :mode); > +use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY O_CREAT :mode); > use File::Basename; > use File::Path; > use File::Spec; > @@ -2178,6 +2178,88 @@ my $enter_mnt_ns_and_change_aa_profile = sub { > or die "failed to change apparmor profile (close() failed): $!\n"; > }; > > +sub device_passthrough_hotplug : prototype($$$) { > + my ($vmid, $conf, $dev) = @_; > + > + my ($mode, $rdev) = PVE::LXC::Tools::get_device_mode_and_rdev($dev->{path}); > + my $device_type = S_ISBLK($mode) ? 'b' : 'c'; > + my $major = PVE::Tools::dev_t_major($rdev); > + my $minor = PVE::Tools::dev_t_minor($rdev); > + > + # We do the rest in a fork with an unshared mount namespace: > + # -) change our apparmor profile to 'pve-container-mounthotplug', which is '/usr/bin/lxc-start' > + # with move_mount privileges on every mount. > + # -) create the device node, then grab it, create a file to bind mount the device node onto in > + # the container, switch to the container mount namespace, and move_mount the device node. > + > + PVE::Tools::run_fork(sub { > + # Pin the container pid longer, we also need to get its monitor/parent: > + my ($ct_pid, $ct_pidfd) = open_lxc_pid($vmid) > + or die "failed to open pidfd of container $vmid\'s init process\n"; > + > + my ($monitor_pid, $monitor_pidfd) = open_ppid($ct_pid) > + or die "failed to open pidfd of container $vmid\'s monitor process\n"; > + > + my $ct_mnt_ns = $get_container_namespace->($vmid, $ct_pid, 'mnt'); > + my $ct_user_ns = $get_container_namespace->($vmid, $ct_pid, 'user'); > + my $monitor_mnt_ns = $get_container_namespace->($vmid, $monitor_pid, 'mnt'); > + > + # Enter monitor mount namespace and switch to 'pve-container-mounthotplug' apparmor profile. > + $enter_mnt_ns_and_change_aa_profile->( > + $monitor_mnt_ns, "pve-container-mounthotplug", undef, > + ); > + > + my $id_map = (PVE::LXC::parse_id_maps($conf))[0]; > + my $passthrough_device_path = create_passthrough_device_node( > + "/var/lib/lxc/$vmid/passthrough", > + $dev, $mode, $rdev, $id_map, > + ); > + > + my $srcfh = PVE::Tools::open_tree( > + &AT_FDCWD, > + $passthrough_device_path, > + &OPEN_TREE_CLOEXEC | &OPEN_TREE_CLONE, > + ) or die "open_tree() on passthrough device node failed: $!\n"; > + > + if ($conf->{unprivileged}) { > + PVE::Tools::setns(fileno($ct_user_ns), PVE::Tools::CLONE_NEWUSER) > + or die "failed to enter user namespace of container $vmid: $!\n"; > + > + POSIX::setuid(0); > + POSIX::setgid(0); > + } > + > + # Create a regular file in the container to bind mount the device node onto. > + my $device_path = "/proc/$ct_pid/root$dev->{path}"; > + File::Path::make_path(dirname($device_path)); > + sysopen(my $dstfh, $device_path, O_CREAT) > + or die "failed to create '$device_path': $!\n"; > + > + # Enter the container mount namespace > + PVE::Tools::setns(fileno($ct_mnt_ns), PVE::Tools::CLONE_NEWNS); > + chdir('/') > + or die "failed to change directory within the container's mount namespace: $!\n"; > + > + # Bind mount the device node into the container > + PVE::Tools::move_mount( > + fileno($srcfh), > + '', > + fileno($dstfh), > + '', > + &MOVE_MOUNT_F_EMPTY_PATH | &MOVE_MOUNT_T_EMPTY_PATH, > + ) or die "move_mount failed: $!\n"; > + }); > + > + # Allow or deny device access with cgroup2 > + run_command(["lxc-cgroup", "-n", $vmid, "devices.deny", "$device_type $major:$minor w"]) > + if ($dev->{'deny-write'}); > + > + my $allow_perms = $dev->{'deny-write'} ? 'r' : 'rw'; > + run_command([ > + "lxc-cgroup", "-n", $vmid, "devices.allow", "$device_type $major:$minor $allow_perms", > + ]); > +} > + > sub mountpoint_hotplug : prototype($$$$$) { > my ($vmid, $conf, $opt, $mp, $storage_cfg) = @_; > > diff --git a/src/PVE/LXC/Config.pm b/src/PVE/LXC/Config.pm > index 7aa6263..de963bc 100644 > --- a/src/PVE/LXC/Config.pm > +++ b/src/PVE/LXC/Config.pm > @@ -1613,6 +1613,13 @@ sub vmconfig_hotplug_pending { > $class->apply_pending_mountpoint($vmid, $conf, $opt, $storecfg, 1); > # apply_pending_mountpoint modifies the value if it creates a new disk > $value = $conf->{pending}->{$opt}; > + } elsif ($opt =~ m/^dev(\d+)$/) { > + if (exists($conf->{$opt})) { > + die "skip\n"; # don't try to hotplug over existing dev > + } > + > + $class->apply_pending_device_passthrough($vmid, $conf, $opt, 1); Below would do well with a comment, and actually I'm not really sure it's needed, currently the print_device called in apply_pending_device_passthrough is just printing the property string per the format, which we just parsed before, so this should be 1:1 the same before after here? Unlike for mpX mountpoints, where the formatted string might change. Or is this just for future preparation for more complex handling/devices? In anyway, can be fine as is now, it doesn't really hurt either, it's just a little bit confusing and might be unnecessary, so maybe take another look at this or provide some rationale. > + $value = $conf->{pending}->{$opt}; > } else { > die "skip\n"; # skip non-hotpluggable > } > @@ -1732,6 +1739,18 @@ my $rescan_volume = sub { > warn "Could not rescan volume size - $@\n" if $@; > }; > > +sub apply_pending_device_passthrough { > + my ($class, $vmid, $conf, $opt, $running) = @_; > + > + my $dev = $class->parse_device($conf->{pending}->{$opt}); parsed here > + my $old = $conf->{$opt}; > + if ($running) { > + die "skip\n" if defined($old); # TODO: editing a device passthrough > + PVE::LXC::device_passthrough_hotplug($vmid, $conf, $dev); > + $conf->{pending}->{$opt} = $class->print_device($dev); serialized 1:1 again here > + } > +} > + > sub apply_pending_mountpoint { > my ($class, $vmid, $conf, $opt, $storecfg, $running) = @_; > _______________________________________________ pve-devel mailing list pve-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel