From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <pve-devel-bounces@lists.proxmox.com>
Received: from firstgate.proxmox.com (firstgate.proxmox.com [IPv6:2a01:7e0:0:424::9])
	by lore.proxmox.com (Postfix) with ESMTPS id 464571FF172
	for <inbox@lore.proxmox.com>; Wed, 23 Apr 2025 14:57:41 +0200 (CEST)
Received: from firstgate.proxmox.com (localhost [127.0.0.1])
	by firstgate.proxmox.com (Proxmox) with ESMTP id 325F61A33F;
	Wed, 23 Apr 2025 14:57:17 +0200 (CEST)
From: Filip Schauer <f.schauer@proxmox.com>
To: pve-devel@lists.proxmox.com
Date: Wed, 23 Apr 2025 14:56:40 +0200
Message-Id: <20250423125640.88756-5-f.schauer@proxmox.com>
X-Mailer: git-send-email 2.39.5
In-Reply-To: <20250423125640.88756-1-f.schauer@proxmox.com>
References: <20250423125640.88756-1-f.schauer@proxmox.com>
MIME-Version: 1.0
X-SPAM-LEVEL: Spam detection results:  0
 AWL -0.018 Adjusted score from AWL reputation of From: address
 BAYES_00                 -1.9 Bayes spam probability is 0 to 1%
 DMARC_MISSING             0.1 Missing DMARC policy
 KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment
 RCVD_IN_VALIDITY_CERTIFIED_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to
 Validity was blocked. See
 https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more
 information.
 RCVD_IN_VALIDITY_RPBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to
 Validity was blocked. See
 https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more
 information.
 RCVD_IN_VALIDITY_SAFE_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to
 Validity was blocked. See
 https://knowledge.validity.com/hc/en-us/articles/20961730681243 for more
 information.
 SPF_HELO_NONE           0.001 SPF: HELO does not publish an SPF Record
 SPF_PASS               -0.001 SPF: sender matches SPF record
 URIBL_BLOCKED 0.001 ADMINISTRATOR NOTICE: The query to URIBL was blocked. See
 http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block for more
 information. [lxc.pm, config.pm]
Subject: [pve-devel] [PATCH container v2 4/4] implement device hotplug
X-BeenThere: pve-devel@lists.proxmox.com
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Proxmox VE development discussion <pve-devel.lists.proxmox.com>
List-Unsubscribe: <https://lists.proxmox.com/cgi-bin/mailman/options/pve-devel>, 
 <mailto:pve-devel-request@lists.proxmox.com?subject=unsubscribe>
List-Archive: <http://lists.proxmox.com/pipermail/pve-devel/>
List-Post: <mailto:pve-devel@lists.proxmox.com>
List-Help: <mailto:pve-devel-request@lists.proxmox.com?subject=help>
List-Subscribe: <https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel>, 
 <mailto:pve-devel-request@lists.proxmox.com?subject=subscribe>
Reply-To: Proxmox VE development discussion <pve-devel@lists.proxmox.com>
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Errors-To: pve-devel-bounces@lists.proxmox.com
Sender: "pve-devel" <pve-devel-bounces@lists.proxmox.com>

This only includes adding devices to a running container. Removing or
editing existing devices is still not implemented.

Signed-off-by: Filip Schauer <f.schauer@proxmox.com>
---
 src/PVE/LXC.pm        | 74 ++++++++++++++++++++++++++++++++++++++++++-
 src/PVE/LXC/Config.pm | 19 +++++++++++
 2 files changed, 92 insertions(+), 1 deletion(-)

diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm
index d985b88..0c8c2e9 100644
--- a/src/PVE/LXC.pm
+++ b/src/PVE/LXC.pm
@@ -5,7 +5,7 @@ use warnings;
 
 use Cwd qw();
 use Errno qw(ELOOP ENOTDIR EROFS ECONNREFUSED EEXIST);
-use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY :mode);
+use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY O_CREAT :mode);
 use File::Basename;
 use File::Path;
 use File::Spec;
@@ -2065,6 +2065,78 @@ my $enter_mnt_ns_and_change_aa_profile = sub {
 	or die "failed to change apparmor profile (close() failed): $!\n";
 };
 
+sub device_passthrough_hotplug :prototype($$$) {
+    my ($vmid, $conf, $dev) = @_;
+
+    my ($mode, $rdev) = (stat($dev->{path}))[2, 6];
+
+    die "Could not get mode or device ID of $dev->{path}\n"
+	if (!defined($mode) || !defined($rdev));
+
+    # We do the rest in a fork with an unshared mount namespace:
+    #  -) change our apparmor profile to 'pve-container-mounthotplug', which is '/usr/bin/lxc-start'
+    #     with move_mount privileges on every mount.
+    #  -) create the device node, then grab it, create a file to bind mount the device node onto in
+    #     the container, switch to the container mount namespace, and move_mount the device node.
+
+    PVE::Tools::run_fork(sub {
+	# Pin the container pid longer, we also need to get its monitor/parent:
+	my ($ct_pid, $ct_pidfd) = open_lxc_pid($vmid)
+	    or die "failed to open pidfd of container $vmid\'s init process\n";
+
+	my ($monitor_pid, $monitor_pidfd) = open_ppid($ct_pid)
+	    or die "failed to open pidfd of container $vmid\'s monitor process\n";
+
+	my $ct_mnt_ns = $get_container_namespace->($vmid, $ct_pid, 'mnt');
+	my $ct_user_ns = $get_container_namespace->($vmid, $ct_pid, 'user');
+	my $monitor_mnt_ns = $get_container_namespace->($vmid, $monitor_pid, 'mnt');
+
+	# Enter monitor mount namespace and switch to 'pve-container-mounthotplug' apparmor profile.
+	$enter_mnt_ns_and_change_aa_profile->($monitor_mnt_ns, "pve-container-mounthotplug", undef);
+
+	my $id_map = (PVE::LXC::parse_id_maps($conf))[0];
+	my $passthrough_device_path = create_passthrough_device_node(
+	    "/var/lib/lxc/$vmid/passthrough", $dev, $mode, $rdev, $id_map);
+
+	my $srcfh = PVE::Tools::open_tree(&AT_FDCWD, $passthrough_device_path, &OPEN_TREE_CLOEXEC | &OPEN_TREE_CLONE)
+	    or die "open_tree() on passthrough device node failed: $!\n";
+
+	if ($conf->{unprivileged}) {
+	    PVE::Tools::setns(fileno($ct_user_ns), PVE::Tools::CLONE_NEWUSER)
+		or die "failed to enter user namespace of container $vmid: $!\n";
+
+	    POSIX::setuid(0);
+	    POSIX::setgid(0);
+	}
+
+	# Create a regular file in the container to bind mount the device node onto.
+	sysopen(my $dstfh, "/proc/$ct_pid/root$dev->{path}", O_CREAT)
+	    or die "failed to open '/proc/$ct_pid/root$dev->{path}': $!\n";
+
+	# Enter the container mount namespace
+	PVE::Tools::setns(fileno($ct_mnt_ns), PVE::Tools::CLONE_NEWNS);
+	chdir('/')
+	    or die "failed to change root directory within the container's mount namespace: $!\n";
+
+	# Bind mount the device node into the container
+	PVE::Tools::move_mount(fileno($srcfh), '', fileno($dstfh), '', &MOVE_MOUNT_F_EMPTY_PATH | &MOVE_MOUNT_T_EMPTY_PATH)
+	    or die "move_mount failed: $!\n";
+    });
+
+    # Allow or deny device access with cgroup2
+    my $major = PVE::Tools::dev_t_major($rdev);
+    my $minor = PVE::Tools::dev_t_minor($rdev);
+    my $device_type = S_ISBLK($mode) ? 'b' : 'c';
+
+    run_command(["lxc-cgroup", "-n", $vmid, "devices.deny", "$device_type $major:$minor w"])
+	if ($dev->{'deny-write'});
+
+    my $allow_perms = $dev->{'deny-write'} ? 'r' : 'rw';
+    run_command([
+	"lxc-cgroup", "-n", $vmid, "devices.allow", "$device_type $major:$minor $allow_perms"
+    ]);
+}
+
 sub mountpoint_hotplug :prototype($$$$$) {
     my ($vmid, $conf, $opt, $mp, $storage_cfg) = @_;
 
diff --git a/src/PVE/LXC/Config.pm b/src/PVE/LXC/Config.pm
index 555767f..f6795a5 100644
--- a/src/PVE/LXC/Config.pm
+++ b/src/PVE/LXC/Config.pm
@@ -1535,6 +1535,13 @@ sub vmconfig_hotplug_pending {
 		$class->apply_pending_mountpoint($vmid, $conf, $opt, $storecfg, 1);
 		# apply_pending_mountpoint modifies the value if it creates a new disk
 		$value = $conf->{pending}->{$opt};
+	    } elsif ($opt =~ m/^dev(\d+)$/) {
+		if (exists($conf->{$opt})) {
+		    die "skip\n"; # don't try to hotplug over existing dev
+		}
+
+		$class->apply_pending_device_passthrough($vmid, $conf, $opt, 1);
+		$value = $conf->{pending}->{$opt};
 	    } else {
 		die "skip\n"; # skip non-hotpluggable
 	    }
@@ -1629,6 +1636,18 @@ my $rescan_volume = sub {
     warn "Could not rescan volume size - $@\n" if $@;
 };
 
+sub apply_pending_device_passthrough {
+    my ($class, $vmid, $conf, $opt, $running) = @_;
+
+    my $dev = $class->parse_device($conf->{pending}->{$opt});
+    my $old = $conf->{$opt};
+    if ($running) {
+	die "skip\n" if defined($old); # TODO: editing a device passthrough
+	PVE::LXC::device_passthrough_hotplug($vmid, $conf, $dev);
+	$conf->{pending}->{$opt} = $class->print_device($dev);
+    }
+}
+
 sub apply_pending_mountpoint {
     my ($class, $vmid, $conf, $opt, $storecfg, $running) = @_;
 
-- 
2.39.5



_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel