public inbox for pve-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: Filip Schauer <f.schauer@proxmox.com>
To: pve-devel@lists.proxmox.com
Subject: [pve-devel] [PATCH container v4 4/4] implement device hotplug
Date: Mon, 28 Jul 2025 14:47:52 +0200	[thread overview]
Message-ID: <20250728124800.96685-5-f.schauer@proxmox.com> (raw)
In-Reply-To: <20250728124800.96685-1-f.schauer@proxmox.com>

This only includes adding devices to a running container. Removing or
editing existing devices is still not implemented.

Signed-off-by: Filip Schauer <f.schauer@proxmox.com>
---
 src/PVE/LXC.pm        | 84 ++++++++++++++++++++++++++++++++++++++++++-
 src/PVE/LXC/Config.pm | 19 ++++++++++
 2 files changed, 102 insertions(+), 1 deletion(-)

diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm
index e5c0714..63fb5d1 100644
--- a/src/PVE/LXC.pm
+++ b/src/PVE/LXC.pm
@@ -5,7 +5,7 @@ use warnings;
 
 use Cwd qw();
 use Errno qw(ELOOP ENOTDIR EROFS ECONNREFUSED EEXIST);
-use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY :mode);
+use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY O_CREAT :mode);
 use File::Basename;
 use File::Path;
 use File::Spec;
@@ -2178,6 +2178,88 @@ my $enter_mnt_ns_and_change_aa_profile = sub {
         or die "failed to change apparmor profile (close() failed): $!\n";
 };
 
+sub device_passthrough_hotplug : prototype($$$) {
+    my ($vmid, $conf, $dev) = @_;
+
+    my ($mode, $rdev) = PVE::LXC::Tools::get_device_mode_and_rdev($dev->{path});
+    my $device_type = S_ISBLK($mode) ? 'b' : 'c';
+    my $major = PVE::Tools::dev_t_major($rdev);
+    my $minor = PVE::Tools::dev_t_minor($rdev);
+
+    # We do the rest in a fork with an unshared mount namespace:
+    #  -) change our apparmor profile to 'pve-container-mounthotplug', which is '/usr/bin/lxc-start'
+    #     with move_mount privileges on every mount.
+    #  -) create the device node, then grab it, create a file to bind mount the device node onto in
+    #     the container, switch to the container mount namespace, and move_mount the device node.
+
+    PVE::Tools::run_fork(sub {
+        # Pin the container pid longer, we also need to get its monitor/parent:
+        my ($ct_pid, $ct_pidfd) = open_lxc_pid($vmid)
+            or die "failed to open pidfd of container $vmid\'s init process\n";
+
+        my ($monitor_pid, $monitor_pidfd) = open_ppid($ct_pid)
+            or die "failed to open pidfd of container $vmid\'s monitor process\n";
+
+        my $ct_mnt_ns = $get_container_namespace->($vmid, $ct_pid, 'mnt');
+        my $ct_user_ns = $get_container_namespace->($vmid, $ct_pid, 'user');
+        my $monitor_mnt_ns = $get_container_namespace->($vmid, $monitor_pid, 'mnt');
+
+        # Enter monitor mount namespace and switch to 'pve-container-mounthotplug' apparmor profile.
+        $enter_mnt_ns_and_change_aa_profile->(
+            $monitor_mnt_ns, "pve-container-mounthotplug", undef,
+        );
+
+        my $id_map = (PVE::LXC::parse_id_maps($conf))[0];
+        my $passthrough_device_path = create_passthrough_device_node(
+            "/var/lib/lxc/$vmid/passthrough",
+            $dev, $mode, $rdev, $id_map,
+        );
+
+        my $srcfh = PVE::Tools::open_tree(
+            &AT_FDCWD,
+            $passthrough_device_path,
+            &OPEN_TREE_CLOEXEC | &OPEN_TREE_CLONE,
+        ) or die "open_tree() on passthrough device node failed: $!\n";
+
+        if ($conf->{unprivileged}) {
+            PVE::Tools::setns(fileno($ct_user_ns), PVE::Tools::CLONE_NEWUSER)
+                or die "failed to enter user namespace of container $vmid: $!\n";
+
+            POSIX::setuid(0);
+            POSIX::setgid(0);
+        }
+
+        # Create a regular file in the container to bind mount the device node onto.
+        my $device_path = "/proc/$ct_pid/root$dev->{path}";
+        File::Path::make_path(dirname($device_path));
+        sysopen(my $dstfh, $device_path, O_CREAT)
+            or die "failed to create '$device_path': $!\n";
+
+        # Enter the container mount namespace
+        PVE::Tools::setns(fileno($ct_mnt_ns), PVE::Tools::CLONE_NEWNS);
+        chdir('/')
+            or die "failed to change directory within the container's mount namespace: $!\n";
+
+        # Bind mount the device node into the container
+        PVE::Tools::move_mount(
+            fileno($srcfh),
+            '',
+            fileno($dstfh),
+            '',
+            &MOVE_MOUNT_F_EMPTY_PATH | &MOVE_MOUNT_T_EMPTY_PATH,
+        ) or die "move_mount failed: $!\n";
+    });
+
+    # Allow or deny device access with cgroup2
+    run_command(["lxc-cgroup", "-n", $vmid, "devices.deny", "$device_type $major:$minor w"])
+        if ($dev->{'deny-write'});
+
+    my $allow_perms = $dev->{'deny-write'} ? 'r' : 'rw';
+    run_command([
+        "lxc-cgroup", "-n", $vmid, "devices.allow", "$device_type $major:$minor $allow_perms",
+    ]);
+}
+
 sub mountpoint_hotplug : prototype($$$$$) {
     my ($vmid, $conf, $opt, $mp, $storage_cfg) = @_;
 
diff --git a/src/PVE/LXC/Config.pm b/src/PVE/LXC/Config.pm
index 7aa6263..de963bc 100644
--- a/src/PVE/LXC/Config.pm
+++ b/src/PVE/LXC/Config.pm
@@ -1613,6 +1613,13 @@ sub vmconfig_hotplug_pending {
                 $class->apply_pending_mountpoint($vmid, $conf, $opt, $storecfg, 1);
                 # apply_pending_mountpoint modifies the value if it creates a new disk
                 $value = $conf->{pending}->{$opt};
+            } elsif ($opt =~ m/^dev(\d+)$/) {
+                if (exists($conf->{$opt})) {
+                    die "skip\n"; # don't try to hotplug over existing dev
+                }
+
+                $class->apply_pending_device_passthrough($vmid, $conf, $opt, 1);
+                $value = $conf->{pending}->{$opt};
             } else {
                 die "skip\n"; # skip non-hotpluggable
             }
@@ -1732,6 +1739,18 @@ my $rescan_volume = sub {
     warn "Could not rescan volume size - $@\n" if $@;
 };
 
+sub apply_pending_device_passthrough {
+    my ($class, $vmid, $conf, $opt, $running) = @_;
+
+    my $dev = $class->parse_device($conf->{pending}->{$opt});
+    my $old = $conf->{$opt};
+    if ($running) {
+        die "skip\n" if defined($old); # TODO: editing a device passthrough
+        PVE::LXC::device_passthrough_hotplug($vmid, $conf, $dev);
+        $conf->{pending}->{$opt} = $class->print_device($dev);
+    }
+}
+
 sub apply_pending_mountpoint {
     my ($class, $vmid, $conf, $opt, $storecfg, $running) = @_;
 
-- 
2.47.2



_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel


  parent reply	other threads:[~2025-07-28 12:47 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-07-28 12:47 [pve-devel] [PATCH container v4 0/4] " Filip Schauer
2025-07-28 12:47 ` [pve-devel] [PATCH container v4 1/4] extract apparmor profile & namespace switch to a helper Filip Schauer
2025-07-28 12:47 ` [pve-devel] [PATCH container v4 2/4] extract passthrough device node creation " Filip Schauer
2025-07-28 12:47 ` [pve-devel] [PATCH container v4 3/4] config: support printing a device Filip Schauer
2025-07-28 12:47 ` Filip Schauer [this message]
2025-07-30 10:59   ` [pve-devel] [PATCH container v4 4/4] implement device hotplug Thomas Lamprecht
2025-07-30 12:12     ` Filip Schauer
2025-07-30 11:40 ` [pve-devel] applied-series: [PATCH container v4 0/4] " Thomas Lamprecht

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250728124800.96685-5-f.schauer@proxmox.com \
    --to=f.schauer@proxmox.com \
    --cc=pve-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal