From: Filip Schauer <f.schauer@proxmox.com>
To: pve-devel@lists.proxmox.com
Subject: [pve-devel] [PATCH container v2 4/4] implement device hotplug
Date: Wed, 23 Apr 2025 14:56:40 +0200 [thread overview]
Message-ID: <20250423125640.88756-5-f.schauer@proxmox.com> (raw)
In-Reply-To: <20250423125640.88756-1-f.schauer@proxmox.com>
This only includes adding devices to a running container. Removing or
editing existing devices is still not implemented.
Signed-off-by: Filip Schauer <f.schauer@proxmox.com>
---
src/PVE/LXC.pm | 74 ++++++++++++++++++++++++++++++++++++++++++-
src/PVE/LXC/Config.pm | 19 +++++++++++
2 files changed, 92 insertions(+), 1 deletion(-)
diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm
index d985b88..0c8c2e9 100644
--- a/src/PVE/LXC.pm
+++ b/src/PVE/LXC.pm
@@ -5,7 +5,7 @@ use warnings;
use Cwd qw();
use Errno qw(ELOOP ENOTDIR EROFS ECONNREFUSED EEXIST);
-use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY :mode);
+use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY O_CREAT :mode);
use File::Basename;
use File::Path;
use File::Spec;
@@ -2065,6 +2065,78 @@ my $enter_mnt_ns_and_change_aa_profile = sub {
or die "failed to change apparmor profile (close() failed): $!\n";
};
+sub device_passthrough_hotplug :prototype($$$) {
+ my ($vmid, $conf, $dev) = @_;
+
+ my ($mode, $rdev) = (stat($dev->{path}))[2, 6];
+
+ die "Could not get mode or device ID of $dev->{path}\n"
+ if (!defined($mode) || !defined($rdev));
+
+ # We do the rest in a fork with an unshared mount namespace:
+ # -) change our apparmor profile to 'pve-container-mounthotplug', which is '/usr/bin/lxc-start'
+ # with move_mount privileges on every mount.
+ # -) create the device node, then grab it, create a file to bind mount the device node onto in
+ # the container, switch to the container mount namespace, and move_mount the device node.
+
+ PVE::Tools::run_fork(sub {
+ # Pin the container pid longer, we also need to get its monitor/parent:
+ my ($ct_pid, $ct_pidfd) = open_lxc_pid($vmid)
+ or die "failed to open pidfd of container $vmid\'s init process\n";
+
+ my ($monitor_pid, $monitor_pidfd) = open_ppid($ct_pid)
+ or die "failed to open pidfd of container $vmid\'s monitor process\n";
+
+ my $ct_mnt_ns = $get_container_namespace->($vmid, $ct_pid, 'mnt');
+ my $ct_user_ns = $get_container_namespace->($vmid, $ct_pid, 'user');
+ my $monitor_mnt_ns = $get_container_namespace->($vmid, $monitor_pid, 'mnt');
+
+ # Enter monitor mount namespace and switch to 'pve-container-mounthotplug' apparmor profile.
+ $enter_mnt_ns_and_change_aa_profile->($monitor_mnt_ns, "pve-container-mounthotplug", undef);
+
+ my $id_map = (PVE::LXC::parse_id_maps($conf))[0];
+ my $passthrough_device_path = create_passthrough_device_node(
+ "/var/lib/lxc/$vmid/passthrough", $dev, $mode, $rdev, $id_map);
+
+ my $srcfh = PVE::Tools::open_tree(&AT_FDCWD, $passthrough_device_path, &OPEN_TREE_CLOEXEC | &OPEN_TREE_CLONE)
+ or die "open_tree() on passthrough device node failed: $!\n";
+
+ if ($conf->{unprivileged}) {
+ PVE::Tools::setns(fileno($ct_user_ns), PVE::Tools::CLONE_NEWUSER)
+ or die "failed to enter user namespace of container $vmid: $!\n";
+
+ POSIX::setuid(0);
+ POSIX::setgid(0);
+ }
+
+ # Create a regular file in the container to bind mount the device node onto.
+ sysopen(my $dstfh, "/proc/$ct_pid/root$dev->{path}", O_CREAT)
+ or die "failed to open '/proc/$ct_pid/root$dev->{path}': $!\n";
+
+ # Enter the container mount namespace
+ PVE::Tools::setns(fileno($ct_mnt_ns), PVE::Tools::CLONE_NEWNS);
+ chdir('/')
+ or die "failed to change root directory within the container's mount namespace: $!\n";
+
+ # Bind mount the device node into the container
+ PVE::Tools::move_mount(fileno($srcfh), '', fileno($dstfh), '', &MOVE_MOUNT_F_EMPTY_PATH | &MOVE_MOUNT_T_EMPTY_PATH)
+ or die "move_mount failed: $!\n";
+ });
+
+ # Allow or deny device access with cgroup2
+ my $major = PVE::Tools::dev_t_major($rdev);
+ my $minor = PVE::Tools::dev_t_minor($rdev);
+ my $device_type = S_ISBLK($mode) ? 'b' : 'c';
+
+ run_command(["lxc-cgroup", "-n", $vmid, "devices.deny", "$device_type $major:$minor w"])
+ if ($dev->{'deny-write'});
+
+ my $allow_perms = $dev->{'deny-write'} ? 'r' : 'rw';
+ run_command([
+ "lxc-cgroup", "-n", $vmid, "devices.allow", "$device_type $major:$minor $allow_perms"
+ ]);
+}
+
sub mountpoint_hotplug :prototype($$$$$) {
my ($vmid, $conf, $opt, $mp, $storage_cfg) = @_;
diff --git a/src/PVE/LXC/Config.pm b/src/PVE/LXC/Config.pm
index 555767f..f6795a5 100644
--- a/src/PVE/LXC/Config.pm
+++ b/src/PVE/LXC/Config.pm
@@ -1535,6 +1535,13 @@ sub vmconfig_hotplug_pending {
$class->apply_pending_mountpoint($vmid, $conf, $opt, $storecfg, 1);
# apply_pending_mountpoint modifies the value if it creates a new disk
$value = $conf->{pending}->{$opt};
+ } elsif ($opt =~ m/^dev(\d+)$/) {
+ if (exists($conf->{$opt})) {
+ die "skip\n"; # don't try to hotplug over existing dev
+ }
+
+ $class->apply_pending_device_passthrough($vmid, $conf, $opt, 1);
+ $value = $conf->{pending}->{$opt};
} else {
die "skip\n"; # skip non-hotpluggable
}
@@ -1629,6 +1636,18 @@ my $rescan_volume = sub {
warn "Could not rescan volume size - $@\n" if $@;
};
+sub apply_pending_device_passthrough {
+ my ($class, $vmid, $conf, $opt, $running) = @_;
+
+ my $dev = $class->parse_device($conf->{pending}->{$opt});
+ my $old = $conf->{$opt};
+ if ($running) {
+ die "skip\n" if defined($old); # TODO: editing a device passthrough
+ PVE::LXC::device_passthrough_hotplug($vmid, $conf, $dev);
+ $conf->{pending}->{$opt} = $class->print_device($dev);
+ }
+}
+
sub apply_pending_mountpoint {
my ($class, $vmid, $conf, $opt, $storecfg, $running) = @_;
--
2.39.5
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
next prev parent reply other threads:[~2025-04-23 12:57 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-23 12:56 [pve-devel] [PATCH container v2 0/4] " Filip Schauer
2025-04-23 12:56 ` [pve-devel] [PATCH container v2 1/4] extract apparmor profile & namespace switch to a helper Filip Schauer
2025-04-23 12:56 ` [pve-devel] [PATCH container v2 2/4] extract passthrough device node creation " Filip Schauer
2025-05-30 14:18 ` Michael Köppl
2025-04-23 12:56 ` [pve-devel] [PATCH container v2 3/4] config: support printing a device Filip Schauer
2025-04-23 12:56 ` Filip Schauer [this message]
2025-05-30 14:18 ` [pve-devel] [PATCH container v2 4/4] implement device hotplug Michael Köppl
2025-06-02 11:40 ` Filip Schauer
2025-06-02 13:36 ` [pve-devel] superseded: [PATCH container v2 0/4] " Filip Schauer
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250423125640.88756-5-f.schauer@proxmox.com \
--to=f.schauer@proxmox.com \
--cc=pve-devel@lists.proxmox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal