From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from firstgate.proxmox.com (firstgate.proxmox.com [212.224.123.68]) by lore.proxmox.com (Postfix) with ESMTPS id 216181FF141 for ; Mon, 30 Mar 2026 16:13:13 +0200 (CEST) Received: from firstgate.proxmox.com (localhost [127.0.0.1]) by firstgate.proxmox.com (Proxmox) with ESMTP id A649C33576; Mon, 30 Mar 2026 16:13:39 +0200 (CEST) From: Filip Schauer To: pve-devel@lists.proxmox.com Subject: [PATCH container v2 5/7] implement per-mountpoint uid/gid mapping Date: Mon, 30 Mar 2026 16:10:16 +0200 Message-ID: <20260330141021.151921-6-f.schauer@proxmox.com> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20260330141021.151921-1-f.schauer@proxmox.com> References: <20260330141021.151921-1-f.schauer@proxmox.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Bm-Milter-Handled: 55990f41-d878-4baa-be0a-ee34c49e34d2 X-Bm-Transport-Timestamp: 1774879959501 X-SPAM-LEVEL: Spam detection results: 0 AWL 0.013 Adjusted score from AWL reputation of From: address BAYES_00 -1.9 Bayes spam probability is 0 to 1% DMARC_MISSING 0.1 Missing DMARC policy KAM_DMARC_STATUS 0.01 Test Rule for DKIM or SPF Failure with Strict Alignment SPF_HELO_NONE 0.001 SPF: HELO does not publish an SPF Record SPF_PASS -0.001 SPF: sender matches SPF record Message-ID-Hash: VEVMXYFYINY2MGMXG6MIZGLAVUX3IKVV X-Message-ID-Hash: VEVMXYFYINY2MGMXG6MIZGLAVUX3IKVV X-MailFrom: f.schauer@proxmox.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.10 Precedence: list List-Id: Proxmox VE development discussion List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: Add support for customizing UID/GID mappings on individual mount points without affecting the entire container. A new "idmap" mount point option accepts semicolon-separated mappings: ``` idmap=type:ct:host:len;type:ct:host:len;... ``` type: can be either 'u' or 'g' ct: ID as seen inside the container host: corresponding ID on the host len: number of consecutive IDs to map Unmapped ranges inherit the container's ID mapping. Example to pass through the host UID & GID 1005: ``` mp0: /mnt/data,mp=/data,idmap=u:1005:1005:1;g:1005:1005:1 ``` To identity-map the entire range of ids, "passthrough" can be used: ``` idmap=passthrough ``` Mount point idmapping only works for unprivileged containers. Privileged containers are unaffected. Signed-off-by: Filip Schauer --- src/PVE/LXC.pm | 96 +++++++++++++++++++++++++++++++++++++-- src/PVE/LXC/Config.pm | 39 ++++++++++++++++ src/lxc-pve-prestart-hook | 23 ++++++++++ 3 files changed, 153 insertions(+), 5 deletions(-) diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm index 6f0dec4..0285d72 100644 --- a/src/PVE/LXC.pm +++ b/src/PVE/LXC.pm @@ -11,6 +11,7 @@ use File::Path; use File::Spec; use IO::Poll qw(POLLIN POLLHUP); use IO::Socket::UNIX; +use List::Util qw(max min); use POSIX qw(EINTR); use Socket; use Time::HiRes qw (gettimeofday); @@ -43,6 +44,7 @@ use PVE::Syscall qw(:fsmount); use PVE::LXC::CGroup; use PVE::LXC::Config; use PVE::LXC::Monitor; +use PVE::LXC::Namespaces; use PVE::LXC::Tools; my $have_sdn; @@ -2470,7 +2472,24 @@ sub device_passthrough_hotplug : prototype($$$) { sub mountpoint_hotplug : prototype($$$$$) { my ($vmid, $conf, $opt, $mp, $storage_cfg) = @_; - my (undef, $root_uid, $root_gid) = PVE::LXC::parse_id_maps($conf); + # Pin the container pid longer, we also need to get its monitor/parent: + my ($ct_pid, $ct_pidfd) = open_lxc_pid($vmid) + or die "failed to open pidfd of container $vmid\'s init process\n"; + + my ($id_map, $root_uid, $root_gid) = PVE::LXC::parse_id_maps($conf); + my $mp_userns_fh; + if ($mp->{idmap}) { + if (!@$id_map) { + PVE::RESTEnvironment::log_warn( + "'$opt' - ignoring 'idmap' option unsupported by privileged container"); + } elsif ($mp->{idmap} eq "passthrough") { + # Optimization: Reuse the container userns to avoid the overhead of creating a new ns + $mp_userns_fh = $get_container_namespace->($vmid, $ct_pid, 'user'); + } else { + my $mp_id_map = resolve_mountpoint_idmap($id_map, $mp); + $mp_userns_fh = PVE::LXC::Namespaces::new_userns($mp_id_map); + } + } # We do the rest in a fork with an unshared mount namespace: # -) change our apparmor profile to 'pve-container-mounthotplug', which is '/usr/bin/lxc-start' @@ -2479,10 +2498,6 @@ sub mountpoint_hotplug : prototype($$$$$) { # namespace, then mount it. PVE::Tools::run_fork(sub { - # Pin the container pid longer, we also need to get its monitor/parent: - my ($ct_pid, $ct_pidfd) = open_lxc_pid($vmid) - or die "failed to open pidfd of container $vmid\'s init process\n"; - my ($monitor_pid, $monitor_pidfd) = open_ppid($ct_pid) or die "failed to open pidfd of container $vmid\'s monitor process\n"; @@ -2506,6 +2521,18 @@ sub mountpoint_hotplug : prototype($$$$$) { my $mount_fd = mountpoint_stage($mp, $dir, $storage_cfg, undef, $root_uid, $root_gid); + if ($mp_userns_fh) { + PVE::Tools::mount_setattr( + fileno($mount_fd), + '', + PVE::Tools::AT_EMPTY_PATH, + &PVE::Syscall::MOUNT_ATTR_IDMAP, + 0, + 0, + fileno($mp_userns_fh), + ) or die "mount_setattr: $!\n"; + } + PVE::Tools::setns(fileno($ct_mnt_ns), PVE::Tools::CLONE_NEWNS); chdir('/') or die "failed to change root directory within the container's mount namespace: $!\n"; @@ -3021,6 +3048,65 @@ sub map_ct_gid_to_host { return map_ct_id_to_host($gid, $id_map, 'g'); } +sub resolve_mountpoint_idmap { + my ($id_map, $mp) = @_; + + die "mount point does not specify an idmap\n" if !$mp->{idmap}; + + return $id_map if $mp->{idmap} eq "passthrough"; + + my $mp_ct_idmap = $mp->{idmap}; + validate_id_maps($mp_ct_idmap); + + # Convert the user friendly mp.idmap to the actual mapping to be applied via mount_setattr. + # Provided by the config: + # lxc.idmap: ID in Container --> ID on Host + # mp.idmap: ID in Container --> ID on Disk + # + # Convert to: ID on Disk --> ID on Host + my $result = []; + for my $type ('u', 'g') { + my @ct_chunks = grep { $_->[0] eq $type } @$id_map; + next if !@ct_chunks; + + my @exceptions = sort { $a->[1] <=> $b->[1] } grep { $_->[0] eq $type } @$mp_ct_idmap; + + for my $chunk (@ct_chunks) { + my (undef, $ct_start, $host_start, $len) = @$chunk; + my $ct_end = $ct_start + $len; + + # Find exceptions that fall within this specific lxc.idmap chunk + my @chunk_exc = grep { $_->[1] < $ct_end && $_->[1] + $_->[3] > $ct_start } @exceptions; + push @chunk_exc, [$type, $ct_end, undef, 0]; # ensure the trailing gap is mapped + + my $ct = $ct_start; + for my $exc (@chunk_exc) { + my (undef, $exc_ct, $exc_disk, $exc_len) = @$exc; + + my $clamped_ct = max($exc_ct, $ct_start); + my $clamped_len = min($exc_ct + $exc_len, $ct_end) - $clamped_ct; + + # Identity mapping for unmapped ranges + if ($ct < $clamped_ct) { + my $host = $host_start + ($ct - $ct_start); + push @$result, [$type, $host, $host, $clamped_ct - $ct]; + } + + # Map the IDs on Disk to the Host IDs. + if ($clamped_len > 0) { + my $disk = $exc_disk + $clamped_ct - $exc_ct; + my $host = $host_start + $clamped_ct - $ct_start; + push @$result, [$type, $disk, $host, $clamped_len]; + } + + $ct = $clamped_ct + $clamped_len; + } + } + } + + return $result; +} + sub userns_command { my ($id_map) = @_; if (@$id_map) { diff --git a/src/PVE/LXC/Config.pm b/src/PVE/LXC/Config.pm index 5442586..924a98c 100644 --- a/src/PVE/LXC/Config.pm +++ b/src/PVE/LXC/Config.pm @@ -369,6 +369,27 @@ my $rootfs_desc = { format_description => 'opt[;opt...]', pattern => qr/$valid_mount_option_re(;$valid_mount_option_re)*/, }, + idmap => { + optional => 1, + type => 'string', + description => + 'Map specific container UIDs/GIDs to underlying disk UIDs/GIDs for this mount point', + verbose_description => + "Customize UID/GID mappings that override the container's `lxc.idmap` for this mount " + . "point. Accepts a semicolon-separated list of `type:container:disk:range-size` " + . "entries.\n" + . "`type` is `u` for UID or `g` for GID.\n" + . "`container` is the first ID as seen inside the container.\n" + . "`disk` is the first corresponding ID on the underlying filesystem.\n" + . "`range-size` is the number of consecutive IDs to map.\n" + . "Unmapped IDs fall back to the container's `lxc.idmap`.\n" + . "Example: `u:123:456:1` maps UID 123 in the container to UID 456 on the disk. " + . "Files owned by UID 456 on the disk will appear as UID 123 inside the container.", + format_description => + 'type:container:disk:range-size[;type:container:disk:range-size;...]', + pattern => + qr/^(?:passthrough|[ug]:[0-9]+:[0-9]+:[1-9][0-9]*(?:;[ug]:[0-9]+:[0-9]+:[1-9][0-9]*)*)$/, + }, ro => { type => 'boolean', description => 'Read-only mount point', @@ -1315,6 +1336,8 @@ sub update_pct_config { $class->check_protection($conf, "can't update CT $vmid drive '$opt'"); my $mp = $class->parse_volume($opt, $value); $check_content_type->($mp) if ($mp->{type} eq 'volume'); + PVE::LXC::validate_id_maps($mp->{idmap}) + if defined($mp->{idmap}) && $mp->{idmap} ne 'passthrough'; } elsif ($opt eq 'hookscript') { PVE::GuestHelpers::check_hookscript($value); } elsif ($opt eq 'nameserver') { @@ -1439,6 +1462,16 @@ my $parse_ct_mountpoint_full = sub { $res->{type} = $class->classify_mountpoint($res->{volume}); + if (defined($res->{idmap}) && $res->{idmap} ne 'passthrough') { + my $mp_ct_idmap = []; + for my $entry (split(';', $res->{idmap})) { + $entry =~ /^([ug]):(\d+):(\d+):(\d+)$/ + or die "failed to parse mount point idmap: $entry\n"; + push @$mp_ct_idmap, [$1, $2, $3, $4]; + } + $res->{idmap} = $mp_ct_idmap; + } + return $res; }; @@ -1446,6 +1479,12 @@ sub print_ct_mountpoint { my ($class, $info, $nomp) = @_; my $skip = ['type']; push @$skip, 'mp' if $nomp; + + if (defined($info->{idmap}) && $info->{idmap} ne 'passthrough') { + $info = {%$info}; # Shallow copy to avoid mutating the caller's hashref + $info->{idmap} = join ';', map { join ':', @$_ } @{ $info->{idmap} }; + } + return PVE::JSONSchema::print_property_string($info, $mp_desc, $skip); } diff --git a/src/lxc-pve-prestart-hook b/src/lxc-pve-prestart-hook index 9862509..2bfce31 100755 --- a/src/lxc-pve-prestart-hook +++ b/src/lxc-pve-prestart-hook @@ -87,6 +87,7 @@ PVE::LXC::Tools::lxc_hook( }; my $rootdir_fd = undef; + my $userns_cache = {}; my $setup_mountpoint = sub { my ($opt, $mountpoint) = @_; @@ -95,6 +96,28 @@ PVE::LXC::Tools::lxc_hook( $mountpoint, $dir, $storage_cfg, undef, $root_uid, $root_gid, ); + if ($mountpoint->{idmap}) { + if (@$id_map) { + my $mp_id_map = PVE::LXC::resolve_mountpoint_idmap($id_map, $mountpoint); + my $cache_key = join(';', map { join(':', @$_) } @$mp_id_map); + my $usernsfh = $userns_cache->{$cache_key} //= + PVE::LXC::Namespaces::new_userns($mp_id_map); + + PVE::Tools::mount_setattr( + fileno($mount_fd), + '', + PVE::Tools::AT_EMPTY_PATH, + &PVE::Syscall::MOUNT_ATTR_IDMAP, + 0, + 0, + fileno($usernsfh), + ) or die "mount_setattr: $!\n"; + } else { + $log_warn->( + "'$opt' - ignoring 'idmap' option unsupported by privileged container"); + } + } + my ($dest_dir, $dest_base_fd, $keep_attrs); if ($rootdir_fd) { # Mount relative to the rootdir fd. -- 2.47.3