public inbox for pve-devel@lists.proxmox.com
 help / color / mirror / Atom feed
From: Filip Schauer <f.schauer@proxmox.com>
To: pve-devel@lists.proxmox.com
Subject: [PATCH container v2 5/7] implement per-mountpoint uid/gid mapping
Date: Mon, 30 Mar 2026 16:10:16 +0200	[thread overview]
Message-ID: <20260330141021.151921-6-f.schauer@proxmox.com> (raw)
In-Reply-To: <20260330141021.151921-1-f.schauer@proxmox.com>

Add support for customizing UID/GID mappings on individual mount points
without affecting the entire container.

A new "idmap" mount point option accepts semicolon-separated mappings:
```
idmap=type:ct:host:len;type:ct:host:len;...
```

type: can be either 'u' or 'g'
ct: ID as seen inside the container
host: corresponding ID on the host
len: number of consecutive IDs to map

Unmapped ranges inherit the container's ID mapping.

Example to pass through the host UID & GID 1005:
```
mp0: /mnt/data,mp=/data,idmap=u:1005:1005:1;g:1005:1005:1
```

To identity-map the entire range of ids, "passthrough" can be used:
```
idmap=passthrough
```

Mount point idmapping only works for unprivileged containers. Privileged
containers are unaffected.

Signed-off-by: Filip Schauer <f.schauer@proxmox.com>
---
 src/PVE/LXC.pm            | 96 +++++++++++++++++++++++++++++++++++++--
 src/PVE/LXC/Config.pm     | 39 ++++++++++++++++
 src/lxc-pve-prestart-hook | 23 ++++++++++
 3 files changed, 153 insertions(+), 5 deletions(-)

diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm
index 6f0dec4..0285d72 100644
--- a/src/PVE/LXC.pm
+++ b/src/PVE/LXC.pm
@@ -11,6 +11,7 @@ use File::Path;
 use File::Spec;
 use IO::Poll qw(POLLIN POLLHUP);
 use IO::Socket::UNIX;
+use List::Util qw(max min);
 use POSIX qw(EINTR);
 use Socket;
 use Time::HiRes qw (gettimeofday);
@@ -43,6 +44,7 @@ use PVE::Syscall qw(:fsmount);
 use PVE::LXC::CGroup;
 use PVE::LXC::Config;
 use PVE::LXC::Monitor;
+use PVE::LXC::Namespaces;
 use PVE::LXC::Tools;
 
 my $have_sdn;
@@ -2470,7 +2472,24 @@ sub device_passthrough_hotplug : prototype($$$) {
 sub mountpoint_hotplug : prototype($$$$$) {
     my ($vmid, $conf, $opt, $mp, $storage_cfg) = @_;
 
-    my (undef, $root_uid, $root_gid) = PVE::LXC::parse_id_maps($conf);
+    # Pin the container pid longer, we also need to get its monitor/parent:
+    my ($ct_pid, $ct_pidfd) = open_lxc_pid($vmid)
+        or die "failed to open pidfd of container $vmid\'s init process\n";
+
+    my ($id_map, $root_uid, $root_gid) = PVE::LXC::parse_id_maps($conf);
+    my $mp_userns_fh;
+    if ($mp->{idmap}) {
+        if (!@$id_map) {
+            PVE::RESTEnvironment::log_warn(
+                "'$opt' - ignoring 'idmap' option unsupported by privileged container");
+        } elsif ($mp->{idmap} eq "passthrough") {
+            # Optimization: Reuse the container userns to avoid the overhead of creating a new ns
+            $mp_userns_fh = $get_container_namespace->($vmid, $ct_pid, 'user');
+        } else {
+            my $mp_id_map = resolve_mountpoint_idmap($id_map, $mp);
+            $mp_userns_fh = PVE::LXC::Namespaces::new_userns($mp_id_map);
+        }
+    }
 
     # We do the rest in a fork with an unshared mount namespace:
     #  -) change our apparmor profile to 'pve-container-mounthotplug', which is '/usr/bin/lxc-start'
@@ -2479,10 +2498,6 @@ sub mountpoint_hotplug : prototype($$$$$) {
     #     namespace, then mount it.
 
     PVE::Tools::run_fork(sub {
-        # Pin the container pid longer, we also need to get its monitor/parent:
-        my ($ct_pid, $ct_pidfd) = open_lxc_pid($vmid)
-            or die "failed to open pidfd of container $vmid\'s init process\n";
-
         my ($monitor_pid, $monitor_pidfd) = open_ppid($ct_pid)
             or die "failed to open pidfd of container $vmid\'s monitor process\n";
 
@@ -2506,6 +2521,18 @@ sub mountpoint_hotplug : prototype($$$$$) {
 
         my $mount_fd = mountpoint_stage($mp, $dir, $storage_cfg, undef, $root_uid, $root_gid);
 
+        if ($mp_userns_fh) {
+            PVE::Tools::mount_setattr(
+                fileno($mount_fd),
+                '',
+                PVE::Tools::AT_EMPTY_PATH,
+                &PVE::Syscall::MOUNT_ATTR_IDMAP,
+                0,
+                0,
+                fileno($mp_userns_fh),
+            ) or die "mount_setattr: $!\n";
+        }
+
         PVE::Tools::setns(fileno($ct_mnt_ns), PVE::Tools::CLONE_NEWNS);
         chdir('/')
             or die "failed to change root directory within the container's mount namespace: $!\n";
@@ -3021,6 +3048,65 @@ sub map_ct_gid_to_host {
     return map_ct_id_to_host($gid, $id_map, 'g');
 }
 
+sub resolve_mountpoint_idmap {
+    my ($id_map, $mp) = @_;
+
+    die "mount point does not specify an idmap\n" if !$mp->{idmap};
+
+    return $id_map if $mp->{idmap} eq "passthrough";
+
+    my $mp_ct_idmap = $mp->{idmap};
+    validate_id_maps($mp_ct_idmap);
+
+    # Convert the user friendly mp.idmap to the actual mapping to be applied via mount_setattr.
+    # Provided by the config:
+    #   lxc.idmap:    ID in Container --> ID on Host
+    #    mp.idmap:    ID in Container --> ID on Disk
+    #
+    # Convert to:          ID on Disk --> ID on Host
+    my $result = [];
+    for my $type ('u', 'g') {
+        my @ct_chunks = grep { $_->[0] eq $type } @$id_map;
+        next if !@ct_chunks;
+
+        my @exceptions = sort { $a->[1] <=> $b->[1] } grep { $_->[0] eq $type } @$mp_ct_idmap;
+
+        for my $chunk (@ct_chunks) {
+            my (undef, $ct_start, $host_start, $len) = @$chunk;
+            my $ct_end = $ct_start + $len;
+
+            # Find exceptions that fall within this specific lxc.idmap chunk
+            my @chunk_exc = grep { $_->[1] < $ct_end && $_->[1] + $_->[3] > $ct_start } @exceptions;
+            push @chunk_exc, [$type, $ct_end, undef, 0]; # ensure the trailing gap is mapped
+
+            my $ct = $ct_start;
+            for my $exc (@chunk_exc) {
+                my (undef, $exc_ct, $exc_disk, $exc_len) = @$exc;
+
+                my $clamped_ct = max($exc_ct, $ct_start);
+                my $clamped_len = min($exc_ct + $exc_len, $ct_end) - $clamped_ct;
+
+                # Identity mapping for unmapped ranges
+                if ($ct < $clamped_ct) {
+                    my $host = $host_start + ($ct - $ct_start);
+                    push @$result, [$type, $host, $host, $clamped_ct - $ct];
+                }
+
+                # Map the IDs on Disk to the Host IDs.
+                if ($clamped_len > 0) {
+                    my $disk = $exc_disk + $clamped_ct - $exc_ct;
+                    my $host = $host_start + $clamped_ct - $ct_start;
+                    push @$result, [$type, $disk, $host, $clamped_len];
+                }
+
+                $ct = $clamped_ct + $clamped_len;
+            }
+        }
+    }
+
+    return $result;
+}
+
 sub userns_command {
     my ($id_map) = @_;
     if (@$id_map) {
diff --git a/src/PVE/LXC/Config.pm b/src/PVE/LXC/Config.pm
index 5442586..924a98c 100644
--- a/src/PVE/LXC/Config.pm
+++ b/src/PVE/LXC/Config.pm
@@ -369,6 +369,27 @@ my $rootfs_desc = {
         format_description => 'opt[;opt...]',
         pattern => qr/$valid_mount_option_re(;$valid_mount_option_re)*/,
     },
+    idmap => {
+        optional => 1,
+        type => 'string',
+        description =>
+            'Map specific container UIDs/GIDs to underlying disk UIDs/GIDs for this mount point',
+        verbose_description =>
+            "Customize UID/GID mappings that override the container's `lxc.idmap` for this mount "
+            . "point. Accepts a semicolon-separated list of `type:container:disk:range-size` "
+            . "entries.\n"
+            . "`type` is `u` for UID or `g` for GID.\n"
+            . "`container` is the first ID as seen inside the container.\n"
+            . "`disk` is the first corresponding ID on the underlying filesystem.\n"
+            . "`range-size` is the number of consecutive IDs to map.\n"
+            . "Unmapped IDs fall back to the container's `lxc.idmap`.\n"
+            . "Example: `u:123:456:1` maps UID 123 in the container to UID 456 on the disk. "
+            . "Files owned by UID 456 on the disk will appear as UID 123 inside the container.",
+        format_description =>
+            'type:container:disk:range-size[;type:container:disk:range-size;...]',
+        pattern =>
+            qr/^(?:passthrough|[ug]:[0-9]+:[0-9]+:[1-9][0-9]*(?:;[ug]:[0-9]+:[0-9]+:[1-9][0-9]*)*)$/,
+    },
     ro => {
         type => 'boolean',
         description => 'Read-only mount point',
@@ -1315,6 +1336,8 @@ sub update_pct_config {
             $class->check_protection($conf, "can't update CT $vmid drive '$opt'");
             my $mp = $class->parse_volume($opt, $value);
             $check_content_type->($mp) if ($mp->{type} eq 'volume');
+            PVE::LXC::validate_id_maps($mp->{idmap})
+                if defined($mp->{idmap}) && $mp->{idmap} ne 'passthrough';
         } elsif ($opt eq 'hookscript') {
             PVE::GuestHelpers::check_hookscript($value);
         } elsif ($opt eq 'nameserver') {
@@ -1439,6 +1462,16 @@ my $parse_ct_mountpoint_full = sub {
 
     $res->{type} = $class->classify_mountpoint($res->{volume});
 
+    if (defined($res->{idmap}) && $res->{idmap} ne 'passthrough') {
+        my $mp_ct_idmap = [];
+        for my $entry (split(';', $res->{idmap})) {
+            $entry =~ /^([ug]):(\d+):(\d+):(\d+)$/
+                or die "failed to parse mount point idmap: $entry\n";
+            push @$mp_ct_idmap, [$1, $2, $3, $4];
+        }
+        $res->{idmap} = $mp_ct_idmap;
+    }
+
     return $res;
 };
 
@@ -1446,6 +1479,12 @@ sub print_ct_mountpoint {
     my ($class, $info, $nomp) = @_;
     my $skip = ['type'];
     push @$skip, 'mp' if $nomp;
+
+    if (defined($info->{idmap}) && $info->{idmap} ne 'passthrough') {
+        $info = {%$info}; # Shallow copy to avoid mutating the caller's hashref
+        $info->{idmap} = join ';', map { join ':', @$_ } @{ $info->{idmap} };
+    }
+
     return PVE::JSONSchema::print_property_string($info, $mp_desc, $skip);
 }
 
diff --git a/src/lxc-pve-prestart-hook b/src/lxc-pve-prestart-hook
index 9862509..2bfce31 100755
--- a/src/lxc-pve-prestart-hook
+++ b/src/lxc-pve-prestart-hook
@@ -87,6 +87,7 @@ PVE::LXC::Tools::lxc_hook(
         };
 
         my $rootdir_fd = undef;
+        my $userns_cache = {};
         my $setup_mountpoint = sub {
             my ($opt, $mountpoint) = @_;
 
@@ -95,6 +96,28 @@ PVE::LXC::Tools::lxc_hook(
                 $mountpoint, $dir, $storage_cfg, undef, $root_uid, $root_gid,
             );
 
+            if ($mountpoint->{idmap}) {
+                if (@$id_map) {
+                    my $mp_id_map = PVE::LXC::resolve_mountpoint_idmap($id_map, $mountpoint);
+                    my $cache_key = join(';', map { join(':', @$_) } @$mp_id_map);
+                    my $usernsfh = $userns_cache->{$cache_key} //=
+                        PVE::LXC::Namespaces::new_userns($mp_id_map);
+
+                    PVE::Tools::mount_setattr(
+                        fileno($mount_fd),
+                        '',
+                        PVE::Tools::AT_EMPTY_PATH,
+                        &PVE::Syscall::MOUNT_ATTR_IDMAP,
+                        0,
+                        0,
+                        fileno($usernsfh),
+                    ) or die "mount_setattr: $!\n";
+                } else {
+                    $log_warn->(
+                        "'$opt' - ignoring 'idmap' option unsupported by privileged container");
+                }
+            }
+
             my ($dest_dir, $dest_base_fd, $keep_attrs);
             if ($rootdir_fd) {
                 # Mount relative to the rootdir fd.
-- 
2.47.3





  parent reply	other threads:[~2026-03-30 14:13 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-30 14:10 [PATCH container/manager v2 0/7] " Filip Schauer
2026-03-30 14:10 ` [PATCH container v2 1/7] namespaces: relax prototype of run_in_userns Filip Schauer
2026-03-30 14:10 ` [PATCH container v2 2/7] namespaces: refactor run_in_userns Filip Schauer
2026-03-30 14:10 ` [PATCH container v2 3/7] d/control: update versioned dependency for libpve-common-perl Filip Schauer
2026-03-30 14:10 ` [PATCH container v2 4/7] namespaces: add helper to create user namespace from idmap Filip Schauer
2026-03-30 14:10 ` Filip Schauer [this message]
2026-03-30 14:10 ` [PATCH manager v2 6/7] ui: lxc/MPEdit: remove duplicate "mp" assignment Filip Schauer
2026-03-30 14:10 ` [PATCH manager v2 7/7] ui: lxc/MPEdit: add "idmap" option Filip Schauer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260330141021.151921-6-f.schauer@proxmox.com \
    --to=f.schauer@proxmox.com \
    --cc=pve-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal