all lists on lists.proxmox.com
 help / color / mirror / Atom feed
From: Stoiko Ivanov <s.ivanov@proxmox.com>
To: pve-devel@lists.proxmox.com
Subject: [pve-devel] [PATCH lxc] add patches for cgroup handling in non-unified cgroup setups
Date: Tue, 20 Jul 2021 12:52:00 +0200	[thread overview]
Message-ID: <20210720105200.3910274-1-s.ivanov@proxmox.com> (raw)

I opened a PR at lxc-upstream with these changes [0].

Testing in my hybrid layout environment fixes the issue with
priviledged container reported in the forum.
(Note that the issue also occurs with unprivileged container, if they
have a `lxc.cgroup.devices.(allow|deny)` entry (which they don't in
our default config)

[0] https://github.com/lxc/lxc/pull/3911

Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
---
I quickly considered also updating lxc to 4.0.10 (which was released last
friday, but ran into some issues with mounting /sys/devices/virtual/net,
which I did not yet get to looking at.

 ...populate-hierarchy-for-device-cgroup.patch | 102 ++++++++++++++++++
 ...nneeded-variables-from-cgroup_tree_c.patch |  65 +++++++++++
 2 files changed, 167 insertions(+)
 create mode 100644 debian/patches/pve/0012-cgroups-populate-hierarchy-for-device-cgroup.patch
 create mode 100644 debian/patches/pve/0013-cgroups-remove-unneeded-variables-from-cgroup_tree_c.patch

diff --git a/debian/patches/pve/0012-cgroups-populate-hierarchy-for-device-cgroup.patch b/debian/patches/pve/0012-cgroups-populate-hierarchy-for-device-cgroup.patch
new file mode 100644
index 0000000..d24e45c
--- /dev/null
+++ b/debian/patches/pve/0012-cgroups-populate-hierarchy-for-device-cgroup.patch
@@ -0,0 +1,102 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Stoiko Ivanov <s.ivanov@proxmox.com>
+Date: Mon, 19 Jul 2021 16:55:43 +0200
+Subject: [PATCH] cgroups: populate hierarchy for device cgroup
+
+With the changes introduced in:
+b7b1e3a34ce28b01206c48227930ff83d399e7b6
+the hierarchy-struct did not have the path_lim set anymore, which is
+needed by setup_limits_legacy to actually access the cgroup directory.
+
+The issue can be reproduced with a container config having
+```
+lxc.cgroup.devices.deny = a
+```
+(or any lxc.cgroup.devices entry) set on a system booted with
+systemd.unified_cgroup_hierarchy=0.
+
+This affects all privileged containers on PVE (due to the default
+devices.deny entry).
+
+Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
+---
+ src/lxc/cgroups/cgfsng.c | 39 +++++++++++++++++++--------------------
+ 1 file changed, 19 insertions(+), 20 deletions(-)
+
+diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
+index 9e1ece5ca..e27baa625 100644
+--- a/src/lxc/cgroups/cgfsng.c
++++ b/src/lxc/cgroups/cgfsng.c
+@@ -794,8 +794,12 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
+ 		if (fd_limit < 0)
+ 			return syserror_ret(false, "Failed to create limiting cgroup %d(%s)", h->dfd_base, cgroup_limit_dir);
+ 
++		limit_path = make_cgroup_path(h, h->at_base, cgroup_limit_dir, NULL);
++		h->dfd_lim = move_fd(fd_limit);
++		h->path_lim = move_ptr(limit_path);
++
+ 		TRACE("Created limit cgroup %d->%d(%s)",
+-		      fd_limit, h->dfd_base, cgroup_limit_dir);
++		      h->dfd_lim, h->dfd_base, cgroup_limit_dir);
+ 
+ 		/*
+ 		 * With isolation the devices legacy cgroup needs to be
+@@ -807,44 +811,39 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
+ 		    !ops->setup_limits_legacy(ops, conf, true))
+ 			return log_error(false, "Failed to setup legacy device limits");
+ 
+-		limit_path = make_cgroup_path(h, h->at_base, cgroup_limit_dir, NULL);
+-		path = must_make_path(limit_path, cgroup_leaf, NULL);
++		path = must_make_path(h->path_lim, cgroup_leaf, NULL);
+ 
+ 		/*
+ 		 * If we use a separate limit cgroup, the leaf cgroup, i.e. the
+ 		 * cgroup the container actually resides in, is below fd_limit.
+ 		 */
+-		fd_final = __cgroup_tree_create(fd_limit, cgroup_leaf, 0755, cpuset_v1, false);
++		fd_final = __cgroup_tree_create(h->dfd_lim, cgroup_leaf, 0755, cpuset_v1, false);
+ 		if (fd_final < 0) {
+ 			/* Ensure we don't leave any garbage behind. */
+ 			if (cgroup_tree_prune(h->dfd_base, cgroup_limit_dir))
+ 				SYSWARN("Failed to destroy %d(%s)", h->dfd_base, cgroup_limit_dir);
+ 			else
+ 				TRACE("Removed cgroup tree %d(%s)", h->dfd_base, cgroup_limit_dir);
++			return syserror_ret(false, "Failed to create %s cgroup %d(%s)", payload ? "payload" : "monitor", h->dfd_base, cgroup_limit_dir);
+ 		}
++		h->dfd_con = move_fd(fd_final);
++		h->path_con = move_ptr(path);
++
+ 	} else {
+ 		path = make_cgroup_path(h, h->at_base, cgroup_limit_dir, NULL);
+ 
+ 		fd_final = __cgroup_tree_create(h->dfd_base, cgroup_limit_dir, 0755, cpuset_v1, false);
+-	}
+-	if (fd_final < 0)
+-		return syserror_ret(false, "Failed to create %s cgroup %d(%s)", payload ? "payload" : "monitor", h->dfd_base, cgroup_limit_dir);
+-
+-	if (payload) {
+-		h->dfd_con = move_fd(fd_final);
+-		h->path_con = move_ptr(path);
++		if (fd_final < 0)
++			return syserror_ret(false, "Failed to create %s cgroup %d(%s)", payload ? "payload" : "monitor", h->dfd_base, cgroup_limit_dir);
+ 
+-		if (fd_limit < 0)
++		if (payload) {
++			h->dfd_con = move_fd(fd_final);
+ 			h->dfd_lim = h->dfd_con;
+-		else
+-			h->dfd_lim = move_fd(fd_limit);
+-
+-		if (limit_path)
+-			h->path_lim = move_ptr(limit_path);
+-		else
++			h->path_con = move_ptr(path);
+ 			h->path_lim = h->path_con;
+-	} else {
+-		h->dfd_mon = move_fd(fd_final);
++		} else {
++			h->dfd_mon = move_fd(fd_final);
++		}
+ 	}
+ 
+ 	return true;
diff --git a/debian/patches/pve/0013-cgroups-remove-unneeded-variables-from-cgroup_tree_c.patch b/debian/patches/pve/0013-cgroups-remove-unneeded-variables-from-cgroup_tree_c.patch
new file mode 100644
index 0000000..692233d
--- /dev/null
+++ b/debian/patches/pve/0013-cgroups-remove-unneeded-variables-from-cgroup_tree_c.patch
@@ -0,0 +1,65 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Stoiko Ivanov <s.ivanov@proxmox.com>
+Date: Tue, 20 Jul 2021 10:30:36 +0200
+Subject: [PATCH] cgroups: remove unneeded variables from cgroup_tree_create
+
+Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
+---
+ src/lxc/cgroups/cgfsng.c | 13 ++++---------
+ 1 file changed, 4 insertions(+), 9 deletions(-)
+
+diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
+index e27baa625..35ba0fb9d 100644
+--- a/src/lxc/cgroups/cgfsng.c
++++ b/src/lxc/cgroups/cgfsng.c
+@@ -779,7 +779,6 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
+ 			       const char *cgroup_leaf, bool payload)
+ {
+ 	__do_close int fd_limit = -EBADF, fd_final = -EBADF;
+-	__do_free char *path = NULL, *limit_path = NULL;
+ 	bool cpuset_v1 = false;
+ 
+ 	/*
+@@ -794,9 +793,8 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
+ 		if (fd_limit < 0)
+ 			return syserror_ret(false, "Failed to create limiting cgroup %d(%s)", h->dfd_base, cgroup_limit_dir);
+ 
+-		limit_path = make_cgroup_path(h, h->at_base, cgroup_limit_dir, NULL);
++		h->path_lim = make_cgroup_path(h, h->at_base, cgroup_limit_dir, NULL);
+ 		h->dfd_lim = move_fd(fd_limit);
+-		h->path_lim = move_ptr(limit_path);
+ 
+ 		TRACE("Created limit cgroup %d->%d(%s)",
+ 		      h->dfd_lim, h->dfd_base, cgroup_limit_dir);
+@@ -811,8 +809,6 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
+ 		    !ops->setup_limits_legacy(ops, conf, true))
+ 			return log_error(false, "Failed to setup legacy device limits");
+ 
+-		path = must_make_path(h->path_lim, cgroup_leaf, NULL);
+-
+ 		/*
+ 		 * If we use a separate limit cgroup, the leaf cgroup, i.e. the
+ 		 * cgroup the container actually resides in, is below fd_limit.
+@@ -827,11 +823,9 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
+ 			return syserror_ret(false, "Failed to create %s cgroup %d(%s)", payload ? "payload" : "monitor", h->dfd_base, cgroup_limit_dir);
+ 		}
+ 		h->dfd_con = move_fd(fd_final);
+-		h->path_con = move_ptr(path);
++		h->path_con = must_make_path(h->path_lim, cgroup_leaf, NULL);
+ 
+ 	} else {
+-		path = make_cgroup_path(h, h->at_base, cgroup_limit_dir, NULL);
+-
+ 		fd_final = __cgroup_tree_create(h->dfd_base, cgroup_limit_dir, 0755, cpuset_v1, false);
+ 		if (fd_final < 0)
+ 			return syserror_ret(false, "Failed to create %s cgroup %d(%s)", payload ? "payload" : "monitor", h->dfd_base, cgroup_limit_dir);
+@@ -839,7 +833,8 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
+ 		if (payload) {
+ 			h->dfd_con = move_fd(fd_final);
+ 			h->dfd_lim = h->dfd_con;
+-			h->path_con = move_ptr(path);
++			h->path_con = make_cgroup_path(h, h->at_base, cgroup_limit_dir, NULL);
++
+ 			h->path_lim = h->path_con;
+ 		} else {
+ 			h->dfd_mon = move_fd(fd_final);
-- 
2.30.2





             reply	other threads:[~2021-07-20 10:52 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-20 10:52 Stoiko Ivanov [this message]
2021-07-20 11:39 ` Thomas Lamprecht

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210720105200.3910274-1-s.ivanov@proxmox.com \
    --to=s.ivanov@proxmox.com \
    --cc=pve-devel@lists.proxmox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.
Service provided by Proxmox Server Solutions GmbH | Privacy | Legal