summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/basic/cgroup-util.c846
-rw-r--r--src/basic/cgroup-util.h26
-rw-r--r--src/core/cgroup.c1
-rw-r--r--src/core/execute.c1
-rw-r--r--src/core/load-fragment.c2
-rw-r--r--src/core/mount-setup.c1
-rw-r--r--src/core/unit.c1
-rw-r--r--src/login/pam_systemd.c2
-rw-r--r--src/nspawn/nspawn-cgroup.c1
-rw-r--r--src/shared/bus-unit-util.c1
-rw-r--r--src/shared/cgroup-setup.c860
-rw-r--r--src/shared/cgroup-setup.h34
-rw-r--r--src/shared/meson.build2
-rw-r--r--src/shutdown/shutdown.c1
-rw-r--r--src/test/meson.build4
-rw-r--r--src/test/test-cgroup-setup.c67
-rw-r--r--src/test/test-cgroup-util.c52
-rw-r--r--src/test/test-cgroup.c1
-rw-r--r--src/test/test-helper.c2
19 files changed, 978 insertions, 927 deletions
diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c
index 2865cd518e..6582b92d67 100644
--- a/src/basic/cgroup-util.c
+++ b/src/basic/cgroup-util.c
@@ -31,7 +31,6 @@
#include "mkdir.h"
#include "parse-util.h"
#include "path-util.h"
-#include "proc-cmdline.h"
#include "process-util.h"
#include "set.h"
#include "special.h"
@@ -410,173 +409,6 @@ int cg_kill_recursive(
return ret;
}
-int cg_migrate(
- const char *cfrom,
- const char *pfrom,
- const char *cto,
- const char *pto,
- CGroupFlags flags) {
-
- bool done = false;
- _cleanup_set_free_ Set *s = NULL;
- int r, ret = 0;
- pid_t my_pid;
-
- assert(cfrom);
- assert(pfrom);
- assert(cto);
- assert(pto);
-
- s = set_new(NULL);
- if (!s)
- return -ENOMEM;
-
- my_pid = getpid_cached();
-
- do {
- _cleanup_fclose_ FILE *f = NULL;
- pid_t pid = 0;
- done = true;
-
- r = cg_enumerate_processes(cfrom, pfrom, &f);
- if (r < 0) {
- if (ret >= 0 && r != -ENOENT)
- return r;
-
- return ret;
- }
-
- while ((r = cg_read_pid(f, &pid)) > 0) {
-
- /* This might do weird stuff if we aren't a
- * single-threaded program. However, we
- * luckily know we are not */
- if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
- continue;
-
- if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
- continue;
-
- /* Ignore kernel threads. Since they can only
- * exist in the root cgroup, we only check for
- * them there. */
- if (cfrom &&
- empty_or_root(pfrom) &&
- is_kernel_thread(pid) > 0)
- continue;
-
- r = cg_attach(cto, pto, pid);
- if (r < 0) {
- if (ret >= 0 && r != -ESRCH)
- ret = r;
- } else if (ret == 0)
- ret = 1;
-
- done = false;
-
- r = set_put(s, PID_TO_PTR(pid));
- if (r < 0) {
- if (ret >= 0)
- return r;
-
- return ret;
- }
- }
-
- if (r < 0) {
- if (ret >= 0)
- return r;
-
- return ret;
- }
- } while (!done);
-
- return ret;
-}
-
-int cg_migrate_recursive(
- const char *cfrom,
- const char *pfrom,
- const char *cto,
- const char *pto,
- CGroupFlags flags) {
-
- _cleanup_closedir_ DIR *d = NULL;
- int r, ret = 0;
- char *fn;
-
- assert(cfrom);
- assert(pfrom);
- assert(cto);
- assert(pto);
-
- ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
-
- r = cg_enumerate_subgroups(cfrom, pfrom, &d);
- if (r < 0) {
- if (ret >= 0 && r != -ENOENT)
- return r;
-
- return ret;
- }
-
- while ((r = cg_read_subgroup(d, &fn)) > 0) {
- _cleanup_free_ char *p = NULL;
-
- p = path_join(empty_to_root(pfrom), fn);
- free(fn);
- if (!p)
- return -ENOMEM;
-
- r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
- if (r != 0 && ret >= 0)
- ret = r;
- }
-
- if (r < 0 && ret >= 0)
- ret = r;
-
- if (flags & CGROUP_REMOVE) {
- r = cg_rmdir(cfrom, pfrom);
- if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
- return r;
- }
-
- return ret;
-}
-
-int cg_migrate_recursive_fallback(
- const char *cfrom,
- const char *pfrom,
- const char *cto,
- const char *pto,
- CGroupFlags flags) {
-
- int r;
-
- assert(cfrom);
- assert(pfrom);
- assert(cto);
- assert(pto);
-
- r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
- if (r < 0) {
- char prefix[strlen(pto) + 1];
-
- /* This didn't work? Then let's try all prefixes of the destination */
-
- PATH_FOREACH_PREFIX(prefix, pto) {
- int q;
-
- q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
- if (q >= 0)
- return q;
- }
- }
-
- return r;
-}
-
static const char *controller_to_dirname(const char *controller) {
const char *e;
@@ -742,253 +574,6 @@ int cg_get_path_and_check(const char *controller, const char *path, const char *
return cg_get_path(controller, path, suffix, fs);
}
-static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
- assert(path);
- assert(sb);
- assert(ftwbuf);
-
- if (typeflag != FTW_DP)
- return 0;
-
- if (ftwbuf->level < 1)
- return 0;
-
- (void) rmdir(path);
- return 0;
-}
-
-int cg_trim(const char *controller, const char *path, bool delete_root) {
- _cleanup_free_ char *fs = NULL;
- int r = 0, q;
-
- assert(path);
-
- r = cg_get_path(controller, path, NULL, &fs);
- if (r < 0)
- return r;
-
- errno = 0;
- if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
- if (errno == ENOENT)
- r = 0;
- else
- r = errno_or_else(EIO);
- }
-
- if (delete_root) {
- if (rmdir(fs) < 0 && errno != ENOENT)
- return -errno;
- }
-
- q = cg_hybrid_unified();
- if (q < 0)
- return q;
- if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
- q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
- if (q < 0)
- log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path);
- }
-
- return r;
-}
-
-/* Create a cgroup in the hierarchy of controller.
- * Returns 0 if the group already existed, 1 on success, negative otherwise.
- */
-int cg_create(const char *controller, const char *path) {
- _cleanup_free_ char *fs = NULL;
- int r;
-
- r = cg_get_path_and_check(controller, path, NULL, &fs);
- if (r < 0)
- return r;
-
- r = mkdir_parents(fs, 0755);
- if (r < 0)
- return r;
-
- r = mkdir_errno_wrapper(fs, 0755);
- if (r == -EEXIST)
- return 0;
- if (r < 0)
- return r;
-
- r = cg_hybrid_unified();
- if (r < 0)
- return r;
-
- if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
- r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
- if (r < 0)
- log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
- }
-
- return 1;
-}
-
-int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
- int r, q;
-
- assert(pid >= 0);
-
- r = cg_create(controller, path);
- if (r < 0)
- return r;
-
- q = cg_attach(controller, path, pid);
- if (q < 0)
- return q;
-
- /* This does not remove the cgroup on failure */
- return r;
-}
-
-int cg_attach(const char *controller, const char *path, pid_t pid) {
- _cleanup_free_ char *fs = NULL;
- char c[DECIMAL_STR_MAX(pid_t) + 2];
- int r;
-
- assert(path);
- assert(pid >= 0);
-
- r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
- if (r < 0)
- return r;
-
- if (pid == 0)
- pid = getpid_cached();
-
- xsprintf(c, PID_FMT "\n", pid);
-
- r = write_string_file(fs, c, WRITE_STRING_FILE_DISABLE_BUFFER);
- if (r < 0)
- return r;
-
- r = cg_hybrid_unified();
- if (r < 0)
- return r;
-
- if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
- r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
- if (r < 0)
- log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path);
- }
-
- return 0;
-}
-
-int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
- int r;
-
- assert(controller);
- assert(path);
- assert(pid >= 0);
-
- r = cg_attach(controller, path, pid);
- if (r < 0) {
- char prefix[strlen(path) + 1];
-
- /* This didn't work? Then let's try all prefixes of
- * the destination */
-
- PATH_FOREACH_PREFIX(prefix, path) {
- int q;
-
- q = cg_attach(controller, prefix, pid);
- if (q >= 0)
- return q;
- }
- }
-
- return r;
-}
-
-int cg_set_access(
- const char *controller,
- const char *path,
- uid_t uid,
- gid_t gid) {
-
- struct Attribute {
- const char *name;
- bool fatal;
- };
-
- /* cgroup v1, aka legacy/non-unified */
- static const struct Attribute legacy_attributes[] = {
- { "cgroup.procs", true },
- { "tasks", false },
- { "cgroup.clone_children", false },
- {},
- };
-
- /* cgroup v2, aka unified */
- static const struct Attribute unified_attributes[] = {
- { "cgroup.procs", true },
- { "cgroup.subtree_control", true },
- { "cgroup.threads", false },
- {},
- };
-
- static const struct Attribute* const attributes[] = {
- [false] = legacy_attributes,
- [true] = unified_attributes,
- };
-
- _cleanup_free_ char *fs = NULL;
- const struct Attribute *i;
- int r, unified;
-
- assert(path);
-
- if (uid == UID_INVALID && gid == GID_INVALID)
- return 0;
-
- unified = cg_unified_controller(controller);
- if (unified < 0)
- return unified;
-
- /* Configure access to the cgroup itself */
- r = cg_get_path(controller, path, NULL, &fs);
- if (r < 0)
- return r;
-
- r = chmod_and_chown(fs, 0755, uid, gid);
- if (r < 0)
- return r;
-
- /* Configure access to the cgroup's attributes */
- for (i = attributes[unified]; i->name; i++) {
- fs = mfree(fs);
-
- r = cg_get_path(controller, path, i->name, &fs);
- if (r < 0)
- return r;
-
- r = chmod_and_chown(fs, 0644, uid, gid);
- if (r < 0) {
- if (i->fatal)
- return r;
-
- log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs);
- }
- }
-
- if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
- r = cg_hybrid_unified();
- if (r < 0)
- return r;
- if (r > 0) {
- /* Always propagate access mode from unified to legacy controller */
- r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid);
- if (r < 0)
- log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path);
- }
- }
-
- return 0;
-}
-
int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) {
_cleanup_free_ char *fs = NULL;
int r;
@@ -2143,194 +1728,6 @@ fail:
done:
memcpy(ret_values, v, sizeof(char*) * n);
return 0;
-
-}
-
-int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
- CGroupController c;
- CGroupMask done;
- bool created;
- int r;
-
- /* This one will create a cgroup in our private tree, but also
- * duplicate it in the trees specified in mask, and remove it
- * in all others.
- *
- * Returns 0 if the group already existed in the systemd hierarchy,
- * 1 on success, negative otherwise.
- */
-
- /* First create the cgroup in our own hierarchy. */
- r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
- if (r < 0)
- return r;
- created = r;
-
- /* If we are in the unified hierarchy, we are done now */
- r = cg_all_unified();
- if (r < 0)
- return r;
- if (r > 0)
- return created;
-
- supported &= CGROUP_MASK_V1;
- mask = CGROUP_MASK_EXTEND_JOINED(mask);
- done = 0;
-
- /* Otherwise, do the same in the other hierarchies */
- for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
- CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
- const char *n;
-
- if (!FLAGS_SET(supported, bit))
- continue;
-
- if (FLAGS_SET(done, bit))
- continue;
-
- n = cgroup_controller_to_string(c);
- if (FLAGS_SET(mask, bit))
- (void) cg_create(n, path);
- else
- (void) cg_trim(n, path, true);
-
- done |= CGROUP_MASK_EXTEND_JOINED(bit);
- }
-
- return created;
-}
-
-int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
- CGroupController c;
- CGroupMask done;
- int r;
-
- r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
- if (r < 0)
- return r;
-
- r = cg_all_unified();
- if (r < 0)
- return r;
- if (r > 0)
- return 0;
-
- supported &= CGROUP_MASK_V1;
- done = 0;
-
- for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
- CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
- const char *p = NULL;
-
- if (!FLAGS_SET(supported, bit))
- continue;
-
- if (FLAGS_SET(done, bit))
- continue;
-
- if (path_callback)
- p = path_callback(bit, userdata);
- if (!p)
- p = path;
-
- (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
- done |= CGROUP_MASK_EXTEND_JOINED(bit);
- }
-
- return 0;
-}
-
-int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
- Iterator i;
- void *pidp;
- int r = 0;
-
- SET_FOREACH(pidp, pids, i) {
- pid_t pid = PTR_TO_PID(pidp);
- int q;
-
- q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
- if (q < 0 && r >= 0)
- r = q;
- }
-
- return r;
-}
-
-int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
- CGroupController c;
- CGroupMask done;
- int r = 0, q;
-
- if (!path_equal(from, to)) {
- r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
- if (r < 0)
- return r;
- }
-
- q = cg_all_unified();
- if (q < 0)
- return q;
- if (q > 0)
- return r;
-
- supported &= CGROUP_MASK_V1;
- done = 0;
-
- for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
- CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
- const char *p = NULL;
-
- if (!FLAGS_SET(supported, bit))
- continue;
-
- if (FLAGS_SET(done, bit))
- continue;
-
- if (to_callback)
- p = to_callback(bit, userdata);
- if (!p)
- p = to;
-
- (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0);
- done |= CGROUP_MASK_EXTEND_JOINED(bit);
- }
-
- return r;
-}
-
-int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
- CGroupController c;
- CGroupMask done;
- int r, q;
-
- r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
- if (r < 0)
- return r;
-
- q = cg_all_unified();
- if (q < 0)
- return q;
- if (q > 0)
- return r;
-
- supported &= CGROUP_MASK_V1;
- done = 0;
-
- for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
- CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
-
- if (!FLAGS_SET(supported, bit))
- continue;
-
- if (FLAGS_SET(done, bit))
- continue;
-
- (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
- done |= CGROUP_MASK_EXTEND_JOINED(bit);
- }
-
- return r;
}
int cg_mask_to_string(CGroupMask mask, char **ret) {
@@ -2626,209 +2023,6 @@ int cg_hybrid_unified(void) {
return r == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
}
-int cg_enable_everywhere(
- CGroupMask supported,
- CGroupMask mask,
- const char *p,
- CGroupMask *ret_result_mask) {
-
- _cleanup_fclose_ FILE *f = NULL;
- _cleanup_free_ char *fs = NULL;
- CGroupController c;
- CGroupMask ret = 0;
- int r;
-
- assert(p);
-
- if (supported == 0) {
- if (ret_result_mask)
- *ret_result_mask = 0;
- return 0;
- }
-
- r = cg_all_unified();
- if (r < 0)
- return r;
- if (r == 0) {
- /* On the legacy hierarchy there's no concept of "enabling" controllers in cgroups defined. Let's claim
- * complete success right away. (If you wonder why we return the full mask here, rather than zero: the
- * caller tends to use the returned mask later on to compare if all controllers where properly joined,
- * and if not requeues realization. This use is the primary purpose of the return value, hence let's
- * minimize surprises here and reduce triggers for re-realization by always saying we fully
- * succeeded.) */
- if (ret_result_mask)
- *ret_result_mask = mask & supported & CGROUP_MASK_V2; /* If you wonder why we mask this with
- * CGROUP_MASK_V2: The 'supported' mask
- * might contain pure-V1 or BPF
- * controllers, and we never want to
- * claim that we could enable those with
- * cgroup.subtree_control */
- return 0;
- }
-
- r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
- if (r < 0)
- return r;
-
- for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
- CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
- const char *n;
-
- if (!FLAGS_SET(CGROUP_MASK_V2, bit))
- continue;
-
- if (!FLAGS_SET(supported, bit))
- continue;
-
- n = cgroup_controller_to_string(c);
- {
- char s[1 + strlen(n) + 1];
-
- s[0] = FLAGS_SET(mask, bit) ? '+' : '-';
- strcpy(s + 1, n);
-
- if (!f) {
- f = fopen(fs, "we");
- if (!f)
- return log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
- }
-
- r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER);
- if (r < 0) {
- log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m",
- FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs);
- clearerr(f);
-
- /* If we can't turn off a controller, leave it on in the reported resulting mask. This
- * happens for example when we attempt to turn off a controller up in the tree that is
- * used down in the tree. */
- if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY
- * only here, and not follow the same logic
- * for other errors such as EINVAL or
- * EOPNOTSUPP or anything else. That's
- * because EBUSY indicates that the
- * controllers is currently enabled and
- * cannot be disabled because something down
- * the hierarchy is still using it. Any other
- * error most likely means something like "I
- * never heard of this controller" or
- * similar. In the former case it's hence
- * safe to assume the controller is still on
- * after the failed operation, while in the
- * latter case it's safer to assume the
- * controller is unknown and hence certainly
- * not enabled. */
- ret |= bit;
- } else {
- /* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */
- if (FLAGS_SET(mask, bit))
- ret |= bit;
- }
- }
- }
-
- /* Let's return the precise set of controllers now enabled for the cgroup. */
- if (ret_result_mask)
- *ret_result_mask = ret;
-
- return 0;
-}
-
-bool cg_is_unified_wanted(void) {
- static thread_local int wanted = -1;
- bool b;
- const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
- _cleanup_free_ char *c = NULL;
- int r;
-
- /* If we have a cached value, return that. */
- if (wanted >= 0)
- return wanted;
-
- /* If the hierarchy is already mounted, then follow whatever
- * was chosen for it. */
- r = cg_unified_cached(true);
- if (r >= 0)
- return (wanted = r >= CGROUP_UNIFIED_ALL);
-
- /* If we were explicitly passed systemd.unified_cgroup_hierarchy,
- * respect that. */
- r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
- if (r > 0)
- return (wanted = b);
-
- /* If we passed cgroup_no_v1=all with no other instructions, it seems
- * highly unlikely that we want to use hybrid or legacy hierarchy. */
- r = proc_cmdline_get_key("cgroup_no_v1", 0, &c);
- if (r > 0 && streq_ptr(c, "all"))
- return (wanted = true);
-
- return (wanted = is_default);
-}
-
-bool cg_is_legacy_wanted(void) {
- static thread_local int wanted = -1;
-
- /* If we have a cached value, return that. */
- if (wanted >= 0)
- return wanted;
-
- /* Check if we have cgroup v2 already mounted. */
- if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL)
- return (wanted = false);
-
- /* Otherwise, assume that at least partial legacy is wanted,
- * since cgroup v2 should already be mounted at this point. */
- return (wanted = true);
-}
-
-bool cg_is_hybrid_wanted(void) {
- static thread_local int wanted = -1;
- int r;
- bool b;
- const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
- /* We default to true if the default is "hybrid", obviously,
- * but also when the default is "unified", because if we get
- * called, it means that unified hierarchy was not mounted. */
-
- /* If we have a cached value, return that. */
- if (wanted >= 0)
- return wanted;
-
- /* If the hierarchy is already mounted, then follow whatever
- * was chosen for it. */
- if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL)
- return (wanted = false);
-
- /* Otherwise, let's see what the kernel command line has to say.
- * Since checking is expensive, cache a non-error result. */
- r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
-
- /* The meaning of the kernel option is reversed wrt. to the return value
- * of this function, hence the negation. */
- return (wanted = r > 0 ? !b : is_default);
-}
-
-int cg_weight_parse(const char *s, uint64_t *ret) {
- uint64_t u;
- int r;
-
- if (isempty(s)) {
- *ret = CGROUP_WEIGHT_INVALID;
- return 0;
- }
-
- r = safe_atou64(s, &u);
- if (r < 0)
- return r;
-
- if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
- return -ERANGE;
-
- *ret = u;
- return 0;
-}
-
const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
[CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX,
[CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX,
@@ -2845,46 +2039,6 @@ static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] =
DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
-int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
- uint64_t u;
- int r;
-
- if (isempty(s)) {
- *ret = CGROUP_CPU_SHARES_INVALID;
- return 0;
- }
-
- r = safe_atou64(s, &u);
- if (r < 0)
- return r;
-
- if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
- return -ERANGE;
-
- *ret = u;
- return 0;
-}
-
-int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
- uint64_t u;
- int r;
-
- if (isempty(s)) {
- *ret = CGROUP_BLKIO_WEIGHT_INVALID;
- return 0;
- }
-
- r = safe_atou64(s, &u);
- if (r < 0)
- return r;
-
- if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
- return -ERANGE;
-
- *ret = u;
- return 0;
-}
-
bool is_cgroup_fs(const struct statfs *s) {
return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
is_fs_type(s, CGROUP2_SUPER_MAGIC);
diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h
index ba8df8139d..1472265204 100644
--- a/src/basic/cgroup-util.h
+++ b/src/basic/cgroup-util.h
@@ -172,10 +172,6 @@ typedef int (*cg_kill_log_func_t)(pid_t pid, int sig, void *userdata);
int cg_kill(const char *controller, const char *path, int sig, CGroupFlags flags, Set *s, cg_kill_log_func_t kill_log, void *userdata);
int cg_kill_recursive(const char *controller, const char *path, int sig, CGroupFlags flags, Set *s, cg_kill_log_func_t kill_log, void *userdata);
-int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags);
-int cg_migrate_recursive(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags);
-int cg_migrate_recursive_fallback(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags);
-
int cg_split_spec(const char *spec, char **controller, char **path);
int cg_mangle_path(const char *path, char **result);
@@ -184,15 +180,8 @@ int cg_get_path_and_check(const char *controller, const char *path, const char *
int cg_pid_get_path(const char *controller, pid_t pid, char **path);
-int cg_trim(const char *controller, const char *path, bool delete_root);
-
int cg_rmdir(const char *controller, const char *path);
-int cg_create(const char *controller, const char *path);
-int cg_attach(const char *controller, const char *path, pid_t pid);
-int cg_attach_fallback(const char *controller, const char *path, pid_t pid);
-int cg_create_and_attach(const char *controller, const char *path, pid_t pid);
-
int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value);
int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret);
int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, char **keys, char **values);
@@ -240,13 +229,6 @@ int cg_slice_to_path(const char *unit, char **ret);
typedef const char* (*cg_migrate_callback_t)(CGroupMask mask, void *userdata);
-int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path);
-int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t callback, void *userdata);
-int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t callback, void *userdata);
-int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t callback, void *userdata);
-int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root);
-int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p, CGroupMask *ret_result_mask);
-
int cg_mask_supported(CGroupMask *ret);
int cg_mask_from_string(const char *s, CGroupMask *ret);
int cg_mask_to_string(CGroupMask mask, char **ret);
@@ -263,16 +245,8 @@ static inline int cg_unified(void) {
return cg_unified_cached(true);
}
-bool cg_is_unified_wanted(void);
-bool cg_is_legacy_wanted(void);
-bool cg_is_hybrid_wanted(void);
-
const char* cgroup_controller_to_string(CGroupController c) _const_;
CGroupController cgroup_controller_from_string(const char *s) _pure_;
-int cg_weight_parse(const char *s, uint64_t *ret);
-int cg_cpu_shares_parse(const char *s, uint64_t *ret);
-int cg_blkio_weight_parse(const char *s, uint64_t *ret);
-
bool is_cgroup_fs(const struct statfs *s);
bool fd_is_cgroup_fs(int fd);
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index 7b0a41fbc8..00690920cd 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -11,6 +11,7 @@
#include "bpf-firewall.h"
#include "btrfs-util.h"
#include "bus-error.h"
+#include "cgroup-setup.h"
#include "cgroup-util.h"
#include "cgroup.h"
#include "fd-util.h"
diff --git a/src/core/execute.c b/src/core/execute.c
index 4c90007778..f769d02be4 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -48,6 +48,7 @@
#include "cap-list.h"
#include "capability-util.h"
#include "chown-recursive.h"
+#include "cgroup-setup.h"
#include "cpu-set-util.h"
#include "def.h"
#include "env-file.h"
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index a7951a5757..e517c8dfbf 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -24,7 +24,7 @@
#include "bus-util.h"
#include "cap-list.h"
#include "capability-util.h"
-#include "cgroup.h"
+#include "cgroup-setup.h"
#include "conf-parser.h"
#include "cpu-set-util.h"
#include "env-util.h"
diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c
index 72cf5a2caa..790f1e234e 100644
--- a/src/core/mount-setup.c
+++ b/src/core/mount-setup.c
@@ -11,6 +11,7 @@
#include "bus-util.h"
#include "cgroup-util.h"
#include "conf-files.h"
+#include "cgroup-setup.h"
#include "dev-setup.h"
#include "efivars.h"
#include "fd-util.h"
diff --git a/src/core/unit.c b/src/core/unit.c
index 52a1acafab..004d787d8f 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -15,6 +15,7 @@
#include "bpf-firewall.h"
#include "bus-common-errors.h"
#include "bus-util.h"
+#include "cgroup-setup.h"
#include "cgroup-util.h"
#include "dbus-unit.h"
#include "dbus.h"
diff --git a/src/login/pam_systemd.c b/src/login/pam_systemd.c
index 3f762cbbc3..766d651c3f 100644
--- a/src/login/pam_systemd.c
+++ b/src/login/pam_systemd.c
@@ -20,7 +20,7 @@
#include "bus-error.h"
#include "bus-internal.h"
#include "bus-util.h"
-#include "cgroup-util.h"
+#include "cgroup-setup.h"
#include "errno-util.h"
#include "fd-util.h"
#include "fileio.h"
diff --git a/src/nspawn/nspawn-cgroup.c b/src/nspawn/nspawn-cgroup.c
index 0462b46413..f5048d9473 100644
--- a/src/nspawn/nspawn-cgroup.c
+++ b/src/nspawn/nspawn-cgroup.c
@@ -3,6 +3,7 @@
#include <sys/mount.h>
#include "alloc-util.h"
+#include "cgroup-setup.h"
#include "fd-util.h"
#include "fileio.h"
#include "format-util.h"
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
index e53b9d5ea2..286ef89878 100644
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -5,6 +5,7 @@
#include "bus-unit-util.h"
#include "bus-util.h"
#include "cap-list.h"
+#include "cgroup-setup.h"
#include "cgroup-util.h"
#include "condition.h"
#include "cpu-set-util.h"
diff --git a/src/shared/cgroup-setup.c b/src/shared/cgroup-setup.c
new file mode 100644
index 0000000000..ddcd156801
--- /dev/null
+++ b/src/shared/cgroup-setup.c
@@ -0,0 +1,860 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <ftw.h>
+#include <unistd.h>
+
+#include "cgroup-setup.h"
+#include "cgroup-util.h"
+#include "errno-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "proc-cmdline.h"
+#include "stdio-util.h"
+#include "string-util.h"
+#include "fs-util.h"
+#include "mkdir.h"
+#include "process-util.h"
+#include "fileio.h"
+#include "user-util.h"
+#include "fd-util.h"
+
+bool cg_is_unified_wanted(void) {
+ static thread_local int wanted = -1;
+ bool b;
+ const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
+ _cleanup_free_ char *c = NULL;
+ int r;
+
+ /* If we have a cached value, return that. */
+ if (wanted >= 0)
+ return wanted;
+
+ /* If the hierarchy is already mounted, then follow whatever was chosen for it. */
+ r = cg_unified_cached(true);
+ if (r >= 0)
+ return (wanted = r >= CGROUP_UNIFIED_ALL);
+
+ /* If we were explicitly passed systemd.unified_cgroup_hierarchy, respect that. */
+ r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
+ if (r > 0)
+ return (wanted = b);
+
+ /* If we passed cgroup_no_v1=all with no other instructions, it seems highly unlikely that we want to
+ * use hybrid or legacy hierarchy. */
+ r = proc_cmdline_get_key("cgroup_no_v1", 0, &c);
+ if (r > 0 && streq_ptr(c, "all"))
+ return (wanted = true);
+
+ return (wanted = is_default);
+}
+
+bool cg_is_legacy_wanted(void) {
+ static thread_local int wanted = -1;
+
+ /* If we have a cached value, return that. */
+ if (wanted >= 0)
+ return wanted;
+
+ /* Check if we have cgroup v2 already mounted. */
+ if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL)
+ return (wanted = false);
+
+ /* Otherwise, assume that at least partial legacy is wanted,
+ * since cgroup v2 should already be mounted at this point. */
+ return (wanted = true);
+}
+
+bool cg_is_hybrid_wanted(void) {
+ static thread_local int wanted = -1;
+ int r;
+ bool b;
+ const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
+ /* We default to true if the default is "hybrid", obviously, but also when the default is "unified",
+ * because if we get called, it means that unified hierarchy was not mounted. */
+
+ /* If we have a cached value, return that. */
+ if (wanted >= 0)
+ return wanted;
+
+ /* If the hierarchy is already mounted, then follow whatever was chosen for it. */
+ if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL)
+ return (wanted = false);
+
+ /* Otherwise, let's see what the kernel command line has to say. Since checking is expensive, cache
+ * a non-error result. */
+ r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
+
+ /* The meaning of the kernel option is reversed wrt. to the return value of this function, hence the
+ * negation. */
+ return (wanted = r > 0 ? !b : is_default);
+}
+
+int cg_weight_parse(const char *s, uint64_t *ret) {
+ uint64_t u;
+ int r;
+
+ if (isempty(s)) {
+ *ret = CGROUP_WEIGHT_INVALID;
+ return 0;
+ }
+
+ r = safe_atou64(s, &u);
+ if (r < 0)
+ return r;
+
+ if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
+ return -ERANGE;
+
+ *ret = u;
+ return 0;
+}
+
+int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
+ uint64_t u;
+ int r;
+
+ if (isempty(s)) {
+ *ret = CGROUP_CPU_SHARES_INVALID;
+ return 0;
+ }
+
+ r = safe_atou64(s, &u);
+ if (r < 0)
+ return r;
+
+ if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
+ return -ERANGE;
+
+ *ret = u;
+ return 0;
+}
+
+int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
+ uint64_t u;
+ int r;
+
+ if (isempty(s)) {
+ *ret = CGROUP_BLKIO_WEIGHT_INVALID;
+ return 0;
+ }
+
+ r = safe_atou64(s, &u);
+ if (r < 0)
+ return r;
+
+ if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
+ return -ERANGE;
+
+ *ret = u;
+ return 0;
+}
+
+
+static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
+ assert(path);
+ assert(sb);
+ assert(ftwbuf);
+
+ if (typeflag != FTW_DP)
+ return 0;
+
+ if (ftwbuf->level < 1)
+ return 0;
+
+ (void) rmdir(path);
+ return 0;
+}
+
+int cg_trim(const char *controller, const char *path, bool delete_root) {
+ _cleanup_free_ char *fs = NULL;
+ int r = 0, q;
+
+ assert(path);
+
+ r = cg_get_path(controller, path, NULL, &fs);
+ if (r < 0)
+ return r;
+
+ errno = 0;
+ if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
+ if (errno == ENOENT)
+ r = 0;
+ else
+ r = errno_or_else(EIO);
+ }
+
+ if (delete_root) {
+ if (rmdir(fs) < 0 && errno != ENOENT)
+ return -errno;
+ }
+
+ q = cg_hybrid_unified();
+ if (q < 0)
+ return q;
+ if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+ q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
+ if (q < 0)
+ log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path);
+ }
+
+ return r;
+}
+
+/* Create a cgroup in the hierarchy of controller.
+ * Returns 0 if the group already existed, 1 on success, negative otherwise.
+ */
+int cg_create(const char *controller, const char *path) {
+ _cleanup_free_ char *fs = NULL;
+ int r;
+
+ r = cg_get_path_and_check(controller, path, NULL, &fs);
+ if (r < 0)
+ return r;
+
+ r = mkdir_parents(fs, 0755);
+ if (r < 0)
+ return r;
+
+ r = mkdir_errno_wrapper(fs, 0755);
+ if (r == -EEXIST)
+ return 0;
+ if (r < 0)
+ return r;
+
+ r = cg_hybrid_unified();
+ if (r < 0)
+ return r;
+
+ if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+ r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
+ if (r < 0)
+ log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
+ }
+
+ return 1;
+}
+
+int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
+ int r, q;
+
+ assert(pid >= 0);
+
+ r = cg_create(controller, path);
+ if (r < 0)
+ return r;
+
+ q = cg_attach(controller, path, pid);
+ if (q < 0)
+ return q;
+
+ /* This does not remove the cgroup on failure */
+ return r;
+}
+
+int cg_attach(const char *controller, const char *path, pid_t pid) {
+ _cleanup_free_ char *fs = NULL;
+ char c[DECIMAL_STR_MAX(pid_t) + 2];
+ int r;
+
+ assert(path);
+ assert(pid >= 0);
+
+ r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
+ if (r < 0)
+ return r;
+
+ if (pid == 0)
+ pid = getpid_cached();
+
+ xsprintf(c, PID_FMT "\n", pid);
+
+ r = write_string_file(fs, c, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0)
+ return r;
+
+ r = cg_hybrid_unified();
+ if (r < 0)
+ return r;
+
+ if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+ r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
+ if (r < 0)
+ log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path);
+ }
+
+ return 0;
+}
+
+int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
+ int r;
+
+ assert(controller);
+ assert(path);
+ assert(pid >= 0);
+
+ r = cg_attach(controller, path, pid);
+ if (r < 0) {
+ char prefix[strlen(path) + 1];
+
+ /* This didn't work? Then let's try all prefixes of
+ * the destination */
+
+ PATH_FOREACH_PREFIX(prefix, path) {
+ int q;
+
+ q = cg_attach(controller, prefix, pid);
+ if (q >= 0)
+ return q;
+ }
+ }
+
+ return r;
+}
+
+int cg_set_access(
+ const char *controller,
+ const char *path,
+ uid_t uid,
+ gid_t gid) {
+
+ struct Attribute {
+ const char *name;
+ bool fatal;
+ };
+
+ /* cgroup v1, aka legacy/non-unified */
+ static const struct Attribute legacy_attributes[] = {
+ { "cgroup.procs", true },
+ { "tasks", false },
+ { "cgroup.clone_children", false },
+ {},
+ };
+
+ /* cgroup v2, aka unified */
+ static const struct Attribute unified_attributes[] = {
+ { "cgroup.procs", true },
+ { "cgroup.subtree_control", true },
+ { "cgroup.threads", false },
+ {},
+ };
+
+ static const struct Attribute* const attributes[] = {
+ [false] = legacy_attributes,
+ [true] = unified_attributes,
+ };
+
+ _cleanup_free_ char *fs = NULL;
+ const struct Attribute *i;
+ int r, unified;
+
+ assert(path);
+
+ if (uid == UID_INVALID && gid == GID_INVALID)
+ return 0;
+
+ unified = cg_unified_controller(controller);
+ if (unified < 0)
+ return unified;
+
+ /* Configure access to the cgroup itself */
+ r = cg_get_path(controller, path, NULL, &fs);
+ if (r < 0)
+ return r;
+
+ r = chmod_and_chown(fs, 0755, uid, gid);
+ if (r < 0)
+ return r;
+
+ /* Configure access to the cgroup's attributes */
+ for (i = attributes[unified]; i->name; i++) {
+ fs = mfree(fs);
+
+ r = cg_get_path(controller, path, i->name, &fs);
+ if (r < 0)
+ return r;
+
+ r = chmod_and_chown(fs, 0644, uid, gid);
+ if (r < 0) {
+ if (i->fatal)
+ return r;
+
+ log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs);
+ }
+ }
+
+ if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
+ r = cg_hybrid_unified();
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* Always propagate access mode from unified to legacy controller */
+ r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid);
+ if (r < 0)
+ log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path);
+ }
+ }
+
+ return 0;
+}
+
+int cg_migrate(
+ const char *cfrom,
+ const char *pfrom,
+ const char *cto,
+ const char *pto,
+ CGroupFlags flags) {
+
+ bool done = false;
+ _cleanup_set_free_ Set *s = NULL;
+ int r, ret = 0;
+ pid_t my_pid;
+
+ assert(cfrom);
+ assert(pfrom);
+ assert(cto);
+ assert(pto);
+
+ s = set_new(NULL);
+ if (!s)
+ return -ENOMEM;
+
+ my_pid = getpid_cached();
+
+ do {
+ _cleanup_fclose_ FILE *f = NULL;
+ pid_t pid = 0;
+ done = true;
+
+ r = cg_enumerate_processes(cfrom, pfrom, &f);
+ if (r < 0) {
+ if (ret >= 0 && r != -ENOENT)
+ return r;
+
+ return ret;
+ }
+
+ while ((r = cg_read_pid(f, &pid)) > 0) {
+
+ /* This might do weird stuff if we aren't a
+ * single-threaded program. However, we
+ * luckily know we are not */
+ if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
+ continue;
+
+ if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
+ continue;
+
+ /* Ignore kernel threads. Since they can only
+ * exist in the root cgroup, we only check for
+ * them there. */
+ if (cfrom &&
+ empty_or_root(pfrom) &&
+ is_kernel_thread(pid) > 0)
+ continue;
+
+ r = cg_attach(cto, pto, pid);
+ if (r < 0) {
+ if (ret >= 0 && r != -ESRCH)
+ ret = r;
+ } else if (ret == 0)
+ ret = 1;
+
+ done = false;
+
+ r = set_put(s, PID_TO_PTR(pid));
+ if (r < 0) {
+ if (ret >= 0)
+ return r;
+
+ return ret;
+ }
+ }
+
+ if (r < 0) {
+ if (ret >= 0)
+ return r;
+
+ return ret;
+ }
+ } while (!done);
+
+ return ret;
+}
+
+int cg_migrate_recursive(
+ const char *cfrom,
+ const char *pfrom,
+ const char *cto,
+ const char *pto,
+ CGroupFlags flags) {
+
+ _cleanup_closedir_ DIR *d = NULL;
+ int r, ret = 0;
+ char *fn;
+
+ assert(cfrom);
+ assert(pfrom);
+ assert(cto);
+ assert(pto);
+
+ ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
+
+ r = cg_enumerate_subgroups(cfrom, pfrom, &d);
+ if (r < 0) {
+ if (ret >= 0 && r != -ENOENT)
+ return r;
+
+ return ret;
+ }
+
+ while ((r = cg_read_subgroup(d, &fn)) > 0) {
+ _cleanup_free_ char *p = NULL;
+
+ p = path_join(empty_to_root(pfrom), fn);
+ free(fn);
+ if (!p)
+ return -ENOMEM;
+
+ r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
+ if (r != 0 && ret >= 0)
+ ret = r;
+ }
+
+ if (r < 0 && ret >= 0)
+ ret = r;
+
+ if (flags & CGROUP_REMOVE) {
+ r = cg_rmdir(cfrom, pfrom);
+ if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
+ return r;
+ }
+
+ return ret;
+}
+
+int cg_migrate_recursive_fallback(
+ const char *cfrom,
+ const char *pfrom,
+ const char *cto,
+ const char *pto,
+ CGroupFlags flags) {
+
+ int r;
+
+ assert(cfrom);
+ assert(pfrom);
+ assert(cto);
+ assert(pto);
+
+ r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
+ if (r < 0) {
+ char prefix[strlen(pto) + 1];
+
+ /* This didn't work? Then let's try all prefixes of the destination */
+
+ PATH_FOREACH_PREFIX(prefix, pto) {
+ int q;
+
+ q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
+ if (q >= 0)
+ return q;
+ }
+ }
+
+ return r;
+}
+
+int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
+ CGroupController c;
+ CGroupMask done;
+ bool created;
+ int r;
+
+ /* This one will create a cgroup in our private tree, but also
+ * duplicate it in the trees specified in mask, and remove it
+ * in all others.
+ *
+ * Returns 0 if the group already existed in the systemd hierarchy,
+ * 1 on success, negative otherwise.
+ */
+
+ /* First create the cgroup in our own hierarchy. */
+ r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
+ if (r < 0)
+ return r;
+ created = r;
+
+ /* If we are in the unified hierarchy, we are done now */
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return created;
+
+ supported &= CGROUP_MASK_V1;
+ mask = CGROUP_MASK_EXTEND_JOINED(mask);
+ done = 0;
+
+ /* Otherwise, do the same in the other hierarchies */
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+ const char *n;
+
+ if (!FLAGS_SET(supported, bit))
+ continue;
+
+ if (FLAGS_SET(done, bit))
+ continue;
+
+ n = cgroup_controller_to_string(c);
+ if (FLAGS_SET(mask, bit))
+ (void) cg_create(n, path);
+ else
+ (void) cg_trim(n, path, true);
+
+ done |= CGROUP_MASK_EXTEND_JOINED(bit);
+ }
+
+ return created;
+}
+
+int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
+ CGroupController c;
+ CGroupMask done;
+ int r;
+
+ r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
+ if (r < 0)
+ return r;
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r > 0)
+ return 0;
+
+ supported &= CGROUP_MASK_V1;
+ done = 0;
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+ const char *p = NULL;
+
+ if (!FLAGS_SET(supported, bit))
+ continue;
+
+ if (FLAGS_SET(done, bit))
+ continue;
+
+ if (path_callback)
+ p = path_callback(bit, userdata);
+ if (!p)
+ p = path;
+
+ (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
+ done |= CGROUP_MASK_EXTEND_JOINED(bit);
+ }
+
+ return 0;
+}
+
+int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
+ Iterator i;
+ void *pidp;
+ int r = 0;
+
+ SET_FOREACH(pidp, pids, i) {
+ pid_t pid = PTR_TO_PID(pidp);
+ int q;
+
+ q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
+ if (q < 0 && r >= 0)
+ r = q;
+ }
+
+ return r;
+}
+
+int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
+ CGroupController c;
+ CGroupMask done;
+ int r = 0, q;
+
+ if (!path_equal(from, to)) {
+ r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
+ if (r < 0)
+ return r;
+ }
+
+ q = cg_all_unified();
+ if (q < 0)
+ return q;
+ if (q > 0)
+ return r;
+
+ supported &= CGROUP_MASK_V1;
+ done = 0;
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+ const char *p = NULL;
+
+ if (!FLAGS_SET(supported, bit))
+ continue;
+
+ if (FLAGS_SET(done, bit))
+ continue;
+
+ if (to_callback)
+ p = to_callback(bit, userdata);
+ if (!p)
+ p = to;
+
+ (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0);
+ done |= CGROUP_MASK_EXTEND_JOINED(bit);
+ }
+
+ return r;
+}
+
+int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
+ CGroupController c;
+ CGroupMask done;
+ int r, q;
+
+ r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
+ if (r < 0)
+ return r;
+
+ q = cg_all_unified();
+ if (q < 0)
+ return q;
+ if (q > 0)
+ return r;
+
+ supported &= CGROUP_MASK_V1;
+ done = 0;
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+
+ if (!FLAGS_SET(supported, bit))
+ continue;
+
+ if (FLAGS_SET(done, bit))
+ continue;
+
+ (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
+ done |= CGROUP_MASK_EXTEND_JOINED(bit);
+ }
+
+ return r;
+}
+
+int cg_enable_everywhere(
+ CGroupMask supported,
+ CGroupMask mask,
+ const char *p,
+ CGroupMask *ret_result_mask) {
+
+ _cleanup_fclose_ FILE *f = NULL;
+ _cleanup_free_ char *fs = NULL;
+ CGroupController c;
+ CGroupMask ret = 0;
+ int r;
+
+ assert(p);
+
+ if (supported == 0) {
+ if (ret_result_mask)
+ *ret_result_mask = 0;
+ return 0;
+ }
+
+ r = cg_all_unified();
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ /* On the legacy hierarchy there's no concept of "enabling" controllers in cgroups defined. Let's claim
+ * complete success right away. (If you wonder why we return the full mask here, rather than zero: the
+ * caller tends to use the returned mask later on to compare if all controllers where properly joined,
+ * and if not requeues realization. This use is the primary purpose of the return value, hence let's
+ * minimize surprises here and reduce triggers for re-realization by always saying we fully
+ * succeeded.) */
+ if (ret_result_mask)
+ *ret_result_mask = mask & supported & CGROUP_MASK_V2; /* If you wonder why we mask this with
+ * CGROUP_MASK_V2: The 'supported' mask
+ * might contain pure-V1 or BPF
+ * controllers, and we never want to
+ * claim that we could enable those with
+ * cgroup.subtree_control */
+ return 0;
+ }
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
+ if (r < 0)
+ return r;
+
+ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
+ CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
+ const char *n;
+
+ if (!FLAGS_SET(CGROUP_MASK_V2, bit))
+ continue;
+
+ if (!FLAGS_SET(supported, bit))
+ continue;
+
+ n = cgroup_controller_to_string(c);
+ {
+ char s[1 + strlen(n) + 1];
+
+ s[0] = FLAGS_SET(mask, bit) ? '+' : '-';
+ strcpy(s + 1, n);
+
+ if (!f) {
+ f = fopen(fs, "we");
+ if (!f)
+ return log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
+ }
+
+ r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m",
+ FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs);
+ clearerr(f);
+
+ /* If we can't turn off a controller, leave it on in the reported resulting mask. This
+ * happens for example when we attempt to turn off a controller up in the tree that is
+ * used down in the tree. */
+ if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY
+ * only here, and not follow the same logic
+ * for other errors such as EINVAL or
+ * EOPNOTSUPP or anything else. That's
+ * because EBUSY indicates that the
+ * controllers is currently enabled and
+ * cannot be disabled because something down
+ * the hierarchy is still using it. Any other
+ * error most likely means something like "I
+ * never heard of this controller" or
+ * similar. In the former case it's hence
+ * safe to assume the controller is still on
+ * after the failed operation, while in the
+ * latter case it's safer to assume the
+ * controller is unknown and hence certainly
+ * not enabled. */
+ ret |= bit;
+ } else {
+ /* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */
+ if (FLAGS_SET(mask, bit))
+ ret |= bit;
+ }
+ }
+ }
+
+ /* Let's return the precise set of controllers now enabled for the cgroup. */
+ if (ret_result_mask)
+ *ret_result_mask = ret;
+
+ return 0;
+}
diff --git a/src/shared/cgroup-setup.h b/src/shared/cgroup-setup.h
new file mode 100644
index 0000000000..6e9b6857d8
--- /dev/null
+++ b/src/shared/cgroup-setup.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "cgroup-util.h"
+
+bool cg_is_unified_wanted(void);
+bool cg_is_legacy_wanted(void);
+bool cg_is_hybrid_wanted(void);
+
+int cg_weight_parse(const char *s, uint64_t *ret);
+int cg_cpu_shares_parse(const char *s, uint64_t *ret);
+int cg_blkio_weight_parse(const char *s, uint64_t *ret);
+
+int cg_trim(const char *controller, const char *path, bool delete_root);
+
+int cg_create(const char *controller, const char *path);
+int cg_attach(const char *controller, const char *path, pid_t pid);
+int cg_attach_fallback(const char *controller, const char *path, pid_t pid);
+int cg_create_and_attach(const char *controller, const char *path, pid_t pid);
+
+int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags);
+int cg_migrate_recursive(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags);
+int cg_migrate_recursive_fallback(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags);
+
+int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path);
+int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t callback, void *userdata);
+int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t callback, void *userdata);
+int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t callback, void *userdata);
+int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root);
+int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p, CGroupMask *ret_result_mask);
diff --git a/src/shared/meson.build b/src/shared/meson.build
index e9005a30e3..63a3f88d50 100644
--- a/src/shared/meson.build
+++ b/src/shared/meson.build
@@ -33,6 +33,8 @@ shared_sources = files('''
bus-wait-for-units.h
calendarspec.c
calendarspec.h
+ cgroup-setup.c
+ cgroup-setup.h
cgroup-show.c
cgroup-show.h
clean-ipc.c
diff --git a/src/shutdown/shutdown.c b/src/shutdown/shutdown.c
index 0eb17989d0..08215fd3ee 100644
--- a/src/shutdown/shutdown.c
+++ b/src/shutdown/shutdown.c
@@ -17,6 +17,7 @@
#include "alloc-util.h"
#include "async.h"
+#include "cgroup-setup.h"
#include "cgroup-util.h"
#include "def.h"
#include "exec-util.h"
diff --git a/src/test/meson.build b/src/test/meson.build
index a8c3e59098..5132145b41 100644
--- a/src/test/meson.build
+++ b/src/test/meson.build
@@ -615,6 +615,10 @@ tests += [
[],
[]],
+ [['src/test/test-cgroup-setup.c'],
+ [],
+ []],
+
[['src/test/test-env-file.c'],
[],
[]],
diff --git a/src/test/test-cgroup-setup.c b/src/test/test-cgroup-setup.c
new file mode 100644
index 0000000000..330631a910
--- /dev/null
+++ b/src/test/test-cgroup-setup.c
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "alloc-util.h"
+#include "build.h"
+#include "cgroup-setup.h"
+#include "log.h"
+#include "proc-cmdline.h"
+#include "string-util.h"
+#include "tests.h"
+
+static void test_is_wanted_print(bool header) {
+ _cleanup_free_ char *cmdline = NULL;
+
+ log_info("-- %s --", __func__);
+ assert_se(proc_cmdline(&cmdline) >= 0);
+ log_info("cmdline: %s", cmdline);
+ if (header) {
+ log_info(_CGROUP_HIERARCHY_);
+ (void) system("findmnt -n /sys/fs/cgroup");
+ }
+
+ log_info("is_unified_wanted() → %s", yes_no(cg_is_unified_wanted()));
+ log_info("is_hybrid_wanted() → %s", yes_no(cg_is_hybrid_wanted()));
+ log_info("is_legacy_wanted() → %s", yes_no(cg_is_legacy_wanted()));
+ log_info(" ");
+}
+
+static void test_is_wanted(void) {
+ assert_se(setenv("SYSTEMD_PROC_CMDLINE",
+ "systemd.unified_cgroup_hierarchy", 1) >= 0);
+ test_is_wanted_print(false);
+
+ assert_se(setenv("SYSTEMD_PROC_CMDLINE",
+ "systemd.unified_cgroup_hierarchy=0", 1) >= 0);
+ test_is_wanted_print(false);
+
+ assert_se(setenv("SYSTEMD_PROC_CMDLINE",
+ "systemd.unified_cgroup_hierarchy=0 "
+ "systemd.legacy_systemd_cgroup_controller", 1) >= 0);
+ test_is_wanted_print(false);
+
+ assert_se(setenv("SYSTEMD_PROC_CMDLINE",
+ "systemd.unified_cgroup_hierarchy=0 "
+ "systemd.legacy_systemd_cgroup_controller=0", 1) >= 0);
+ test_is_wanted_print(false);
+
+ /* cgroup_no_v1=all implies unified cgroup hierarchy, unless otherwise
+ * explicitly specified. */
+ assert_se(setenv("SYSTEMD_PROC_CMDLINE",
+ "cgroup_no_v1=all", 1) >= 0);
+ test_is_wanted_print(false);
+
+ assert_se(setenv("SYSTEMD_PROC_CMDLINE",
+ "cgroup_no_v1=all "
+ "systemd.unified_cgroup_hierarchy=0", 1) >= 0);
+ test_is_wanted_print(false);
+}
+
+int main(void) {
+ test_setup_logging(LOG_DEBUG);
+
+ test_is_wanted_print(true);
+ test_is_wanted_print(false); /* run twice to test caching */
+ test_is_wanted();
+
+ return 0;
+}
diff --git a/src/test/test-cgroup-util.c b/src/test/test-cgroup-util.c
index f45c5ff760..e4fbb9edce 100644
--- a/src/test/test-cgroup-util.c
+++ b/src/test/test-cgroup-util.c
@@ -333,55 +333,6 @@ static void test_fd_is_cgroup_fs(void) {
fd = safe_close(fd);
}
-static void test_is_wanted_print(bool header) {
- _cleanup_free_ char *cmdline = NULL;
-
- log_info("-- %s --", __func__);
- assert_se(proc_cmdline(&cmdline) >= 0);
- log_info("cmdline: %s", cmdline);
- if (header) {
-
- log_info(_CGROUP_HIERARCHY_);
- (void) system("findmnt -n /sys/fs/cgroup");
- }
-
- log_info("is_unified_wanted() → %s", yes_no(cg_is_unified_wanted()));
- log_info("is_hybrid_wanted() → %s", yes_no(cg_is_hybrid_wanted()));
- log_info("is_legacy_wanted() → %s", yes_no(cg_is_legacy_wanted()));
- log_info(" ");
-}
-
-static void test_is_wanted(void) {
- assert_se(setenv("SYSTEMD_PROC_CMDLINE",
- "systemd.unified_cgroup_hierarchy", 1) >= 0);
- test_is_wanted_print(false);
-
- assert_se(setenv("SYSTEMD_PROC_CMDLINE",
- "systemd.unified_cgroup_hierarchy=0", 1) >= 0);
- test_is_wanted_print(false);
-
- assert_se(setenv("SYSTEMD_PROC_CMDLINE",
- "systemd.unified_cgroup_hierarchy=0 "
- "systemd.legacy_systemd_cgroup_controller", 1) >= 0);
- test_is_wanted_print(false);
-
- assert_se(setenv("SYSTEMD_PROC_CMDLINE",
- "systemd.unified_cgroup_hierarchy=0 "
- "systemd.legacy_systemd_cgroup_controller=0", 1) >= 0);
- test_is_wanted_print(false);
-
- /* cgroup_no_v1=all implies unified cgroup hierarchy, unless otherwise
- * explicitly specified. */
- assert_se(setenv("SYSTEMD_PROC_CMDLINE",
- "cgroup_no_v1=all", 1) >= 0);
- test_is_wanted_print(false);
-
- assert_se(setenv("SYSTEMD_PROC_CMDLINE",
- "cgroup_no_v1=all "
- "systemd.unified_cgroup_hierarchy=0", 1) >= 0);
- test_is_wanted_print(false);
-}
-
static void test_cg_tests(void) {
int all, hybrid, systemd, r;
@@ -477,9 +428,6 @@ int main(void) {
TEST_REQ_RUNNING_SYSTEMD(test_mask_supported());
TEST_REQ_RUNNING_SYSTEMD(test_is_cgroup_fs());
TEST_REQ_RUNNING_SYSTEMD(test_fd_is_cgroup_fs());
- test_is_wanted_print(true);
- test_is_wanted_print(false); /* run twice to test caching */
- test_is_wanted();
test_cg_tests();
test_cg_get_keyed_attribute();
diff --git a/src/test/test-cgroup.c b/src/test/test-cgroup.c
index 5cdfd2dc54..1891df0eb9 100644
--- a/src/test/test-cgroup.c
+++ b/src/test/test-cgroup.c
@@ -3,6 +3,7 @@
#include <string.h>
#include <unistd.h>
+#include "cgroup-setup.h"
#include "cgroup-util.h"
#include "path-util.h"
#include "process-util.h"
diff --git a/src/test/test-helper.c b/src/test/test-helper.c
index 5b79d12f07..dc8c80a14b 100644
--- a/src/test/test-helper.c
+++ b/src/test/test-helper.c
@@ -3,7 +3,7 @@
#include "test-helper.h"
#include "random-util.h"
#include "alloc-util.h"
-#include "cgroup-util.h"
+#include "cgroup-setup.h"
#include "string-util.h"
int enter_cgroup_subroot(void) {