diff options
-rw-r--r-- | src/basic/cgroup-util.c | 846 | ||||
-rw-r--r-- | src/basic/cgroup-util.h | 26 | ||||
-rw-r--r-- | src/core/cgroup.c | 1 | ||||
-rw-r--r-- | src/core/execute.c | 1 | ||||
-rw-r--r-- | src/core/load-fragment.c | 2 | ||||
-rw-r--r-- | src/core/mount-setup.c | 1 | ||||
-rw-r--r-- | src/core/unit.c | 1 | ||||
-rw-r--r-- | src/login/pam_systemd.c | 2 | ||||
-rw-r--r-- | src/nspawn/nspawn-cgroup.c | 1 | ||||
-rw-r--r-- | src/shared/bus-unit-util.c | 1 | ||||
-rw-r--r-- | src/shared/cgroup-setup.c | 860 | ||||
-rw-r--r-- | src/shared/cgroup-setup.h | 34 | ||||
-rw-r--r-- | src/shared/meson.build | 2 | ||||
-rw-r--r-- | src/shutdown/shutdown.c | 1 | ||||
-rw-r--r-- | src/test/meson.build | 4 | ||||
-rw-r--r-- | src/test/test-cgroup-setup.c | 67 | ||||
-rw-r--r-- | src/test/test-cgroup-util.c | 52 | ||||
-rw-r--r-- | src/test/test-cgroup.c | 1 | ||||
-rw-r--r-- | src/test/test-helper.c | 2 |
19 files changed, 978 insertions, 927 deletions
diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 2865cd518e..6582b92d67 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -31,7 +31,6 @@ #include "mkdir.h" #include "parse-util.h" #include "path-util.h" -#include "proc-cmdline.h" #include "process-util.h" #include "set.h" #include "special.h" @@ -410,173 +409,6 @@ int cg_kill_recursive( return ret; } -int cg_migrate( - const char *cfrom, - const char *pfrom, - const char *cto, - const char *pto, - CGroupFlags flags) { - - bool done = false; - _cleanup_set_free_ Set *s = NULL; - int r, ret = 0; - pid_t my_pid; - - assert(cfrom); - assert(pfrom); - assert(cto); - assert(pto); - - s = set_new(NULL); - if (!s) - return -ENOMEM; - - my_pid = getpid_cached(); - - do { - _cleanup_fclose_ FILE *f = NULL; - pid_t pid = 0; - done = true; - - r = cg_enumerate_processes(cfrom, pfrom, &f); - if (r < 0) { - if (ret >= 0 && r != -ENOENT) - return r; - - return ret; - } - - while ((r = cg_read_pid(f, &pid)) > 0) { - - /* This might do weird stuff if we aren't a - * single-threaded program. However, we - * luckily know we are not */ - if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid) - continue; - - if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid)) - continue; - - /* Ignore kernel threads. Since they can only - * exist in the root cgroup, we only check for - * them there. */ - if (cfrom && - empty_or_root(pfrom) && - is_kernel_thread(pid) > 0) - continue; - - r = cg_attach(cto, pto, pid); - if (r < 0) { - if (ret >= 0 && r != -ESRCH) - ret = r; - } else if (ret == 0) - ret = 1; - - done = false; - - r = set_put(s, PID_TO_PTR(pid)); - if (r < 0) { - if (ret >= 0) - return r; - - return ret; - } - } - - if (r < 0) { - if (ret >= 0) - return r; - - return ret; - } - } while (!done); - - return ret; -} - -int cg_migrate_recursive( - const char *cfrom, - const char *pfrom, - const char *cto, - const char *pto, - CGroupFlags flags) { - - _cleanup_closedir_ DIR *d = NULL; - int r, ret = 0; - char *fn; - - assert(cfrom); - assert(pfrom); - assert(cto); - assert(pto); - - ret = cg_migrate(cfrom, pfrom, cto, pto, flags); - - r = cg_enumerate_subgroups(cfrom, pfrom, &d); - if (r < 0) { - if (ret >= 0 && r != -ENOENT) - return r; - - return ret; - } - - while ((r = cg_read_subgroup(d, &fn)) > 0) { - _cleanup_free_ char *p = NULL; - - p = path_join(empty_to_root(pfrom), fn); - free(fn); - if (!p) - return -ENOMEM; - - r = cg_migrate_recursive(cfrom, p, cto, pto, flags); - if (r != 0 && ret >= 0) - ret = r; - } - - if (r < 0 && ret >= 0) - ret = r; - - if (flags & CGROUP_REMOVE) { - r = cg_rmdir(cfrom, pfrom); - if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY)) - return r; - } - - return ret; -} - -int cg_migrate_recursive_fallback( - const char *cfrom, - const char *pfrom, - const char *cto, - const char *pto, - CGroupFlags flags) { - - int r; - - assert(cfrom); - assert(pfrom); - assert(cto); - assert(pto); - - r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags); - if (r < 0) { - char prefix[strlen(pto) + 1]; - - /* This didn't work? Then let's try all prefixes of the destination */ - - PATH_FOREACH_PREFIX(prefix, pto) { - int q; - - q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags); - if (q >= 0) - return q; - } - } - - return r; -} - static const char *controller_to_dirname(const char *controller) { const char *e; @@ -742,253 +574,6 @@ int cg_get_path_and_check(const char *controller, const char *path, const char * return cg_get_path(controller, path, suffix, fs); } -static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) { - assert(path); - assert(sb); - assert(ftwbuf); - - if (typeflag != FTW_DP) - return 0; - - if (ftwbuf->level < 1) - return 0; - - (void) rmdir(path); - return 0; -} - -int cg_trim(const char *controller, const char *path, bool delete_root) { - _cleanup_free_ char *fs = NULL; - int r = 0, q; - - assert(path); - - r = cg_get_path(controller, path, NULL, &fs); - if (r < 0) - return r; - - errno = 0; - if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) { - if (errno == ENOENT) - r = 0; - else - r = errno_or_else(EIO); - } - - if (delete_root) { - if (rmdir(fs) < 0 && errno != ENOENT) - return -errno; - } - - q = cg_hybrid_unified(); - if (q < 0) - return q; - if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { - q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root); - if (q < 0) - log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path); - } - - return r; -} - -/* Create a cgroup in the hierarchy of controller. - * Returns 0 if the group already existed, 1 on success, negative otherwise. - */ -int cg_create(const char *controller, const char *path) { - _cleanup_free_ char *fs = NULL; - int r; - - r = cg_get_path_and_check(controller, path, NULL, &fs); - if (r < 0) - return r; - - r = mkdir_parents(fs, 0755); - if (r < 0) - return r; - - r = mkdir_errno_wrapper(fs, 0755); - if (r == -EEXIST) - return 0; - if (r < 0) - return r; - - r = cg_hybrid_unified(); - if (r < 0) - return r; - - if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { - r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path); - if (r < 0) - log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path); - } - - return 1; -} - -int cg_create_and_attach(const char *controller, const char *path, pid_t pid) { - int r, q; - - assert(pid >= 0); - - r = cg_create(controller, path); - if (r < 0) - return r; - - q = cg_attach(controller, path, pid); - if (q < 0) - return q; - - /* This does not remove the cgroup on failure */ - return r; -} - -int cg_attach(const char *controller, const char *path, pid_t pid) { - _cleanup_free_ char *fs = NULL; - char c[DECIMAL_STR_MAX(pid_t) + 2]; - int r; - - assert(path); - assert(pid >= 0); - - r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs); - if (r < 0) - return r; - - if (pid == 0) - pid = getpid_cached(); - - xsprintf(c, PID_FMT "\n", pid); - - r = write_string_file(fs, c, WRITE_STRING_FILE_DISABLE_BUFFER); - if (r < 0) - return r; - - r = cg_hybrid_unified(); - if (r < 0) - return r; - - if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { - r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid); - if (r < 0) - log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path); - } - - return 0; -} - -int cg_attach_fallback(const char *controller, const char *path, pid_t pid) { - int r; - - assert(controller); - assert(path); - assert(pid >= 0); - - r = cg_attach(controller, path, pid); - if (r < 0) { - char prefix[strlen(path) + 1]; - - /* This didn't work? Then let's try all prefixes of - * the destination */ - - PATH_FOREACH_PREFIX(prefix, path) { - int q; - - q = cg_attach(controller, prefix, pid); - if (q >= 0) - return q; - } - } - - return r; -} - -int cg_set_access( - const char *controller, - const char *path, - uid_t uid, - gid_t gid) { - - struct Attribute { - const char *name; - bool fatal; - }; - - /* cgroup v1, aka legacy/non-unified */ - static const struct Attribute legacy_attributes[] = { - { "cgroup.procs", true }, - { "tasks", false }, - { "cgroup.clone_children", false }, - {}, - }; - - /* cgroup v2, aka unified */ - static const struct Attribute unified_attributes[] = { - { "cgroup.procs", true }, - { "cgroup.subtree_control", true }, - { "cgroup.threads", false }, - {}, - }; - - static const struct Attribute* const attributes[] = { - [false] = legacy_attributes, - [true] = unified_attributes, - }; - - _cleanup_free_ char *fs = NULL; - const struct Attribute *i; - int r, unified; - - assert(path); - - if (uid == UID_INVALID && gid == GID_INVALID) - return 0; - - unified = cg_unified_controller(controller); - if (unified < 0) - return unified; - - /* Configure access to the cgroup itself */ - r = cg_get_path(controller, path, NULL, &fs); - if (r < 0) - return r; - - r = chmod_and_chown(fs, 0755, uid, gid); - if (r < 0) - return r; - - /* Configure access to the cgroup's attributes */ - for (i = attributes[unified]; i->name; i++) { - fs = mfree(fs); - - r = cg_get_path(controller, path, i->name, &fs); - if (r < 0) - return r; - - r = chmod_and_chown(fs, 0644, uid, gid); - if (r < 0) { - if (i->fatal) - return r; - - log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs); - } - } - - if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { - r = cg_hybrid_unified(); - if (r < 0) - return r; - if (r > 0) { - /* Always propagate access mode from unified to legacy controller */ - r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid); - if (r < 0) - log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path); - } - } - - return 0; -} - int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) { _cleanup_free_ char *fs = NULL; int r; @@ -2143,194 +1728,6 @@ fail: done: memcpy(ret_values, v, sizeof(char*) * n); return 0; - -} - -int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) { - CGroupController c; - CGroupMask done; - bool created; - int r; - - /* This one will create a cgroup in our private tree, but also - * duplicate it in the trees specified in mask, and remove it - * in all others. - * - * Returns 0 if the group already existed in the systemd hierarchy, - * 1 on success, negative otherwise. - */ - - /* First create the cgroup in our own hierarchy. */ - r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path); - if (r < 0) - return r; - created = r; - - /* If we are in the unified hierarchy, we are done now */ - r = cg_all_unified(); - if (r < 0) - return r; - if (r > 0) - return created; - - supported &= CGROUP_MASK_V1; - mask = CGROUP_MASK_EXTEND_JOINED(mask); - done = 0; - - /* Otherwise, do the same in the other hierarchies */ - for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { - CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); - const char *n; - - if (!FLAGS_SET(supported, bit)) - continue; - - if (FLAGS_SET(done, bit)) - continue; - - n = cgroup_controller_to_string(c); - if (FLAGS_SET(mask, bit)) - (void) cg_create(n, path); - else - (void) cg_trim(n, path, true); - - done |= CGROUP_MASK_EXTEND_JOINED(bit); - } - - return created; -} - -int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) { - CGroupController c; - CGroupMask done; - int r; - - r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid); - if (r < 0) - return r; - - r = cg_all_unified(); - if (r < 0) - return r; - if (r > 0) - return 0; - - supported &= CGROUP_MASK_V1; - done = 0; - - for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { - CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); - const char *p = NULL; - - if (!FLAGS_SET(supported, bit)) - continue; - - if (FLAGS_SET(done, bit)) - continue; - - if (path_callback) - p = path_callback(bit, userdata); - if (!p) - p = path; - - (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid); - done |= CGROUP_MASK_EXTEND_JOINED(bit); - } - - return 0; -} - -int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) { - Iterator i; - void *pidp; - int r = 0; - - SET_FOREACH(pidp, pids, i) { - pid_t pid = PTR_TO_PID(pidp); - int q; - - q = cg_attach_everywhere(supported, path, pid, path_callback, userdata); - if (q < 0 && r >= 0) - r = q; - } - - return r; -} - -int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) { - CGroupController c; - CGroupMask done; - int r = 0, q; - - if (!path_equal(from, to)) { - r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE); - if (r < 0) - return r; - } - - q = cg_all_unified(); - if (q < 0) - return q; - if (q > 0) - return r; - - supported &= CGROUP_MASK_V1; - done = 0; - - for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { - CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); - const char *p = NULL; - - if (!FLAGS_SET(supported, bit)) - continue; - - if (FLAGS_SET(done, bit)) - continue; - - if (to_callback) - p = to_callback(bit, userdata); - if (!p) - p = to; - - (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0); - done |= CGROUP_MASK_EXTEND_JOINED(bit); - } - - return r; -} - -int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) { - CGroupController c; - CGroupMask done; - int r, q; - - r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root); - if (r < 0) - return r; - - q = cg_all_unified(); - if (q < 0) - return q; - if (q > 0) - return r; - - supported &= CGROUP_MASK_V1; - done = 0; - - for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { - CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); - - if (!FLAGS_SET(supported, bit)) - continue; - - if (FLAGS_SET(done, bit)) - continue; - - (void) cg_trim(cgroup_controller_to_string(c), path, delete_root); - done |= CGROUP_MASK_EXTEND_JOINED(bit); - } - - return r; } int cg_mask_to_string(CGroupMask mask, char **ret) { @@ -2626,209 +2023,6 @@ int cg_hybrid_unified(void) { return r == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232; } -int cg_enable_everywhere( - CGroupMask supported, - CGroupMask mask, - const char *p, - CGroupMask *ret_result_mask) { - - _cleanup_fclose_ FILE *f = NULL; - _cleanup_free_ char *fs = NULL; - CGroupController c; - CGroupMask ret = 0; - int r; - - assert(p); - - if (supported == 0) { - if (ret_result_mask) - *ret_result_mask = 0; - return 0; - } - - r = cg_all_unified(); - if (r < 0) - return r; - if (r == 0) { - /* On the legacy hierarchy there's no concept of "enabling" controllers in cgroups defined. Let's claim - * complete success right away. (If you wonder why we return the full mask here, rather than zero: the - * caller tends to use the returned mask later on to compare if all controllers where properly joined, - * and if not requeues realization. This use is the primary purpose of the return value, hence let's - * minimize surprises here and reduce triggers for re-realization by always saying we fully - * succeeded.) */ - if (ret_result_mask) - *ret_result_mask = mask & supported & CGROUP_MASK_V2; /* If you wonder why we mask this with - * CGROUP_MASK_V2: The 'supported' mask - * might contain pure-V1 or BPF - * controllers, and we never want to - * claim that we could enable those with - * cgroup.subtree_control */ - return 0; - } - - r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs); - if (r < 0) - return r; - - for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { - CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); - const char *n; - - if (!FLAGS_SET(CGROUP_MASK_V2, bit)) - continue; - - if (!FLAGS_SET(supported, bit)) - continue; - - n = cgroup_controller_to_string(c); - { - char s[1 + strlen(n) + 1]; - - s[0] = FLAGS_SET(mask, bit) ? '+' : '-'; - strcpy(s + 1, n); - - if (!f) { - f = fopen(fs, "we"); - if (!f) - return log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p); - } - - r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER); - if (r < 0) { - log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m", - FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs); - clearerr(f); - - /* If we can't turn off a controller, leave it on in the reported resulting mask. This - * happens for example when we attempt to turn off a controller up in the tree that is - * used down in the tree. */ - if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY - * only here, and not follow the same logic - * for other errors such as EINVAL or - * EOPNOTSUPP or anything else. That's - * because EBUSY indicates that the - * controllers is currently enabled and - * cannot be disabled because something down - * the hierarchy is still using it. Any other - * error most likely means something like "I - * never heard of this controller" or - * similar. In the former case it's hence - * safe to assume the controller is still on - * after the failed operation, while in the - * latter case it's safer to assume the - * controller is unknown and hence certainly - * not enabled. */ - ret |= bit; - } else { - /* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */ - if (FLAGS_SET(mask, bit)) - ret |= bit; - } - } - } - - /* Let's return the precise set of controllers now enabled for the cgroup. */ - if (ret_result_mask) - *ret_result_mask = ret; - - return 0; -} - -bool cg_is_unified_wanted(void) { - static thread_local int wanted = -1; - bool b; - const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL; - _cleanup_free_ char *c = NULL; - int r; - - /* If we have a cached value, return that. */ - if (wanted >= 0) - return wanted; - - /* If the hierarchy is already mounted, then follow whatever - * was chosen for it. */ - r = cg_unified_cached(true); - if (r >= 0) - return (wanted = r >= CGROUP_UNIFIED_ALL); - - /* If we were explicitly passed systemd.unified_cgroup_hierarchy, - * respect that. */ - r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b); - if (r > 0) - return (wanted = b); - - /* If we passed cgroup_no_v1=all with no other instructions, it seems - * highly unlikely that we want to use hybrid or legacy hierarchy. */ - r = proc_cmdline_get_key("cgroup_no_v1", 0, &c); - if (r > 0 && streq_ptr(c, "all")) - return (wanted = true); - - return (wanted = is_default); -} - -bool cg_is_legacy_wanted(void) { - static thread_local int wanted = -1; - - /* If we have a cached value, return that. */ - if (wanted >= 0) - return wanted; - - /* Check if we have cgroup v2 already mounted. */ - if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL) - return (wanted = false); - - /* Otherwise, assume that at least partial legacy is wanted, - * since cgroup v2 should already be mounted at this point. */ - return (wanted = true); -} - -bool cg_is_hybrid_wanted(void) { - static thread_local int wanted = -1; - int r; - bool b; - const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD; - /* We default to true if the default is "hybrid", obviously, - * but also when the default is "unified", because if we get - * called, it means that unified hierarchy was not mounted. */ - - /* If we have a cached value, return that. */ - if (wanted >= 0) - return wanted; - - /* If the hierarchy is already mounted, then follow whatever - * was chosen for it. */ - if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL) - return (wanted = false); - - /* Otherwise, let's see what the kernel command line has to say. - * Since checking is expensive, cache a non-error result. */ - r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b); - - /* The meaning of the kernel option is reversed wrt. to the return value - * of this function, hence the negation. */ - return (wanted = r > 0 ? !b : is_default); -} - -int cg_weight_parse(const char *s, uint64_t *ret) { - uint64_t u; - int r; - - if (isempty(s)) { - *ret = CGROUP_WEIGHT_INVALID; - return 0; - } - - r = safe_atou64(s, &u); - if (r < 0) - return r; - - if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX) - return -ERANGE; - - *ret = u; - return 0; -} - const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = { [CGROUP_IO_RBPS_MAX] = CGROUP_LIMIT_MAX, [CGROUP_IO_WBPS_MAX] = CGROUP_LIMIT_MAX, @@ -2845,46 +2039,6 @@ static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType); -int cg_cpu_shares_parse(const char *s, uint64_t *ret) { - uint64_t u; - int r; - - if (isempty(s)) { - *ret = CGROUP_CPU_SHARES_INVALID; - return 0; - } - - r = safe_atou64(s, &u); - if (r < 0) - return r; - - if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX) - return -ERANGE; - - *ret = u; - return 0; -} - -int cg_blkio_weight_parse(const char *s, uint64_t *ret) { - uint64_t u; - int r; - - if (isempty(s)) { - *ret = CGROUP_BLKIO_WEIGHT_INVALID; - return 0; - } - - r = safe_atou64(s, &u); - if (r < 0) - return r; - - if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX) - return -ERANGE; - - *ret = u; - return 0; -} - bool is_cgroup_fs(const struct statfs *s) { return is_fs_type(s, CGROUP_SUPER_MAGIC) || is_fs_type(s, CGROUP2_SUPER_MAGIC); diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h index ba8df8139d..1472265204 100644 --- a/src/basic/cgroup-util.h +++ b/src/basic/cgroup-util.h @@ -172,10 +172,6 @@ typedef int (*cg_kill_log_func_t)(pid_t pid, int sig, void *userdata); int cg_kill(const char *controller, const char *path, int sig, CGroupFlags flags, Set *s, cg_kill_log_func_t kill_log, void *userdata); int cg_kill_recursive(const char *controller, const char *path, int sig, CGroupFlags flags, Set *s, cg_kill_log_func_t kill_log, void *userdata); -int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags); -int cg_migrate_recursive(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags); -int cg_migrate_recursive_fallback(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags); - int cg_split_spec(const char *spec, char **controller, char **path); int cg_mangle_path(const char *path, char **result); @@ -184,15 +180,8 @@ int cg_get_path_and_check(const char *controller, const char *path, const char * int cg_pid_get_path(const char *controller, pid_t pid, char **path); -int cg_trim(const char *controller, const char *path, bool delete_root); - int cg_rmdir(const char *controller, const char *path); -int cg_create(const char *controller, const char *path); -int cg_attach(const char *controller, const char *path, pid_t pid); -int cg_attach_fallback(const char *controller, const char *path, pid_t pid); -int cg_create_and_attach(const char *controller, const char *path, pid_t pid); - int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value); int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret); int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, char **keys, char **values); @@ -240,13 +229,6 @@ int cg_slice_to_path(const char *unit, char **ret); typedef const char* (*cg_migrate_callback_t)(CGroupMask mask, void *userdata); -int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path); -int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t callback, void *userdata); -int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t callback, void *userdata); -int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t callback, void *userdata); -int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root); -int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p, CGroupMask *ret_result_mask); - int cg_mask_supported(CGroupMask *ret); int cg_mask_from_string(const char *s, CGroupMask *ret); int cg_mask_to_string(CGroupMask mask, char **ret); @@ -263,16 +245,8 @@ static inline int cg_unified(void) { return cg_unified_cached(true); } -bool cg_is_unified_wanted(void); -bool cg_is_legacy_wanted(void); -bool cg_is_hybrid_wanted(void); - const char* cgroup_controller_to_string(CGroupController c) _const_; CGroupController cgroup_controller_from_string(const char *s) _pure_; -int cg_weight_parse(const char *s, uint64_t *ret); -int cg_cpu_shares_parse(const char *s, uint64_t *ret); -int cg_blkio_weight_parse(const char *s, uint64_t *ret); - bool is_cgroup_fs(const struct statfs *s); bool fd_is_cgroup_fs(int fd); diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 7b0a41fbc8..00690920cd 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -11,6 +11,7 @@ #include "bpf-firewall.h" #include "btrfs-util.h" #include "bus-error.h" +#include "cgroup-setup.h" #include "cgroup-util.h" #include "cgroup.h" #include "fd-util.h" diff --git a/src/core/execute.c b/src/core/execute.c index 4c90007778..f769d02be4 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -48,6 +48,7 @@ #include "cap-list.h" #include "capability-util.h" #include "chown-recursive.h" +#include "cgroup-setup.h" #include "cpu-set-util.h" #include "def.h" #include "env-file.h" diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index a7951a5757..e517c8dfbf 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -24,7 +24,7 @@ #include "bus-util.h" #include "cap-list.h" #include "capability-util.h" -#include "cgroup.h" +#include "cgroup-setup.h" #include "conf-parser.h" #include "cpu-set-util.h" #include "env-util.h" diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c index 72cf5a2caa..790f1e234e 100644 --- a/src/core/mount-setup.c +++ b/src/core/mount-setup.c @@ -11,6 +11,7 @@ #include "bus-util.h" #include "cgroup-util.h" #include "conf-files.h" +#include "cgroup-setup.h" #include "dev-setup.h" #include "efivars.h" #include "fd-util.h" diff --git a/src/core/unit.c b/src/core/unit.c index 52a1acafab..004d787d8f 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -15,6 +15,7 @@ #include "bpf-firewall.h" #include "bus-common-errors.h" #include "bus-util.h" +#include "cgroup-setup.h" #include "cgroup-util.h" #include "dbus-unit.h" #include "dbus.h" diff --git a/src/login/pam_systemd.c b/src/login/pam_systemd.c index 3f762cbbc3..766d651c3f 100644 --- a/src/login/pam_systemd.c +++ b/src/login/pam_systemd.c @@ -20,7 +20,7 @@ #include "bus-error.h" #include "bus-internal.h" #include "bus-util.h" -#include "cgroup-util.h" +#include "cgroup-setup.h" #include "errno-util.h" #include "fd-util.h" #include "fileio.h" diff --git a/src/nspawn/nspawn-cgroup.c b/src/nspawn/nspawn-cgroup.c index 0462b46413..f5048d9473 100644 --- a/src/nspawn/nspawn-cgroup.c +++ b/src/nspawn/nspawn-cgroup.c @@ -3,6 +3,7 @@ #include <sys/mount.h> #include "alloc-util.h" +#include "cgroup-setup.h" #include "fd-util.h" #include "fileio.h" #include "format-util.h" diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index e53b9d5ea2..286ef89878 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -5,6 +5,7 @@ #include "bus-unit-util.h" #include "bus-util.h" #include "cap-list.h" +#include "cgroup-setup.h" #include "cgroup-util.h" #include "condition.h" #include "cpu-set-util.h" diff --git a/src/shared/cgroup-setup.c b/src/shared/cgroup-setup.c new file mode 100644 index 0000000000..ddcd156801 --- /dev/null +++ b/src/shared/cgroup-setup.c @@ -0,0 +1,860 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include <ftw.h> +#include <unistd.h> + +#include "cgroup-setup.h" +#include "cgroup-util.h" +#include "errno-util.h" +#include "parse-util.h" +#include "path-util.h" +#include "proc-cmdline.h" +#include "stdio-util.h" +#include "string-util.h" +#include "fs-util.h" +#include "mkdir.h" +#include "process-util.h" +#include "fileio.h" +#include "user-util.h" +#include "fd-util.h" + +bool cg_is_unified_wanted(void) { + static thread_local int wanted = -1; + bool b; + const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL; + _cleanup_free_ char *c = NULL; + int r; + + /* If we have a cached value, return that. */ + if (wanted >= 0) + return wanted; + + /* If the hierarchy is already mounted, then follow whatever was chosen for it. */ + r = cg_unified_cached(true); + if (r >= 0) + return (wanted = r >= CGROUP_UNIFIED_ALL); + + /* If we were explicitly passed systemd.unified_cgroup_hierarchy, respect that. */ + r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b); + if (r > 0) + return (wanted = b); + + /* If we passed cgroup_no_v1=all with no other instructions, it seems highly unlikely that we want to + * use hybrid or legacy hierarchy. */ + r = proc_cmdline_get_key("cgroup_no_v1", 0, &c); + if (r > 0 && streq_ptr(c, "all")) + return (wanted = true); + + return (wanted = is_default); +} + +bool cg_is_legacy_wanted(void) { + static thread_local int wanted = -1; + + /* If we have a cached value, return that. */ + if (wanted >= 0) + return wanted; + + /* Check if we have cgroup v2 already mounted. */ + if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL) + return (wanted = false); + + /* Otherwise, assume that at least partial legacy is wanted, + * since cgroup v2 should already be mounted at this point. */ + return (wanted = true); +} + +bool cg_is_hybrid_wanted(void) { + static thread_local int wanted = -1; + int r; + bool b; + const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD; + /* We default to true if the default is "hybrid", obviously, but also when the default is "unified", + * because if we get called, it means that unified hierarchy was not mounted. */ + + /* If we have a cached value, return that. */ + if (wanted >= 0) + return wanted; + + /* If the hierarchy is already mounted, then follow whatever was chosen for it. */ + if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL) + return (wanted = false); + + /* Otherwise, let's see what the kernel command line has to say. Since checking is expensive, cache + * a non-error result. */ + r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b); + + /* The meaning of the kernel option is reversed wrt. to the return value of this function, hence the + * negation. */ + return (wanted = r > 0 ? !b : is_default); +} + +int cg_weight_parse(const char *s, uint64_t *ret) { + uint64_t u; + int r; + + if (isempty(s)) { + *ret = CGROUP_WEIGHT_INVALID; + return 0; + } + + r = safe_atou64(s, &u); + if (r < 0) + return r; + + if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX) + return -ERANGE; + + *ret = u; + return 0; +} + +int cg_cpu_shares_parse(const char *s, uint64_t *ret) { + uint64_t u; + int r; + + if (isempty(s)) { + *ret = CGROUP_CPU_SHARES_INVALID; + return 0; + } + + r = safe_atou64(s, &u); + if (r < 0) + return r; + + if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX) + return -ERANGE; + + *ret = u; + return 0; +} + +int cg_blkio_weight_parse(const char *s, uint64_t *ret) { + uint64_t u; + int r; + + if (isempty(s)) { + *ret = CGROUP_BLKIO_WEIGHT_INVALID; + return 0; + } + + r = safe_atou64(s, &u); + if (r < 0) + return r; + + if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX) + return -ERANGE; + + *ret = u; + return 0; +} + + +static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) { + assert(path); + assert(sb); + assert(ftwbuf); + + if (typeflag != FTW_DP) + return 0; + + if (ftwbuf->level < 1) + return 0; + + (void) rmdir(path); + return 0; +} + +int cg_trim(const char *controller, const char *path, bool delete_root) { + _cleanup_free_ char *fs = NULL; + int r = 0, q; + + assert(path); + + r = cg_get_path(controller, path, NULL, &fs); + if (r < 0) + return r; + + errno = 0; + if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) { + if (errno == ENOENT) + r = 0; + else + r = errno_or_else(EIO); + } + + if (delete_root) { + if (rmdir(fs) < 0 && errno != ENOENT) + return -errno; + } + + q = cg_hybrid_unified(); + if (q < 0) + return q; + if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { + q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root); + if (q < 0) + log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path); + } + + return r; +} + +/* Create a cgroup in the hierarchy of controller. + * Returns 0 if the group already existed, 1 on success, negative otherwise. + */ +int cg_create(const char *controller, const char *path) { + _cleanup_free_ char *fs = NULL; + int r; + + r = cg_get_path_and_check(controller, path, NULL, &fs); + if (r < 0) + return r; + + r = mkdir_parents(fs, 0755); + if (r < 0) + return r; + + r = mkdir_errno_wrapper(fs, 0755); + if (r == -EEXIST) + return 0; + if (r < 0) + return r; + + r = cg_hybrid_unified(); + if (r < 0) + return r; + + if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { + r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path); + if (r < 0) + log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path); + } + + return 1; +} + +int cg_create_and_attach(const char *controller, const char *path, pid_t pid) { + int r, q; + + assert(pid >= 0); + + r = cg_create(controller, path); + if (r < 0) + return r; + + q = cg_attach(controller, path, pid); + if (q < 0) + return q; + + /* This does not remove the cgroup on failure */ + return r; +} + +int cg_attach(const char *controller, const char *path, pid_t pid) { + _cleanup_free_ char *fs = NULL; + char c[DECIMAL_STR_MAX(pid_t) + 2]; + int r; + + assert(path); + assert(pid >= 0); + + r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs); + if (r < 0) + return r; + + if (pid == 0) + pid = getpid_cached(); + + xsprintf(c, PID_FMT "\n", pid); + + r = write_string_file(fs, c, WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) + return r; + + r = cg_hybrid_unified(); + if (r < 0) + return r; + + if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { + r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid); + if (r < 0) + log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path); + } + + return 0; +} + +int cg_attach_fallback(const char *controller, const char *path, pid_t pid) { + int r; + + assert(controller); + assert(path); + assert(pid >= 0); + + r = cg_attach(controller, path, pid); + if (r < 0) { + char prefix[strlen(path) + 1]; + + /* This didn't work? Then let's try all prefixes of + * the destination */ + + PATH_FOREACH_PREFIX(prefix, path) { + int q; + + q = cg_attach(controller, prefix, pid); + if (q >= 0) + return q; + } + } + + return r; +} + +int cg_set_access( + const char *controller, + const char *path, + uid_t uid, + gid_t gid) { + + struct Attribute { + const char *name; + bool fatal; + }; + + /* cgroup v1, aka legacy/non-unified */ + static const struct Attribute legacy_attributes[] = { + { "cgroup.procs", true }, + { "tasks", false }, + { "cgroup.clone_children", false }, + {}, + }; + + /* cgroup v2, aka unified */ + static const struct Attribute unified_attributes[] = { + { "cgroup.procs", true }, + { "cgroup.subtree_control", true }, + { "cgroup.threads", false }, + {}, + }; + + static const struct Attribute* const attributes[] = { + [false] = legacy_attributes, + [true] = unified_attributes, + }; + + _cleanup_free_ char *fs = NULL; + const struct Attribute *i; + int r, unified; + + assert(path); + + if (uid == UID_INVALID && gid == GID_INVALID) + return 0; + + unified = cg_unified_controller(controller); + if (unified < 0) + return unified; + + /* Configure access to the cgroup itself */ + r = cg_get_path(controller, path, NULL, &fs); + if (r < 0) + return r; + + r = chmod_and_chown(fs, 0755, uid, gid); + if (r < 0) + return r; + + /* Configure access to the cgroup's attributes */ + for (i = attributes[unified]; i->name; i++) { + fs = mfree(fs); + + r = cg_get_path(controller, path, i->name, &fs); + if (r < 0) + return r; + + r = chmod_and_chown(fs, 0644, uid, gid); + if (r < 0) { + if (i->fatal) + return r; + + log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs); + } + } + + if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) { + r = cg_hybrid_unified(); + if (r < 0) + return r; + if (r > 0) { + /* Always propagate access mode from unified to legacy controller */ + r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid); + if (r < 0) + log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path); + } + } + + return 0; +} + +int cg_migrate( + const char *cfrom, + const char *pfrom, + const char *cto, + const char *pto, + CGroupFlags flags) { + + bool done = false; + _cleanup_set_free_ Set *s = NULL; + int r, ret = 0; + pid_t my_pid; + + assert(cfrom); + assert(pfrom); + assert(cto); + assert(pto); + + s = set_new(NULL); + if (!s) + return -ENOMEM; + + my_pid = getpid_cached(); + + do { + _cleanup_fclose_ FILE *f = NULL; + pid_t pid = 0; + done = true; + + r = cg_enumerate_processes(cfrom, pfrom, &f); + if (r < 0) { + if (ret >= 0 && r != -ENOENT) + return r; + + return ret; + } + + while ((r = cg_read_pid(f, &pid)) > 0) { + + /* This might do weird stuff if we aren't a + * single-threaded program. However, we + * luckily know we are not */ + if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid) + continue; + + if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid)) + continue; + + /* Ignore kernel threads. Since they can only + * exist in the root cgroup, we only check for + * them there. */ + if (cfrom && + empty_or_root(pfrom) && + is_kernel_thread(pid) > 0) + continue; + + r = cg_attach(cto, pto, pid); + if (r < 0) { + if (ret >= 0 && r != -ESRCH) + ret = r; + } else if (ret == 0) + ret = 1; + + done = false; + + r = set_put(s, PID_TO_PTR(pid)); + if (r < 0) { + if (ret >= 0) + return r; + + return ret; + } + } + + if (r < 0) { + if (ret >= 0) + return r; + + return ret; + } + } while (!done); + + return ret; +} + +int cg_migrate_recursive( + const char *cfrom, + const char *pfrom, + const char *cto, + const char *pto, + CGroupFlags flags) { + + _cleanup_closedir_ DIR *d = NULL; + int r, ret = 0; + char *fn; + + assert(cfrom); + assert(pfrom); + assert(cto); + assert(pto); + + ret = cg_migrate(cfrom, pfrom, cto, pto, flags); + + r = cg_enumerate_subgroups(cfrom, pfrom, &d); + if (r < 0) { + if (ret >= 0 && r != -ENOENT) + return r; + + return ret; + } + + while ((r = cg_read_subgroup(d, &fn)) > 0) { + _cleanup_free_ char *p = NULL; + + p = path_join(empty_to_root(pfrom), fn); + free(fn); + if (!p) + return -ENOMEM; + + r = cg_migrate_recursive(cfrom, p, cto, pto, flags); + if (r != 0 && ret >= 0) + ret = r; + } + + if (r < 0 && ret >= 0) + ret = r; + + if (flags & CGROUP_REMOVE) { + r = cg_rmdir(cfrom, pfrom); + if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY)) + return r; + } + + return ret; +} + +int cg_migrate_recursive_fallback( + const char *cfrom, + const char *pfrom, + const char *cto, + const char *pto, + CGroupFlags flags) { + + int r; + + assert(cfrom); + assert(pfrom); + assert(cto); + assert(pto); + + r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags); + if (r < 0) { + char prefix[strlen(pto) + 1]; + + /* This didn't work? Then let's try all prefixes of the destination */ + + PATH_FOREACH_PREFIX(prefix, pto) { + int q; + + q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags); + if (q >= 0) + return q; + } + } + + return r; +} + +int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) { + CGroupController c; + CGroupMask done; + bool created; + int r; + + /* This one will create a cgroup in our private tree, but also + * duplicate it in the trees specified in mask, and remove it + * in all others. + * + * Returns 0 if the group already existed in the systemd hierarchy, + * 1 on success, negative otherwise. + */ + + /* First create the cgroup in our own hierarchy. */ + r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path); + if (r < 0) + return r; + created = r; + + /* If we are in the unified hierarchy, we are done now */ + r = cg_all_unified(); + if (r < 0) + return r; + if (r > 0) + return created; + + supported &= CGROUP_MASK_V1; + mask = CGROUP_MASK_EXTEND_JOINED(mask); + done = 0; + + /* Otherwise, do the same in the other hierarchies */ + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { + CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); + const char *n; + + if (!FLAGS_SET(supported, bit)) + continue; + + if (FLAGS_SET(done, bit)) + continue; + + n = cgroup_controller_to_string(c); + if (FLAGS_SET(mask, bit)) + (void) cg_create(n, path); + else + (void) cg_trim(n, path, true); + + done |= CGROUP_MASK_EXTEND_JOINED(bit); + } + + return created; +} + +int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) { + CGroupController c; + CGroupMask done; + int r; + + r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid); + if (r < 0) + return r; + + r = cg_all_unified(); + if (r < 0) + return r; + if (r > 0) + return 0; + + supported &= CGROUP_MASK_V1; + done = 0; + + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { + CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); + const char *p = NULL; + + if (!FLAGS_SET(supported, bit)) + continue; + + if (FLAGS_SET(done, bit)) + continue; + + if (path_callback) + p = path_callback(bit, userdata); + if (!p) + p = path; + + (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid); + done |= CGROUP_MASK_EXTEND_JOINED(bit); + } + + return 0; +} + +int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) { + Iterator i; + void *pidp; + int r = 0; + + SET_FOREACH(pidp, pids, i) { + pid_t pid = PTR_TO_PID(pidp); + int q; + + q = cg_attach_everywhere(supported, path, pid, path_callback, userdata); + if (q < 0 && r >= 0) + r = q; + } + + return r; +} + +int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) { + CGroupController c; + CGroupMask done; + int r = 0, q; + + if (!path_equal(from, to)) { + r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE); + if (r < 0) + return r; + } + + q = cg_all_unified(); + if (q < 0) + return q; + if (q > 0) + return r; + + supported &= CGROUP_MASK_V1; + done = 0; + + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { + CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); + const char *p = NULL; + + if (!FLAGS_SET(supported, bit)) + continue; + + if (FLAGS_SET(done, bit)) + continue; + + if (to_callback) + p = to_callback(bit, userdata); + if (!p) + p = to; + + (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0); + done |= CGROUP_MASK_EXTEND_JOINED(bit); + } + + return r; +} + +int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) { + CGroupController c; + CGroupMask done; + int r, q; + + r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root); + if (r < 0) + return r; + + q = cg_all_unified(); + if (q < 0) + return q; + if (q > 0) + return r; + + supported &= CGROUP_MASK_V1; + done = 0; + + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { + CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); + + if (!FLAGS_SET(supported, bit)) + continue; + + if (FLAGS_SET(done, bit)) + continue; + + (void) cg_trim(cgroup_controller_to_string(c), path, delete_root); + done |= CGROUP_MASK_EXTEND_JOINED(bit); + } + + return r; +} + +int cg_enable_everywhere( + CGroupMask supported, + CGroupMask mask, + const char *p, + CGroupMask *ret_result_mask) { + + _cleanup_fclose_ FILE *f = NULL; + _cleanup_free_ char *fs = NULL; + CGroupController c; + CGroupMask ret = 0; + int r; + + assert(p); + + if (supported == 0) { + if (ret_result_mask) + *ret_result_mask = 0; + return 0; + } + + r = cg_all_unified(); + if (r < 0) + return r; + if (r == 0) { + /* On the legacy hierarchy there's no concept of "enabling" controllers in cgroups defined. Let's claim + * complete success right away. (If you wonder why we return the full mask here, rather than zero: the + * caller tends to use the returned mask later on to compare if all controllers where properly joined, + * and if not requeues realization. This use is the primary purpose of the return value, hence let's + * minimize surprises here and reduce triggers for re-realization by always saying we fully + * succeeded.) */ + if (ret_result_mask) + *ret_result_mask = mask & supported & CGROUP_MASK_V2; /* If you wonder why we mask this with + * CGROUP_MASK_V2: The 'supported' mask + * might contain pure-V1 or BPF + * controllers, and we never want to + * claim that we could enable those with + * cgroup.subtree_control */ + return 0; + } + + r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs); + if (r < 0) + return r; + + for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) { + CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c); + const char *n; + + if (!FLAGS_SET(CGROUP_MASK_V2, bit)) + continue; + + if (!FLAGS_SET(supported, bit)) + continue; + + n = cgroup_controller_to_string(c); + { + char s[1 + strlen(n) + 1]; + + s[0] = FLAGS_SET(mask, bit) ? '+' : '-'; + strcpy(s + 1, n); + + if (!f) { + f = fopen(fs, "we"); + if (!f) + return log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p); + } + + r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) { + log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m", + FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs); + clearerr(f); + + /* If we can't turn off a controller, leave it on in the reported resulting mask. This + * happens for example when we attempt to turn off a controller up in the tree that is + * used down in the tree. */ + if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY + * only here, and not follow the same logic + * for other errors such as EINVAL or + * EOPNOTSUPP or anything else. That's + * because EBUSY indicates that the + * controllers is currently enabled and + * cannot be disabled because something down + * the hierarchy is still using it. Any other + * error most likely means something like "I + * never heard of this controller" or + * similar. In the former case it's hence + * safe to assume the controller is still on + * after the failed operation, while in the + * latter case it's safer to assume the + * controller is unknown and hence certainly + * not enabled. */ + ret |= bit; + } else { + /* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */ + if (FLAGS_SET(mask, bit)) + ret |= bit; + } + } + } + + /* Let's return the precise set of controllers now enabled for the cgroup. */ + if (ret_result_mask) + *ret_result_mask = ret; + + return 0; +} diff --git a/src/shared/cgroup-setup.h b/src/shared/cgroup-setup.h new file mode 100644 index 0000000000..6e9b6857d8 --- /dev/null +++ b/src/shared/cgroup-setup.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include <stdbool.h> +#include <stdint.h> +#include <sys/types.h> + +#include "cgroup-util.h" + +bool cg_is_unified_wanted(void); +bool cg_is_legacy_wanted(void); +bool cg_is_hybrid_wanted(void); + +int cg_weight_parse(const char *s, uint64_t *ret); +int cg_cpu_shares_parse(const char *s, uint64_t *ret); +int cg_blkio_weight_parse(const char *s, uint64_t *ret); + +int cg_trim(const char *controller, const char *path, bool delete_root); + +int cg_create(const char *controller, const char *path); +int cg_attach(const char *controller, const char *path, pid_t pid); +int cg_attach_fallback(const char *controller, const char *path, pid_t pid); +int cg_create_and_attach(const char *controller, const char *path, pid_t pid); + +int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags); +int cg_migrate_recursive(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags); +int cg_migrate_recursive_fallback(const char *cfrom, const char *pfrom, const char *cto, const char *pto, CGroupFlags flags); + +int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path); +int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t callback, void *userdata); +int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t callback, void *userdata); +int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t callback, void *userdata); +int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root); +int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p, CGroupMask *ret_result_mask); diff --git a/src/shared/meson.build b/src/shared/meson.build index e9005a30e3..63a3f88d50 100644 --- a/src/shared/meson.build +++ b/src/shared/meson.build @@ -33,6 +33,8 @@ shared_sources = files(''' bus-wait-for-units.h calendarspec.c calendarspec.h + cgroup-setup.c + cgroup-setup.h cgroup-show.c cgroup-show.h clean-ipc.c diff --git a/src/shutdown/shutdown.c b/src/shutdown/shutdown.c index 0eb17989d0..08215fd3ee 100644 --- a/src/shutdown/shutdown.c +++ b/src/shutdown/shutdown.c @@ -17,6 +17,7 @@ #include "alloc-util.h" #include "async.h" +#include "cgroup-setup.h" #include "cgroup-util.h" #include "def.h" #include "exec-util.h" diff --git a/src/test/meson.build b/src/test/meson.build index a8c3e59098..5132145b41 100644 --- a/src/test/meson.build +++ b/src/test/meson.build @@ -615,6 +615,10 @@ tests += [ [], []], + [['src/test/test-cgroup-setup.c'], + [], + []], + [['src/test/test-env-file.c'], [], []], diff --git a/src/test/test-cgroup-setup.c b/src/test/test-cgroup-setup.c new file mode 100644 index 0000000000..330631a910 --- /dev/null +++ b/src/test/test-cgroup-setup.c @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include "alloc-util.h" +#include "build.h" +#include "cgroup-setup.h" +#include "log.h" +#include "proc-cmdline.h" +#include "string-util.h" +#include "tests.h" + +static void test_is_wanted_print(bool header) { + _cleanup_free_ char *cmdline = NULL; + + log_info("-- %s --", __func__); + assert_se(proc_cmdline(&cmdline) >= 0); + log_info("cmdline: %s", cmdline); + if (header) { + log_info(_CGROUP_HIERARCHY_); + (void) system("findmnt -n /sys/fs/cgroup"); + } + + log_info("is_unified_wanted() → %s", yes_no(cg_is_unified_wanted())); + log_info("is_hybrid_wanted() → %s", yes_no(cg_is_hybrid_wanted())); + log_info("is_legacy_wanted() → %s", yes_no(cg_is_legacy_wanted())); + log_info(" "); +} + +static void test_is_wanted(void) { + assert_se(setenv("SYSTEMD_PROC_CMDLINE", + "systemd.unified_cgroup_hierarchy", 1) >= 0); + test_is_wanted_print(false); + + assert_se(setenv("SYSTEMD_PROC_CMDLINE", + "systemd.unified_cgroup_hierarchy=0", 1) >= 0); + test_is_wanted_print(false); + + assert_se(setenv("SYSTEMD_PROC_CMDLINE", + "systemd.unified_cgroup_hierarchy=0 " + "systemd.legacy_systemd_cgroup_controller", 1) >= 0); + test_is_wanted_print(false); + + assert_se(setenv("SYSTEMD_PROC_CMDLINE", + "systemd.unified_cgroup_hierarchy=0 " + "systemd.legacy_systemd_cgroup_controller=0", 1) >= 0); + test_is_wanted_print(false); + + /* cgroup_no_v1=all implies unified cgroup hierarchy, unless otherwise + * explicitly specified. */ + assert_se(setenv("SYSTEMD_PROC_CMDLINE", + "cgroup_no_v1=all", 1) >= 0); + test_is_wanted_print(false); + + assert_se(setenv("SYSTEMD_PROC_CMDLINE", + "cgroup_no_v1=all " + "systemd.unified_cgroup_hierarchy=0", 1) >= 0); + test_is_wanted_print(false); +} + +int main(void) { + test_setup_logging(LOG_DEBUG); + + test_is_wanted_print(true); + test_is_wanted_print(false); /* run twice to test caching */ + test_is_wanted(); + + return 0; +} diff --git a/src/test/test-cgroup-util.c b/src/test/test-cgroup-util.c index f45c5ff760..e4fbb9edce 100644 --- a/src/test/test-cgroup-util.c +++ b/src/test/test-cgroup-util.c @@ -333,55 +333,6 @@ static void test_fd_is_cgroup_fs(void) { fd = safe_close(fd); } -static void test_is_wanted_print(bool header) { - _cleanup_free_ char *cmdline = NULL; - - log_info("-- %s --", __func__); - assert_se(proc_cmdline(&cmdline) >= 0); - log_info("cmdline: %s", cmdline); - if (header) { - - log_info(_CGROUP_HIERARCHY_); - (void) system("findmnt -n /sys/fs/cgroup"); - } - - log_info("is_unified_wanted() → %s", yes_no(cg_is_unified_wanted())); - log_info("is_hybrid_wanted() → %s", yes_no(cg_is_hybrid_wanted())); - log_info("is_legacy_wanted() → %s", yes_no(cg_is_legacy_wanted())); - log_info(" "); -} - -static void test_is_wanted(void) { - assert_se(setenv("SYSTEMD_PROC_CMDLINE", - "systemd.unified_cgroup_hierarchy", 1) >= 0); - test_is_wanted_print(false); - - assert_se(setenv("SYSTEMD_PROC_CMDLINE", - "systemd.unified_cgroup_hierarchy=0", 1) >= 0); - test_is_wanted_print(false); - - assert_se(setenv("SYSTEMD_PROC_CMDLINE", - "systemd.unified_cgroup_hierarchy=0 " - "systemd.legacy_systemd_cgroup_controller", 1) >= 0); - test_is_wanted_print(false); - - assert_se(setenv("SYSTEMD_PROC_CMDLINE", - "systemd.unified_cgroup_hierarchy=0 " - "systemd.legacy_systemd_cgroup_controller=0", 1) >= 0); - test_is_wanted_print(false); - - /* cgroup_no_v1=all implies unified cgroup hierarchy, unless otherwise - * explicitly specified. */ - assert_se(setenv("SYSTEMD_PROC_CMDLINE", - "cgroup_no_v1=all", 1) >= 0); - test_is_wanted_print(false); - - assert_se(setenv("SYSTEMD_PROC_CMDLINE", - "cgroup_no_v1=all " - "systemd.unified_cgroup_hierarchy=0", 1) >= 0); - test_is_wanted_print(false); -} - static void test_cg_tests(void) { int all, hybrid, systemd, r; @@ -477,9 +428,6 @@ int main(void) { TEST_REQ_RUNNING_SYSTEMD(test_mask_supported()); TEST_REQ_RUNNING_SYSTEMD(test_is_cgroup_fs()); TEST_REQ_RUNNING_SYSTEMD(test_fd_is_cgroup_fs()); - test_is_wanted_print(true); - test_is_wanted_print(false); /* run twice to test caching */ - test_is_wanted(); test_cg_tests(); test_cg_get_keyed_attribute(); diff --git a/src/test/test-cgroup.c b/src/test/test-cgroup.c index 5cdfd2dc54..1891df0eb9 100644 --- a/src/test/test-cgroup.c +++ b/src/test/test-cgroup.c @@ -3,6 +3,7 @@ #include <string.h> #include <unistd.h> +#include "cgroup-setup.h" #include "cgroup-util.h" #include "path-util.h" #include "process-util.h" diff --git a/src/test/test-helper.c b/src/test/test-helper.c index 5b79d12f07..dc8c80a14b 100644 --- a/src/test/test-helper.c +++ b/src/test/test-helper.c @@ -3,7 +3,7 @@ #include "test-helper.h" #include "random-util.h" #include "alloc-util.h" -#include "cgroup-util.h" +#include "cgroup-setup.h" #include "string-util.h" int enter_cgroup_subroot(void) { |