diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 3a1cb265afd65a00b9fe7d6198be0a1e5f089c05..4829a577c1b91748d6f88b2a3436a41e0af8b836 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -350,6 +350,7 @@ struct css_set { * List of csets participating in the on-going migration either as * source or destination. Protected by cgroup_mutex. */ + struct list_head mg_preload_node; struct list_head mg_node; /* diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 23e3a8c74bd4938995f263d5fc76e46ce766bedf..a93f6f1ebc69bfda2c24a728cbb8a6e85dd7cafd 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -644,6 +644,7 @@ static struct css_set *find_css_set(struct css_set *old_cset, INIT_LIST_HEAD(&cset->cgrp_links); INIT_LIST_HEAD(&cset->tasks); INIT_LIST_HEAD(&cset->mg_tasks); + INIT_LIST_HEAD(&cset->mg_preload_node); INIT_LIST_HEAD(&cset->mg_node); INIT_HLIST_NODE(&cset->hlist); @@ -1755,16 +1756,137 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp, } /** - * cgroup_attach_task - attach a task or a whole threadgroup to a cgroup - * @cgrp: the cgroup to attach to - * @leader: the task or the leader of the threadgroup to be attached - * @threadgroup: attach the whole threadgroup? + * cgroup_migrate_finish - cleanup after attach + * @preloaded_csets: list of preloaded css_sets * - * Call holding cgroup_mutex and the group_rwsem of the leader. Will take - * task_lock of @tsk or each thread in the threadgroup individually in turn. + * Undo cgroup_migrate_add_src() and cgroup_migrate_prepare_dst(). See + * those functions for details. */ -static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *leader, - bool threadgroup) +static void cgroup_migrate_finish(struct list_head *preloaded_csets) +{ + struct css_set *cset, *tmp_cset; + + lockdep_assert_held(&cgroup_mutex); + + down_write(&css_set_rwsem); + list_for_each_entry_safe(cset, tmp_cset, preloaded_csets, mg_preload_node) { + cset->mg_src_cgrp = NULL; + cset->mg_dst_cset = NULL; + list_del_init(&cset->mg_preload_node); + put_css_set_locked(cset, false); + } + up_write(&css_set_rwsem); +} + +/** + * cgroup_migrate_add_src - add a migration source css_set + * @src_cset: the source css_set to add + * @dst_cgrp: the destination cgroup + * @preloaded_csets: list of preloaded css_sets + * + * Tasks belonging to @src_cset are about to be migrated to @dst_cgrp. Pin + * @src_cset and add it to @preloaded_csets, which should later be cleaned + * up by cgroup_migrate_finish(). + * + * This function may be called without holding threadgroup_lock even if the + * target is a process. Threads may be created and destroyed but as long + * as cgroup_mutex is not dropped, no new css_set can be put into play and + * the preloaded css_sets are guaranteed to cover all migrations. + */ +static void cgroup_migrate_add_src(struct css_set *src_cset, + struct cgroup *dst_cgrp, + struct list_head *preloaded_csets) +{ + struct cgroup *src_cgrp; + + lockdep_assert_held(&cgroup_mutex); + lockdep_assert_held(&css_set_rwsem); + + src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root); + + /* nothing to do if this cset already belongs to the cgroup */ + if (src_cgrp == dst_cgrp) + return; + + if (!list_empty(&src_cset->mg_preload_node)) + return; + + WARN_ON(src_cset->mg_src_cgrp); + WARN_ON(!list_empty(&src_cset->mg_tasks)); + WARN_ON(!list_empty(&src_cset->mg_node)); + + src_cset->mg_src_cgrp = src_cgrp; + get_css_set(src_cset); + list_add(&src_cset->mg_preload_node, preloaded_csets); +} + +/** + * cgroup_migrate_prepare_dst - prepare destination css_sets for migration + * @dst_cgrp: the destination cgroup + * @preloaded_csets: list of preloaded source css_sets + * + * Tasks are about to be moved to @dst_cgrp and all the source css_sets + * have been preloaded to @preloaded_csets. This function looks up and + * pins all destination css_sets, links each to its source, and put them on + * @preloaded_csets. + * + * This function must be called after cgroup_migrate_add_src() has been + * called on each migration source css_set. After migration is performed + * using cgroup_migrate(), cgroup_migrate_finish() must be called on + * @preloaded_csets. + */ +static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp, + struct list_head *preloaded_csets) +{ + LIST_HEAD(csets); + struct css_set *src_cset; + + lockdep_assert_held(&cgroup_mutex); + + /* look up the dst cset for each src cset and link it to src */ + list_for_each_entry(src_cset, preloaded_csets, mg_preload_node) { + struct css_set *dst_cset; + + dst_cset = find_css_set(src_cset, dst_cgrp); + if (!dst_cset) + goto err; + + WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset); + src_cset->mg_dst_cset = dst_cset; + + if (list_empty(&dst_cset->mg_preload_node)) + list_add(&dst_cset->mg_preload_node, &csets); + else + put_css_set(dst_cset, false); + } + + list_splice(&csets, preloaded_csets); + return 0; +err: + cgroup_migrate_finish(&csets); + return -ENOMEM; +} + +/** + * cgroup_migrate - migrate a process or task to a cgroup + * @cgrp: the destination cgroup + * @leader: the leader of the process or the task to migrate + * @threadgroup: whether @leader points to the whole process or a single task + * + * Migrate a process or task denoted by @leader to @cgrp. If migrating a + * process, the caller must be holding threadgroup_lock of @leader. The + * caller is also responsible for invoking cgroup_migrate_add_src() and + * cgroup_migrate_prepare_dst() on the targets before invoking this + * function and following up with cgroup_migrate_finish(). + * + * As long as a controller's ->can_attach() doesn't fail, this function is + * guaranteed to succeed. This means that, excluding ->can_attach() + * failure, when migrating multiple targets, the success or failure can be + * decided for all targets by invoking group_migrate_prepare_dst() before + * actually starting migrating. + */ +static int cgroup_migrate(struct cgroup *cgrp, struct task_struct *leader, + bool threadgroup) { struct cgroup_taskset tset = { .src_csets = LIST_HEAD_INIT(tset.src_csets), @@ -1785,29 +1907,17 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *leader, rcu_read_lock(); task = leader; do { - struct cgroup *src_cgrp; - /* @task either already exited or can't exit until the end */ if (task->flags & PF_EXITING) goto next; cset = task_css_set(task); - src_cgrp = task_cgroup_from_root(task, cgrp->root); - - /* nothing to do if this task is already in the cgroup */ - if (src_cgrp == cgrp) + if (!cset->mg_src_cgrp) goto next; - if (!cset->mg_src_cgrp) { - WARN_ON(!list_empty(&cset->mg_tasks)); - WARN_ON(!list_empty(&cset->mg_node)); - - cset->mg_src_cgrp = src_cgrp; - list_add(&cset->mg_node, &tset.src_csets); - get_css_set(cset); - } - list_move(&task->cg_list, &cset->mg_tasks); + list_move(&cset->mg_node, &tset.src_csets); + list_move(&cset->mg_dst_cset->mg_node, &tset.dst_csets); next: if (!threadgroup) break; @@ -1819,9 +1929,7 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *leader, if (list_empty(&tset.src_csets)) return 0; - /* - * step 1: check that we can legitimately attach to the cgroup. - */ + /* check that we can legitimately attach to the cgroup */ for_each_css(css, i, cgrp) { if (css->ss->can_attach) { ret = css->ss->can_attach(css, &tset); @@ -1833,30 +1941,9 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *leader, } /* - * step 2: make sure css_sets exist for all threads to be migrated. - * we use find_css_set, which allocates a new one if necessary. - */ - list_for_each_entry(cset, &tset.src_csets, mg_node) { - struct css_set *dst_cset; - - dst_cset = find_css_set(cset, cgrp); - if (!dst_cset) { - ret = -ENOMEM; - goto out_release_tset; - } - - if (list_empty(&dst_cset->mg_node)) - list_add(&dst_cset->mg_node, &tset.dst_csets); - else - put_css_set(dst_cset, false); - - cset->mg_dst_cset = dst_cset; - } - - /* - * step 3: now that we're guaranteed success wrt the css_sets, - * proceed to move all tasks to the new cgroup. There are no - * failure cases after here, so this is the commit point. + * Now that we're guaranteed success, proceed to move all tasks to + * the new cgroup. There are no failure cases after here, so this + * is the commit point. */ down_write(&css_set_rwsem); list_for_each_entry(cset, &tset.src_csets, mg_node) { @@ -1866,14 +1953,13 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *leader, } up_write(&css_set_rwsem); - /* migration is committed, all target tasks are now on dst_csets */ - tset.csets = &tset.dst_csets; - - /* nothing is sensitive to fork() after this point */ - /* - * step 4: do subsystem attach callbacks. + * Migration is committed, all target tasks are now on dst_csets. + * Nothing is sensitive to fork() after this point. Notify + * controllers that migration is complete. */ + tset.csets = &tset.dst_csets; + for_each_css(css, i, cgrp) if (css->ss->attach) css->ss->attach(css, &tset); @@ -1893,15 +1979,50 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *leader, list_splice_init(&tset.dst_csets, &tset.src_csets); list_for_each_entry_safe(cset, tmp_cset, &tset.src_csets, mg_node) { list_splice_init(&cset->mg_tasks, &cset->tasks); - cset->mg_dst_cset = NULL; - cset->mg_src_cgrp = NULL; list_del_init(&cset->mg_node); - put_css_set_locked(cset, false); } up_write(&css_set_rwsem); return ret; } +/** + * cgroup_attach_task - attach a task or a whole threadgroup to a cgroup + * @dst_cgrp: the cgroup to attach to + * @leader: the task or the leader of the threadgroup to be attached + * @threadgroup: attach the whole threadgroup? + * + * Call holding cgroup_mutex and the group_rwsem of the leader. Will take + * task_lock of @tsk or each thread in the threadgroup individually in turn. + */ +static int cgroup_attach_task(struct cgroup *dst_cgrp, + struct task_struct *leader, bool threadgroup) +{ + LIST_HEAD(preloaded_csets); + struct task_struct *task; + int ret; + + /* look up all src csets */ + down_read(&css_set_rwsem); + rcu_read_lock(); + task = leader; + do { + cgroup_migrate_add_src(task_css_set(task), dst_cgrp, + &preloaded_csets); + if (!threadgroup) + break; + } while_each_thread(leader, task); + rcu_read_unlock(); + up_read(&css_set_rwsem); + + /* prepare dst csets and commit */ + ret = cgroup_migrate_prepare_dst(dst_cgrp, &preloaded_csets); + if (!ret) + ret = cgroup_migrate(dst_cgrp, leader, threadgroup); + + cgroup_migrate_finish(&preloaded_csets); + return ret; +} + /* * Find the task_struct of the task to attach by vpid and pass it along to the * function to attach either it or all tasks in its threadgroup. Will lock @@ -3906,6 +4027,7 @@ int __init cgroup_init_early(void) INIT_LIST_HEAD(&init_css_set.cgrp_links); INIT_LIST_HEAD(&init_css_set.tasks); INIT_LIST_HEAD(&init_css_set.mg_tasks); + INIT_LIST_HEAD(&init_css_set.mg_preload_node); INIT_LIST_HEAD(&init_css_set.mg_node); INIT_HLIST_NODE(&init_css_set.hlist); css_set_count = 1;