提交 5406812e 编写于 作者: L Linus Torvalds

Merge branch 'for-4.4-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup fixes from Tejun Heo:
 "More change than I'd have liked at this stage.  The pids controller
  and the changes made to cgroup core to support it introduced and
  revealed several important issues.

   - Assigning membership to a newly created task and migrating it can
     race leading to incorrect accounting.  Oleg fixed it by widening
     threadgroup synchronization.  It looks like we'll be able to merge
     it with a different percpu rwsem which is used in fork path making
     things simpler and cheaper.

   - The recent change to extend cgroup membership to zombies (so that
     pid accounting can extend till the pid is actually released) missed
     pinning the underlying data structures leading to use-after-free.
     Fixed.

   - v2 hierarchy was calling subsystem callbacks with the wrong target
     cgroup_subsys_state based on the incorrect assumption that they
     share the same target.  pids is the first controller affected by
     this.  Subsys callbacks updated so that they can deal with
     multi-target migrations"

* 'for-4.4-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup_pids: don't account for the root cgroup
  cgroup: fix handling of multi-destination migration from subtree_control enabling
  cgroup_freezer: simplify propagation of CGROUP_FROZEN clearing in freezer_attach()
  cgroup: pids: kill pids_fork(), simplify pids_can_fork() and pids_cancel_fork()
  cgroup: pids: fix race between cgroup_post_fork() and cgroup_migrate()
  cgroup: make css_set pin its css's to avoid use-afer-free
  cgroup: fix cftype->file_offset handling
......@@ -1127,15 +1127,15 @@ void blkcg_exit_queue(struct request_queue *q)
* of the main cic data structures. For now we allow a task to change
* its cgroup only if it's the only owner of its ioc.
*/
static int blkcg_can_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
static int blkcg_can_attach(struct cgroup_taskset *tset)
{
struct task_struct *task;
struct cgroup_subsys_state *dst_css;
struct io_context *ioc;
int ret = 0;
/* task_lock() is needed to avoid races with exit_io_context() */
cgroup_taskset_for_each(task, tset) {
cgroup_taskset_for_each(task, dst_css, tset) {
task_lock(task);
ioc = task->io_context;
if (ioc && atomic_read(&ioc->nr_tasks) > 1)
......
......@@ -90,7 +90,6 @@ enum {
*/
struct cgroup_file {
/* do not access any fields from outside cgroup core */
struct list_head node; /* anchored at css->files */
struct kernfs_node *kn;
};
......@@ -134,9 +133,6 @@ struct cgroup_subsys_state {
*/
u64 serial_nr;
/* all cgroup_files associated with this css */
struct list_head files;
/* percpu_ref killing and RCU release */
struct rcu_head rcu_head;
struct work_struct destroy_work;
......@@ -426,12 +422,9 @@ struct cgroup_subsys {
void (*css_reset)(struct cgroup_subsys_state *css);
void (*css_e_css_changed)(struct cgroup_subsys_state *css);
int (*can_attach)(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset);
void (*cancel_attach)(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset);
void (*attach)(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset);
int (*can_attach)(struct cgroup_taskset *tset);
void (*cancel_attach)(struct cgroup_taskset *tset);
void (*attach)(struct cgroup_taskset *tset);
int (*can_fork)(struct task_struct *task, void **priv_p);
void (*cancel_fork)(struct task_struct *task, void *priv);
void (*fork)(struct task_struct *task, void *priv);
......
......@@ -88,6 +88,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
int cgroup_rm_cftypes(struct cftype *cfts);
void cgroup_file_notify(struct cgroup_file *cfile);
char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry);
......@@ -119,8 +120,10 @@ struct cgroup_subsys_state *css_rightmost_descendant(struct cgroup_subsys_state
struct cgroup_subsys_state *css_next_descendant_post(struct cgroup_subsys_state *pos,
struct cgroup_subsys_state *css);
struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset);
struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset);
struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
struct cgroup_subsys_state **dst_cssp);
struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
struct cgroup_subsys_state **dst_cssp);
void css_task_iter_start(struct cgroup_subsys_state *css,
struct css_task_iter *it);
......@@ -235,30 +238,39 @@ void css_task_iter_end(struct css_task_iter *it);
/**
* cgroup_taskset_for_each - iterate cgroup_taskset
* @task: the loop cursor
* @dst_css: the destination css
* @tset: taskset to iterate
*
* @tset may contain multiple tasks and they may belong to multiple
* processes. When there are multiple tasks in @tset, if a task of a
* process is in @tset, all tasks of the process are in @tset. Also, all
* are guaranteed to share the same source and destination csses.
* processes.
*
* On the v2 hierarchy, there may be tasks from multiple processes and they
* may not share the source or destination csses.
*
* On traditional hierarchies, when there are multiple tasks in @tset, if a
* task of a process is in @tset, all tasks of the process are in @tset.
* Also, all are guaranteed to share the same source and destination csses.
*
* Iteration is not in any specific order.
*/
#define cgroup_taskset_for_each(task, tset) \
for ((task) = cgroup_taskset_first((tset)); (task); \
(task) = cgroup_taskset_next((tset)))
#define cgroup_taskset_for_each(task, dst_css, tset) \
for ((task) = cgroup_taskset_first((tset), &(dst_css)); \
(task); \
(task) = cgroup_taskset_next((tset), &(dst_css)))
/**
* cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset
* @leader: the loop cursor
* @dst_css: the destination css
* @tset: takset to iterate
*
* Iterate threadgroup leaders of @tset. For single-task migrations, @tset
* may not contain any.
*/
#define cgroup_taskset_for_each_leader(leader, tset) \
for ((leader) = cgroup_taskset_first((tset)); (leader); \
(leader) = cgroup_taskset_next((tset))) \
#define cgroup_taskset_for_each_leader(leader, dst_css, tset) \
for ((leader) = cgroup_taskset_first((tset), &(dst_css)); \
(leader); \
(leader) = cgroup_taskset_next((tset), &(dst_css))) \
if ((leader) != (leader)->group_leader) \
; \
else
......@@ -516,19 +528,6 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
pr_cont_kernfs_path(cgrp->kn);
}
/**
* cgroup_file_notify - generate a file modified event for a cgroup_file
* @cfile: target cgroup_file
*
* @cfile must have been obtained by setting cftype->file_offset.
*/
static inline void cgroup_file_notify(struct cgroup_file *cfile)
{
/* might not have been created due to one of the CFTYPE selector flags */
if (cfile->kn)
kernfs_notify(cfile->kn);
}
#else /* !CONFIG_CGROUPS */
struct cgroup_subsys_state;
......
......@@ -97,6 +97,12 @@ static DEFINE_SPINLOCK(css_set_lock);
*/
static DEFINE_SPINLOCK(cgroup_idr_lock);
/*
* Protects cgroup_file->kn for !self csses. It synchronizes notifications
* against file removal/re-creation across css hiding.
*/
static DEFINE_SPINLOCK(cgroup_file_kn_lock);
/*
* Protects cgroup_subsys->release_agent_path. Modifying it also requires
* cgroup_mutex. Reading requires either cgroup_mutex or this spinlock.
......@@ -754,9 +760,11 @@ static void put_css_set_locked(struct css_set *cset)
if (!atomic_dec_and_test(&cset->refcount))
return;
/* This css_set is dead. unlink it and release cgroup refcounts */
for_each_subsys(ss, ssid)
/* This css_set is dead. unlink it and release cgroup and css refs */
for_each_subsys(ss, ssid) {
list_del(&cset->e_cset_node[ssid]);
css_put(cset->subsys[ssid]);
}
hash_del(&cset->hlist);
css_set_count--;
......@@ -1056,9 +1064,13 @@ static struct css_set *find_css_set(struct css_set *old_cset,
key = css_set_hash(cset->subsys);
hash_add(css_set_table, &cset->hlist, key);
for_each_subsys(ss, ssid)
for_each_subsys(ss, ssid) {
struct cgroup_subsys_state *css = cset->subsys[ssid];
list_add_tail(&cset->e_cset_node[ssid],
&cset->subsys[ssid]->cgroup->e_csets[ssid]);
&css->cgroup->e_csets[ssid]);
css_get(css);
}
spin_unlock_bh(&css_set_lock);
......@@ -1393,6 +1405,16 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
char name[CGROUP_FILE_NAME_MAX];
lockdep_assert_held(&cgroup_mutex);
if (cft->file_offset) {
struct cgroup_subsys_state *css = cgroup_css(cgrp, cft->ss);
struct cgroup_file *cfile = (void *)css + cft->file_offset;
spin_lock_irq(&cgroup_file_kn_lock);
cfile->kn = NULL;
spin_unlock_irq(&cgroup_file_kn_lock);
}
kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
}
......@@ -1856,7 +1878,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
INIT_LIST_HEAD(&cgrp->self.sibling);
INIT_LIST_HEAD(&cgrp->self.children);
INIT_LIST_HEAD(&cgrp->self.files);
INIT_LIST_HEAD(&cgrp->cset_links);
INIT_LIST_HEAD(&cgrp->pidlists);
mutex_init(&cgrp->pidlist_mutex);
......@@ -2216,6 +2237,9 @@ struct cgroup_taskset {
struct list_head src_csets;
struct list_head dst_csets;
/* the subsys currently being processed */
int ssid;
/*
* Fields for cgroup_taskset_*() iteration.
*
......@@ -2278,25 +2302,29 @@ static void cgroup_taskset_add(struct task_struct *task,
/**
* cgroup_taskset_first - reset taskset and return the first task
* @tset: taskset of interest
* @dst_cssp: output variable for the destination css
*
* @tset iteration is initialized and the first task is returned.
*/
struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset)
struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
struct cgroup_subsys_state **dst_cssp)
{
tset->cur_cset = list_first_entry(tset->csets, struct css_set, mg_node);
tset->cur_task = NULL;
return cgroup_taskset_next(tset);
return cgroup_taskset_next(tset, dst_cssp);
}
/**
* cgroup_taskset_next - iterate to the next task in taskset
* @tset: taskset of interest
* @dst_cssp: output variable for the destination css
*
* Return the next task in @tset. Iteration must have been initialized
* with cgroup_taskset_first().
*/
struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset)
struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
struct cgroup_subsys_state **dst_cssp)
{
struct css_set *cset = tset->cur_cset;
struct task_struct *task = tset->cur_task;
......@@ -2311,6 +2339,18 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset)
if (&task->cg_list != &cset->mg_tasks) {
tset->cur_cset = cset;
tset->cur_task = task;
/*
* This function may be called both before and
* after cgroup_taskset_migrate(). The two cases
* can be distinguished by looking at whether @cset
* has its ->mg_dst_cset set.
*/
if (cset->mg_dst_cset)
*dst_cssp = cset->mg_dst_cset->subsys[tset->ssid];
else
*dst_cssp = cset->subsys[tset->ssid];
return task;
}
......@@ -2346,7 +2386,8 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
/* check that we can legitimately attach to the cgroup */
for_each_e_css(css, i, dst_cgrp) {
if (css->ss->can_attach) {
ret = css->ss->can_attach(css, tset);
tset->ssid = i;
ret = css->ss->can_attach(tset);
if (ret) {
failed_css = css;
goto out_cancel_attach;
......@@ -2379,9 +2420,12 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
*/
tset->csets = &tset->dst_csets;
for_each_e_css(css, i, dst_cgrp)
if (css->ss->attach)
css->ss->attach(css, tset);
for_each_e_css(css, i, dst_cgrp) {
if (css->ss->attach) {
tset->ssid = i;
css->ss->attach(tset);
}
}
ret = 0;
goto out_release_tset;
......@@ -2390,8 +2434,10 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
for_each_e_css(css, i, dst_cgrp) {
if (css == failed_css)
break;
if (css->ss->cancel_attach)
css->ss->cancel_attach(css, tset);
if (css->ss->cancel_attach) {
tset->ssid = i;
css->ss->cancel_attach(tset);
}
}
out_release_tset:
spin_lock_bh(&css_set_lock);
......@@ -3313,9 +3359,9 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
if (cft->file_offset) {
struct cgroup_file *cfile = (void *)css + cft->file_offset;
kernfs_get(kn);
spin_lock_irq(&cgroup_file_kn_lock);
cfile->kn = kn;
list_add(&cfile->node, &css->files);
spin_unlock_irq(&cgroup_file_kn_lock);
}
return 0;
......@@ -3552,6 +3598,22 @@ int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
return cgroup_add_cftypes(ss, cfts);
}
/**
* cgroup_file_notify - generate a file modified event for a cgroup_file
* @cfile: target cgroup_file
*
* @cfile must have been obtained by setting cftype->file_offset.
*/
void cgroup_file_notify(struct cgroup_file *cfile)
{
unsigned long flags;
spin_lock_irqsave(&cgroup_file_kn_lock, flags);
if (cfile->kn)
kernfs_notify(cfile->kn);
spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
}
/**
* cgroup_task_count - count the number of tasks in a cgroup.
* @cgrp: the cgroup in question
......@@ -4613,13 +4675,9 @@ static void css_free_work_fn(struct work_struct *work)
container_of(work, struct cgroup_subsys_state, destroy_work);
struct cgroup_subsys *ss = css->ss;
struct cgroup *cgrp = css->cgroup;
struct cgroup_file *cfile;
percpu_ref_exit(&css->refcnt);
list_for_each_entry(cfile, &css->files, node)
kernfs_put(cfile->kn);
if (ss) {
/* css free path */
int id = css->id;
......@@ -4724,7 +4782,6 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
css->ss = ss;
INIT_LIST_HEAD(&css->sibling);
INIT_LIST_HEAD(&css->children);
INIT_LIST_HEAD(&css->files);
css->serial_nr = css_serial_nr_next++;
if (cgroup_parent(cgrp)) {
......
......@@ -155,12 +155,10 @@ static void freezer_css_free(struct cgroup_subsys_state *css)
* @freezer->lock. freezer_attach() makes the new tasks conform to the
* current state and all following state changes can see the new tasks.
*/
static void freezer_attach(struct cgroup_subsys_state *new_css,
struct cgroup_taskset *tset)
static void freezer_attach(struct cgroup_taskset *tset)
{
struct freezer *freezer = css_freezer(new_css);
struct task_struct *task;
bool clear_frozen = false;
struct cgroup_subsys_state *new_css;
mutex_lock(&freezer_mutex);
......@@ -174,22 +172,21 @@ static void freezer_attach(struct cgroup_subsys_state *new_css,
* current state before executing the following - !frozen tasks may
* be visible in a FROZEN cgroup and frozen tasks in a THAWED one.
*/
cgroup_taskset_for_each(task, tset) {
cgroup_taskset_for_each(task, new_css, tset) {
struct freezer *freezer = css_freezer(new_css);
if (!(freezer->state & CGROUP_FREEZING)) {
__thaw_task(task);
} else {
freeze_task(task);
freezer->state &= ~CGROUP_FROZEN;
clear_frozen = true;
/* clear FROZEN and propagate upwards */
while (freezer && (freezer->state & CGROUP_FROZEN)) {
freezer->state &= ~CGROUP_FROZEN;
freezer = parent_freezer(freezer);
}
}
}
/* propagate FROZEN clearing upwards */
while (clear_frozen && (freezer = parent_freezer(freezer))) {
freezer->state &= ~CGROUP_FROZEN;
clear_frozen = freezer->state & CGROUP_FREEZING;
}
mutex_unlock(&freezer_mutex);
}
......
......@@ -106,7 +106,7 @@ static void pids_uncharge(struct pids_cgroup *pids, int num)
{
struct pids_cgroup *p;
for (p = pids; p; p = parent_pids(p))
for (p = pids; parent_pids(p); p = parent_pids(p))
pids_cancel(p, num);
}
......@@ -123,7 +123,7 @@ static void pids_charge(struct pids_cgroup *pids, int num)
{
struct pids_cgroup *p;
for (p = pids; p; p = parent_pids(p))
for (p = pids; parent_pids(p); p = parent_pids(p))
atomic64_add(num, &p->counter);
}
......@@ -140,7 +140,7 @@ static int pids_try_charge(struct pids_cgroup *pids, int num)
{
struct pids_cgroup *p, *q;
for (p = pids; p; p = parent_pids(p)) {
for (p = pids; parent_pids(p); p = parent_pids(p)) {
int64_t new = atomic64_add_return(num, &p->counter);
/*
......@@ -162,13 +162,13 @@ static int pids_try_charge(struct pids_cgroup *pids, int num)
return -EAGAIN;
}
static int pids_can_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
static int pids_can_attach(struct cgroup_taskset *tset)
{
struct pids_cgroup *pids = css_pids(css);
struct task_struct *task;
struct cgroup_subsys_state *dst_css;
cgroup_taskset_for_each(task, tset) {
cgroup_taskset_for_each(task, dst_css, tset) {
struct pids_cgroup *pids = css_pids(dst_css);
struct cgroup_subsys_state *old_css;
struct pids_cgroup *old_pids;
......@@ -187,13 +187,13 @@ static int pids_can_attach(struct cgroup_subsys_state *css,
return 0;
}
static void pids_cancel_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
static void pids_cancel_attach(struct cgroup_taskset *tset)
{
struct pids_cgroup *pids = css_pids(css);
struct task_struct *task;
struct cgroup_subsys_state *dst_css;
cgroup_taskset_for_each(task, tset) {
cgroup_taskset_for_each(task, dst_css, tset) {
struct pids_cgroup *pids = css_pids(dst_css);
struct cgroup_subsys_state *old_css;
struct pids_cgroup *old_pids;
......@@ -205,65 +205,28 @@ static void pids_cancel_attach(struct cgroup_subsys_state *css,
}
}
/*
* task_css_check(true) in pids_can_fork() and pids_cancel_fork() relies
* on threadgroup_change_begin() held by the copy_process().
*/
static int pids_can_fork(struct task_struct *task, void **priv_p)
{
struct cgroup_subsys_state *css;
struct pids_cgroup *pids;
int err;
/*
* Use the "current" task_css for the pids subsystem as the tentative
* css. It is possible we will charge the wrong hierarchy, in which
* case we will forcefully revert/reapply the charge on the right
* hierarchy after it is committed to the task proper.
*/
css = task_get_css(current, pids_cgrp_id);
css = task_css_check(current, pids_cgrp_id, true);
pids = css_pids(css);
err = pids_try_charge(pids, 1);
if (err)
goto err_css_put;
*priv_p = css;
return 0;
err_css_put:
css_put(css);
return err;
return pids_try_charge(pids, 1);
}
static void pids_cancel_fork(struct task_struct *task, void *priv)
{
struct cgroup_subsys_state *css = priv;
struct pids_cgroup *pids = css_pids(css);
pids_uncharge(pids, 1);
css_put(css);
}
static void pids_fork(struct task_struct *task, void *priv)
{
struct cgroup_subsys_state *css;
struct cgroup_subsys_state *old_css = priv;
struct pids_cgroup *pids;
struct pids_cgroup *old_pids = css_pids(old_css);
css = task_get_css(task, pids_cgrp_id);
css = task_css_check(current, pids_cgrp_id, true);
pids = css_pids(css);
/*
* If the association has changed, we have to revert and reapply the
* charge/uncharge on the wrong hierarchy to the current one. Since
* the association can only change due to an organisation event, its
* okay for us to ignore the limit in this case.
*/
if (pids != old_pids) {
pids_uncharge(old_pids, 1);
pids_charge(pids, 1);
}
css_put(css);
css_put(old_css);
pids_uncharge(pids, 1);
}
static void pids_free(struct task_struct *task)
......@@ -335,6 +298,7 @@ static struct cftype pids_files[] = {
{
.name = "current",
.read_s64 = pids_current_read,
.flags = CFTYPE_NOT_ON_ROOT,
},
{ } /* terminate */
};
......@@ -346,7 +310,6 @@ struct cgroup_subsys pids_cgrp_subsys = {
.cancel_attach = pids_cancel_attach,
.can_fork = pids_can_fork,
.cancel_fork = pids_cancel_fork,
.fork = pids_fork,
.free = pids_free,
.legacy_cftypes = pids_files,
.dfl_cftypes = pids_files,
......
......@@ -1429,15 +1429,16 @@ static int fmeter_getrate(struct fmeter *fmp)
static struct cpuset *cpuset_attach_old_cs;
/* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */
static int cpuset_can_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
static int cpuset_can_attach(struct cgroup_taskset *tset)
{
struct cpuset *cs = css_cs(css);
struct cgroup_subsys_state *css;
struct cpuset *cs;
struct task_struct *task;
int ret;
/* used later by cpuset_attach() */
cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset));
cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css));
cs = css_cs(css);
mutex_lock(&cpuset_mutex);
......@@ -1447,7 +1448,7 @@ static int cpuset_can_attach(struct cgroup_subsys_state *css,
(cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
goto out_unlock;
cgroup_taskset_for_each(task, tset) {
cgroup_taskset_for_each(task, css, tset) {
ret = task_can_attach(task, cs->cpus_allowed);
if (ret)
goto out_unlock;
......@@ -1467,9 +1468,14 @@ static int cpuset_can_attach(struct cgroup_subsys_state *css,
return ret;
}
static void cpuset_cancel_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
static void cpuset_cancel_attach(struct cgroup_taskset *tset)
{
struct cgroup_subsys_state *css;
struct cpuset *cs;
cgroup_taskset_first(tset, &css);
cs = css_cs(css);
mutex_lock(&cpuset_mutex);
css_cs(css)->attach_in_progress--;
mutex_unlock(&cpuset_mutex);
......@@ -1482,16 +1488,19 @@ static void cpuset_cancel_attach(struct cgroup_subsys_state *css,
*/
static cpumask_var_t cpus_attach;
static void cpuset_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
static void cpuset_attach(struct cgroup_taskset *tset)
{
/* static buf protected by cpuset_mutex */
static nodemask_t cpuset_attach_nodemask_to;
struct task_struct *task;
struct task_struct *leader;
struct cpuset *cs = css_cs(css);
struct cgroup_subsys_state *css;
struct cpuset *cs;
struct cpuset *oldcs = cpuset_attach_old_cs;
cgroup_taskset_first(tset, &css);
cs = css_cs(css);
mutex_lock(&cpuset_mutex);
/* prepare for attach */
......@@ -1502,7 +1511,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css,
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
cgroup_taskset_for_each(task, tset) {
cgroup_taskset_for_each(task, css, tset) {
/*
* can_attach beforehand should guarantee that this doesn't
* fail. TODO: have a better way to handle failure here
......@@ -1518,7 +1527,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css,
* sleep and should be moved outside migration path proper.
*/
cpuset_attach_nodemask_to = cs->effective_mems;
cgroup_taskset_for_each_leader(leader, tset) {
cgroup_taskset_for_each_leader(leader, css, tset) {
struct mm_struct *mm = get_task_mm(leader);
if (mm) {
......
......@@ -9494,12 +9494,12 @@ static int __perf_cgroup_move(void *info)
return 0;
}
static void perf_cgroup_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
static void perf_cgroup_attach(struct cgroup_taskset *tset)
{
struct task_struct *task;
struct cgroup_subsys_state *css;
cgroup_taskset_for_each(task, tset)
cgroup_taskset_for_each(task, css, tset)
task_function_call(task, __perf_cgroup_move, task);
}
......
......@@ -1368,8 +1368,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->real_start_time = ktime_get_boot_ns();
p->io_context = NULL;
p->audit_context = NULL;
if (clone_flags & CLONE_THREAD)
threadgroup_change_begin(current);
threadgroup_change_begin(current);
cgroup_fork(p);
#ifdef CONFIG_NUMA
p->mempolicy = mpol_dup(p->mempolicy);
......@@ -1610,8 +1609,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
proc_fork_connector(p);
cgroup_post_fork(p, cgrp_ss_priv);
if (clone_flags & CLONE_THREAD)
threadgroup_change_end(current);
threadgroup_change_end(current);
perf_event_fork(p);
trace_task_newtask(p, clone_flags);
......@@ -1652,8 +1650,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
mpol_put(p->mempolicy);
bad_fork_cleanup_threadgroup_lock:
#endif
if (clone_flags & CLONE_THREAD)
threadgroup_change_end(current);
threadgroup_change_end(current);
delayacct_tsk_free(p);
bad_fork_cleanup_count:
atomic_dec(&p->cred->user->processes);
......
......@@ -8241,12 +8241,12 @@ static void cpu_cgroup_fork(struct task_struct *task, void *private)
sched_move_task(task);
}
static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
{
struct task_struct *task;
struct cgroup_subsys_state *css;
cgroup_taskset_for_each(task, tset) {
cgroup_taskset_for_each(task, css, tset) {
#ifdef CONFIG_RT_GROUP_SCHED
if (!sched_rt_can_attach(css_tg(css), task))
return -EINVAL;
......@@ -8259,12 +8259,12 @@ static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css,
return 0;
}
static void cpu_cgroup_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
static void cpu_cgroup_attach(struct cgroup_taskset *tset)
{
struct task_struct *task;
struct cgroup_subsys_state *css;
cgroup_taskset_for_each(task, tset)
cgroup_taskset_for_each(task, css, tset)
sched_move_task(task);
}
......
......@@ -4779,23 +4779,18 @@ static void mem_cgroup_clear_mc(void)
spin_unlock(&mc.lock);
}
static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
struct cgroup_subsys_state *css;
struct mem_cgroup *memcg;
struct mem_cgroup *from;
struct task_struct *leader, *p;
struct mm_struct *mm;
unsigned long move_flags;
int ret = 0;
/*
* We are now commited to this value whatever it is. Changes in this
* tunable will only affect upcoming migrations, not the current one.
* So we need to save it, and keep it going.
*/
move_flags = READ_ONCE(memcg->move_charge_at_immigrate);
if (!move_flags)
/* charge immigration isn't supported on the default hierarchy */
if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
return 0;
/*
......@@ -4805,13 +4800,23 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
* multiple.
*/
p = NULL;
cgroup_taskset_for_each_leader(leader, tset) {
cgroup_taskset_for_each_leader(leader, css, tset) {
WARN_ON_ONCE(p);
p = leader;
memcg = mem_cgroup_from_css(css);
}
if (!p)
return 0;
/*
* We are now commited to this value whatever it is. Changes in this
* tunable will only affect upcoming migrations, not the current one.
* So we need to save it, and keep it going.
*/
move_flags = READ_ONCE(memcg->move_charge_at_immigrate);
if (!move_flags)
return 0;
from = mem_cgroup_from_task(p);
VM_BUG_ON(from == memcg);
......@@ -4842,8 +4847,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
return ret;
}
static void mem_cgroup_cancel_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset)
{
if (mc.to)
mem_cgroup_clear_mc();
......@@ -4985,10 +4989,10 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
atomic_dec(&mc.from->moving_account);
}
static void mem_cgroup_move_task(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
static void mem_cgroup_move_task(struct cgroup_taskset *tset)
{
struct task_struct *p = cgroup_taskset_first(tset);
struct cgroup_subsys_state *css;
struct task_struct *p = cgroup_taskset_first(tset, &css);
struct mm_struct *mm = get_task_mm(p);
if (mm) {
......@@ -5000,17 +5004,14 @@ static void mem_cgroup_move_task(struct cgroup_subsys_state *css,
mem_cgroup_clear_mc();
}
#else /* !CONFIG_MMU */
static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
{
return 0;
}
static void mem_cgroup_cancel_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset)
{
}
static void mem_cgroup_move_task(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
static void mem_cgroup_move_task(struct cgroup_taskset *tset)
{
}
#endif
......
......@@ -81,9 +81,11 @@ static void update_classid(struct cgroup_subsys_state *css, void *v)
css_task_iter_end(&it);
}
static void cgrp_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
static void cgrp_attach(struct cgroup_taskset *tset)
{
struct cgroup_subsys_state *css;
cgroup_taskset_first(tset, &css);
update_classid(css,
(void *)(unsigned long)css_cls_state(css)->classid);
}
......
......@@ -218,13 +218,14 @@ static int update_netprio(const void *v, struct file *file, unsigned n)
return 0;
}
static void net_prio_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
static void net_prio_attach(struct cgroup_taskset *tset)
{
struct task_struct *p;
void *v = (void *)(unsigned long)css->cgroup->id;
struct cgroup_subsys_state *css;
cgroup_taskset_for_each(p, css, tset) {
void *v = (void *)(unsigned long)css->cgroup->id;
cgroup_taskset_for_each(p, tset) {
task_lock(p);
iterate_fd(p->files, 0, update_netprio, v);
task_unlock(p);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册