提交 3454b0b9 编写于 作者: J Jiufei Xue 提交者: Joseph Qi

writeback: add memcg_blkcg_link tree

Here we add a global radix tree to link memcg and blkcg that the user
attach the tasks to when using cgroup v1, which is used for writeback
cgroup.
Signed-off-by: NJiufei Xue <jiufei.xue@linux.alibaba.com>
Reviewed-by: NJoseph Qi <joseph.qi@linux.alibaba.com>
上级 6d3b382d
......@@ -248,9 +248,7 @@ static inline bool inode_cgwb_enabled(struct inode *inode)
{
struct backing_dev_info *bdi = inode_to_bdi(inode);
return cgroup_subsys_on_dfl(memory_cgrp_subsys) &&
cgroup_subsys_on_dfl(io_cgrp_subsys) &&
bdi_cap_account_dirty(bdi) &&
return bdi_cap_account_dirty(bdi) &&
(bdi->capabilities & BDI_CAP_CGROUP_WRITEBACK) &&
(inode->i_sb->s_iflags & SB_I_CGROUPWB);
}
......@@ -394,6 +392,13 @@ static inline void unlocked_inode_to_wb_end(struct inode *inode,
rcu_read_unlock();
}
void insert_memcg_blkcg_link(struct cgroup_subsys *ss,
struct list_head *tmp_links,
struct css_set *cset);
int allocate_memcg_blkcg_links(int count, struct list_head *tmp_links);
void free_memcg_blkcg_links(struct list_head *links_to_free);
void delete_memcg_blkcg_link(struct cgroup_subsys *ss,
struct cgroup_subsys_state *css);
#else /* CONFIG_CGROUP_WRITEBACK */
static inline bool inode_cgwb_enabled(struct inode *inode)
......@@ -459,6 +464,26 @@ static inline int inode_congested(struct inode *inode, int cong_bits)
return wb_congested(&inode_to_bdi(inode)->wb, cong_bits);
}
static inline void insert_memcg_blkcg_link(struct cgroup_subsys *ss,
struct list_head *tmp_links,
struct css_set *cset)
{
}
static inline int allocate_memcg_blkcg_links(int count, struct list_head *tmp_links)
{
return 0;
}
static inline void free_memcg_blkcg_links(struct list_head *links_to_free)
{
}
static inline void delete_memcg_blkcg_link(struct cgroup_subsys *ss,
struct cgroup_subsys_state *css)
{
}
#endif /* CONFIG_CGROUP_WRITEBACK */
static inline int inode_read_congested(struct inode *inode)
......
......@@ -59,6 +59,7 @@ struct cgroup_taskset {
/* the src and dst cset list running through cset->mg_node */
struct list_head src_csets;
struct list_head dst_csets;
int dst_count;
/* the number of tasks in the set */
int nr_tasks;
......@@ -103,6 +104,7 @@ struct cgroup_mgctx {
.src_csets = LIST_HEAD_INIT(tset.src_csets), \
.dst_csets = LIST_HEAD_INIT(tset.dst_csets), \
.csets = &tset.src_csets, \
.dst_count = 0, \
}
#define CGROUP_MGCTX_INIT(name) \
......
......@@ -55,6 +55,7 @@
#include <linux/nsproxy.h>
#include <linux/file.h>
#include <linux/sched/cputime.h>
#include <linux/backing-dev.h>
#include <net/sock.h>
#define CREATE_TRACE_POINTS
......@@ -2214,9 +2215,11 @@ static void cgroup_migrate_add_task(struct task_struct *task,
if (list_empty(&cset->mg_node))
list_add_tail(&cset->mg_node,
&mgctx->tset.src_csets);
if (list_empty(&cset->mg_dst_cset->mg_node))
if (list_empty(&cset->mg_dst_cset->mg_node)) {
list_add_tail(&cset->mg_dst_cset->mg_node,
&mgctx->tset.dst_csets);
mgctx->tset.dst_count++;
}
}
/**
......@@ -2297,9 +2300,14 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
struct task_struct *task, *tmp_task;
struct css_set *cset, *tmp_cset;
int ssid, failed_ssid, ret;
LIST_HEAD(tmp_links);
/* check that we can legitimately attach to the cgroup */
if (tset->nr_tasks) {
ret = allocate_memcg_blkcg_links(tset->dst_count*2, &tmp_links);
if (ret)
goto out_free_list;
do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
if (ss->can_attach) {
tset->ssid = ssid;
......@@ -2345,6 +2353,8 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
tset->ssid = ssid;
ss->attach(tset);
}
list_for_each_entry(cset, &tset->dst_csets, mg_node)
insert_memcg_blkcg_link(ss, &tmp_links, cset);
} while_each_subsys_mask();
}
......@@ -2371,6 +2381,9 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
}
spin_unlock_irq(&css_set_lock);
out_free_list:
free_memcg_blkcg_links(&tmp_links);
/*
* Re-initialize the cgroup_taskset structure in case it is reused
* again in another cgroup_migrate_add_task()/cgroup_migrate_execute()
......@@ -4593,6 +4606,8 @@ static void css_free_rwork_fn(struct work_struct *work)
struct cgroup_subsys_state *parent = css->parent;
int id = css->id;
delete_memcg_blkcg_link(ss, css);
ss->css_free(css);
cgroup_idr_remove(&ss->css_idr, id);
cgroup_put(cgrp);
......
......@@ -393,6 +393,158 @@ static void wb_exit(struct bdi_writeback *wb)
#include <linux/memcontrol.h>
struct memcg_blkcg_link {
struct list_head list;
struct rcu_head rcu;
struct cgroup_subsys_state *memcg_css;
struct cgroup_subsys_state *blkcg_css;
};
static RADIX_TREE(memcg_blkcg_tree, GFP_ATOMIC);
DEFINE_SPINLOCK(memcg_blkcg_tree_lock);
int allocate_memcg_blkcg_links(int count, struct list_head *tmp_links)
{
struct memcg_blkcg_link *link;
int i;
for (i = 0; i < count; i++) {
link = kzalloc(sizeof(*link), GFP_KERNEL);
if (!link) {
free_memcg_blkcg_links(tmp_links);
return -ENOMEM;
}
list_add(&link->list, tmp_links);
}
return 0;
}
static void link_free(struct rcu_head *head)
{
struct memcg_blkcg_link *link = container_of(head,
struct memcg_blkcg_link, rcu);
kfree(link);
}
void insert_memcg_blkcg_link(struct cgroup_subsys *ss,
struct list_head *tmp_links,
struct css_set *cset)
{
struct memcg_blkcg_link *link;
struct cgroup_subsys_state *blkcg_css;
struct cgroup_subsys_state *memcg_css;
int err;
if (ss->id != io_cgrp_id && ss->id != memory_cgrp_id)
return;
BUG_ON(list_empty(tmp_links));
memcg_css = cset->subsys[memory_cgrp_id];
blkcg_css = cset->subsys[io_cgrp_id];
if ((memcg_css == &root_mem_cgroup->css) ||
(blkcg_css == blkcg_root_css))
return;
rcu_read_lock();
link = radix_tree_lookup(&memcg_blkcg_tree, memcg_css->id);
if (link && ((link->blkcg_css == blkcg_css) ||
(link->blkcg_css == blkcg_root_css))) {
rcu_read_unlock();
return;
}
rcu_read_unlock();
spin_lock(&memcg_blkcg_tree_lock);
if (link) {
radix_tree_delete(&memcg_blkcg_tree, memcg_css->id);
call_rcu(&link->rcu, link_free);
blkcg_css = blkcg_root_css;
}
link = list_first_entry(tmp_links, struct memcg_blkcg_link, list);
list_del_init(&link->list);
link->memcg_css = memcg_css;
link->blkcg_css = blkcg_css;
err = radix_tree_insert(&memcg_blkcg_tree, memcg_css->id, link);
WARN_ON(err);
spin_unlock(&memcg_blkcg_tree_lock);
}
void free_memcg_blkcg_links(struct list_head *links_to_free)
{
struct memcg_blkcg_link *link, *tmp_link;
list_for_each_entry_safe(link, tmp_link, links_to_free, list) {
list_del(&link->list);
kfree(link);
}
}
static void delete_memcg_link(struct cgroup_subsys_state *memcg_css)
{
struct memcg_blkcg_link *link;
spin_lock(&memcg_blkcg_tree_lock);
link = radix_tree_lookup(&memcg_blkcg_tree, memcg_css->id);
if (link) {
radix_tree_delete(&memcg_blkcg_tree, memcg_css->id);
call_rcu(&link->rcu, link_free);
}
spin_unlock(&memcg_blkcg_tree_lock);
}
static void delete_blkcg_link(struct cgroup_subsys_state *blkcg_css)
{
struct memcg_blkcg_link *link;
struct radix_tree_iter iter;
void **slot;
spin_lock(&memcg_blkcg_tree_lock);
radix_tree_for_each_slot(slot, &memcg_blkcg_tree, &iter, 0) {
link = *slot;
if (link->blkcg_css == blkcg_css) {
radix_tree_delete(&memcg_blkcg_tree, link->memcg_css->id);
call_rcu(&link->rcu, link_free);
}
}
spin_unlock(&memcg_blkcg_tree_lock);
}
void delete_memcg_blkcg_link(struct cgroup_subsys *ss,
struct cgroup_subsys_state *css)
{
if (ss->id != io_cgrp_id && ss->id != memory_cgrp_id)
return;
if (ss->id == io_cgrp_id)
delete_blkcg_link(css);
if (ss->id == memory_cgrp_id)
delete_memcg_link(css);
}
static struct cgroup_subsys_state *find_blkcg_css(struct cgroup_subsys_state *memcg_css)
{
struct memcg_blkcg_link *link;
struct cgroup_subsys_state *blkcg_css;
rcu_read_lock();
link = radix_tree_lookup(&memcg_blkcg_tree, memcg_css->id);
if (link)
blkcg_css = link->blkcg_css;
else
blkcg_css = blkcg_root_css;
css_get(blkcg_css);
rcu_read_unlock();
return blkcg_css;
}
/*
* cgwb_lock protects bdi->cgwb_tree, bdi->cgwb_congested_tree,
* blkcg->cgwb_list, and memcg->cgwb_list. bdi->cgwb_tree is also RCU
......@@ -545,7 +697,10 @@ static int cgwb_create(struct backing_dev_info *bdi,
int ret = 0;
memcg = mem_cgroup_from_css(memcg_css);
blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys);
if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys);
else
blkcg_css = find_blkcg_css(memcg_css);
blkcg = css_to_blkcg(blkcg_css);
memcg_cgwb_list = &memcg->cgwb_list;
blkcg_cgwb_list = &blkcg->cgwb_list;
......@@ -669,8 +824,11 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
struct cgroup_subsys_state *blkcg_css;
/* see whether the blkcg association has changed */
blkcg_css = cgroup_get_e_css(memcg_css->cgroup,
&io_cgrp_subsys);
if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
blkcg_css = cgroup_get_e_css(memcg_css->cgroup,
&io_cgrp_subsys);
else
blkcg_css = find_blkcg_css(memcg_css);
if (unlikely(wb->blkcg_css != blkcg_css ||
!wb_tryget(wb)))
wb = NULL;
......
......@@ -468,7 +468,7 @@ struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page)
memcg = page->mem_cgroup;
if (!memcg || !cgroup_subsys_on_dfl(memory_cgrp_subsys))
if (!memcg)
memcg = root_mem_cgroup;
return &memcg->css;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册