From a79cf2e16588ae837209725a450384cc8b2e3b22 Mon Sep 17 00:00:00 2001 From: Jiufei Xue Date: Wed, 6 Dec 2017 17:40:35 +0800 Subject: [PATCH] writeback: add memcg_blkcg_link tree Here we add a global radix tree to link memcg and blkcg that the user attach the tasks to when using cgroup v1, which is used for writeback cgroup. Signed-off-by: Jiufei Xue Reviewed-by: Joseph Qi --- include/linux/backing-dev.h | 31 +++++- kernel/cgroup/cgroup-internal.h | 2 + kernel/cgroup/cgroup.c | 17 +++- mm/backing-dev.c | 164 +++++++++++++++++++++++++++++++- mm/memcontrol.c | 2 +- 5 files changed, 208 insertions(+), 8 deletions(-) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index c28a47cbe355..d4088485acab 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -248,9 +248,7 @@ static inline bool inode_cgwb_enabled(struct inode *inode) { struct backing_dev_info *bdi = inode_to_bdi(inode); - return cgroup_subsys_on_dfl(memory_cgrp_subsys) && - cgroup_subsys_on_dfl(io_cgrp_subsys) && - bdi_cap_account_dirty(bdi) && + return bdi_cap_account_dirty(bdi) && (bdi->capabilities & BDI_CAP_CGROUP_WRITEBACK) && (inode->i_sb->s_iflags & SB_I_CGROUPWB); } @@ -394,6 +392,13 @@ static inline void unlocked_inode_to_wb_end(struct inode *inode, rcu_read_unlock(); } +void insert_memcg_blkcg_link(struct cgroup_subsys *ss, + struct list_head *tmp_links, + struct css_set *cset); +int allocate_memcg_blkcg_links(int count, struct list_head *tmp_links); +void free_memcg_blkcg_links(struct list_head *links_to_free); +void delete_memcg_blkcg_link(struct cgroup_subsys *ss, + struct cgroup_subsys_state *css); #else /* CONFIG_CGROUP_WRITEBACK */ static inline bool inode_cgwb_enabled(struct inode *inode) @@ -459,6 +464,26 @@ static inline int inode_congested(struct inode *inode, int cong_bits) return wb_congested(&inode_to_bdi(inode)->wb, cong_bits); } +static inline void insert_memcg_blkcg_link(struct cgroup_subsys *ss, + struct list_head *tmp_links, + struct css_set *cset) +{ +} + +static inline int allocate_memcg_blkcg_links(int count, struct list_head *tmp_links) +{ + return 0; +} + +static inline void free_memcg_blkcg_links(struct list_head *links_to_free) +{ +} + +static inline void delete_memcg_blkcg_link(struct cgroup_subsys *ss, + struct cgroup_subsys_state *css) +{ +} + #endif /* CONFIG_CGROUP_WRITEBACK */ static inline int inode_read_congested(struct inode *inode) diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 75568fcf2180..9316004d4b36 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -59,6 +59,7 @@ struct cgroup_taskset { /* the src and dst cset list running through cset->mg_node */ struct list_head src_csets; struct list_head dst_csets; + int dst_count; /* the number of tasks in the set */ int nr_tasks; @@ -103,6 +104,7 @@ struct cgroup_mgctx { .src_csets = LIST_HEAD_INIT(tset.src_csets), \ .dst_csets = LIST_HEAD_INIT(tset.dst_csets), \ .csets = &tset.src_csets, \ + .dst_count = 0, \ } #define CGROUP_MGCTX_INIT(name) \ diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 63dae7e0ccae..435ded55278e 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #define CREATE_TRACE_POINTS @@ -2214,9 +2215,11 @@ static void cgroup_migrate_add_task(struct task_struct *task, if (list_empty(&cset->mg_node)) list_add_tail(&cset->mg_node, &mgctx->tset.src_csets); - if (list_empty(&cset->mg_dst_cset->mg_node)) + if (list_empty(&cset->mg_dst_cset->mg_node)) { list_add_tail(&cset->mg_dst_cset->mg_node, &mgctx->tset.dst_csets); + mgctx->tset.dst_count++; + } } /** @@ -2297,9 +2300,14 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx) struct task_struct *task, *tmp_task; struct css_set *cset, *tmp_cset; int ssid, failed_ssid, ret; + LIST_HEAD(tmp_links); /* check that we can legitimately attach to the cgroup */ if (tset->nr_tasks) { + ret = allocate_memcg_blkcg_links(tset->dst_count*2, &tmp_links); + if (ret) + goto out_free_list; + do_each_subsys_mask(ss, ssid, mgctx->ss_mask) { if (ss->can_attach) { tset->ssid = ssid; @@ -2345,6 +2353,8 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx) tset->ssid = ssid; ss->attach(tset); } + list_for_each_entry(cset, &tset->dst_csets, mg_node) + insert_memcg_blkcg_link(ss, &tmp_links, cset); } while_each_subsys_mask(); } @@ -2371,6 +2381,9 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx) } spin_unlock_irq(&css_set_lock); +out_free_list: + free_memcg_blkcg_links(&tmp_links); + /* * Re-initialize the cgroup_taskset structure in case it is reused * again in another cgroup_migrate_add_task()/cgroup_migrate_execute() @@ -4593,6 +4606,8 @@ static void css_free_rwork_fn(struct work_struct *work) struct cgroup_subsys_state *parent = css->parent; int id = css->id; + delete_memcg_blkcg_link(ss, css); + ss->css_free(css); cgroup_idr_remove(&ss->css_idr, id); cgroup_put(cgrp); diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 72e6d0c55cfa..a28fc475fde1 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -393,6 +393,158 @@ static void wb_exit(struct bdi_writeback *wb) #include +struct memcg_blkcg_link { + struct list_head list; + struct rcu_head rcu; + struct cgroup_subsys_state *memcg_css; + struct cgroup_subsys_state *blkcg_css; +}; + +static RADIX_TREE(memcg_blkcg_tree, GFP_ATOMIC); +DEFINE_SPINLOCK(memcg_blkcg_tree_lock); + +int allocate_memcg_blkcg_links(int count, struct list_head *tmp_links) +{ + struct memcg_blkcg_link *link; + int i; + + for (i = 0; i < count; i++) { + link = kzalloc(sizeof(*link), GFP_KERNEL); + if (!link) { + free_memcg_blkcg_links(tmp_links); + return -ENOMEM; + } + list_add(&link->list, tmp_links); + } + return 0; +} + +static void link_free(struct rcu_head *head) +{ + struct memcg_blkcg_link *link = container_of(head, + struct memcg_blkcg_link, rcu); + kfree(link); +} + +void insert_memcg_blkcg_link(struct cgroup_subsys *ss, + struct list_head *tmp_links, + struct css_set *cset) +{ + struct memcg_blkcg_link *link; + struct cgroup_subsys_state *blkcg_css; + struct cgroup_subsys_state *memcg_css; + int err; + + if (ss->id != io_cgrp_id && ss->id != memory_cgrp_id) + return; + + BUG_ON(list_empty(tmp_links)); + + memcg_css = cset->subsys[memory_cgrp_id]; + blkcg_css = cset->subsys[io_cgrp_id]; + + if ((memcg_css == &root_mem_cgroup->css) || + (blkcg_css == blkcg_root_css)) + return; + + rcu_read_lock(); + link = radix_tree_lookup(&memcg_blkcg_tree, memcg_css->id); + if (link && ((link->blkcg_css == blkcg_css) || + (link->blkcg_css == blkcg_root_css))) { + rcu_read_unlock(); + return; + } + rcu_read_unlock(); + + spin_lock(&memcg_blkcg_tree_lock); + if (link) { + radix_tree_delete(&memcg_blkcg_tree, memcg_css->id); + call_rcu(&link->rcu, link_free); + blkcg_css = blkcg_root_css; + } + + link = list_first_entry(tmp_links, struct memcg_blkcg_link, list); + list_del_init(&link->list); + + link->memcg_css = memcg_css; + link->blkcg_css = blkcg_css; + err = radix_tree_insert(&memcg_blkcg_tree, memcg_css->id, link); + WARN_ON(err); + + spin_unlock(&memcg_blkcg_tree_lock); +} + +void free_memcg_blkcg_links(struct list_head *links_to_free) +{ + struct memcg_blkcg_link *link, *tmp_link; + + list_for_each_entry_safe(link, tmp_link, links_to_free, list) { + list_del(&link->list); + kfree(link); + } +} + +static void delete_memcg_link(struct cgroup_subsys_state *memcg_css) +{ + struct memcg_blkcg_link *link; + + spin_lock(&memcg_blkcg_tree_lock); + link = radix_tree_lookup(&memcg_blkcg_tree, memcg_css->id); + if (link) { + radix_tree_delete(&memcg_blkcg_tree, memcg_css->id); + call_rcu(&link->rcu, link_free); + } + spin_unlock(&memcg_blkcg_tree_lock); +} + +static void delete_blkcg_link(struct cgroup_subsys_state *blkcg_css) +{ + struct memcg_blkcg_link *link; + struct radix_tree_iter iter; + void **slot; + + spin_lock(&memcg_blkcg_tree_lock); + radix_tree_for_each_slot(slot, &memcg_blkcg_tree, &iter, 0) { + link = *slot; + if (link->blkcg_css == blkcg_css) { + radix_tree_delete(&memcg_blkcg_tree, link->memcg_css->id); + call_rcu(&link->rcu, link_free); + } + } + spin_unlock(&memcg_blkcg_tree_lock); +} + +void delete_memcg_blkcg_link(struct cgroup_subsys *ss, + struct cgroup_subsys_state *css) +{ + if (ss->id != io_cgrp_id && ss->id != memory_cgrp_id) + return; + + if (ss->id == io_cgrp_id) + delete_blkcg_link(css); + if (ss->id == memory_cgrp_id) + delete_memcg_link(css); +} + +static struct cgroup_subsys_state *find_blkcg_css(struct cgroup_subsys_state *memcg_css) +{ + struct memcg_blkcg_link *link; + struct cgroup_subsys_state *blkcg_css; + + rcu_read_lock(); + link = radix_tree_lookup(&memcg_blkcg_tree, memcg_css->id); + if (link) + blkcg_css = link->blkcg_css; + else + blkcg_css = blkcg_root_css; + + css_get(blkcg_css); + + rcu_read_unlock(); + + return blkcg_css; +} + /* * cgwb_lock protects bdi->cgwb_tree, bdi->cgwb_congested_tree, * blkcg->cgwb_list, and memcg->cgwb_list. bdi->cgwb_tree is also RCU @@ -545,7 +697,10 @@ static int cgwb_create(struct backing_dev_info *bdi, int ret = 0; memcg = mem_cgroup_from_css(memcg_css); - blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys); + if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) + blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys); + else + blkcg_css = find_blkcg_css(memcg_css); blkcg = css_to_blkcg(blkcg_css); memcg_cgwb_list = &memcg->cgwb_list; blkcg_cgwb_list = &blkcg->cgwb_list; @@ -669,8 +824,11 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi, struct cgroup_subsys_state *blkcg_css; /* see whether the blkcg association has changed */ - blkcg_css = cgroup_get_e_css(memcg_css->cgroup, - &io_cgrp_subsys); + if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) + blkcg_css = cgroup_get_e_css(memcg_css->cgroup, + &io_cgrp_subsys); + else + blkcg_css = find_blkcg_css(memcg_css); if (unlikely(wb->blkcg_css != blkcg_css || !wb_tryget(wb))) wb = NULL; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7e7cc0cd89fe..b1cdb2657161 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -468,7 +468,7 @@ struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page) memcg = page->mem_cgroup; - if (!memcg || !cgroup_subsys_on_dfl(memory_cgrp_subsys)) + if (!memcg) memcg = root_mem_cgroup; return &memcg->css; -- GitLab