提交 f3b6a6df 编写于 作者: R Roman Gushchin 提交者: Linus Torvalds

writeback, cgroup: keep list of inodes attached to bdi_writeback

Currently there is no way to iterate over inodes attached to a specific
cgwb structure.  It limits the ability to efficiently reclaim the
writeback structure itself and associated memory and block cgroup
structures without scanning all inodes belonging to a sb, which can be
prohibitively expensive.

While dirty/in-active-writeback an inode belongs to one of the
bdi_writeback's io lists: b_dirty, b_io, b_more_io and b_dirty_time.  Once
cleaned up, it's removed from all io lists.  So the inode->i_io_list can
be reused to maintain the list of inodes, attached to a bdi_writeback
structure.

This patch introduces a new wb->b_attached list, which contains all inodes
which were dirty at least once and are attached to the given cgwb.  Inodes
attached to the root bdi_writeback structures are never placed on such
list.  The following patch will use this list to try to release cgwbs
structures more efficiently.

Link: https://lkml.kernel.org/r/20210608230225.2078447-6-guro@fb.comSigned-off-by: NRoman Gushchin <guro@fb.com>
Suggested-by: NJan Kara <jack@suse.cz>
Reviewed-by: NJan Kara <jack@suse.cz>
Acked-by: NTejun Heo <tj@kernel.org>
Acked-by: NDennis Zhou <dennis@kernel.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: Jan Kara <jack@suse.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 29264d92
...@@ -131,25 +131,6 @@ static bool inode_io_list_move_locked(struct inode *inode, ...@@ -131,25 +131,6 @@ static bool inode_io_list_move_locked(struct inode *inode,
return false; return false;
} }
/**
* inode_io_list_del_locked - remove an inode from its bdi_writeback IO list
* @inode: inode to be removed
* @wb: bdi_writeback @inode is being removed from
*
* Remove @inode which may be on one of @wb->b_{dirty|io|more_io} lists and
* clear %WB_has_dirty_io if all are empty afterwards.
*/
static void inode_io_list_del_locked(struct inode *inode,
struct bdi_writeback *wb)
{
assert_spin_locked(&wb->list_lock);
assert_spin_locked(&inode->i_lock);
inode->i_state &= ~I_SYNC_QUEUED;
list_del_init(&inode->i_io_list);
wb_io_lists_depopulated(wb);
}
static void wb_wakeup(struct bdi_writeback *wb) static void wb_wakeup(struct bdi_writeback *wb)
{ {
spin_lock_bh(&wb->work_lock); spin_lock_bh(&wb->work_lock);
...@@ -278,6 +259,28 @@ void __inode_attach_wb(struct inode *inode, struct page *page) ...@@ -278,6 +259,28 @@ void __inode_attach_wb(struct inode *inode, struct page *page)
} }
EXPORT_SYMBOL_GPL(__inode_attach_wb); EXPORT_SYMBOL_GPL(__inode_attach_wb);
/**
* inode_cgwb_move_to_attached - put the inode onto wb->b_attached list
* @inode: inode of interest with i_lock held
* @wb: target bdi_writeback
*
* Remove the inode from wb's io lists and if necessarily put onto b_attached
* list. Only inodes attached to cgwb's are kept on this list.
*/
static void inode_cgwb_move_to_attached(struct inode *inode,
struct bdi_writeback *wb)
{
assert_spin_locked(&wb->list_lock);
assert_spin_locked(&inode->i_lock);
inode->i_state &= ~I_SYNC_QUEUED;
if (wb != &wb->bdi->wb)
list_move(&inode->i_io_list, &wb->b_attached);
else
list_del_init(&inode->i_io_list);
wb_io_lists_depopulated(wb);
}
/** /**
* locked_inode_to_wb_and_lock_list - determine a locked inode's wb and lock it * locked_inode_to_wb_and_lock_list - determine a locked inode's wb and lock it
* @inode: inode of interest with i_lock held * @inode: inode of interest with i_lock held
...@@ -418,21 +421,28 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) ...@@ -418,21 +421,28 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
wb_get(new_wb); wb_get(new_wb);
/* /*
* Transfer to @new_wb's IO list if necessary. The specific list * Transfer to @new_wb's IO list if necessary. If the @inode is dirty,
* @inode was on is ignored and the inode is put on ->b_dirty which * the specific list @inode was on is ignored and the @inode is put on
* is always correct including from ->b_dirty_time. The transfer * ->b_dirty which is always correct including from ->b_dirty_time.
* preserves @inode->dirtied_when ordering. * The transfer preserves @inode->dirtied_when ordering. If the @inode
* was clean, it means it was on the b_attached list, so move it onto
* the b_attached list of @new_wb.
*/ */
if (!list_empty(&inode->i_io_list)) { if (!list_empty(&inode->i_io_list)) {
struct inode *pos;
inode_io_list_del_locked(inode, old_wb);
inode->i_wb = new_wb; inode->i_wb = new_wb;
list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
if (time_after_eq(inode->dirtied_when, if (inode->i_state & I_DIRTY_ALL) {
pos->dirtied_when)) struct inode *pos;
break;
inode_io_list_move_locked(inode, new_wb, pos->i_io_list.prev); list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
if (time_after_eq(inode->dirtied_when,
pos->dirtied_when))
break;
inode_io_list_move_locked(inode, new_wb,
pos->i_io_list.prev);
} else {
inode_cgwb_move_to_attached(inode, new_wb);
}
} else { } else {
inode->i_wb = new_wb; inode->i_wb = new_wb;
} }
...@@ -1021,6 +1031,17 @@ fs_initcall(cgroup_writeback_init); ...@@ -1021,6 +1031,17 @@ fs_initcall(cgroup_writeback_init);
static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { } static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { } static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
static void inode_cgwb_move_to_attached(struct inode *inode,
struct bdi_writeback *wb)
{
assert_spin_locked(&wb->list_lock);
assert_spin_locked(&inode->i_lock);
inode->i_state &= ~I_SYNC_QUEUED;
list_del_init(&inode->i_io_list);
wb_io_lists_depopulated(wb);
}
static struct bdi_writeback * static struct bdi_writeback *
locked_inode_to_wb_and_lock_list(struct inode *inode) locked_inode_to_wb_and_lock_list(struct inode *inode)
__releases(&inode->i_lock) __releases(&inode->i_lock)
...@@ -1121,7 +1142,11 @@ void inode_io_list_del(struct inode *inode) ...@@ -1121,7 +1142,11 @@ void inode_io_list_del(struct inode *inode)
wb = inode_to_wb_and_lock_list(inode); wb = inode_to_wb_and_lock_list(inode);
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
inode_io_list_del_locked(inode, wb);
inode->i_state &= ~I_SYNC_QUEUED;
list_del_init(&inode->i_io_list);
wb_io_lists_depopulated(wb);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
spin_unlock(&wb->list_lock); spin_unlock(&wb->list_lock);
} }
...@@ -1434,7 +1459,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, ...@@ -1434,7 +1459,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
inode->i_state &= ~I_SYNC_QUEUED; inode->i_state &= ~I_SYNC_QUEUED;
} else { } else {
/* The inode is clean. Remove from writeback lists. */ /* The inode is clean. Remove from writeback lists. */
inode_io_list_del_locked(inode, wb); inode_cgwb_move_to_attached(inode, wb);
} }
} }
...@@ -1586,7 +1611,7 @@ static int writeback_single_inode(struct inode *inode, ...@@ -1586,7 +1611,7 @@ static int writeback_single_inode(struct inode *inode,
* responsible for the writeback lists. * responsible for the writeback lists.
*/ */
if (!(inode->i_state & I_DIRTY_ALL)) if (!(inode->i_state & I_DIRTY_ALL))
inode_io_list_del_locked(inode, wb); inode_cgwb_move_to_attached(inode, wb);
spin_unlock(&wb->list_lock); spin_unlock(&wb->list_lock);
inode_sync_complete(inode); inode_sync_complete(inode);
out: out:
......
...@@ -154,6 +154,7 @@ struct bdi_writeback { ...@@ -154,6 +154,7 @@ struct bdi_writeback {
struct cgroup_subsys_state *blkcg_css; /* and blkcg */ struct cgroup_subsys_state *blkcg_css; /* and blkcg */
struct list_head memcg_node; /* anchored at memcg->cgwb_list */ struct list_head memcg_node; /* anchored at memcg->cgwb_list */
struct list_head blkcg_node; /* anchored at blkcg->cgwb_list */ struct list_head blkcg_node; /* anchored at blkcg->cgwb_list */
struct list_head b_attached; /* attached inodes, protected by list_lock */
union { union {
struct work_struct release_work; struct work_struct release_work;
......
...@@ -396,6 +396,7 @@ static void cgwb_release_workfn(struct work_struct *work) ...@@ -396,6 +396,7 @@ static void cgwb_release_workfn(struct work_struct *work)
fprop_local_destroy_percpu(&wb->memcg_completions); fprop_local_destroy_percpu(&wb->memcg_completions);
percpu_ref_exit(&wb->refcnt); percpu_ref_exit(&wb->refcnt);
wb_exit(wb); wb_exit(wb);
WARN_ON_ONCE(!list_empty(&wb->b_attached));
kfree_rcu(wb, rcu); kfree_rcu(wb, rcu);
} }
...@@ -472,6 +473,7 @@ static int cgwb_create(struct backing_dev_info *bdi, ...@@ -472,6 +473,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
wb->memcg_css = memcg_css; wb->memcg_css = memcg_css;
wb->blkcg_css = blkcg_css; wb->blkcg_css = blkcg_css;
INIT_LIST_HEAD(&wb->b_attached);
INIT_WORK(&wb->release_work, cgwb_release_workfn); INIT_WORK(&wb->release_work, cgwb_release_workfn);
set_bit(WB_registered, &wb->state); set_bit(WB_registered, &wb->state);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册