提交 65d0f205 编写于 作者: D Dave Chinner 提交者: Alex Elder

xfs: split inode AG walking into separate code for reclaim

The reclaim walk requires different locking and has a slightly
different walk algorithm, so separate it out so that it can be
optimised separately.
Signed-off-by: NDave Chinner <dchinner@redhat.com>
Reviewed-by: NChristoph Hellwig <hch@lst.de>
Reviewed-by: NAlex Elder <aelder@sgi.com>
上级 69d6cc76
...@@ -40,78 +40,46 @@ ...@@ -40,78 +40,46 @@
#include <linux/freezer.h> #include <linux/freezer.h>
STATIC xfs_inode_t *
xfs_inode_ag_lookup(
struct xfs_mount *mp,
struct xfs_perag *pag,
uint32_t *first_index,
int tag)
{
int nr_found;
struct xfs_inode *ip;
/*
* use a gang lookup to find the next inode in the tree
* as the tree is sparse and a gang lookup walks to find
* the number of objects requested.
*/
if (tag == XFS_ICI_NO_TAG) {
nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
(void **)&ip, *first_index, 1);
} else {
nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
(void **)&ip, *first_index, 1, tag);
}
if (!nr_found)
return NULL;
/*
* Update the index for the next lookup. Catch overflows
* into the next AG range which can occur if we have inodes
* in the last block of the AG and we are currently
* pointing to the last inode.
*/
*first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
return NULL;
return ip;
}
STATIC int STATIC int
xfs_inode_ag_walk( xfs_inode_ag_walk(
struct xfs_mount *mp, struct xfs_mount *mp,
struct xfs_perag *pag, struct xfs_perag *pag,
int (*execute)(struct xfs_inode *ip, int (*execute)(struct xfs_inode *ip,
struct xfs_perag *pag, int flags), struct xfs_perag *pag, int flags),
int flags, int flags)
int tag,
int exclusive,
int *nr_to_scan)
{ {
uint32_t first_index; uint32_t first_index;
int last_error = 0; int last_error = 0;
int skipped; int skipped;
int done;
restart: restart:
done = 0;
skipped = 0; skipped = 0;
first_index = 0; first_index = 0;
do { do {
int error = 0; int error = 0;
int nr_found;
xfs_inode_t *ip; xfs_inode_t *ip;
if (exclusive)
write_lock(&pag->pag_ici_lock);
else
read_lock(&pag->pag_ici_lock); read_lock(&pag->pag_ici_lock);
ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
if (!ip) { (void **)&ip, first_index, 1);
if (exclusive) if (!nr_found) {
write_unlock(&pag->pag_ici_lock);
else
read_unlock(&pag->pag_ici_lock); read_unlock(&pag->pag_ici_lock);
break; break;
} }
/*
* Update the index for the next lookup. Catch overflows
* into the next AG range which can occur if we have inodes
* in the last block of the AG and we are currently
* pointing to the last inode.
*/
first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
done = 1;
/* execute releases pag->pag_ici_lock */ /* execute releases pag->pag_ici_lock */
error = execute(ip, pag, flags); error = execute(ip, pag, flags);
if (error == EAGAIN) { if (error == EAGAIN) {
...@@ -125,7 +93,7 @@ xfs_inode_ag_walk( ...@@ -125,7 +93,7 @@ xfs_inode_ag_walk(
if (error == EFSCORRUPTED) if (error == EFSCORRUPTED)
break; break;
} while ((*nr_to_scan)--); } while (!done);
if (skipped) { if (skipped) {
delay(1); delay(1);
...@@ -134,73 +102,29 @@ xfs_inode_ag_walk( ...@@ -134,73 +102,29 @@ xfs_inode_ag_walk(
return last_error; return last_error;
} }
/*
* Select the next per-ag structure to iterate during the walk. The reclaim
* walk is optimised only to walk AGs with reclaimable inodes in them.
*/
static struct xfs_perag *
xfs_inode_ag_iter_next_pag(
struct xfs_mount *mp,
xfs_agnumber_t *first,
int tag)
{
struct xfs_perag *pag = NULL;
if (tag == XFS_ICI_RECLAIM_TAG) {
int found;
int ref;
rcu_read_lock();
found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
(void **)&pag, *first, 1, tag);
if (found <= 0) {
rcu_read_unlock();
return NULL;
}
*first = pag->pag_agno + 1;
/* open coded pag reference increment */
ref = atomic_inc_return(&pag->pag_ref);
rcu_read_unlock();
trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_);
} else {
pag = xfs_perag_get(mp, *first);
(*first)++;
}
return pag;
}
int int
xfs_inode_ag_iterator( xfs_inode_ag_iterator(
struct xfs_mount *mp, struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, int (*execute)(struct xfs_inode *ip,
struct xfs_perag *pag, int flags), struct xfs_perag *pag, int flags),
int flags, int flags)
int tag,
int exclusive,
int *nr_to_scan)
{ {
struct xfs_perag *pag; struct xfs_perag *pag;
int error = 0; int error = 0;
int last_error = 0; int last_error = 0;
xfs_agnumber_t ag; xfs_agnumber_t ag;
int nr;
nr = nr_to_scan ? *nr_to_scan : INT_MAX;
ag = 0; ag = 0;
while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) { while ((pag = xfs_perag_get(mp, ag))) {
error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, ag = pag->pag_agno + 1;
exclusive, &nr); error = xfs_inode_ag_walk(mp, pag, execute, flags);
xfs_perag_put(pag); xfs_perag_put(pag);
if (error) { if (error) {
last_error = error; last_error = error;
if (error == EFSCORRUPTED) if (error == EFSCORRUPTED)
break; break;
} }
if (nr <= 0)
break;
} }
if (nr_to_scan)
*nr_to_scan = nr;
return XFS_ERROR(last_error); return XFS_ERROR(last_error);
} }
...@@ -318,8 +242,7 @@ xfs_sync_data( ...@@ -318,8 +242,7 @@ xfs_sync_data(
ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags);
XFS_ICI_NO_TAG, 0, NULL);
if (error) if (error)
return XFS_ERROR(error); return XFS_ERROR(error);
...@@ -337,8 +260,7 @@ xfs_sync_attr( ...@@ -337,8 +260,7 @@ xfs_sync_attr(
{ {
ASSERT((flags & ~SYNC_WAIT) == 0); ASSERT((flags & ~SYNC_WAIT) == 0);
return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags);
XFS_ICI_NO_TAG, 0, NULL);
} }
STATIC int STATIC int
...@@ -868,13 +790,72 @@ xfs_reclaim_inode( ...@@ -868,13 +790,72 @@ xfs_reclaim_inode(
} }
/*
* Walk the AGs and reclaim the inodes in them. Even if the filesystem is
* corrupted, we still want to try to reclaim all the inodes. If we don't,
* then a shut down during filesystem unmount reclaim walk leak all the
* unreclaimed inodes.
*/
int
xfs_reclaim_inodes_ag(
struct xfs_mount *mp,
int flags,
int *nr_to_scan)
{
struct xfs_perag *pag;
int error = 0;
int last_error = 0;
xfs_agnumber_t ag;
ag = 0;
while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
unsigned long first_index = 0;
int done = 0;
ag = pag->pag_agno + 1;
do {
struct xfs_inode *ip;
int nr_found;
write_lock(&pag->pag_ici_lock);
nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
(void **)&ip, first_index, 1,
XFS_ICI_RECLAIM_TAG);
if (!nr_found) {
write_unlock(&pag->pag_ici_lock);
break;
}
/*
* Update the index for the next lookup. Catch overflows
* into the next AG range which can occur if we have inodes
* in the last block of the AG and we are currently
* pointing to the last inode.
*/
first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
done = 1;
error = xfs_reclaim_inode(ip, pag, flags);
if (error && last_error != EFSCORRUPTED)
last_error = error;
} while (!done && (*nr_to_scan)--);
xfs_perag_put(pag);
}
return XFS_ERROR(last_error);
}
int int
xfs_reclaim_inodes( xfs_reclaim_inodes(
xfs_mount_t *mp, xfs_mount_t *mp,
int mode) int mode)
{ {
return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode, int nr_to_scan = INT_MAX;
XFS_ICI_RECLAIM_TAG, 1, NULL);
return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
} }
/* /*
...@@ -896,17 +877,16 @@ xfs_reclaim_inode_shrink( ...@@ -896,17 +877,16 @@ xfs_reclaim_inode_shrink(
if (!(gfp_mask & __GFP_FS)) if (!(gfp_mask & __GFP_FS))
return -1; return -1;
xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0, xfs_reclaim_inodes_ag(mp, 0, &nr_to_scan);
XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan); /* terminate if we don't exhaust the scan */
/* if we don't exhaust the scan, don't bother coming back */
if (nr_to_scan > 0) if (nr_to_scan > 0)
return -1; return -1;
} }
reclaimable = 0; reclaimable = 0;
ag = 0; ag = 0;
while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
XFS_ICI_RECLAIM_TAG))) { ag = pag->pag_agno + 1;
reclaimable += pag->pag_ici_reclaimable; reclaimable += pag->pag_ici_reclaimable;
xfs_perag_put(pag); xfs_perag_put(pag);
} }
......
...@@ -50,7 +50,7 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, ...@@ -50,7 +50,7 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
int xfs_inode_ag_iterator(struct xfs_mount *mp, int xfs_inode_ag_iterator(struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
int flags, int tag, int write_lock, int *nr_to_scan); int flags);
void xfs_inode_shrinker_register(struct xfs_mount *mp); void xfs_inode_shrinker_register(struct xfs_mount *mp);
void xfs_inode_shrinker_unregister(struct xfs_mount *mp); void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
......
...@@ -124,7 +124,7 @@ DEFINE_EVENT(xfs_perag_class, name, \ ...@@ -124,7 +124,7 @@ DEFINE_EVENT(xfs_perag_class, name, \
unsigned long caller_ip), \ unsigned long caller_ip), \
TP_ARGS(mp, agno, refcount, caller_ip)) TP_ARGS(mp, agno, refcount, caller_ip))
DEFINE_PERAG_REF_EVENT(xfs_perag_get); DEFINE_PERAG_REF_EVENT(xfs_perag_get);
DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim); DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
DEFINE_PERAG_REF_EVENT(xfs_perag_put); DEFINE_PERAG_REF_EVENT(xfs_perag_put);
DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim); DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim); DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
......
...@@ -918,8 +918,7 @@ xfs_qm_dqrele_all_inodes( ...@@ -918,8 +918,7 @@ xfs_qm_dqrele_all_inodes(
uint flags) uint flags)
{ {
ASSERT(mp->m_quotainfo); ASSERT(mp->m_quotainfo);
xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags);
XFS_ICI_NO_TAG, 0, NULL);
} }
/*------------------------------------------------------------------------*/ /*------------------------------------------------------------------------*/
......
...@@ -219,6 +219,32 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno) ...@@ -219,6 +219,32 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
return pag; return pag;
} }
/*
* search from @first to find the next perag with the given tag set.
*/
struct xfs_perag *
xfs_perag_get_tag(
struct xfs_mount *mp,
xfs_agnumber_t first,
int tag)
{
struct xfs_perag *pag;
int found;
int ref;
rcu_read_lock();
found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
(void **)&pag, first, 1, tag);
if (found <= 0) {
rcu_read_unlock();
return NULL;
}
ref = atomic_inc_return(&pag->pag_ref);
rcu_read_unlock();
trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_);
return pag;
}
void void
xfs_perag_put(struct xfs_perag *pag) xfs_perag_put(struct xfs_perag *pag)
{ {
......
...@@ -327,6 +327,8 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d) ...@@ -327,6 +327,8 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
* perag get/put wrappers for ref counting * perag get/put wrappers for ref counting
*/ */
struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno); struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno);
struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno,
int tag);
void xfs_perag_put(struct xfs_perag *pag); void xfs_perag_put(struct xfs_perag *pag);
/* /*
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册