提交 777df5af 编写于 作者: D Dave Chinner

xfs: Make inode reclaim states explicit

A.K.A.: don't rely on xfs_iflush() return value in reclaim

We have gradually been moving checks out of the reclaim code because
they are duplicated in xfs_iflush(). We've had a history of problems
in this area, and many of them stem from the overloading of the
return values from xfs_iflush() and interaction with inode flush
locking to determine if the inode is safe to reclaim.

With the desire to move to delayed write flushing of inodes and
non-blocking inode tree reclaim walks, the overloading of the
return value of xfs_iflush makes it very difficult to determine
the correct thing to do next.

This patch explicitly re-adds the checks to the inode reclaim code,
removing the reliance on the return value of xfs_iflush() to
determine what to do next. It also means that we can clearly
document all the inode states that reclaim must handle and hence
we can easily see that we handled all the necessary cases.

This also removes the need for the xfs_inode_clean() check in
xfs_iflush() as all callers now check this first (safely).
Signed-off-by: NDave Chinner <david@fromorbit.com>
Reviewed-by: NChristoph Hellwig <hch@lst.de>
上级 d5db0f97
...@@ -706,12 +706,43 @@ __xfs_inode_clear_reclaim_tag( ...@@ -706,12 +706,43 @@ __xfs_inode_clear_reclaim_tag(
XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
} }
/*
* Inodes in different states need to be treated differently, and the return
* value of xfs_iflush is not sufficient to get this right. The following table
* lists the inode states and the reclaim actions necessary for non-blocking
* reclaim:
*
*
* inode state iflush ret required action
* --------------- ---------- ---------------
* bad - reclaim
* shutdown EIO unpin and reclaim
* clean, unpinned 0 reclaim
* stale, unpinned 0 reclaim
* clean, pinned(*) 0 unpin and reclaim
* stale, pinned 0 unpin and reclaim
* dirty, async 0 block on flush lock, reclaim
* dirty, sync flush 0 block on flush lock, reclaim
*
* (*) dgc: I don't think the clean, pinned state is possible but it gets
* handled anyway given the order of checks implemented.
*
* Hence the order of actions after gaining the locks should be:
* bad => reclaim
* shutdown => unpin and reclaim
* pinned => unpin
* stale => reclaim
* clean => reclaim
* dirty => flush, wait and reclaim
*/
STATIC int STATIC int
xfs_reclaim_inode( xfs_reclaim_inode(
struct xfs_inode *ip, struct xfs_inode *ip,
struct xfs_perag *pag, struct xfs_perag *pag,
int sync_mode) int sync_mode)
{ {
int error;
/* /*
* The radix tree lock here protects a thread in xfs_iget from racing * The radix tree lock here protects a thread in xfs_iget from racing
* with us starting reclaim on the inode. Once we have the * with us starting reclaim on the inode. Once we have the
...@@ -729,30 +760,42 @@ xfs_reclaim_inode( ...@@ -729,30 +760,42 @@ xfs_reclaim_inode(
spin_unlock(&ip->i_flags_lock); spin_unlock(&ip->i_flags_lock);
write_unlock(&pag->pag_ici_lock); write_unlock(&pag->pag_ici_lock);
/*
* If the inode is still dirty, then flush it out. If the inode
* is not in the AIL, then it will be OK to flush it delwri as
* long as xfs_iflush() does not keep any references to the inode.
* We leave that decision up to xfs_iflush() since it has the
* knowledge of whether it's OK to simply do a delwri flush of
* the inode or whether we need to wait until the inode is
* pulled from the AIL.
* We get the flush lock regardless, though, just to make sure
* we don't free it while it is being flushed.
*/
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_iflock(ip); xfs_iflock(ip);
/* if (is_bad_inode(VFS_I(ip)))
* In the case of a forced shutdown we rely on xfs_iflush() to goto reclaim;
* wait for the inode to be unpinned before returning an error. if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
*/ xfs_iunpin_wait(ip);
if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) { goto reclaim;
/* synchronize with xfs_iflush_done */ }
xfs_iflock(ip); if (xfs_ipincount(ip))
xfs_ifunlock(ip); xfs_iunpin_wait(ip);
if (xfs_iflags_test(ip, XFS_ISTALE))
goto reclaim;
if (xfs_inode_clean(ip))
goto reclaim;
/* Now we have an inode that needs flushing */
error = xfs_iflush(ip, sync_mode);
if (!error) {
switch(sync_mode) {
case XFS_IFLUSH_DELWRI_ELSE_ASYNC:
case XFS_IFLUSH_DELWRI:
case XFS_IFLUSH_ASYNC:
case XFS_IFLUSH_DELWRI_ELSE_SYNC:
case XFS_IFLUSH_SYNC:
/* IO issued, synchronise with IO completion */
xfs_iflock(ip);
break;
default:
ASSERT(0);
break;
}
} }
reclaim:
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_ireclaim(ip); xfs_ireclaim(ip);
return 0; return 0;
......
...@@ -2493,7 +2493,7 @@ __xfs_iunpin_wait( ...@@ -2493,7 +2493,7 @@ __xfs_iunpin_wait(
wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0));
} }
static inline void void
xfs_iunpin_wait( xfs_iunpin_wait(
xfs_inode_t *ip) xfs_inode_t *ip)
{ {
...@@ -2848,15 +2848,6 @@ xfs_iflush( ...@@ -2848,15 +2848,6 @@ xfs_iflush(
iip = ip->i_itemp; iip = ip->i_itemp;
mp = ip->i_mount; mp = ip->i_mount;
/*
* If the inode isn't dirty, then just release the inode flush lock and
* do nothing.
*/
if (xfs_inode_clean(ip)) {
xfs_ifunlock(ip);
return 0;
}
/* /*
* We can't flush the inode until it is unpinned, so wait for it if we * We can't flush the inode until it is unpinned, so wait for it if we
* are allowed to block. We know noone new can pin it, because we are * are allowed to block. We know noone new can pin it, because we are
......
...@@ -483,6 +483,7 @@ int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); ...@@ -483,6 +483,7 @@ int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
void xfs_iext_realloc(xfs_inode_t *, int, int); void xfs_iext_realloc(xfs_inode_t *, int, int);
void xfs_ipin(xfs_inode_t *); void xfs_ipin(xfs_inode_t *);
void xfs_iunpin(xfs_inode_t *); void xfs_iunpin(xfs_inode_t *);
void xfs_iunpin_wait(xfs_inode_t *);
int xfs_iflush(xfs_inode_t *, uint); int xfs_iflush(xfs_inode_t *, uint);
void xfs_ichgtime(xfs_inode_t *, int); void xfs_ichgtime(xfs_inode_t *, int);
void xfs_lock_inodes(xfs_inode_t **, int, uint); void xfs_lock_inodes(xfs_inode_t **, int, uint);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册