提交 585e6d88 编写于 作者: D David Chinner 提交者: Tim Shimmin

[XFS] Fix a synchronous buftarg flush deadlock when freezing.

At the last stage of a freeze, we flush the buftarg synchronously over and
over again until it succeeds twice without skipping any buffers.

The delwri list flush skips pinned buffers, but tries to flush all others.
It removes the buffers from the delwri list, then tries to lock them one
at a time as it traverses the list to issue the I/O. It holds them locked
until we issue all of the I/O and then unlocks them once we've waited for
it to complete.

The problem is that during a freeze, the filesystem may still be doing
stuff - like flushing delalloc data buffers - in the background and hence
we can be trying to lock buffers that were on the delwri list at the same
time. Hence we can get ABBA deadlocks between threads doing allocation and
the buftarg flush (freeze) thread.

Fix it by skipping locked (and pinned) buffers as we traverse the delwri
buffer list.

SGI-PV: 957195
SGI-Modid: xfs-linux-melb:xfs-kern:27535a
Signed-off-by: NDavid Chinner <dgc@sgi.com>
Signed-off-by: NTim Shimmin <tes@sgi.com>
上级 dac61f52
...@@ -1679,21 +1679,59 @@ xfsbufd_wakeup( ...@@ -1679,21 +1679,59 @@ xfsbufd_wakeup(
return 0; return 0;
} }
/*
* Move as many buffers as specified to the supplied list
* idicating if we skipped any buffers to prevent deadlocks.
*/
STATIC int
xfs_buf_delwri_split(
xfs_buftarg_t *target,
struct list_head *list,
unsigned long age,
int flags)
{
xfs_buf_t *bp, *n;
struct list_head *dwq = &target->bt_delwrite_queue;
spinlock_t *dwlk = &target->bt_delwrite_lock;
int skipped = 0;
INIT_LIST_HEAD(list);
spin_lock(dwlk);
list_for_each_entry_safe(bp, n, dwq, b_list) {
XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp));
ASSERT(bp->b_flags & XBF_DELWRI);
if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) {
if (!(flags & XBT_FORCE_FLUSH) &&
time_before(jiffies, bp->b_queuetime + age)) {
xfs_buf_unlock(bp);
break;
}
bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|
_XBF_RUN_QUEUES);
bp->b_flags |= XBF_WRITE;
list_move_tail(&bp->b_list, list);
} else
skipped++;
}
spin_unlock(dwlk);
return skipped;
}
STATIC int STATIC int
xfsbufd( xfsbufd(
void *data) void *data)
{ {
struct list_head tmp; struct list_head tmp;
unsigned long age; xfs_buftarg_t *target = (xfs_buftarg_t *)data;
xfs_buftarg_t *target = (xfs_buftarg_t *)data; int count;
xfs_buf_t *bp, *n; xfs_buf_t *bp;
struct list_head *dwq = &target->bt_delwrite_queue;
spinlock_t *dwlk = &target->bt_delwrite_lock;
int count;
current->flags |= PF_MEMALLOC; current->flags |= PF_MEMALLOC;
INIT_LIST_HEAD(&tmp);
do { do {
if (unlikely(freezing(current))) { if (unlikely(freezing(current))) {
set_bit(XBT_FORCE_SLEEP, &target->bt_flags); set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
...@@ -1705,37 +1743,19 @@ xfsbufd( ...@@ -1705,37 +1743,19 @@ xfsbufd(
schedule_timeout_interruptible( schedule_timeout_interruptible(
xfs_buf_timer_centisecs * msecs_to_jiffies(10)); xfs_buf_timer_centisecs * msecs_to_jiffies(10));
count = 0; xfs_buf_delwri_split(target, &tmp,
age = xfs_buf_age_centisecs * msecs_to_jiffies(10); xfs_buf_age_centisecs * msecs_to_jiffies(10),
spin_lock(dwlk); test_bit(XBT_FORCE_FLUSH, &target->bt_flags)
list_for_each_entry_safe(bp, n, dwq, b_list) { ? XBT_FORCE_FLUSH : 0);
XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp));
ASSERT(bp->b_flags & XBF_DELWRI);
if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) {
if (!test_bit(XBT_FORCE_FLUSH,
&target->bt_flags) &&
time_before(jiffies,
bp->b_queuetime + age)) {
xfs_buf_unlock(bp);
break;
}
bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|
_XBF_RUN_QUEUES);
bp->b_flags |= XBF_WRITE;
list_move_tail(&bp->b_list, &tmp);
count++;
}
}
spin_unlock(dwlk);
count = 0;
while (!list_empty(&tmp)) { while (!list_empty(&tmp)) {
bp = list_entry(tmp.next, xfs_buf_t, b_list); bp = list_entry(tmp.next, xfs_buf_t, b_list);
ASSERT(target == bp->b_target); ASSERT(target == bp->b_target);
list_del_init(&bp->b_list); list_del_init(&bp->b_list);
xfs_buf_iostrategy(bp); xfs_buf_iostrategy(bp);
count++;
} }
if (as_list_len > 0) if (as_list_len > 0)
...@@ -1756,40 +1776,23 @@ xfsbufd( ...@@ -1756,40 +1776,23 @@ xfsbufd(
*/ */
int int
xfs_flush_buftarg( xfs_flush_buftarg(
xfs_buftarg_t *target, xfs_buftarg_t *target,
int wait) int wait)
{ {
struct list_head tmp; struct list_head tmp;
xfs_buf_t *bp, *n; xfs_buf_t *bp, *n;
int pincount = 0; int pincount = 0;
struct list_head *dwq = &target->bt_delwrite_queue;
spinlock_t *dwlk = &target->bt_delwrite_lock;
xfs_buf_runall_queues(xfsdatad_workqueue); xfs_buf_runall_queues(xfsdatad_workqueue);
xfs_buf_runall_queues(xfslogd_workqueue); xfs_buf_runall_queues(xfslogd_workqueue);
INIT_LIST_HEAD(&tmp); pincount = xfs_buf_delwri_split(target, &tmp, 0, XBT_FORCE_FLUSH);
spin_lock(dwlk);
list_for_each_entry_safe(bp, n, dwq, b_list) {
ASSERT(bp->b_target == target);
ASSERT(bp->b_flags & (XBF_DELWRI | _XBF_DELWRI_Q));
XB_TRACE(bp, "walkq2", (long)xfs_buf_ispin(bp));
if (xfs_buf_ispin(bp)) {
pincount++;
continue;
}
list_move_tail(&bp->b_list, &tmp);
}
spin_unlock(dwlk);
/* /*
* Dropped the delayed write list lock, now walk the temporary list * Dropped the delayed write list lock, now walk the temporary list
*/ */
list_for_each_entry_safe(bp, n, &tmp, b_list) { list_for_each_entry_safe(bp, n, &tmp, b_list) {
xfs_buf_lock(bp); ASSERT(target == bp->b_target);
bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|_XBF_RUN_QUEUES);
bp->b_flags |= XBF_WRITE;
if (wait) if (wait)
bp->b_flags &= ~XBF_ASYNC; bp->b_flags &= ~XBF_ASYNC;
else else
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册