提交 e4143a1c 编写于 作者: D David Chinner 提交者: Lachlan McIlroy

[XFS] Fix transaction overrun during writeback.

Prevent transaction overrun in xfs_iomap_write_allocate() if we race with
a truncate that overlaps the delalloc range we were planning to allocate.

If we race, we may allocate into a hole and that requires block
allocation. At this point in time we don't have a reservation for block
allocation (apart from metadata blocks) and so allocating into a hole
rather than a delalloc region results in overflowing the transaction block
reservation.

Fix it by only allowing a single extent to be allocated at a time.

SGI-PV: 972757
SGI-Modid: xfs-linux-melb:xfs-kern:30005a
Signed-off-by: NDavid Chinner <dgc@sgi.com>
Signed-off-by: NLachlan McIlroy <lachlan@sgi.com>
上级 786f486f
...@@ -702,6 +702,9 @@ xfs_iomap_write_delay( ...@@ -702,6 +702,9 @@ xfs_iomap_write_delay(
* the originating callers request. * the originating callers request.
* *
* Called without a lock on the inode. * Called without a lock on the inode.
*
* We no longer bother to look at the incoming map - all we have to
* guarantee is that whatever we allocate fills the required range.
*/ */
int int
xfs_iomap_write_allocate( xfs_iomap_write_allocate(
...@@ -717,9 +720,9 @@ xfs_iomap_write_allocate( ...@@ -717,9 +720,9 @@ xfs_iomap_write_allocate(
xfs_fsblock_t first_block; xfs_fsblock_t first_block;
xfs_bmap_free_t free_list; xfs_bmap_free_t free_list;
xfs_filblks_t count_fsb; xfs_filblks_t count_fsb;
xfs_bmbt_irec_t imap[XFS_STRAT_WRITE_IMAPS]; xfs_bmbt_irec_t imap;
xfs_trans_t *tp; xfs_trans_t *tp;
int i, nimaps, committed; int nimaps, committed;
int error = 0; int error = 0;
int nres; int nres;
...@@ -766,13 +769,38 @@ xfs_iomap_write_allocate( ...@@ -766,13 +769,38 @@ xfs_iomap_write_allocate(
XFS_BMAP_INIT(&free_list, &first_block); XFS_BMAP_INIT(&free_list, &first_block);
nimaps = XFS_STRAT_WRITE_IMAPS;
/* /*
* Ensure we don't go beyond eof - it is possible * it is possible that the extents have changed since
* the extents changed since we did the read call, * we did the read call as we dropped the ilock for a
* we dropped the ilock in the interim. * while. We have to be careful about truncates or hole
* punchs here - we are not allowed to allocate
* non-delalloc blocks here.
*
* The only protection against truncation is the pages
* for the range we are being asked to convert are
* locked and hence a truncate will block on them
* first.
*
* As a result, if we go beyond the range we really
* need and hit an delalloc extent boundary followed by
* a hole while we have excess blocks in the map, we
* will fill the hole incorrectly and overrun the
* transaction reservation.
*
* Using a single map prevents this as we are forced to
* check each map we look for overlap with the desired
* range and abort as soon as we find it. Also, given
* that we only return a single map, having one beyond
* what we can return is probably a bit silly.
*
* We also need to check that we don't go beyond EOF;
* this is a truncate optimisation as a truncate sets
* the new file size before block on the pages we
* currently have locked under writeback. Because they
* are about to be tossed, we don't need to write them
* back....
*/ */
nimaps = 1;
end_fsb = XFS_B_TO_FSB(mp, ip->i_size); end_fsb = XFS_B_TO_FSB(mp, ip->i_size);
xfs_bmap_last_offset(NULL, ip, &last_block, xfs_bmap_last_offset(NULL, ip, &last_block,
XFS_DATA_FORK); XFS_DATA_FORK);
...@@ -788,7 +816,7 @@ xfs_iomap_write_allocate( ...@@ -788,7 +816,7 @@ xfs_iomap_write_allocate(
/* Go get the actual blocks */ /* Go get the actual blocks */
error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb, error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb,
XFS_BMAPI_WRITE, &first_block, 1, XFS_BMAPI_WRITE, &first_block, 1,
imap, &nimaps, &free_list, NULL); &imap, &nimaps, &free_list, NULL);
if (error) if (error)
goto trans_cancel; goto trans_cancel;
...@@ -807,27 +835,24 @@ xfs_iomap_write_allocate( ...@@ -807,27 +835,24 @@ xfs_iomap_write_allocate(
* See if we were able to allocate an extent that * See if we were able to allocate an extent that
* covers at least part of the callers request * covers at least part of the callers request
*/ */
for (i = 0; i < nimaps; i++) { if (unlikely(!imap.br_startblock &&
if (unlikely(!imap[i].br_startblock && XFS_IS_REALTIME_INODE(ip)))
!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME))) return xfs_cmn_err_fsblock_zero(ip, &imap);
return xfs_cmn_err_fsblock_zero(ip, &imap[i]); if ((offset_fsb >= imap.br_startoff) &&
if ((offset_fsb >= imap[i].br_startoff) && (offset_fsb < (imap.br_startoff +
(offset_fsb < (imap[i].br_startoff + imap.br_blockcount))) {
imap[i].br_blockcount))) { *map = imap;
*map = imap[i]; *retmap = 1;
*retmap = 1; XFS_STATS_INC(xs_xstrat_quick);
XFS_STATS_INC(xs_xstrat_quick); return 0;
return 0;
}
count_fsb -= imap[i].br_blockcount;
} }
/* So far we have not mapped the requested part of the /*
* So far we have not mapped the requested part of the
* file, just surrounding data, try again. * file, just surrounding data, try again.
*/ */
nimaps--; count_fsb -= imap.br_blockcount;
map_start_fsb = imap[nimaps].br_startoff + map_start_fsb = imap.br_startoff + imap.br_blockcount;
imap[nimaps].br_blockcount;
} }
trans_cancel: trans_cancel:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册