提交 009c6e87 编写于 作者: B Brian Foster 提交者: Dave Chinner

xfs: add missing ilock around dio write last extent alignment

The iomap codepath (via get_blocks()) acquires and release the inode
lock in the case of a direct write that requires block allocation. This
is because xfs_iomap_write_direct() allocates a transaction, which means
the ilock must be dropped and reacquired after the transaction is
allocated and reserved.

xfs_iomap_write_direct() invokes xfs_iomap_eof_align_last_fsb() before
the transaction is created and thus before the ilock is reacquired. This
can lead to calls to xfs_iread_extents() and reads of the in-core extent
list without any synchronization (via xfs_bmap_eof() and
xfs_bmap_last_extent()). xfs_iread_extents() assert fails if the ilock
is not held, but this is not currently seen in practice as the current
callers had already invoked xfs_bmapi_read().

What has been seen in practice are reports of crashes down in the
xfs_bmap_eof() codepath on direct writes due to seemingly bogus pointer
references from xfs_iext_get_ext(). While an explicit reproducer is not
currently available to confirm the cause of the problem, crash analysis
and code inspection from David Jeffrey had identified the insufficient
locking.

xfs_iomap_eof_align_last_fsb() is called from other contexts with the
inode lock already held, so we cannot acquire it therein.
__xfs_get_blocks() acquires and drops the ilock with variable flags to
cover the event that the extent list must be read in. The common case is
that __xfs_get_blocks() acquires the shared ilock. To provide locking
around the last extent alignment call without adding more lock cycles to
the dio path, update xfs_iomap_write_direct() to expect the shared ilock
held on entry and do the extent alignment under its protection. Demote
the lock, if necessary, from __xfs_get_blocks() and push the
xfs_qm_dqattach() call outside of the shared lock critical section.
Also, add an assert to document that the extent list is always expected
to be present in this path. Otherwise, we risk a call to
xfs_iread_extents() while under the shared ilock. This is safe as all
current callers have executed an xfs_bmapi_read() call under the current
iolock context.
Reported-by: NDavid Jeffery <djeffery@redhat.com>
Signed-off-by: NBrian Foster <bfoster@redhat.com>
Reviewed-by: NDave Chinner <dchinner@redhat.com>
Signed-off-by: NDave Chinner <david@fromorbit.com>
上级 5cb13dcd
...@@ -1408,12 +1408,12 @@ __xfs_get_blocks( ...@@ -1408,12 +1408,12 @@ __xfs_get_blocks(
imap.br_startblock == DELAYSTARTBLOCK))) { imap.br_startblock == DELAYSTARTBLOCK))) {
if (direct || xfs_get_extsz_hint(ip)) { if (direct || xfs_get_extsz_hint(ip)) {
/* /*
* Drop the ilock in preparation for starting the block * xfs_iomap_write_direct() expects the shared lock. It
* allocation transaction. It will be retaken * is unlocked on return.
* exclusively inside xfs_iomap_write_direct for the
* actual allocation.
*/ */
xfs_iunlock(ip, lockmode); if (lockmode == XFS_ILOCK_EXCL)
xfs_ilock_demote(ip, lockmode);
error = xfs_iomap_write_direct(ip, offset, size, error = xfs_iomap_write_direct(ip, offset, size,
&imap, nimaps); &imap, nimaps);
if (error) if (error)
......
...@@ -131,20 +131,29 @@ xfs_iomap_write_direct( ...@@ -131,20 +131,29 @@ xfs_iomap_write_direct(
uint qblocks, resblks, resrtextents; uint qblocks, resblks, resrtextents;
int committed; int committed;
int error; int error;
int lockmode;
error = xfs_qm_dqattach(ip, 0);
if (error)
return error;
rt = XFS_IS_REALTIME_INODE(ip); rt = XFS_IS_REALTIME_INODE(ip);
extsz = xfs_get_extsz_hint(ip); extsz = xfs_get_extsz_hint(ip);
lockmode = XFS_ILOCK_SHARED; /* locked by caller */
ASSERT(xfs_isilocked(ip, lockmode));
offset_fsb = XFS_B_TO_FSBT(mp, offset); offset_fsb = XFS_B_TO_FSBT(mp, offset);
last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
if ((offset + count) > XFS_ISIZE(ip)) { if ((offset + count) > XFS_ISIZE(ip)) {
/*
* Assert that the in-core extent list is present since this can
* call xfs_iread_extents() and we only have the ilock shared.
* This should be safe because the lock was held around a bmapi
* call in the caller and we only need it to access the in-core
* list.
*/
ASSERT(XFS_IFORK_PTR(ip, XFS_DATA_FORK)->if_flags &
XFS_IFEXTENTS);
error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
if (error) if (error)
return error; goto out_unlock;
} else { } else {
if (nmaps && (imap->br_startblock == HOLESTARTBLOCK)) if (nmaps && (imap->br_startblock == HOLESTARTBLOCK))
last_fsb = MIN(last_fsb, (xfs_fileoff_t) last_fsb = MIN(last_fsb, (xfs_fileoff_t)
...@@ -173,6 +182,15 @@ xfs_iomap_write_direct( ...@@ -173,6 +182,15 @@ xfs_iomap_write_direct(
quota_flag = XFS_QMOPT_RES_REGBLKS; quota_flag = XFS_QMOPT_RES_REGBLKS;
} }
/*
* Drop the shared lock acquired by the caller, attach the dquot if
* necessary and move on to transaction setup.
*/
xfs_iunlock(ip, lockmode);
error = xfs_qm_dqattach(ip, 0);
if (error)
return error;
/* /*
* Allocate and setup the transaction * Allocate and setup the transaction
*/ */
...@@ -187,7 +205,8 @@ xfs_iomap_write_direct( ...@@ -187,7 +205,8 @@ xfs_iomap_write_direct(
return error; return error;
} }
xfs_ilock(ip, XFS_ILOCK_EXCL); lockmode = XFS_ILOCK_EXCL;
xfs_ilock(ip, lockmode);
error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
if (error) if (error)
...@@ -229,7 +248,7 @@ xfs_iomap_write_direct( ...@@ -229,7 +248,7 @@ xfs_iomap_write_direct(
error = xfs_alert_fsblock_zero(ip, imap); error = xfs_alert_fsblock_zero(ip, imap);
out_unlock: out_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, lockmode);
return error; return error;
out_bmap_cancel: out_bmap_cancel:
......
...@@ -181,6 +181,11 @@ xfs_fs_map_blocks( ...@@ -181,6 +181,11 @@ xfs_fs_map_blocks(
ASSERT(imap.br_startblock != DELAYSTARTBLOCK); ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) { if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) {
/*
* xfs_iomap_write_direct() expects to take ownership of
* the shared ilock.
*/
xfs_ilock(ip, XFS_ILOCK_SHARED);
error = xfs_iomap_write_direct(ip, offset, length, error = xfs_iomap_write_direct(ip, offset, length,
&imap, nimaps); &imap, nimaps);
if (error) if (error)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册