提交 f5749432 编写于 作者: L Linus Torvalds

Merge tag 'xfs-4.18-fixes-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs fixes from Darrick Wong:
 "Here are some patches for 4.18 to fix regressions, accounting
  problems, overflow problems, and to strengthen metadata validation to
  prevent corruption.

  This series has been run through a full xfstests run over the weekend
  and through a quick xfstests run against this morning's master, with
  no major failures reported.

  Changes since last update:

   - more metadata validation strengthening to prevent crashes.

   - fix extent offset overflow problem when insert_range on a 512b
     block fs

   - fix some off-by-one errors in the realtime fsmap code

   - fix some math errors in the default resblks calculation when free
     space is low

   - fix a problem where stale page contents are exposed via mmap read
     after a zero_range at eof

   - fix accounting problems with per-ag reservations causing statfs
     reports to vary incorrectly"

* tag 'xfs-4.18-fixes-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: fix fdblocks accounting w/ RMAPBT per-AG reservation
  xfs: ensure post-EOF zeroing happens after zeroing part of a file
  xfs: fix off-by-one error in xfs_rtalloc_query_range
  xfs: fix uninitialized field in rtbitmap fsmap backend
  xfs: recheck reflink state after grabbing ILOCK_SHARED for a write
  xfs: don't allow insert-range to shift extents past the maximum offset
  xfs: don't trip over negative free space in xfs_reserve_blocks
  xfs: allow empty transactions while frozen
  xfs: xfs_iflush_abort() can be called twice on cluster writeback failure
  xfs: More robust inode extent count validation
  xfs: simplify xfs_bmap_punch_delalloc_range
...@@ -157,6 +157,7 @@ __xfs_ag_resv_free( ...@@ -157,6 +157,7 @@ __xfs_ag_resv_free(
error = xfs_mod_fdblocks(pag->pag_mount, oldresv, true); error = xfs_mod_fdblocks(pag->pag_mount, oldresv, true);
resv->ar_reserved = 0; resv->ar_reserved = 0;
resv->ar_asked = 0; resv->ar_asked = 0;
resv->ar_orig_reserved = 0;
if (error) if (error)
trace_xfs_ag_resv_free_error(pag->pag_mount, pag->pag_agno, trace_xfs_ag_resv_free_error(pag->pag_mount, pag->pag_agno,
...@@ -189,13 +190,34 @@ __xfs_ag_resv_init( ...@@ -189,13 +190,34 @@ __xfs_ag_resv_init(
struct xfs_mount *mp = pag->pag_mount; struct xfs_mount *mp = pag->pag_mount;
struct xfs_ag_resv *resv; struct xfs_ag_resv *resv;
int error; int error;
xfs_extlen_t reserved; xfs_extlen_t hidden_space;
if (used > ask) if (used > ask)
ask = used; ask = used;
reserved = ask - used;
error = xfs_mod_fdblocks(mp, -(int64_t)reserved, true); switch (type) {
case XFS_AG_RESV_RMAPBT:
/*
* Space taken by the rmapbt is not subtracted from fdblocks
* because the rmapbt lives in the free space. Here we must
* subtract the entire reservation from fdblocks so that we
* always have blocks available for rmapbt expansion.
*/
hidden_space = ask;
break;
case XFS_AG_RESV_METADATA:
/*
* Space taken by all other metadata btrees are accounted
* on-disk as used space. We therefore only hide the space
* that is reserved but not used by the trees.
*/
hidden_space = ask - used;
break;
default:
ASSERT(0);
return -EINVAL;
}
error = xfs_mod_fdblocks(mp, -(int64_t)hidden_space, true);
if (error) { if (error) {
trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno, trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno,
error, _RET_IP_); error, _RET_IP_);
...@@ -216,7 +238,8 @@ __xfs_ag_resv_init( ...@@ -216,7 +238,8 @@ __xfs_ag_resv_init(
resv = xfs_perag_resv(pag, type); resv = xfs_perag_resv(pag, type);
resv->ar_asked = ask; resv->ar_asked = ask;
resv->ar_reserved = resv->ar_orig_reserved = reserved; resv->ar_orig_reserved = hidden_space;
resv->ar_reserved = ask - used;
trace_xfs_ag_resv_init(pag, type, ask); trace_xfs_ag_resv_init(pag, type, ask);
return 0; return 0;
......
...@@ -5780,6 +5780,32 @@ xfs_bmap_collapse_extents( ...@@ -5780,6 +5780,32 @@ xfs_bmap_collapse_extents(
return error; return error;
} }
/* Make sure we won't be right-shifting an extent past the maximum bound. */
int
xfs_bmap_can_insert_extents(
struct xfs_inode *ip,
xfs_fileoff_t off,
xfs_fileoff_t shift)
{
struct xfs_bmbt_irec got;
int is_empty;
int error = 0;
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
xfs_ilock(ip, XFS_ILOCK_EXCL);
error = xfs_bmap_last_extent(NULL, ip, XFS_DATA_FORK, &got, &is_empty);
if (!error && !is_empty && got.br_startoff >= off &&
((got.br_startoff + shift) & BMBT_STARTOFF_MASK) < got.br_startoff)
error = -EINVAL;
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
}
int int
xfs_bmap_insert_extents( xfs_bmap_insert_extents(
struct xfs_trans *tp, struct xfs_trans *tp,
......
...@@ -227,6 +227,8 @@ int xfs_bmap_collapse_extents(struct xfs_trans *tp, struct xfs_inode *ip, ...@@ -227,6 +227,8 @@ int xfs_bmap_collapse_extents(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
bool *done, xfs_fsblock_t *firstblock, bool *done, xfs_fsblock_t *firstblock,
struct xfs_defer_ops *dfops); struct xfs_defer_ops *dfops);
int xfs_bmap_can_insert_extents(struct xfs_inode *ip, xfs_fileoff_t off,
xfs_fileoff_t shift);
int xfs_bmap_insert_extents(struct xfs_trans *tp, struct xfs_inode *ip, int xfs_bmap_insert_extents(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
bool *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock, bool *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock,
......
...@@ -962,6 +962,9 @@ typedef enum xfs_dinode_fmt { ...@@ -962,6 +962,9 @@ typedef enum xfs_dinode_fmt {
XFS_DFORK_DSIZE(dip, mp) : \ XFS_DFORK_DSIZE(dip, mp) : \
XFS_DFORK_ASIZE(dip, mp)) XFS_DFORK_ASIZE(dip, mp))
#define XFS_DFORK_MAXEXT(dip, mp, w) \
(XFS_DFORK_SIZE(dip, mp, w) / sizeof(struct xfs_bmbt_rec))
/* /*
* Return pointers to the data or attribute forks. * Return pointers to the data or attribute forks.
*/ */
...@@ -1526,6 +1529,8 @@ typedef struct xfs_bmdr_block { ...@@ -1526,6 +1529,8 @@ typedef struct xfs_bmdr_block {
#define BMBT_STARTBLOCK_BITLEN 52 #define BMBT_STARTBLOCK_BITLEN 52
#define BMBT_BLOCKCOUNT_BITLEN 21 #define BMBT_BLOCKCOUNT_BITLEN 21
#define BMBT_STARTOFF_MASK ((1ULL << BMBT_STARTOFF_BITLEN) - 1)
typedef struct xfs_bmbt_rec { typedef struct xfs_bmbt_rec {
__be64 l0, l1; __be64 l0, l1;
} xfs_bmbt_rec_t; } xfs_bmbt_rec_t;
......
...@@ -374,6 +374,47 @@ xfs_log_dinode_to_disk( ...@@ -374,6 +374,47 @@ xfs_log_dinode_to_disk(
} }
} }
static xfs_failaddr_t
xfs_dinode_verify_fork(
struct xfs_dinode *dip,
struct xfs_mount *mp,
int whichfork)
{
uint32_t di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork);
switch (XFS_DFORK_FORMAT(dip, whichfork)) {
case XFS_DINODE_FMT_LOCAL:
/*
* no local regular files yet
*/
if (whichfork == XFS_DATA_FORK) {
if (S_ISREG(be16_to_cpu(dip->di_mode)))
return __this_address;
if (be64_to_cpu(dip->di_size) >
XFS_DFORK_SIZE(dip, mp, whichfork))
return __this_address;
}
if (di_nextents)
return __this_address;
break;
case XFS_DINODE_FMT_EXTENTS:
if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork))
return __this_address;
break;
case XFS_DINODE_FMT_BTREE:
if (whichfork == XFS_ATTR_FORK) {
if (di_nextents > MAXAEXTNUM)
return __this_address;
} else if (di_nextents > MAXEXTNUM) {
return __this_address;
}
break;
default:
return __this_address;
}
return NULL;
}
xfs_failaddr_t xfs_failaddr_t
xfs_dinode_verify( xfs_dinode_verify(
struct xfs_mount *mp, struct xfs_mount *mp,
...@@ -441,24 +482,9 @@ xfs_dinode_verify( ...@@ -441,24 +482,9 @@ xfs_dinode_verify(
case S_IFREG: case S_IFREG:
case S_IFLNK: case S_IFLNK:
case S_IFDIR: case S_IFDIR:
switch (dip->di_format) { fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK);
case XFS_DINODE_FMT_LOCAL: if (fa)
/* return fa;
* no local regular files yet
*/
if (S_ISREG(mode))
return __this_address;
if (di_size > XFS_DFORK_DSIZE(dip, mp))
return __this_address;
if (dip->di_nextents)
return __this_address;
/* fall through */
case XFS_DINODE_FMT_EXTENTS:
case XFS_DINODE_FMT_BTREE:
break;
default:
return __this_address;
}
break; break;
case 0: case 0:
/* Uninitialized inode ok. */ /* Uninitialized inode ok. */
...@@ -468,17 +494,9 @@ xfs_dinode_verify( ...@@ -468,17 +494,9 @@ xfs_dinode_verify(
} }
if (XFS_DFORK_Q(dip)) { if (XFS_DFORK_Q(dip)) {
switch (dip->di_aformat) { fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK);
case XFS_DINODE_FMT_LOCAL: if (fa)
if (dip->di_anextents) return fa;
return __this_address;
/* fall through */
case XFS_DINODE_FMT_EXTENTS:
case XFS_DINODE_FMT_BTREE:
break;
default:
return __this_address;
}
} else { } else {
/* /*
* If there is no fork offset, this may be a freshly-made inode * If there is no fork offset, this may be a freshly-made inode
......
...@@ -1029,8 +1029,8 @@ xfs_rtalloc_query_range( ...@@ -1029,8 +1029,8 @@ xfs_rtalloc_query_range(
if (low_rec->ar_startext >= mp->m_sb.sb_rextents || if (low_rec->ar_startext >= mp->m_sb.sb_rextents ||
low_rec->ar_startext == high_rec->ar_startext) low_rec->ar_startext == high_rec->ar_startext)
return 0; return 0;
if (high_rec->ar_startext >= mp->m_sb.sb_rextents) if (high_rec->ar_startext > mp->m_sb.sb_rextents)
high_rec->ar_startext = mp->m_sb.sb_rextents - 1; high_rec->ar_startext = mp->m_sb.sb_rextents;
/* Iterate the bitmap, looking for discrepancies. */ /* Iterate the bitmap, looking for discrepancies. */
rtstart = low_rec->ar_startext; rtstart = low_rec->ar_startext;
......
...@@ -685,12 +685,10 @@ xfs_getbmap( ...@@ -685,12 +685,10 @@ xfs_getbmap(
} }
/* /*
* dead simple method of punching delalyed allocation blocks from a range in * Dead simple method of punching delalyed allocation blocks from a range in
* the inode. Walks a block at a time so will be slow, but is only executed in * the inode. This will always punch out both the start and end blocks, even
* rare error cases so the overhead is not critical. This will always punch out * if the ranges only partially overlap them, so it is up to the caller to
* both the start and end blocks, even if the ranges only partially overlap * ensure that partial blocks are not passed in.
* them, so it is up to the caller to ensure that partial blocks are not
* passed in.
*/ */
int int
xfs_bmap_punch_delalloc_range( xfs_bmap_punch_delalloc_range(
...@@ -698,63 +696,44 @@ xfs_bmap_punch_delalloc_range( ...@@ -698,63 +696,44 @@ xfs_bmap_punch_delalloc_range(
xfs_fileoff_t start_fsb, xfs_fileoff_t start_fsb,
xfs_fileoff_t length) xfs_fileoff_t length)
{ {
xfs_fileoff_t remaining = length; struct xfs_ifork *ifp = &ip->i_df;
xfs_fileoff_t end_fsb = start_fsb + length;
struct xfs_bmbt_irec got, del;
struct xfs_iext_cursor icur;
int error = 0; int error = 0;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
do { if (!(ifp->if_flags & XFS_IFEXTENTS)) {
int done; error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
xfs_bmbt_irec_t imap; if (error)
int nimaps = 1; return error;
xfs_fsblock_t firstblock; }
struct xfs_defer_ops dfops;
if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got))
return 0;
while (got.br_startoff + got.br_blockcount > start_fsb) {
del = got;
xfs_trim_extent(&del, start_fsb, length);
/* /*
* Map the range first and check that it is a delalloc extent * A delete can push the cursor forward. Step back to the
* before trying to unmap the range. Otherwise we will be * previous extent on non-delalloc or extents outside the
* trying to remove a real extent (which requires a * target range.
* transaction) or a hole, which is probably a bad idea...
*/ */
error = xfs_bmapi_read(ip, start_fsb, 1, &imap, &nimaps, if (!del.br_blockcount ||
XFS_BMAPI_ENTIRE); !isnullstartblock(del.br_startblock)) {
if (!xfs_iext_prev_extent(ifp, &icur, &got))
if (error) {
/* something screwed, just bail */
if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
xfs_alert(ip->i_mount,
"Failed delalloc mapping lookup ino %lld fsb %lld.",
ip->i_ino, start_fsb);
}
break; break;
continue;
} }
if (!nimaps) {
/* nothing there */
goto next_block;
}
if (imap.br_startblock != DELAYSTARTBLOCK) {
/* been converted, ignore */
goto next_block;
}
WARN_ON(imap.br_blockcount == 0);
/* error = xfs_bmap_del_extent_delay(ip, XFS_DATA_FORK, &icur,
* Note: while we initialise the firstblock/dfops pair, they &got, &del);
* should never be used because blocks should never be if (error || !xfs_iext_get_extent(ifp, &icur, &got))
* allocated or freed for a delalloc extent and hence we need
* don't cancel or finish them after the xfs_bunmapi() call.
*/
xfs_defer_init(&dfops, &firstblock);
error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock,
&dfops, &done);
if (error)
break; break;
}
ASSERT(!xfs_defer_has_unfinished_work(&dfops));
next_block:
start_fsb++;
remaining--;
} while(remaining > 0);
return error; return error;
} }
...@@ -1208,7 +1187,22 @@ xfs_free_file_space( ...@@ -1208,7 +1187,22 @@ xfs_free_file_space(
return 0; return 0;
if (offset + len > XFS_ISIZE(ip)) if (offset + len > XFS_ISIZE(ip))
len = XFS_ISIZE(ip) - offset; len = XFS_ISIZE(ip) - offset;
return iomap_zero_range(VFS_I(ip), offset, len, NULL, &xfs_iomap_ops); error = iomap_zero_range(VFS_I(ip), offset, len, NULL, &xfs_iomap_ops);
if (error)
return error;
/*
* If we zeroed right up to EOF and EOF straddles a page boundary we
* must make sure that the post-EOF area is also zeroed because the
* page could be mmap'd and iomap_zero_range doesn't do that for us.
* Writeback of the eof page will do this, albeit clumsily.
*/
if (offset + len >= XFS_ISIZE(ip) && ((offset + len) & PAGE_MASK)) {
error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
(offset + len) & ~PAGE_MASK, LLONG_MAX);
}
return error;
} }
/* /*
...@@ -1404,6 +1398,10 @@ xfs_insert_file_space( ...@@ -1404,6 +1398,10 @@ xfs_insert_file_space(
trace_xfs_insert_file_space(ip); trace_xfs_insert_file_space(ip);
error = xfs_bmap_can_insert_extents(ip, stop_fsb, shift_fsb);
if (error)
return error;
error = xfs_prepare_shift(ip, offset); error = xfs_prepare_shift(ip, offset);
if (error) if (error)
return error; return error;
......
...@@ -513,8 +513,8 @@ xfs_getfsmap_rtdev_rtbitmap_query( ...@@ -513,8 +513,8 @@ xfs_getfsmap_rtdev_rtbitmap_query(
struct xfs_trans *tp, struct xfs_trans *tp,
struct xfs_getfsmap_info *info) struct xfs_getfsmap_info *info)
{ {
struct xfs_rtalloc_rec alow; struct xfs_rtalloc_rec alow = { 0 };
struct xfs_rtalloc_rec ahigh; struct xfs_rtalloc_rec ahigh = { 0 };
int error; int error;
xfs_ilock(tp->t_mountp->m_rbmip, XFS_ILOCK_SHARED); xfs_ilock(tp->t_mountp->m_rbmip, XFS_ILOCK_SHARED);
......
...@@ -387,7 +387,7 @@ xfs_reserve_blocks( ...@@ -387,7 +387,7 @@ xfs_reserve_blocks(
do { do {
free = percpu_counter_sum(&mp->m_fdblocks) - free = percpu_counter_sum(&mp->m_fdblocks) -
mp->m_alloc_set_aside; mp->m_alloc_set_aside;
if (!free) if (free <= 0)
break; break;
delta = request - mp->m_resblks; delta = request - mp->m_resblks;
......
...@@ -3236,7 +3236,6 @@ xfs_iflush_cluster( ...@@ -3236,7 +3236,6 @@ xfs_iflush_cluster(
struct xfs_inode *cip; struct xfs_inode *cip;
int nr_found; int nr_found;
int clcount = 0; int clcount = 0;
int bufwasdelwri;
int i; int i;
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
...@@ -3360,37 +3359,22 @@ xfs_iflush_cluster( ...@@ -3360,37 +3359,22 @@ xfs_iflush_cluster(
* inode buffer and shut down the filesystem. * inode buffer and shut down the filesystem.
*/ */
rcu_read_unlock(); rcu_read_unlock();
/*
* Clean up the buffer. If it was delwri, just release it --
* brelse can handle it with no problems. If not, shut down the
* filesystem before releasing the buffer.
*/
bufwasdelwri = (bp->b_flags & _XBF_DELWRI_Q);
if (bufwasdelwri)
xfs_buf_relse(bp);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
if (!bufwasdelwri) {
/* /*
* Just like incore_relse: if we have b_iodone functions, * We'll always have an inode attached to the buffer for completion
* mark the buffer as an error and call them. Otherwise * process by the time we are called from xfs_iflush(). Hence we have
* mark it as stale and brelse. * always need to do IO completion processing to abort the inodes
* attached to the buffer. handle them just like the shutdown case in
* xfs_buf_submit().
*/ */
if (bp->b_iodone) { ASSERT(bp->b_iodone);
bp->b_flags &= ~XBF_DONE; bp->b_flags &= ~XBF_DONE;
xfs_buf_stale(bp); xfs_buf_stale(bp);
xfs_buf_ioerror(bp, -EIO); xfs_buf_ioerror(bp, -EIO);
xfs_buf_ioend(bp); xfs_buf_ioend(bp);
} else {
xfs_buf_stale(bp);
xfs_buf_relse(bp);
}
}
/* /* abort the corrupt inode, as it was not attached to the buffer */
* Unlocks the flush lock
*/
xfs_iflush_abort(cip, false); xfs_iflush_abort(cip, false);
kmem_free(cilist); kmem_free(cilist);
xfs_perag_put(pag); xfs_perag_put(pag);
...@@ -3486,12 +3470,17 @@ xfs_iflush( ...@@ -3486,12 +3470,17 @@ xfs_iflush(
xfs_log_force(mp, 0); xfs_log_force(mp, 0);
/* /*
* inode clustering: * inode clustering: try to gather other inodes into this write
* see if other inodes can be gathered into this write *
* Note: Any error during clustering will result in the filesystem
* being shut down and completion callbacks run on the cluster buffer.
* As we have already flushed and attached this inode to the buffer,
* it has already been aborted and released by xfs_iflush_cluster() and
* so we have no further error handling to do here.
*/ */
error = xfs_iflush_cluster(ip, bp); error = xfs_iflush_cluster(ip, bp);
if (error) if (error)
goto cluster_corrupt_out; return error;
*bpp = bp; *bpp = bp;
return 0; return 0;
...@@ -3500,12 +3489,8 @@ xfs_iflush( ...@@ -3500,12 +3489,8 @@ xfs_iflush(
if (bp) if (bp)
xfs_buf_relse(bp); xfs_buf_relse(bp);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
cluster_corrupt_out:
error = -EFSCORRUPTED;
abort_out: abort_out:
/* /* abort the corrupt inode, as it was not attached to the buffer */
* Unlocks the flush lock
*/
xfs_iflush_abort(ip, false); xfs_iflush_abort(ip, false);
return error; return error;
} }
......
...@@ -963,12 +963,13 @@ xfs_ilock_for_iomap( ...@@ -963,12 +963,13 @@ xfs_ilock_for_iomap(
unsigned *lockmode) unsigned *lockmode)
{ {
unsigned mode = XFS_ILOCK_SHARED; unsigned mode = XFS_ILOCK_SHARED;
bool is_write = flags & (IOMAP_WRITE | IOMAP_ZERO);
/* /*
* COW writes may allocate delalloc space or convert unwritten COW * COW writes may allocate delalloc space or convert unwritten COW
* extents, so we need to make sure to take the lock exclusively here. * extents, so we need to make sure to take the lock exclusively here.
*/ */
if (xfs_is_reflink_inode(ip) && (flags & (IOMAP_WRITE | IOMAP_ZERO))) { if (xfs_is_reflink_inode(ip) && is_write) {
/* /*
* FIXME: It could still overwrite on unshared extents and not * FIXME: It could still overwrite on unshared extents and not
* need allocation. * need allocation.
...@@ -989,6 +990,7 @@ xfs_ilock_for_iomap( ...@@ -989,6 +990,7 @@ xfs_ilock_for_iomap(
mode = XFS_ILOCK_EXCL; mode = XFS_ILOCK_EXCL;
} }
relock:
if (flags & IOMAP_NOWAIT) { if (flags & IOMAP_NOWAIT) {
if (!xfs_ilock_nowait(ip, mode)) if (!xfs_ilock_nowait(ip, mode))
return -EAGAIN; return -EAGAIN;
...@@ -996,6 +998,17 @@ xfs_ilock_for_iomap( ...@@ -996,6 +998,17 @@ xfs_ilock_for_iomap(
xfs_ilock(ip, mode); xfs_ilock(ip, mode);
} }
/*
* The reflink iflag could have changed since the earlier unlocked
* check, so if we got ILOCK_SHARED for a write and but we're now a
* reflink inode we have to switch to ILOCK_EXCL and relock.
*/
if (mode == XFS_ILOCK_SHARED && is_write && xfs_is_reflink_inode(ip)) {
xfs_iunlock(ip, mode);
mode = XFS_ILOCK_EXCL;
goto relock;
}
*lockmode = mode; *lockmode = mode;
return 0; return 0;
} }
......
...@@ -258,7 +258,12 @@ xfs_trans_alloc( ...@@ -258,7 +258,12 @@ xfs_trans_alloc(
if (!(flags & XFS_TRANS_NO_WRITECOUNT)) if (!(flags & XFS_TRANS_NO_WRITECOUNT))
sb_start_intwrite(mp->m_super); sb_start_intwrite(mp->m_super);
WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); /*
* Zero-reservation ("empty") transactions can't modify anything, so
* they're allowed to run while we're frozen.
*/
WARN_ON(resp->tr_logres > 0 &&
mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
atomic_inc(&mp->m_active_trans); atomic_inc(&mp->m_active_trans);
tp = kmem_zone_zalloc(xfs_trans_zone, tp = kmem_zone_zalloc(xfs_trans_zone,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册