提交 7fdec82a 编写于 作者: L Linus Torvalds

Merge tag 'xfs-for-linus-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs

Pull xfs updates from Dave Chinner:
 "There's not a lot in this - the main addition is the CRC validation of
  the entire region of the log that the will be recovered, along with
  several log recovery fixes.  Most of the rest is small bug fixes and
  cleanups.

  I have three bug fixes still pending, all that address recently fixed
  regressions that I will send to next week after they've had some time
  in for-next.

  Summary:
   - extensive CRC validation during log recovery
   - several log recovery bug fixes
   - Various DAX support fixes
   - AGFL size calculation fix
   - various cleanups in preparation for new functionality
   - project quota ENOSPC notification via netlink
   - tracing and debug improvements"

* tag 'xfs-for-linus-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: (26 commits)
  xfs: handle dquot buffer readahead in log recovery correctly
  xfs: inode recovery readahead can race with inode buffer creation
  xfs: eliminate committed arg from xfs_bmap_finish
  xfs: bmapbt checking on debug kernels too expensive
  xfs: add tracepoints to readpage calls
  xfs: debug mode log record crc error injection
  xfs: detect and trim torn writes during log recovery
  xfs: fix recursive splice read locking with DAX
  xfs: Don't use reserved blocks for data blocks with DAX
  XFS: Use a signed return type for suffix_kstrtoint()
  libxfs: refactor short btree block verification
  libxfs: pack the agfl header structure so XFS_AGFL_SIZE is correct
  libxfs: use a convenience variable instead of open-coding the fork
  xfs: fix log ticket type printing
  libxfs: make xfs_alloc_fix_freelist non-static
  xfs: make xfs_buf_ioend_async() static
  xfs: send warning of project quota to userspace via netlink
  xfs: get mp from bma->ip in xfs_bmap code
  xfs: print name of verifier if it fails
  libxfs: Optimize the loop for xfs_bitmap_empty
  ...
......@@ -535,6 +535,7 @@ xfs_agfl_write_verify(
}
const struct xfs_buf_ops xfs_agfl_buf_ops = {
.name = "xfs_agfl",
.verify_read = xfs_agfl_read_verify,
.verify_write = xfs_agfl_write_verify,
};
......@@ -1926,7 +1927,7 @@ xfs_alloc_space_available(
* Decide whether to use this allocation group for this allocation.
* If so, fix up the btree freelist's size.
*/
STATIC int /* error */
int /* error */
xfs_alloc_fix_freelist(
struct xfs_alloc_arg *args, /* allocation argument structure */
int flags) /* XFS_ALLOC_FLAG_... */
......@@ -2339,6 +2340,7 @@ xfs_agf_write_verify(
}
const struct xfs_buf_ops xfs_agf_buf_ops = {
.name = "xfs_agf",
.verify_read = xfs_agf_read_verify,
.verify_write = xfs_agf_write_verify,
};
......
......@@ -235,5 +235,6 @@ xfs_alloc_get_rec(
int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_agnumber_t agno, int flags, struct xfs_buf **bpp);
int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags);
#endif /* __XFS_ALLOC_H__ */
......@@ -293,14 +293,7 @@ xfs_allocbt_verify(
level = be16_to_cpu(block->bb_level);
switch (block->bb_magic) {
case cpu_to_be32(XFS_ABTB_CRC_MAGIC):
if (!xfs_sb_version_hascrc(&mp->m_sb))
return false;
if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
return false;
if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
return false;
if (pag &&
be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
if (!xfs_btree_sblock_v5hdr_verify(bp))
return false;
/* fall through */
case cpu_to_be32(XFS_ABTB_MAGIC):
......@@ -311,14 +304,7 @@ xfs_allocbt_verify(
return false;
break;
case cpu_to_be32(XFS_ABTC_CRC_MAGIC):
if (!xfs_sb_version_hascrc(&mp->m_sb))
return false;
if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
return false;
if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
return false;
if (pag &&
be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
if (!xfs_btree_sblock_v5hdr_verify(bp))
return false;
/* fall through */
case cpu_to_be32(XFS_ABTC_MAGIC):
......@@ -332,21 +318,7 @@ xfs_allocbt_verify(
return false;
}
/* numrecs verification */
if (be16_to_cpu(block->bb_numrecs) > mp->m_alloc_mxr[level != 0])
return false;
/* sibling pointer verification */
if (!block->bb_u.s.bb_leftsib ||
(be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
return false;
if (!block->bb_u.s.bb_rightsib ||
(be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
return false;
return true;
return xfs_btree_sblock_verify(bp, mp->m_alloc_mxr[level != 0]);
}
static void
......@@ -379,6 +351,7 @@ xfs_allocbt_write_verify(
}
const struct xfs_buf_ops xfs_allocbt_buf_ops = {
.name = "xfs_allocbt",
.verify_read = xfs_allocbt_read_verify,
.verify_write = xfs_allocbt_write_verify,
};
......
......@@ -207,7 +207,7 @@ xfs_attr_set(
struct xfs_trans_res tres;
xfs_fsblock_t firstblock;
int rsvd = (flags & ATTR_ROOT) != 0;
int error, err2, committed, local;
int error, err2, local;
XFS_STATS_INC(mp, xs_attr_set);
......@@ -334,24 +334,14 @@ xfs_attr_set(
*/
xfs_bmap_init(args.flist, args.firstblock);
error = xfs_attr_shortform_to_leaf(&args);
if (!error) {
error = xfs_bmap_finish(&args.trans, args.flist,
&committed);
}
if (!error)
error = xfs_bmap_finish(&args.trans, args.flist, dp);
if (error) {
ASSERT(committed);
args.trans = NULL;
xfs_bmap_cancel(&flist);
goto out;
}
/*
* bmap_finish() may have committed the last trans and started
* a new one. We need the inode to be in all transactions.
*/
if (committed)
xfs_trans_ijoin(args.trans, dp, 0);
/*
* Commit the leaf transformation. We'll need another (linked)
* transaction to add the new attribute to the leaf.
......@@ -568,7 +558,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
{
xfs_inode_t *dp;
struct xfs_buf *bp;
int retval, error, committed, forkoff;
int retval, error, forkoff;
trace_xfs_attr_leaf_addname(args);
......@@ -628,24 +618,14 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
*/
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_attr3_leaf_to_node(args);
if (!error) {
error = xfs_bmap_finish(&args->trans, args->flist,
&committed);
}
if (!error)
error = xfs_bmap_finish(&args->trans, args->flist, dp);
if (error) {
ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
return error;
}
/*
* bmap_finish() may have committed the last trans and started
* a new one. We need the inode to be in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, dp, 0);
/*
* Commit the current trans (including the inode) and start
* a new one.
......@@ -729,25 +709,14 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
/* bp is gone due to xfs_da_shrink_inode */
if (!error) {
if (!error)
error = xfs_bmap_finish(&args->trans,
args->flist,
&committed);
}
args->flist, dp);
if (error) {
ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
return error;
}
/*
* bmap_finish() may have committed the last trans
* and started a new one. We need the inode to be
* in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, dp, 0);
}
/*
......@@ -775,7 +744,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
{
xfs_inode_t *dp;
struct xfs_buf *bp;
int error, committed, forkoff;
int error, forkoff;
trace_xfs_attr_leaf_removename(args);
......@@ -803,23 +772,13 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
/* bp is gone due to xfs_da_shrink_inode */
if (!error) {
error = xfs_bmap_finish(&args->trans, args->flist,
&committed);
}
if (!error)
error = xfs_bmap_finish(&args->trans, args->flist, dp);
if (error) {
ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
return error;
}
/*
* bmap_finish() may have committed the last trans and started
* a new one. We need the inode to be in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, dp, 0);
}
return 0;
}
......@@ -877,7 +836,7 @@ xfs_attr_node_addname(xfs_da_args_t *args)
xfs_da_state_blk_t *blk;
xfs_inode_t *dp;
xfs_mount_t *mp;
int committed, retval, error;
int retval, error;
trace_xfs_attr_node_addname(args);
......@@ -938,26 +897,15 @@ xfs_attr_node_addname(xfs_da_args_t *args)
state = NULL;
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_attr3_leaf_to_node(args);
if (!error) {
if (!error)
error = xfs_bmap_finish(&args->trans,
args->flist,
&committed);
}
args->flist, dp);
if (error) {
ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
goto out;
}
/*
* bmap_finish() may have committed the last trans
* and started a new one. We need the inode to be
* in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, dp, 0);
/*
* Commit the node conversion and start the next
* trans in the chain.
......@@ -977,23 +925,13 @@ xfs_attr_node_addname(xfs_da_args_t *args)
*/
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_da3_split(state);
if (!error) {
error = xfs_bmap_finish(&args->trans, args->flist,
&committed);
}
if (!error)
error = xfs_bmap_finish(&args->trans, args->flist, dp);
if (error) {
ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
goto out;
}
/*
* bmap_finish() may have committed the last trans and started
* a new one. We need the inode to be in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, dp, 0);
} else {
/*
* Addition succeeded, update Btree hashvals.
......@@ -1086,25 +1024,14 @@ xfs_attr_node_addname(xfs_da_args_t *args)
if (retval && (state->path.active > 1)) {
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_da3_join(state);
if (!error) {
if (!error)
error = xfs_bmap_finish(&args->trans,
args->flist,
&committed);
}
args->flist, dp);
if (error) {
ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
goto out;
}
/*
* bmap_finish() may have committed the last trans
* and started a new one. We need the inode to be
* in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, dp, 0);
}
/*
......@@ -1146,7 +1073,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
xfs_da_state_blk_t *blk;
xfs_inode_t *dp;
struct xfs_buf *bp;
int retval, error, committed, forkoff;
int retval, error, forkoff;
trace_xfs_attr_node_removename(args);
......@@ -1220,24 +1147,13 @@ xfs_attr_node_removename(xfs_da_args_t *args)
if (retval && (state->path.active > 1)) {
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_da3_join(state);
if (!error) {
error = xfs_bmap_finish(&args->trans, args->flist,
&committed);
}
if (!error)
error = xfs_bmap_finish(&args->trans, args->flist, dp);
if (error) {
ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
goto out;
}
/*
* bmap_finish() may have committed the last trans and started
* a new one. We need the inode to be in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, dp, 0);
/*
* Commit the Btree join operation and start a new trans.
*/
......@@ -1265,25 +1181,14 @@ xfs_attr_node_removename(xfs_da_args_t *args)
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
/* bp is gone due to xfs_da_shrink_inode */
if (!error) {
if (!error)
error = xfs_bmap_finish(&args->trans,
args->flist,
&committed);
}
args->flist, dp);
if (error) {
ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
goto out;
}
/*
* bmap_finish() may have committed the last trans
* and started a new one. We need the inode to be
* in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, dp, 0);
} else
xfs_trans_brelse(args->trans, bp);
}
......
......@@ -328,6 +328,7 @@ xfs_attr3_leaf_read_verify(
}
const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = {
.name = "xfs_attr3_leaf",
.verify_read = xfs_attr3_leaf_read_verify,
.verify_write = xfs_attr3_leaf_write_verify,
};
......
......@@ -201,6 +201,7 @@ xfs_attr3_rmt_write_verify(
}
const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
.name = "xfs_attr3_rmt",
.verify_read = xfs_attr3_rmt_read_verify,
.verify_write = xfs_attr3_rmt_write_verify,
};
......@@ -447,8 +448,6 @@ xfs_attr_rmtval_set(
* Roll through the "value", allocating blocks on disk as required.
*/
while (blkcnt > 0) {
int committed;
/*
* Allocate a single extent, up to the size of the value.
*
......@@ -466,24 +465,14 @@ xfs_attr_rmtval_set(
error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock,
args->total, &map, &nmap, args->flist);
if (!error) {
error = xfs_bmap_finish(&args->trans, args->flist,
&committed);
}
if (!error)
error = xfs_bmap_finish(&args->trans, args->flist, dp);
if (error) {
ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
return error;
}
/*
* bmap_finish() may have committed the last trans and started
* a new one. We need the inode to be in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, dp, 0);
ASSERT(nmap == 1);
ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
(map.br_startblock != HOLESTARTBLOCK));
......@@ -614,30 +603,19 @@ xfs_attr_rmtval_remove(
blkcnt = args->rmtblkcnt;
done = 0;
while (!done) {
int committed;
xfs_bmap_init(args->flist, args->firstblock);
error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
XFS_BMAPI_ATTRFORK, 1, args->firstblock,
args->flist, &done);
if (!error) {
if (!error)
error = xfs_bmap_finish(&args->trans, args->flist,
&committed);
}
args->dp);
if (error) {
ASSERT(committed);
args->trans = NULL;
xfs_bmap_cancel(args->flist);
return error;
}
/*
* bmap_finish() may have committed the last trans and started
* a new one. We need the inode to be in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, args->dp, 0);
/*
* Close out trans and start the next one in the chain.
*/
......
......@@ -32,13 +32,13 @@ int
xfs_bitmap_empty(uint *map, uint size)
{
uint i;
uint ret = 0;
for (i = 0; i < size; i++) {
ret |= map[i];
if (map[i] != 0)
return 0;
}
return (ret == 0);
return 1;
}
/*
......
......@@ -325,9 +325,11 @@ xfs_check_block(
/*
* Check that the extents for the inode ip are in the right order in all
* btree leaves.
* btree leaves. THis becomes prohibitively expensive for large extent count
* files, so don't bother with inodes that have more than 10,000 extents in
* them. The btree record ordering checks will still be done, so for such large
* bmapbt constructs that is going to catch most corruptions.
*/
STATIC void
xfs_bmap_check_leaf_extents(
xfs_btree_cur_t *cur, /* btree cursor or null */
......@@ -352,6 +354,10 @@ xfs_bmap_check_leaf_extents(
return;
}
/* skip large extent count inodes */
if (ip->i_d.di_nextents > 10000)
return;
bno = NULLFSBLOCK;
mp = ip->i_mount;
ifp = XFS_IFORK_PTR(ip, whichfork);
......@@ -1111,7 +1117,6 @@ xfs_bmap_add_attrfork(
xfs_trans_t *tp; /* transaction pointer */
int blks; /* space reservation */
int version = 1; /* superblock attr version */
int committed; /* xaction was committed */
int logflags; /* logging flags */
int error; /* error return value */
......@@ -1214,7 +1219,7 @@ xfs_bmap_add_attrfork(
xfs_log_sb(tp);
}
error = xfs_bmap_finish(&tp, &flist, &committed);
error = xfs_bmap_finish(&tp, &flist, NULL);
if (error)
goto bmap_cancel;
error = xfs_trans_commit(tp);
......@@ -1723,10 +1728,11 @@ xfs_bmap_add_extent_delay_real(
xfs_filblks_t temp=0; /* value for da_new calculations */
xfs_filblks_t temp2=0;/* value for da_new calculations */
int tmp_rval; /* partial logging flags */
int whichfork = XFS_DATA_FORK;
struct xfs_mount *mp;
mp = bma->tp ? bma->tp->t_mountp : NULL;
ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK);
mp = bma->ip->i_mount;
ifp = XFS_IFORK_PTR(bma->ip, whichfork);
ASSERT(bma->idx >= 0);
ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
......@@ -1785,7 +1791,7 @@ xfs_bmap_add_extent_delay_real(
* Don't set contiguous if the combined extent would be too large.
* Also check for all-three-contiguous being too large.
*/
if (bma->idx < bma->ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
state |= BMAP_RIGHT_VALID;
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT);
......@@ -2016,10 +2022,10 @@ xfs_bmap_add_extent_delay_real(
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
}
if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
bma->firstblock, bma->flist,
&bma->cur, 1, &tmp_rval, XFS_DATA_FORK);
&bma->cur, 1, &tmp_rval, whichfork);
rval |= tmp_rval;
if (error)
goto done;
......@@ -2100,10 +2106,10 @@ xfs_bmap_add_extent_delay_real(
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
}
if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
bma->firstblock, bma->flist, &bma->cur, 1,
&tmp_rval, XFS_DATA_FORK);
&tmp_rval, whichfork);
rval |= tmp_rval;
if (error)
goto done;
......@@ -2169,10 +2175,10 @@ xfs_bmap_add_extent_delay_real(
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
}
if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
bma->firstblock, bma->flist, &bma->cur,
1, &tmp_rval, XFS_DATA_FORK);
1, &tmp_rval, whichfork);
rval |= tmp_rval;
if (error)
goto done;
......@@ -2215,13 +2221,13 @@ xfs_bmap_add_extent_delay_real(
}
/* convert to a btree if necessary */
if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
int tmp_logflags; /* partial log flag return val */
ASSERT(bma->cur == NULL);
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
bma->firstblock, bma->flist, &bma->cur,
da_old > 0, &tmp_logflags, XFS_DATA_FORK);
da_old > 0, &tmp_logflags, whichfork);
bma->logflags |= tmp_logflags;
if (error)
goto done;
......@@ -2242,7 +2248,7 @@ xfs_bmap_add_extent_delay_real(
if (bma->cur)
bma->cur->bc_private.b.allocated = 0;
xfs_bmap_check_leaf_extents(bma->cur, bma->ip, XFS_DATA_FORK);
xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
done:
bma->logflags |= rval;
return error;
......@@ -2939,7 +2945,7 @@ xfs_bmap_add_extent_hole_real(
int state; /* state bits, accessed thru macros */
struct xfs_mount *mp;
mp = bma->tp ? bma->tp->t_mountp : NULL;
mp = bma->ip->i_mount;
ifp = XFS_IFORK_PTR(bma->ip, whichfork);
ASSERT(bma->idx >= 0);
......@@ -5950,7 +5956,6 @@ xfs_bmap_split_extent(
struct xfs_trans *tp;
struct xfs_bmap_free free_list;
xfs_fsblock_t firstfsb;
int committed;
int error;
tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
......@@ -5971,7 +5976,7 @@ xfs_bmap_split_extent(
if (error)
goto out;
error = xfs_bmap_finish(&tp, &free_list, &committed);
error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error)
goto out;
......
......@@ -195,7 +195,7 @@ void xfs_bmap_add_free(xfs_fsblock_t bno, xfs_filblks_t len,
struct xfs_bmap_free *flist, struct xfs_mount *mp);
void xfs_bmap_cancel(struct xfs_bmap_free *flist);
int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
int *committed);
struct xfs_inode *ip);
void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork);
int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork);
......
......@@ -720,6 +720,7 @@ xfs_bmbt_write_verify(
}
const struct xfs_buf_ops xfs_bmbt_buf_ops = {
.name = "xfs_bmbt",
.verify_read = xfs_bmbt_read_verify,
.verify_write = xfs_bmbt_write_verify,
};
......
......@@ -4080,3 +4080,61 @@ xfs_btree_change_owner(
return 0;
}
/**
* xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
* btree block
*
* @bp: buffer containing the btree block
* @max_recs: pointer to the m_*_mxr max records field in the xfs mount
* @pag_max_level: pointer to the per-ag max level field
*/
bool
xfs_btree_sblock_v5hdr_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_perag *pag = bp->b_pag;
if (!xfs_sb_version_hascrc(&mp->m_sb))
return false;
if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
return false;
if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
return false;
if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
return false;
return true;
}
/**
* xfs_btree_sblock_verify() -- verify a short-format btree block
*
* @bp: buffer containing the btree block
* @max_recs: maximum records allowed in this btree node
*/
bool
xfs_btree_sblock_verify(
struct xfs_buf *bp,
unsigned int max_recs)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
/* numrecs verification */
if (be16_to_cpu(block->bb_numrecs) > max_recs)
return false;
/* sibling pointer verification */
if (!block->bb_u.s.bb_leftsib ||
(be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
return false;
if (!block->bb_u.s.bb_rightsib ||
(be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
return false;
return true;
}
......@@ -472,4 +472,7 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block)
#define XFS_BTREE_TRACE_ARGR(c, r)
#define XFS_BTREE_TRACE_CURSOR(c, t)
bool xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp);
bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs);
#endif /* __XFS_BTREE_H__ */
......@@ -245,6 +245,7 @@ xfs_da3_node_read_verify(
}
const struct xfs_buf_ops xfs_da3_node_buf_ops = {
.name = "xfs_da3_node",
.verify_read = xfs_da3_node_read_verify,
.verify_write = xfs_da3_node_write_verify,
};
......
......@@ -123,6 +123,7 @@ xfs_dir3_block_write_verify(
}
const struct xfs_buf_ops xfs_dir3_block_buf_ops = {
.name = "xfs_dir3_block",
.verify_read = xfs_dir3_block_read_verify,
.verify_write = xfs_dir3_block_write_verify,
};
......
......@@ -305,11 +305,13 @@ xfs_dir3_data_write_verify(
}
const struct xfs_buf_ops xfs_dir3_data_buf_ops = {
.name = "xfs_dir3_data",
.verify_read = xfs_dir3_data_read_verify,
.verify_write = xfs_dir3_data_write_verify,
};
static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = {
.name = "xfs_dir3_data_reada",
.verify_read = xfs_dir3_data_reada_verify,
.verify_write = xfs_dir3_data_write_verify,
};
......
......@@ -245,11 +245,13 @@ xfs_dir3_leafn_write_verify(
}
const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops = {
.name = "xfs_dir3_leaf1",
.verify_read = xfs_dir3_leaf1_read_verify,
.verify_write = xfs_dir3_leaf1_write_verify,
};
const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = {
.name = "xfs_dir3_leafn",
.verify_read = xfs_dir3_leafn_read_verify,
.verify_write = xfs_dir3_leafn_write_verify,
};
......
......@@ -150,6 +150,7 @@ xfs_dir3_free_write_verify(
}
const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
.name = "xfs_dir3_free",
.verify_read = xfs_dir3_free_read_verify,
.verify_write = xfs_dir3_free_write_verify,
};
......
......@@ -54,7 +54,7 @@ xfs_dqcheck(
xfs_dqid_t id,
uint type, /* used only when IO_dorepair is true */
uint flags,
char *str)
const char *str)
{
xfs_dqblk_t *d = (xfs_dqblk_t *)ddq;
int errs = 0;
......@@ -207,7 +207,8 @@ xfs_dquot_buf_verify_crc(
STATIC bool
xfs_dquot_buf_verify(
struct xfs_mount *mp,
struct xfs_buf *bp)
struct xfs_buf *bp,
int warn)
{
struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr;
xfs_dqid_t id = 0;
......@@ -240,8 +241,7 @@ xfs_dquot_buf_verify(
if (i == 0)
id = be32_to_cpu(ddq->d_id);
error = xfs_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN,
"xfs_dquot_buf_verify");
error = xfs_dqcheck(mp, ddq, id + i, 0, warn, __func__);
if (error)
return false;
}
......@@ -256,13 +256,32 @@ xfs_dquot_buf_read_verify(
if (!xfs_dquot_buf_verify_crc(mp, bp))
xfs_buf_ioerror(bp, -EFSBADCRC);
else if (!xfs_dquot_buf_verify(mp, bp))
else if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN))
xfs_buf_ioerror(bp, -EFSCORRUPTED);
if (bp->b_error)
xfs_verifier_error(bp);
}
/*
* readahead errors are silent and simply leave the buffer as !done so a real
* read will then be run with the xfs_dquot_buf_ops verifier. See
* xfs_inode_buf_verify() for why we use EIO and ~XBF_DONE here rather than
* reporting the failure.
*/
static void
xfs_dquot_buf_readahead_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
if (!xfs_dquot_buf_verify_crc(mp, bp) ||
!xfs_dquot_buf_verify(mp, bp, 0)) {
xfs_buf_ioerror(bp, -EIO);
bp->b_flags &= ~XBF_DONE;
}
}
/*
* we don't calculate the CRC here as that is done when the dquot is flushed to
* the buffer after the update is done. This ensures that the dquot in the
......@@ -274,7 +293,7 @@ xfs_dquot_buf_write_verify(
{
struct xfs_mount *mp = bp->b_target->bt_mount;
if (!xfs_dquot_buf_verify(mp, bp)) {
if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) {
xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
return;
......@@ -282,7 +301,13 @@ xfs_dquot_buf_write_verify(
}
const struct xfs_buf_ops xfs_dquot_buf_ops = {
.name = "xfs_dquot",
.verify_read = xfs_dquot_buf_read_verify,
.verify_write = xfs_dquot_buf_write_verify,
};
const struct xfs_buf_ops xfs_dquot_buf_ra_ops = {
.name = "xfs_dquot_ra",
.verify_read = xfs_dquot_buf_readahead_verify,
.verify_write = xfs_dquot_buf_write_verify,
};
......@@ -786,7 +786,7 @@ typedef struct xfs_agfl {
__be64 agfl_lsn;
__be32 agfl_crc;
__be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */
} xfs_agfl_t;
} __attribute__((packed)) xfs_agfl_t;
#define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc)
......
......@@ -2572,6 +2572,7 @@ xfs_agi_write_verify(
}
const struct xfs_buf_ops xfs_agi_buf_ops = {
.name = "xfs_agi",
.verify_read = xfs_agi_read_verify,
.verify_write = xfs_agi_write_verify,
};
......
......@@ -221,7 +221,6 @@ xfs_inobt_verify(
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_perag *pag = bp->b_pag;
unsigned int level;
/*
......@@ -237,14 +236,7 @@ xfs_inobt_verify(
switch (block->bb_magic) {
case cpu_to_be32(XFS_IBT_CRC_MAGIC):
case cpu_to_be32(XFS_FIBT_CRC_MAGIC):
if (!xfs_sb_version_hascrc(&mp->m_sb))
return false;
if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
return false;
if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
return false;
if (pag &&
be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
if (!xfs_btree_sblock_v5hdr_verify(bp))
return false;
/* fall through */
case cpu_to_be32(XFS_IBT_MAGIC):
......@@ -254,24 +246,12 @@ xfs_inobt_verify(
return 0;
}
/* numrecs and level verification */
/* level verification */
level = be16_to_cpu(block->bb_level);
if (level >= mp->m_in_maxlevels)
return false;
if (be16_to_cpu(block->bb_numrecs) > mp->m_inobt_mxr[level != 0])
return false;
/* sibling pointer verification */
if (!block->bb_u.s.bb_leftsib ||
(be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
return false;
if (!block->bb_u.s.bb_rightsib ||
(be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
return false;
return true;
return xfs_btree_sblock_verify(bp, mp->m_inobt_mxr[level != 0]);
}
static void
......@@ -304,6 +284,7 @@ xfs_inobt_write_verify(
}
const struct xfs_buf_ops xfs_inobt_buf_ops = {
.name = "xfs_inobt",
.verify_read = xfs_inobt_read_verify,
.verify_write = xfs_inobt_write_verify,
};
......
......@@ -62,11 +62,14 @@ xfs_inobp_check(
* has not had the inode cores stamped into it. Hence for readahead, the buffer
* may be potentially invalid.
*
* If the readahead buffer is invalid, we don't want to mark it with an error,
* but we do want to clear the DONE status of the buffer so that a followup read
* will re-read it from disk. This will ensure that we don't get an unnecessary
* warnings during log recovery and we don't get unnecssary panics on debug
* kernels.
* If the readahead buffer is invalid, we need to mark it with an error and
* clear the DONE status of the buffer so that a followup read will re-read it
* from disk. We don't report the error otherwise to avoid warnings during log
* recovery and we don't get unnecssary panics on debug kernels. We use EIO here
* because all we want to do is say readahead failed; there is no-one to report
* the error to, so this will distinguish it from a non-ra verifier failure.
* Changes to this readahead error behavour also need to be reflected in
* xfs_dquot_buf_readahead_verify().
*/
static void
xfs_inode_buf_verify(
......@@ -93,6 +96,7 @@ xfs_inode_buf_verify(
XFS_RANDOM_ITOBP_INOTOBP))) {
if (readahead) {
bp->b_flags &= ~XBF_DONE;
xfs_buf_ioerror(bp, -EIO);
return;
}
......@@ -132,11 +136,13 @@ xfs_inode_buf_write_verify(
}
const struct xfs_buf_ops xfs_inode_buf_ops = {
.name = "xfs_inode",
.verify_read = xfs_inode_buf_read_verify,
.verify_write = xfs_inode_buf_write_verify,
};
const struct xfs_buf_ops xfs_inode_buf_ra_ops = {
.name = "xxfs_inode_ra",
.verify_read = xfs_inode_buf_readahead_verify,
.verify_write = xfs_inode_buf_write_verify,
};
......
......@@ -60,6 +60,7 @@ typedef struct xlog_recover {
*/
#define XLOG_BC_TABLE_SIZE 64
#define XLOG_RECOVER_CRCPASS 0
#define XLOG_RECOVER_PASS1 1
#define XLOG_RECOVER_PASS2 2
......
......@@ -153,7 +153,7 @@ typedef __uint16_t xfs_qwarncnt_t;
#define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS)
extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq,
xfs_dqid_t id, uint type, uint flags, char *str);
xfs_dqid_t id, uint type, uint flags, const char *str);
extern int xfs_calc_dquots_per_chunk(unsigned int nbblks);
#endif /* __XFS_QUOTA_H__ */
......@@ -679,11 +679,13 @@ xfs_sb_write_verify(
}
const struct xfs_buf_ops xfs_sb_buf_ops = {
.name = "xfs_sb",
.verify_read = xfs_sb_read_verify,
.verify_write = xfs_sb_write_verify,
};
const struct xfs_buf_ops xfs_sb_quiet_buf_ops = {
.name = "xfs_sb_quiet",
.verify_read = xfs_sb_quiet_read_verify,
.verify_write = xfs_sb_write_verify,
};
......
......@@ -49,6 +49,7 @@ extern const struct xfs_buf_ops xfs_inobt_buf_ops;
extern const struct xfs_buf_ops xfs_inode_buf_ops;
extern const struct xfs_buf_ops xfs_inode_buf_ra_ops;
extern const struct xfs_buf_ops xfs_dquot_buf_ops;
extern const struct xfs_buf_ops xfs_dquot_buf_ra_ops;
extern const struct xfs_buf_ops xfs_sb_buf_ops;
extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops;
extern const struct xfs_buf_ops xfs_symlink_buf_ops;
......
......@@ -168,6 +168,7 @@ xfs_symlink_write_verify(
}
const struct xfs_buf_ops xfs_symlink_buf_ops = {
.name = "xfs_symlink",
.verify_read = xfs_symlink_read_verify,
.verify_write = xfs_symlink_write_verify,
};
......
......@@ -1917,6 +1917,7 @@ xfs_vm_readpage(
struct file *unused,
struct page *page)
{
trace_xfs_vm_readpage(page->mapping->host, 1);
return mpage_readpage(page, xfs_get_blocks);
}
......@@ -1927,6 +1928,7 @@ xfs_vm_readpages(
struct list_head *pages,
unsigned nr_pages)
{
trace_xfs_vm_readpages(mapping->host, nr_pages);
return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
}
......
......@@ -91,32 +91,32 @@ xfs_zero_extent(
* last due to locking considerations. We never free any extents in
* the first transaction.
*
* Return 1 if the given transaction was committed and a new one
* started, and 0 otherwise in the committed parameter.
* If an inode *ip is provided, rejoin it to the transaction if
* the transaction was committed.
*/
int /* error */
xfs_bmap_finish(
struct xfs_trans **tp, /* transaction pointer addr */
struct xfs_bmap_free *flist, /* i/o: list extents to free */
int *committed)/* xact committed or not */
struct xfs_inode *ip)
{
struct xfs_efd_log_item *efd; /* extent free data */
struct xfs_efi_log_item *efi; /* extent free intention */
int error; /* error return value */
int committed;/* xact committed or not */
struct xfs_bmap_free_item *free; /* free extent item */
struct xfs_bmap_free_item *next; /* next item on free list */
ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
if (flist->xbf_count == 0) {
*committed = 0;
if (flist->xbf_count == 0)
return 0;
}
efi = xfs_trans_get_efi(*tp, flist->xbf_count);
for (free = flist->xbf_first; free; free = free->xbfi_next)
xfs_trans_log_efi_extent(*tp, efi, free->xbfi_startblock,
free->xbfi_blockcount);
error = __xfs_trans_roll(tp, NULL, committed);
error = __xfs_trans_roll(tp, ip, &committed);
if (error) {
/*
* If the transaction was committed, drop the EFD reference
......@@ -128,16 +128,13 @@ xfs_bmap_finish(
* transaction so we should return committed=1 even though we're
* returning an error.
*/
if (*committed) {
if (committed) {
xfs_efi_release(efi);
xfs_force_shutdown((*tp)->t_mountp,
(error == -EFSCORRUPTED) ?
SHUTDOWN_CORRUPT_INCORE :
SHUTDOWN_META_IO_ERROR);
} else {
*committed = 1;
}
return error;
}
......@@ -969,7 +966,6 @@ xfs_alloc_file_space(
xfs_bmbt_irec_t imaps[1], *imapp;
xfs_bmap_free_t free_list;
uint qblocks, resblks, resrtextents;
int committed;
int error;
trace_xfs_alloc_file_space(ip);
......@@ -1064,23 +1060,20 @@ xfs_alloc_file_space(
error = xfs_bmapi_write(tp, ip, startoffset_fsb,
allocatesize_fsb, alloc_type, &firstfsb,
resblks, imapp, &nimaps, &free_list);
if (error) {
if (error)
goto error0;
}
/*
* Complete the transaction
*/
error = xfs_bmap_finish(&tp, &free_list, &committed);
if (error) {
error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error)
goto error0;
}
error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
if (error) {
if (error)
break;
}
allocated_fsb = imapp->br_blockcount;
......@@ -1206,7 +1199,6 @@ xfs_free_file_space(
xfs_off_t offset,
xfs_off_t len)
{
int committed;
int done;
xfs_fileoff_t endoffset_fsb;
int error;
......@@ -1346,17 +1338,15 @@ xfs_free_file_space(
error = xfs_bunmapi(tp, ip, startoffset_fsb,
endoffset_fsb - startoffset_fsb,
0, 2, &firstfsb, &free_list, &done);
if (error) {
if (error)
goto error0;
}
/*
* complete the transaction
*/
error = xfs_bmap_finish(&tp, &free_list, &committed);
if (error) {
error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error)
goto error0;
}
error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
......@@ -1434,7 +1424,6 @@ xfs_shift_file_space(
int error;
struct xfs_bmap_free free_list;
xfs_fsblock_t first_block;
int committed;
xfs_fileoff_t stop_fsb;
xfs_fileoff_t next_fsb;
xfs_fileoff_t shift_fsb;
......@@ -1526,7 +1515,7 @@ xfs_shift_file_space(
if (error)
goto out_bmap_cancel;
error = xfs_bmap_finish(&tp, &free_list, &committed);
error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error)
goto out_bmap_cancel;
......
......@@ -604,6 +604,13 @@ xfs_buf_get_map(
}
}
/*
* Clear b_error if this is a lookup from a caller that doesn't expect
* valid data to be found in the buffer.
*/
if (!(flags & XBF_READ))
xfs_buf_ioerror(bp, 0);
XFS_STATS_INC(target->bt_mount, xb_get);
trace_xfs_buf_get(bp, flags, _RET_IP_);
return bp;
......@@ -1045,7 +1052,7 @@ xfs_buf_ioend_work(
xfs_buf_ioend(bp);
}
void
static void
xfs_buf_ioend_async(
struct xfs_buf *bp)
{
......
......@@ -132,6 +132,7 @@ struct xfs_buf_map {
struct xfs_buf_map (map) = { .bm_bn = (blkno), .bm_len = (numblk) };
struct xfs_buf_ops {
char *name;
void (*verify_read)(struct xfs_buf *);
void (*verify_write)(struct xfs_buf *);
};
......
......@@ -306,7 +306,7 @@ xfs_qm_dqalloc(
xfs_fsblock_t firstblock;
xfs_bmap_free_t flist;
xfs_bmbt_irec_t map;
int nmaps, error, committed;
int nmaps, error;
xfs_buf_t *bp;
xfs_trans_t *tp = *tpp;
......@@ -379,11 +379,12 @@ xfs_qm_dqalloc(
xfs_trans_bhold(tp, bp);
if ((error = xfs_bmap_finish(tpp, &flist, &committed))) {
error = xfs_bmap_finish(tpp, &flist, NULL);
if (error)
goto error1;
}
if (committed) {
/* Transaction was committed? */
if (*tpp != tp) {
tp = *tpp;
xfs_trans_bjoin(tp, bp);
} else {
......@@ -393,9 +394,9 @@ xfs_qm_dqalloc(
*O_bpp = bp;
return 0;
error1:
error1:
xfs_bmap_cancel(&flist);
error0:
error0:
xfs_iunlock(quotip, XFS_ILOCK_EXCL);
return error;
......
......@@ -164,9 +164,9 @@ xfs_verifier_error(
{
struct xfs_mount *mp = bp->b_target->bt_mount;
xfs_alert(mp, "Metadata %s detected at %pF, block 0x%llx",
xfs_alert(mp, "Metadata %s detected at %pF, %s block 0x%llx",
bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
__return_address, bp->b_bn);
__return_address, bp->b_ops->name, bp->b_bn);
xfs_alert(mp, "Unmount and run xfs_repair");
......
......@@ -402,19 +402,26 @@ xfs_file_splice_read(
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
/* for dax, we need to avoid the page cache */
if (IS_DAX(VFS_I(ip)))
ret = default_file_splice_read(infilp, ppos, pipe, count, flags);
else
ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
if (ret > 0)
XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret);
/*
* DAX inodes cannot ues the page cache for splice, so we have to push
* them through the VFS IO path. This means it goes through
* ->read_iter, which for us takes the XFS_IOLOCK_SHARED. Hence we
* cannot lock the splice operation at this level for DAX inodes.
*/
if (IS_DAX(VFS_I(ip))) {
ret = default_file_splice_read(infilp, ppos, pipe, count,
flags);
goto out;
}
xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
out:
if (ret > 0)
XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret);
return ret;
}
......
......@@ -1143,7 +1143,6 @@ xfs_create(
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
bool unlock_dp_on_error = false;
int committed;
prid_t prid;
struct xfs_dquot *udqp = NULL;
struct xfs_dquot *gdqp = NULL;
......@@ -1226,7 +1225,7 @@ xfs_create(
* pointing to itself.
*/
error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev,
prid, resblks > 0, &ip, &committed);
prid, resblks > 0, &ip, NULL);
if (error)
goto out_trans_cancel;
......@@ -1275,7 +1274,7 @@ xfs_create(
*/
xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
error = xfs_bmap_finish(&tp, &free_list, &committed);
error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error)
goto out_bmap_cancel;
......@@ -1427,7 +1426,6 @@ xfs_link(
int error;
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
int committed;
int resblks;
trace_xfs_link(tdp, target_name);
......@@ -1502,11 +1500,10 @@ xfs_link(
* link transaction goes to disk before returning to
* the user.
*/
if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
xfs_trans_set_sync(tp);
}
error = xfs_bmap_finish (&tp, &free_list, &committed);
error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error) {
xfs_bmap_cancel(&free_list);
goto error_return;
......@@ -1555,7 +1552,6 @@ xfs_itruncate_extents(
xfs_fileoff_t first_unmap_block;
xfs_fileoff_t last_block;
xfs_filblks_t unmap_len;
int committed;
int error = 0;
int done = 0;
......@@ -1601,9 +1597,7 @@ xfs_itruncate_extents(
* Duplicate the transaction that has the permanent
* reservation and commit the old transaction.
*/
error = xfs_bmap_finish(&tp, &free_list, &committed);
if (committed)
xfs_trans_ijoin(tp, ip, 0);
error = xfs_bmap_finish(&tp, &free_list, ip);
if (error)
goto out_bmap_cancel;
......@@ -1774,7 +1768,6 @@ xfs_inactive_ifree(
{
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
int committed;
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp;
int error;
......@@ -1841,7 +1834,7 @@ xfs_inactive_ifree(
* Just ignore errors at this point. There is nothing we can do except
* to try to keep going. Make sure it's not a silent error.
*/
error = xfs_bmap_finish(&tp, &free_list, &committed);
error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error) {
xfs_notice(mp, "%s: xfs_bmap_finish returned error %d",
__func__, error);
......@@ -2523,7 +2516,6 @@ xfs_remove(
int error = 0;
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
int committed;
uint resblks;
trace_xfs_remove(dp, name);
......@@ -2624,7 +2616,7 @@ xfs_remove(
if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
xfs_trans_set_sync(tp);
error = xfs_bmap_finish(&tp, &free_list, &committed);
error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error)
goto out_bmap_cancel;
......@@ -2701,7 +2693,6 @@ xfs_finish_rename(
struct xfs_trans *tp,
struct xfs_bmap_free *free_list)
{
int committed = 0;
int error;
/*
......@@ -2711,7 +2702,7 @@ xfs_finish_rename(
if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
xfs_trans_set_sync(tp);
error = xfs_bmap_finish(&tp, free_list, &committed);
error = xfs_bmap_finish(&tp, free_list, NULL);
if (error) {
xfs_bmap_cancel(free_list);
xfs_trans_cancel(tp);
......
......@@ -129,7 +129,6 @@ xfs_iomap_write_direct(
xfs_trans_t *tp;
xfs_bmap_free_t free_list;
uint qblocks, resblks, resrtextents;
int committed;
int error;
int lockmode;
int bmapi_flags = XFS_BMAPI_PREALLOC;
......@@ -203,15 +202,20 @@ xfs_iomap_write_direct(
* this outside the transaction context, but if we commit and then crash
* we may not have zeroed the blocks and this will be exposed on
* recovery of the allocation. Hence we must zero before commit.
*
* Further, if we are mapping unwritten extents here, we need to zero
* and convert them to written so that we don't need an unwritten extent
* callback for DAX. This also means that we need to be able to dip into
* the reserve block pool if there is no space left but we need to do
* unwritten extent conversion.
* the reserve block pool for bmbt block allocation if there is no space
* left but we need to do unwritten extent conversion.
*/
if (IS_DAX(VFS_I(ip))) {
bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO;
tp->t_flags |= XFS_TRANS_RESERVE;
if (ISUNWRITTEN(imap)) {
tp->t_flags |= XFS_TRANS_RESERVE;
resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
}
}
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
resblks, resrtextents);
......@@ -247,7 +251,7 @@ xfs_iomap_write_direct(
/*
* Complete the transaction
*/
error = xfs_bmap_finish(&tp, &free_list, &committed);
error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error)
goto out_bmap_cancel;
......@@ -693,7 +697,7 @@ xfs_iomap_write_allocate(
xfs_bmap_free_t free_list;
xfs_filblks_t count_fsb;
xfs_trans_t *tp;
int nimaps, committed;
int nimaps;
int error = 0;
int nres;
......@@ -794,7 +798,7 @@ xfs_iomap_write_allocate(
if (error)
goto trans_cancel;
error = xfs_bmap_finish(&tp, &free_list, &committed);
error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error)
goto trans_cancel;
......@@ -852,7 +856,6 @@ xfs_iomap_write_unwritten(
xfs_bmap_free_t free_list;
xfs_fsize_t i_size;
uint resblks;
int committed;
int error;
trace_xfs_unwritten_convert(ip, offset, count);
......@@ -924,7 +927,7 @@ xfs_iomap_write_unwritten(
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
}
error = xfs_bmap_finish(&tp, &free_list, &committed);
error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error)
goto error_on_bmapi_transaction;
......
......@@ -1188,10 +1188,16 @@ xlog_iodone(xfs_buf_t *bp)
int aborted = 0;
/*
* Race to shutdown the filesystem if we see an error.
* Race to shutdown the filesystem if we see an error or the iclog is in
* IOABORT state. The IOABORT state is only set in DEBUG mode to inject
* CRC errors into log recovery.
*/
if (XFS_TEST_ERROR(bp->b_error, l->l_mp,
XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) {
if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR,
XFS_RANDOM_IODONE_IOERR) ||
iclog->ic_state & XLOG_STATE_IOABORT) {
if (iclog->ic_state & XLOG_STATE_IOABORT)
iclog->ic_state &= ~XLOG_STATE_IOABORT;
xfs_buf_ioerror_alert(bp, __func__);
xfs_buf_stale(bp);
xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR);
......@@ -1838,6 +1844,23 @@ xlog_sync(
/* calculcate the checksum */
iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header,
iclog->ic_datap, size);
#ifdef DEBUG
/*
* Intentionally corrupt the log record CRC based on the error injection
* frequency, if defined. This facilitates testing log recovery in the
* event of torn writes. Hence, set the IOABORT state to abort the log
* write on I/O completion and shutdown the fs. The subsequent mount
* detects the bad CRC and attempts to recover.
*/
if (log->l_badcrc_factor &&
(prandom_u32() % log->l_badcrc_factor == 0)) {
iclog->ic_header.h_crc &= 0xAAAAAAAA;
iclog->ic_state |= XLOG_STATE_IOABORT;
xfs_warn(log->l_mp,
"Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.",
be64_to_cpu(iclog->ic_header.h_lsn));
}
#endif
bp->b_io_length = BTOBB(count);
bp->b_fspriv = iclog;
......@@ -2045,12 +2068,14 @@ xlog_print_tic_res(
"QM_DQCLUSTER",
"QM_QINOCREATE",
"QM_QUOTAOFF_END",
"SB_UNIT",
"FSYNC_TS",
"GROWFSRT_ALLOC",
"GROWFSRT_ZERO",
"GROWFSRT_FREE",
"SWAPEXT"
"SWAPEXT",
"CHECKPOINT",
"ICREATE",
"CREATE_TMPFILE"
};
xfs_warn(mp, "xlog_write: reservation summary:");
......@@ -2791,11 +2816,19 @@ xlog_state_do_callback(
}
} while (!ioerrors && loopdidcallbacks);
#ifdef DEBUG
/*
* make one last gasp attempt to see if iclogs are being left in
* limbo..
* Make one last gasp attempt to see if iclogs are being left in limbo.
* If the above loop finds an iclog earlier than the current iclog and
* in one of the syncing states, the current iclog is put into
* DO_CALLBACK and the callbacks are deferred to the completion of the
* earlier iclog. Walk the iclogs in order and make sure that no iclog
* is in DO_CALLBACK unless an earlier iclog is in one of the syncing
* states.
*
* Note that SYNCING|IOABORT is a valid state so we cannot just check
* for ic_state == SYNCING.
*/
#ifdef DEBUG
if (funcdidcallbacks) {
first_iclog = iclog = log->l_iclog;
do {
......@@ -2810,7 +2843,7 @@ xlog_state_do_callback(
* IOERROR - give up hope all ye who enter here
*/
if (iclog->ic_state == XLOG_STATE_WANT_SYNC ||
iclog->ic_state == XLOG_STATE_SYNCING ||
iclog->ic_state & XLOG_STATE_SYNCING ||
iclog->ic_state == XLOG_STATE_DONE_SYNC ||
iclog->ic_state == XLOG_STATE_IOERROR )
break;
......
......@@ -62,6 +62,7 @@ static inline uint xlog_get_client_id(__be32 i)
#define XLOG_STATE_CALLBACK 0x0020 /* Callback functions now */
#define XLOG_STATE_DIRTY 0x0040 /* Dirty IC log, not ready for ACTIVE status*/
#define XLOG_STATE_IOERROR 0x0080 /* IO error happened in sync'ing log */
#define XLOG_STATE_IOABORT 0x0100 /* force abort on I/O completion (debug) */
#define XLOG_STATE_ALL 0x7FFF /* All possible valid flags */
#define XLOG_STATE_NOTUSED 0x8000 /* This IC log not being used */
......@@ -410,6 +411,8 @@ struct xlog {
/* The following field are used for debugging; need to hold icloglock */
#ifdef DEBUG
void *l_iclog_bak[XLOG_MAX_ICLOGS];
/* log record crc error injection factor */
uint32_t l_badcrc_factor;
#endif
};
......
此差异已折叠。
......@@ -766,7 +766,6 @@ xfs_growfs_rt_alloc(
{
xfs_fileoff_t bno; /* block number in file */
struct xfs_buf *bp; /* temporary buffer for zeroing */
int committed; /* transaction committed flag */
xfs_daddr_t d; /* disk block address */
int error; /* error return value */
xfs_fsblock_t firstblock;/* first block allocated in xaction */
......@@ -811,7 +810,7 @@ xfs_growfs_rt_alloc(
/*
* Free any blocks freed up in the transaction, then commit.
*/
error = xfs_bmap_finish(&tp, &flist, &committed);
error = xfs_bmap_finish(&tp, &flist, NULL);
if (error)
goto out_bmap_cancel;
error = xfs_trans_commit(tp);
......
......@@ -137,7 +137,7 @@ static const match_table_t tokens = {
};
STATIC unsigned long
STATIC int
suffix_kstrtoint(char *s, unsigned int base, int *res)
{
int last, shift_left_factor = 0, _res;
......
......@@ -178,7 +178,6 @@ xfs_symlink(
struct xfs_bmap_free free_list;
xfs_fsblock_t first_block;
bool unlock_dp_on_error = false;
int committed;
xfs_fileoff_t first_fsb;
xfs_filblks_t fs_blocks;
int nmaps;
......@@ -387,7 +386,7 @@ xfs_symlink(
xfs_trans_set_sync(tp);
}
error = xfs_bmap_finish(&tp, &free_list, &committed);
error = xfs_bmap_finish(&tp, &free_list, NULL);
if (error)
goto out_bmap_cancel;
......@@ -434,7 +433,6 @@ xfs_inactive_symlink_rmt(
struct xfs_inode *ip)
{
xfs_buf_t *bp;
int committed;
int done;
int error;
xfs_fsblock_t first_block;
......@@ -510,15 +508,9 @@ xfs_inactive_symlink_rmt(
/*
* Commit the first transaction. This logs the EFI and the inode.
*/
error = xfs_bmap_finish(&tp, &free_list, &committed);
error = xfs_bmap_finish(&tp, &free_list, ip);
if (error)
goto error_bmap_cancel;
/*
* The transaction must have been committed, since there were
* actually extents freed by xfs_bunmapi. See xfs_bmap_finish.
* The new tp has the extent freeing and EFDs.
*/
ASSERT(committed);
/*
* The first xact was committed, so add the inode to the new one.
* Mark it dirty so it will be logged and moved forward in the log as
......
......@@ -255,11 +255,47 @@ write_grant_head_show(
}
XFS_SYSFS_ATTR_RO(write_grant_head);
#ifdef DEBUG
STATIC ssize_t
log_badcrc_factor_store(
struct kobject *kobject,
const char *buf,
size_t count)
{
struct xlog *log = to_xlog(kobject);
int ret;
uint32_t val;
ret = kstrtouint(buf, 0, &val);
if (ret)
return ret;
log->l_badcrc_factor = val;
return count;
}
STATIC ssize_t
log_badcrc_factor_show(
struct kobject *kobject,
char *buf)
{
struct xlog *log = to_xlog(kobject);
return snprintf(buf, PAGE_SIZE, "%d\n", log->l_badcrc_factor);
}
XFS_SYSFS_ATTR_RW(log_badcrc_factor);
#endif /* DEBUG */
static struct attribute *xfs_log_attrs[] = {
ATTR_LIST(log_head_lsn),
ATTR_LIST(log_tail_lsn),
ATTR_LIST(reserve_grant_head),
ATTR_LIST(write_grant_head),
#ifdef DEBUG
ATTR_LIST(log_badcrc_factor),
#endif
NULL,
};
......
......@@ -1222,6 +1222,32 @@ DEFINE_PAGE_EVENT(xfs_writepage);
DEFINE_PAGE_EVENT(xfs_releasepage);
DEFINE_PAGE_EVENT(xfs_invalidatepage);
DECLARE_EVENT_CLASS(xfs_readpage_class,
TP_PROTO(struct inode *inode, int nr_pages),
TP_ARGS(inode, nr_pages),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(int, nr_pages)
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->nr_pages = nr_pages;
),
TP_printk("dev %d:%d ino 0x%llx nr_pages %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->nr_pages)
)
#define DEFINE_READPAGE_EVENT(name) \
DEFINE_EVENT(xfs_readpage_class, name, \
TP_PROTO(struct inode *inode, int nr_pages), \
TP_ARGS(inode, nr_pages))
DEFINE_READPAGE_EVENT(xfs_vm_readpage);
DEFINE_READPAGE_EVENT(xfs_vm_readpages);
DECLARE_EVENT_CLASS(xfs_imap_class,
TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
int type, struct xfs_bmbt_irec *irec),
......
......@@ -572,12 +572,16 @@ xfs_quota_warn(
struct xfs_dquot *dqp,
int type)
{
/* no warnings for project quotas - we just return ENOSPC later */
enum quota_type qtype;
if (dqp->dq_flags & XFS_DQ_PROJ)
return;
quota_send_warning(make_kqid(&init_user_ns,
(dqp->dq_flags & XFS_DQ_USER) ?
USRQUOTA : GRPQUOTA,
qtype = PRJQUOTA;
else if (dqp->dq_flags & XFS_DQ_USER)
qtype = USRQUOTA;
else
qtype = GRPQUOTA;
quota_send_warning(make_kqid(&init_user_ns, qtype,
be32_to_cpu(dqp->q_core.d_id)),
mp->m_super->s_dev, type);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册