提交 3f1c64f4 编写于 作者: L Linus Torvalds

Merge tag 'for-linus-v3.8-rc1' of git://oss.sgi.com/xfs/xfs

Pull xfs update from Ben Myers:
 "There is plenty going on, including the cleanup of xfssyncd, metadata
  verifiers, CRC infrastructure for the log, tracking of inodes with
  speculative allocation, a cleanup of xfs_fs_subr.c, fixes for
  XFS_IOC_ZERO_RANGE, and important fix related to log replay (only
  update the last_sync_lsn when a transaction completes), a fix for
  deadlock on AGF buffers, documentation and comment updates, and a few
  more cleanups and fixes.

  Details:
   - remove the xfssyncd mess
   - only update the last_sync_lsn when a transaction completes
   - zero allocation_args on the kernel stack
   - fix AGF/alloc workqueue deadlock
   - silence uninitialised f.file warning
   - Update inode alloc comments
   - Update mount options documentation
   - report projid32bit feature in geometry call
   - speculative preallocation inode tracking
   - fix attr tree double split corruption
   - fix broken error handling in xfs_vm_writepage
   - drop buffer io reference when a bad bio is built
   - add more attribute tree trace points
   - growfs infrastructure changes for 3.8
   - fs/xfs/xfs_fs_subr.c die die die
   - add CRC infrastructure
   - add CRC checks to the log
   - Remove description of nodelaylog mount option from xfs.txt
   - inode allocation should use unmapped buffers
   - byte range granularity for XFS_IOC_ZERO_RANGE
   - fix direct IO nested transaction deadlock
   - fix stray dquot unlock when reclaiming dquots
   - fix sparse reported log CRC endian issue"

Fix up trivial conflict in fs/xfs/xfs_fsops.c due to the same patch
having been applied twice (commits eaef8543 and 1375cb65: "xfs:
growfs: don't read garbage for new secondary superblocks") with later
updates to the affected code in the XFS tree.

* tag 'for-linus-v3.8-rc1' of git://oss.sgi.com/xfs/xfs: (78 commits)
  xfs: fix sparse reported log CRC endian issue
  xfs: fix stray dquot unlock when reclaiming dquots
  xfs: fix direct IO nested transaction deadlock.
  xfs: byte range granularity for XFS_IOC_ZERO_RANGE
  xfs: inode allocation should use unmapped buffers.
  xfs: Remove the description of nodelaylog mount option from xfs.txt
  xfs: add CRC checks to the log
  xfs: add CRC infrastructure
  xfs: convert buffer verifiers to an ops structure.
  xfs: connect up write verifiers to new buffers
  xfs: add pre-write metadata buffer verifier callbacks
  xfs: add buffer pre-write callback
  xfs: Add verifiers to dir2 data readahead.
  xfs: add xfs_da_node verification
  xfs: factor and verify attr leaf reads
  xfs: factor dir2 leaf read
  xfs: factor out dir2 data block reading
  xfs: factor dir2 free block reading
  xfs: verify dir2 block format buffers
  xfs: factor dir2 block read operations
  ...
......@@ -43,7 +43,7 @@ When mounting an XFS filesystem, the following options are accepted.
Issue command to let the block device reclaim space freed by the
filesystem. This is useful for SSD devices, thinly provisioned
LUNs and virtual machine images, but may have a performance
impact. This option is incompatible with the nodelaylog option.
impact.
dmapi
Enable the DMAPI (Data Management API) event callouts.
......@@ -72,8 +72,15 @@ When mounting an XFS filesystem, the following options are accepted.
Indicates that XFS is allowed to create inodes at any location
in the filesystem, including those which will result in inode
numbers occupying more than 32 bits of significance. This is
provided for backwards compatibility, but causes problems for
backup applications that cannot handle large inode numbers.
the default allocation option. Applications which do not handle
inode numbers bigger than 32 bits, should use inode32 option.
inode32
Indicates that XFS is limited to create inodes at locations which
will not result in inode numbers with more than 32 bits of
significance. This is provided for backwards compatibility, since
64 bits inode numbers might cause problems for some applications
that cannot handle large inode numbers.
largeio/nolargeio
If "nolargeio" is specified, the optimal I/O reported in
......
......@@ -2,6 +2,7 @@ config XFS_FS
tristate "XFS filesystem support"
depends on BLOCK
select EXPORTFS
select LIBCRC32C
help
XFS is a high performance journaling filesystem which originated
on the SGI IRIX platform. It is completely multi-threaded, can
......
......@@ -37,9 +37,8 @@ xfs-y += xfs_aops.o \
xfs_file.o \
xfs_filestream.o \
xfs_fsops.o \
xfs_fs_subr.o \
xfs_globals.o \
xfs_iget.o \
xfs_icache.o \
xfs_ioctl.o \
xfs_iomap.o \
xfs_iops.o \
......@@ -47,7 +46,6 @@ xfs-y += xfs_aops.o \
xfs_message.o \
xfs_mru_cache.o \
xfs_super.o \
xfs_sync.o \
xfs_xattr.o \
xfs_rename.o \
xfs_utils.o \
......
......@@ -26,4 +26,10 @@ extern int uuid_is_nil(uuid_t *uuid);
extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]);
static inline void
uuid_copy(uuid_t *dst, uuid_t *src)
{
memcpy(dst, src, sizeof(uuid_t));
}
#endif /* __XFS_SUPPORT_UUID_H__ */
......@@ -108,6 +108,8 @@ typedef struct xfs_agf {
extern int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_agnumber_t agno, int flags, struct xfs_buf **bpp);
extern const struct xfs_buf_ops xfs_agf_buf_ops;
/*
* Size of the unlinked inode hash table in the agi.
*/
......@@ -161,6 +163,8 @@ typedef struct xfs_agi {
extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_agnumber_t agno, struct xfs_buf **bpp);
extern const struct xfs_buf_ops xfs_agi_buf_ops;
/*
* The third a.g. block contains the a.g. freelist, an array
* of block pointers to blocks owned by the allocation btree code.
......@@ -233,6 +237,7 @@ typedef struct xfs_perag {
#define XFS_ICI_NO_TAG (-1) /* special flag for an untagged lookup
in xfs_inode_ag_iterator */
#define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */
#define XFS_ICI_EOFBLOCKS_TAG 1 /* inode has blocks beyond EOF */
#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels)
#define XFS_MIN_FREELIST_RAW(bl,cl,mp) \
......
......@@ -430,6 +430,60 @@ xfs_alloc_fixup_trees(
return 0;
}
static void
xfs_agfl_verify(
struct xfs_buf *bp)
{
#ifdef WHEN_CRCS_COME_ALONG
/*
* we cannot actually do any verification of the AGFL because mkfs does
* not initialise the AGFL to zero or NULL. Hence the only valid part of
* the AGFL is what the AGF says is active. We can't get to the AGF, so
* we can't verify just those entries are valid.
*
* This problem goes away when the CRC format change comes along as that
* requires the AGFL to be initialised by mkfs. At that point, we can
* verify the blocks in the agfl -active or not- lie within the bounds
* of the AG. Until then, just leave this check ifdef'd out.
*/
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
int agfl_ok = 1;
int i;
for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
if (be32_to_cpu(agfl->agfl_bno[i]) == NULLAGBLOCK ||
be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
agfl_ok = 0;
}
if (!agfl_ok) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agfl);
xfs_buf_ioerror(bp, EFSCORRUPTED);
}
#endif
}
static void
xfs_agfl_write_verify(
struct xfs_buf *bp)
{
xfs_agfl_verify(bp);
}
static void
xfs_agfl_read_verify(
struct xfs_buf *bp)
{
xfs_agfl_verify(bp);
}
const struct xfs_buf_ops xfs_agfl_buf_ops = {
.verify_read = xfs_agfl_read_verify,
.verify_write = xfs_agfl_write_verify,
};
/*
* Read in the allocation group free block array.
*/
......@@ -447,7 +501,7 @@ xfs_alloc_read_agfl(
error = xfs_trans_read_buf(
mp, tp, mp->m_ddev_targp,
XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
XFS_FSS_TO_BB(mp, 1), 0, &bp);
XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
if (error)
return error;
ASSERT(!xfs_buf_geterror(bp));
......@@ -2091,6 +2145,63 @@ xfs_alloc_put_freelist(
return 0;
}
static void
xfs_agf_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_agf *agf;
int agf_ok;
agf = XFS_BUF_TO_AGF(bp);
agf_ok = agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp);
/*
* during growfs operations, the perag is not fully initialised,
* so we can't use it for any useful checking. growfs ensures we can't
* use it by using uncached buffers that don't have the perag attached
* so we can detect and avoid this problem.
*/
if (bp->b_pag)
agf_ok = agf_ok && be32_to_cpu(agf->agf_seqno) ==
bp->b_pag->pag_agno;
if (xfs_sb_version_haslazysbcount(&mp->m_sb))
agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <=
be32_to_cpu(agf->agf_length);
if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
XFS_RANDOM_ALLOC_READ_AGF))) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agf);
xfs_buf_ioerror(bp, EFSCORRUPTED);
}
}
static void
xfs_agf_read_verify(
struct xfs_buf *bp)
{
xfs_agf_verify(bp);
}
static void
xfs_agf_write_verify(
struct xfs_buf *bp)
{
xfs_agf_verify(bp);
}
const struct xfs_buf_ops xfs_agf_buf_ops = {
.verify_read = xfs_agf_read_verify,
.verify_write = xfs_agf_write_verify,
};
/*
* Read in the allocation group header (free/alloc section).
*/
......@@ -2102,44 +2213,19 @@ xfs_read_agf(
int flags, /* XFS_BUF_ */
struct xfs_buf **bpp) /* buffer for the ag freelist header */
{
struct xfs_agf *agf; /* ag freelist header */
int agf_ok; /* set if agf is consistent */
int error;
ASSERT(agno != NULLAGNUMBER);
error = xfs_trans_read_buf(
mp, tp, mp->m_ddev_targp,
XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
XFS_FSS_TO_BB(mp, 1), flags, bpp);
XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
if (error)
return error;
if (!*bpp)
return 0;
ASSERT(!(*bpp)->b_error);
agf = XFS_BUF_TO_AGF(*bpp);
/*
* Validate the magic number of the agf block.
*/
agf_ok =
agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp) &&
be32_to_cpu(agf->agf_seqno) == agno;
if (xfs_sb_version_haslazysbcount(&mp->m_sb))
agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <=
be32_to_cpu(agf->agf_length);
if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
XFS_RANDOM_ALLOC_READ_AGF))) {
XFS_CORRUPTION_ERROR("xfs_alloc_read_agf",
XFS_ERRLEVEL_LOW, mp, agf);
xfs_trans_brelse(tp, *bpp);
return XFS_ERROR(EFSCORRUPTED);
}
xfs_buf_set_ref(*bpp, XFS_AGF_REF);
return 0;
}
......
......@@ -231,4 +231,7 @@ xfs_alloc_get_rec(
xfs_extlen_t *len, /* output: length of extent */
int *stat); /* output: success/failure */
extern const struct xfs_buf_ops xfs_agf_buf_ops;
extern const struct xfs_buf_ops xfs_agfl_buf_ops;
#endif /* __XFS_ALLOC_H__ */
......@@ -272,6 +272,82 @@ xfs_allocbt_key_diff(
return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
}
static void
xfs_allocbt_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_perag *pag = bp->b_pag;
unsigned int level;
int sblock_ok; /* block passes checks */
/*
* magic number and level verification
*
* During growfs operations, we can't verify the exact level as the
* perag is not fully initialised and hence not attached to the buffer.
* In this case, check against the maximum tree depth.
*/
level = be16_to_cpu(block->bb_level);
switch (block->bb_magic) {
case cpu_to_be32(XFS_ABTB_MAGIC):
if (pag)
sblock_ok = level < pag->pagf_levels[XFS_BTNUM_BNOi];
else
sblock_ok = level < mp->m_ag_maxlevels;
break;
case cpu_to_be32(XFS_ABTC_MAGIC):
if (pag)
sblock_ok = level < pag->pagf_levels[XFS_BTNUM_CNTi];
else
sblock_ok = level < mp->m_ag_maxlevels;
break;
default:
sblock_ok = 0;
break;
}
/* numrecs verification */
sblock_ok = sblock_ok &&
be16_to_cpu(block->bb_numrecs) <= mp->m_alloc_mxr[level != 0];
/* sibling pointer verification */
sblock_ok = sblock_ok &&
(block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
be32_to_cpu(block->bb_u.s.bb_leftsib) < mp->m_sb.sb_agblocks) &&
block->bb_u.s.bb_leftsib &&
(block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
be32_to_cpu(block->bb_u.s.bb_rightsib) < mp->m_sb.sb_agblocks) &&
block->bb_u.s.bb_rightsib;
if (!sblock_ok) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block);
xfs_buf_ioerror(bp, EFSCORRUPTED);
}
}
static void
xfs_allocbt_read_verify(
struct xfs_buf *bp)
{
xfs_allocbt_verify(bp);
}
static void
xfs_allocbt_write_verify(
struct xfs_buf *bp)
{
xfs_allocbt_verify(bp);
}
const struct xfs_buf_ops xfs_allocbt_buf_ops = {
.verify_read = xfs_allocbt_read_verify,
.verify_write = xfs_allocbt_write_verify,
};
#ifdef DEBUG
STATIC int
xfs_allocbt_keys_inorder(
......@@ -327,6 +403,7 @@ static const struct xfs_btree_ops xfs_allocbt_ops = {
.init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
.init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
.key_diff = xfs_allocbt_key_diff,
.buf_ops = &xfs_allocbt_buf_ops,
#ifdef DEBUG
.keys_inorder = xfs_allocbt_keys_inorder,
.recs_inorder = xfs_allocbt_recs_inorder,
......
......@@ -93,4 +93,6 @@ extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *,
xfs_agnumber_t, xfs_btnum_t);
extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int);
extern const struct xfs_buf_ops xfs_allocbt_buf_ops;
#endif /* __XFS_ALLOC_BTREE_H__ */
......@@ -124,7 +124,7 @@ xfs_setfilesize_trans_alloc(
ioend->io_append_trans = tp;
/*
* We will pass freeze protection with a transaction. So tell lockdep
* We may pass freeze protection with a transaction. So tell lockdep
* we released it.
*/
rwsem_release(&ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
......@@ -149,11 +149,13 @@ xfs_setfilesize(
xfs_fsize_t isize;
/*
* The transaction was allocated in the I/O submission thread,
* thus we need to mark ourselves as beeing in a transaction
* manually.
* The transaction may have been allocated in the I/O submission thread,
* thus we need to mark ourselves as beeing in a transaction manually.
* Similarly for freeze protection.
*/
current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
0, 1, _THIS_IP_);
xfs_ilock(ip, XFS_ILOCK_EXCL);
isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size);
......@@ -187,7 +189,8 @@ xfs_finish_ioend(
if (ioend->io_type == XFS_IO_UNWRITTEN)
queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
else if (ioend->io_append_trans)
else if (ioend->io_append_trans ||
(ioend->io_isdirect && xfs_ioend_is_append(ioend)))
queue_work(mp->m_data_workqueue, &ioend->io_work);
else
xfs_destroy_ioend(ioend);
......@@ -205,15 +208,6 @@ xfs_end_io(
struct xfs_inode *ip = XFS_I(ioend->io_inode);
int error = 0;
if (ioend->io_append_trans) {
/*
* We've got freeze protection passed with the transaction.
* Tell lockdep about it.
*/
rwsem_acquire_read(
&ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
0, 1, _THIS_IP_);
}
if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
ioend->io_error = -EIO;
goto done;
......@@ -226,35 +220,31 @@ xfs_end_io(
* range to normal written extens after the data I/O has finished.
*/
if (ioend->io_type == XFS_IO_UNWRITTEN) {
/*
* For buffered I/O we never preallocate a transaction when
* doing the unwritten extent conversion, but for direct I/O
* we do not know if we are converting an unwritten extent
* or not at the point where we preallocate the transaction.
*/
if (ioend->io_append_trans) {
ASSERT(ioend->io_isdirect);
current_set_flags_nested(
&ioend->io_append_trans->t_pflags, PF_FSTRANS);
xfs_trans_cancel(ioend->io_append_trans, 0);
}
error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
ioend->io_size);
if (error) {
ioend->io_error = -error;
} else if (ioend->io_isdirect && xfs_ioend_is_append(ioend)) {
/*
* For direct I/O we do not know if we need to allocate blocks
* or not so we can't preallocate an append transaction as that
* results in nested reservations and log space deadlocks. Hence
* allocate the transaction here. While this is sub-optimal and
* can block IO completion for some time, we're stuck with doing
* it this way until we can pass the ioend to the direct IO
* allocation callbacks and avoid nesting that way.
*/
error = xfs_setfilesize_trans_alloc(ioend);
if (error)
goto done;
}
error = xfs_setfilesize(ioend);
} else if (ioend->io_append_trans) {
error = xfs_setfilesize(ioend);
if (error)
ioend->io_error = -error;
} else {
ASSERT(!xfs_ioend_is_append(ioend));
}
done:
if (error)
ioend->io_error = -error;
xfs_destroy_ioend(ioend);
}
......@@ -1432,25 +1422,21 @@ xfs_vm_direct_IO(
size_t size = iov_length(iov, nr_segs);
/*
* We need to preallocate a transaction for a size update
* here. In the case that this write both updates the size
* and converts at least on unwritten extent we will cancel
* the still clean transaction after the I/O has finished.
* We cannot preallocate a size update transaction here as we
* don't know whether allocation is necessary or not. Hence we
* can only tell IO completion that one is necessary if we are
* not doing unwritten extent conversion.
*/
iocb->private = ioend = xfs_alloc_ioend(inode, XFS_IO_DIRECT);
if (offset + size > XFS_I(inode)->i_d.di_size) {
ret = xfs_setfilesize_trans_alloc(ioend);
if (ret)
goto out_destroy_ioend;
if (offset + size > XFS_I(inode)->i_d.di_size)
ioend->io_isdirect = 1;
}
ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
offset, nr_segs,
xfs_get_blocks_direct,
xfs_end_io_direct_write, NULL, 0);
if (ret != -EIOCBQUEUED && iocb->private)
goto out_trans_cancel;
goto out_destroy_ioend;
} else {
ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
offset, nr_segs,
......@@ -1460,15 +1446,6 @@ xfs_vm_direct_IO(
return ret;
out_trans_cancel:
if (ioend->io_append_trans) {
current_set_flags_nested(&ioend->io_append_trans->t_pflags,
PF_FSTRANS);
rwsem_acquire_read(
&inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
0, 1, _THIS_IP_);
xfs_trans_cancel(ioend->io_append_trans, 0);
}
out_destroy_ioend:
xfs_destroy_ioend(ioend);
return ret;
......@@ -1641,7 +1618,7 @@ xfs_vm_bmap(
trace_xfs_vm_bmap(XFS_I(inode));
xfs_ilock(ip, XFS_IOLOCK_SHARED);
xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF);
filemap_write_and_wait(mapping);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
return generic_block_bmap(mapping, block, xfs_get_blocks);
}
......
......@@ -903,11 +903,9 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
*/
dp = args->dp;
args->blkno = 0;
error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp,
XFS_ATTR_FORK);
error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
if (error)
return(error);
ASSERT(bp != NULL);
return error;
/*
* Look up the given attribute in the leaf block. Figure out if
......@@ -1031,12 +1029,12 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
* Read in the block containing the "old" attr, then
* remove the "old" attr from that block (neat, huh!)
*/
error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1,
&bp, XFS_ATTR_FORK);
error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno,
-1, &bp);
if (error)
return(error);
ASSERT(bp != NULL);
(void)xfs_attr_leaf_remove(bp, args);
return error;
xfs_attr_leaf_remove(bp, args);
/*
* If the result is small enough, shrink it all into the inode.
......@@ -1100,20 +1098,17 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
*/
dp = args->dp;
args->blkno = 0;
error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp,
XFS_ATTR_FORK);
if (error) {
return(error);
}
error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
if (error)
return error;
ASSERT(bp != NULL);
error = xfs_attr_leaf_lookup_int(bp, args);
if (error == ENOATTR) {
xfs_trans_brelse(args->trans, bp);
return(error);
}
(void)xfs_attr_leaf_remove(bp, args);
xfs_attr_leaf_remove(bp, args);
/*
* If the result is small enough, shrink it all into the inode.
......@@ -1155,12 +1150,12 @@ xfs_attr_leaf_get(xfs_da_args_t *args)
struct xfs_buf *bp;
int error;
trace_xfs_attr_leaf_get(args);
args->blkno = 0;
error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp,
XFS_ATTR_FORK);
error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
if (error)
return(error);
ASSERT(bp != NULL);
return error;
error = xfs_attr_leaf_lookup_int(bp, args);
if (error != EEXIST) {
......@@ -1181,22 +1176,15 @@ xfs_attr_leaf_get(xfs_da_args_t *args)
STATIC int
xfs_attr_leaf_list(xfs_attr_list_context_t *context)
{
xfs_attr_leafblock_t *leaf;
int error;
struct xfs_buf *bp;
trace_xfs_attr_leaf_list(context);
context->cursor->blkno = 0;
error = xfs_da_read_buf(NULL, context->dp, 0, -1, &bp, XFS_ATTR_FORK);
error = xfs_attr_leaf_read(NULL, context->dp, 0, -1, &bp);
if (error)
return XFS_ERROR(error);
ASSERT(bp != NULL);
leaf = bp->b_addr;
if (unlikely(leaf->hdr.info.magic != cpu_to_be16(XFS_ATTR_LEAF_MAGIC))) {
XFS_CORRUPTION_ERROR("xfs_attr_leaf_list", XFS_ERRLEVEL_LOW,
context->dp->i_mount, leaf);
xfs_trans_brelse(NULL, bp);
return XFS_ERROR(EFSCORRUPTED);
}
error = xfs_attr_leaf_list_int(bp, context);
xfs_trans_brelse(NULL, bp);
......@@ -1600,12 +1588,9 @@ xfs_attr_node_removename(xfs_da_args_t *args)
ASSERT(state->path.blk[0].bp);
state->path.blk[0].bp = NULL;
error = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp,
XFS_ATTR_FORK);
error = xfs_attr_leaf_read(args->trans, args->dp, 0, -1, &bp);
if (error)
goto out;
ASSERT((((xfs_attr_leafblock_t *)bp->b_addr)->hdr.info.magic) ==
cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
xfs_bmap_init(args->flist, args->firstblock);
......@@ -1653,6 +1638,8 @@ xfs_attr_fillstate(xfs_da_state_t *state)
xfs_da_state_blk_t *blk;
int level;
trace_xfs_attr_fillstate(state->args);
/*
* Roll down the "path" in the state structure, storing the on-disk
* block number for those buffers in the "path".
......@@ -1699,6 +1686,8 @@ xfs_attr_refillstate(xfs_da_state_t *state)
xfs_da_state_blk_t *blk;
int level, error;
trace_xfs_attr_refillstate(state->args);
/*
* Roll down the "path" in the state structure, storing the on-disk
* block number for those buffers in the "path".
......@@ -1707,7 +1696,7 @@ xfs_attr_refillstate(xfs_da_state_t *state)
ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
if (blk->disk_blkno) {
error = xfs_da_read_buf(state->args->trans,
error = xfs_da_node_read(state->args->trans,
state->args->dp,
blk->blkno, blk->disk_blkno,
&blk->bp, XFS_ATTR_FORK);
......@@ -1726,7 +1715,7 @@ xfs_attr_refillstate(xfs_da_state_t *state)
ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
if (blk->disk_blkno) {
error = xfs_da_read_buf(state->args->trans,
error = xfs_da_node_read(state->args->trans,
state->args->dp,
blk->blkno, blk->disk_blkno,
&blk->bp, XFS_ATTR_FORK);
......@@ -1755,6 +1744,8 @@ xfs_attr_node_get(xfs_da_args_t *args)
int error, retval;
int i;
trace_xfs_attr_node_get(args);
state = xfs_da_state_alloc();
state->args = args;
state->mp = args->dp->i_mount;
......@@ -1804,6 +1795,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
int error, i;
struct xfs_buf *bp;
trace_xfs_attr_node_list(context);
cursor = context->cursor;
cursor->initted = 1;
......@@ -1814,7 +1807,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
*/
bp = NULL;
if (cursor->blkno > 0) {
error = xfs_da_read_buf(NULL, context->dp, cursor->blkno, -1,
error = xfs_da_node_read(NULL, context->dp, cursor->blkno, -1,
&bp, XFS_ATTR_FORK);
if ((error != 0) && (error != EFSCORRUPTED))
return(error);
......@@ -1856,17 +1849,11 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
if (bp == NULL) {
cursor->blkno = 0;
for (;;) {
error = xfs_da_read_buf(NULL, context->dp,
error = xfs_da_node_read(NULL, context->dp,
cursor->blkno, -1, &bp,
XFS_ATTR_FORK);
if (error)
return(error);
if (unlikely(bp == NULL)) {
XFS_ERROR_REPORT("xfs_attr_node_list(2)",
XFS_ERRLEVEL_LOW,
context->dp->i_mount);
return(XFS_ERROR(EFSCORRUPTED));
}
node = bp->b_addr;
if (node->hdr.info.magic ==
cpu_to_be16(XFS_ATTR_LEAF_MAGIC))
......@@ -1907,14 +1894,6 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
*/
for (;;) {
leaf = bp->b_addr;
if (unlikely(leaf->hdr.info.magic !=
cpu_to_be16(XFS_ATTR_LEAF_MAGIC))) {
XFS_CORRUPTION_ERROR("xfs_attr_node_list(4)",
XFS_ERRLEVEL_LOW,
context->dp->i_mount, leaf);
xfs_trans_brelse(NULL, bp);
return(XFS_ERROR(EFSCORRUPTED));
}
error = xfs_attr_leaf_list_int(bp, context);
if (error) {
xfs_trans_brelse(NULL, bp);
......@@ -1924,16 +1903,10 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
break;
cursor->blkno = be32_to_cpu(leaf->hdr.info.forw);
xfs_trans_brelse(NULL, bp);
error = xfs_da_read_buf(NULL, context->dp, cursor->blkno, -1,
&bp, XFS_ATTR_FORK);
error = xfs_attr_leaf_read(NULL, context->dp, cursor->blkno, -1,
&bp);
if (error)
return(error);
if (unlikely((bp == NULL))) {
XFS_ERROR_REPORT("xfs_attr_node_list(5)",
XFS_ERRLEVEL_LOW,
context->dp->i_mount);
return(XFS_ERROR(EFSCORRUPTED));
}
return error;
}
xfs_trans_brelse(NULL, bp);
return(0);
......@@ -1959,6 +1932,8 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
int nmap, error, tmp, valuelen, blkcnt, i;
xfs_dablk_t lblkno;
trace_xfs_attr_rmtval_get(args);
ASSERT(!(args->flags & ATTR_KERNOVAL));
mp = args->dp->i_mount;
......@@ -1980,7 +1955,7 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
dblkno, blkcnt, 0, &bp);
dblkno, blkcnt, 0, &bp, NULL);
if (error)
return(error);
......@@ -2014,6 +1989,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
xfs_dablk_t lblkno;
int blkcnt, valuelen, nmap, error, tmp, committed;
trace_xfs_attr_rmtval_set(args);
dp = args->dp;
mp = dp->i_mount;
src = args->value;
......@@ -2143,6 +2120,8 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
xfs_dablk_t lblkno;
int valuelen, blkcnt, nmap, error, done, committed;
trace_xfs_attr_rmtval_remove(args);
mp = args->dp->i_mount;
/*
......
......@@ -57,7 +57,8 @@ STATIC int xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t which_block,
struct xfs_buf **bpp);
STATIC int xfs_attr_leaf_add_work(struct xfs_buf *leaf_buffer,
xfs_da_args_t *args, int freemap_index);
STATIC void xfs_attr_leaf_compact(xfs_trans_t *tp, struct xfs_buf *leaf_buffer);
STATIC void xfs_attr_leaf_compact(struct xfs_da_args *args,
struct xfs_buf *leaf_buffer);
STATIC void xfs_attr_leaf_rebalance(xfs_da_state_t *state,
xfs_da_state_blk_t *blk1,
xfs_da_state_blk_t *blk2);
......@@ -87,6 +88,52 @@ STATIC void xfs_attr_leaf_moveents(xfs_attr_leafblock_t *src_leaf,
xfs_mount_t *mp);
STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
static void
xfs_attr_leaf_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_attr_leaf_hdr *hdr = bp->b_addr;
int block_ok = 0;
block_ok = hdr->info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC);
if (!block_ok) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
xfs_buf_ioerror(bp, EFSCORRUPTED);
}
}
static void
xfs_attr_leaf_read_verify(
struct xfs_buf *bp)
{
xfs_attr_leaf_verify(bp);
}
static void
xfs_attr_leaf_write_verify(
struct xfs_buf *bp)
{
xfs_attr_leaf_verify(bp);
}
const struct xfs_buf_ops xfs_attr_leaf_buf_ops = {
.verify_read = xfs_attr_leaf_read_verify,
.verify_write = xfs_attr_leaf_write_verify,
};
int
xfs_attr_leaf_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
xfs_dablk_t bno,
xfs_daddr_t mappedbno,
struct xfs_buf **bpp)
{
return xfs_da_read_buf(tp, dp, bno, mappedbno, bpp,
XFS_ATTR_FORK, &xfs_attr_leaf_buf_ops);
}
/*========================================================================
* Namespace helper routines
*========================================================================*/
......@@ -869,17 +916,16 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args)
error = xfs_da_grow_inode(args, &blkno);
if (error)
goto out;
error = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp1,
XFS_ATTR_FORK);
error = xfs_attr_leaf_read(args->trans, args->dp, 0, -1, &bp1);
if (error)
goto out;
ASSERT(bp1 != NULL);
bp2 = NULL;
error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp2,
XFS_ATTR_FORK);
if (error)
goto out;
ASSERT(bp2 != NULL);
bp2->b_ops = bp1->b_ops;
memcpy(bp2->b_addr, bp1->b_addr, XFS_LBSIZE(dp->i_mount));
bp1 = NULL;
xfs_trans_log_buf(args->trans, bp2, 0, XFS_LBSIZE(dp->i_mount) - 1);
......@@ -933,7 +979,7 @@ xfs_attr_leaf_create(
XFS_ATTR_FORK);
if (error)
return(error);
ASSERT(bp != NULL);
bp->b_ops = &xfs_attr_leaf_buf_ops;
leaf = bp->b_addr;
memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount));
hdr = &leaf->hdr;
......@@ -1071,7 +1117,7 @@ xfs_attr_leaf_add(
* Compact the entries to coalesce free space.
* This may change the hdr->count via dropping INCOMPLETE entries.
*/
xfs_attr_leaf_compact(args->trans, bp);
xfs_attr_leaf_compact(args, bp);
/*
* After compaction, the block is guaranteed to have only one
......@@ -1102,6 +1148,8 @@ xfs_attr_leaf_add_work(
xfs_mount_t *mp;
int tmp, i;
trace_xfs_attr_leaf_add_work(args);
leaf = bp->b_addr;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
hdr = &leaf->hdr;
......@@ -1214,15 +1262,17 @@ xfs_attr_leaf_add_work(
*/
STATIC void
xfs_attr_leaf_compact(
struct xfs_trans *trans,
struct xfs_da_args *args,
struct xfs_buf *bp)
{
xfs_attr_leafblock_t *leaf_s, *leaf_d;
xfs_attr_leaf_hdr_t *hdr_s, *hdr_d;
xfs_mount_t *mp;
struct xfs_trans *trans = args->trans;
struct xfs_mount *mp = trans->t_mountp;
char *tmpbuffer;
mp = trans->t_mountp;
trace_xfs_attr_leaf_compact(args);
tmpbuffer = kmem_alloc(XFS_LBSIZE(mp), KM_SLEEP);
ASSERT(tmpbuffer != NULL);
memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(mp));
......@@ -1345,9 +1395,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
max = be16_to_cpu(hdr2->firstused)
- sizeof(xfs_attr_leaf_hdr_t);
max -= be16_to_cpu(hdr2->count) * sizeof(xfs_attr_leaf_entry_t);
if (space > max) {
xfs_attr_leaf_compact(args->trans, blk2->bp);
}
if (space > max)
xfs_attr_leaf_compact(args, blk2->bp);
/*
* Move high entries from leaf1 to low end of leaf2.
......@@ -1378,9 +1427,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
max = be16_to_cpu(hdr1->firstused)
- sizeof(xfs_attr_leaf_hdr_t);
max -= be16_to_cpu(hdr1->count) * sizeof(xfs_attr_leaf_entry_t);
if (space > max) {
xfs_attr_leaf_compact(args->trans, blk1->bp);
}
if (space > max)
xfs_attr_leaf_compact(args, blk1->bp);
/*
* Move low entries from leaf2 to high end of leaf1.
......@@ -1577,6 +1625,8 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
xfs_dablk_t blkno;
struct xfs_buf *bp;
trace_xfs_attr_leaf_toosmall(state->args);
/*
* Check for the degenerate case of the block being over 50% full.
* If so, it's not worth even looking to see if we might be able
......@@ -1636,18 +1686,16 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
blkno = be32_to_cpu(info->back);
if (blkno == 0)
continue;
error = xfs_da_read_buf(state->args->trans, state->args->dp,
blkno, -1, &bp, XFS_ATTR_FORK);
error = xfs_attr_leaf_read(state->args->trans, state->args->dp,
blkno, -1, &bp);
if (error)
return(error);
ASSERT(bp != NULL);
leaf = (xfs_attr_leafblock_t *)info;
count = be16_to_cpu(leaf->hdr.count);
bytes = state->blocksize - (state->blocksize>>2);
bytes -= be16_to_cpu(leaf->hdr.usedbytes);
leaf = bp->b_addr;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
count += be16_to_cpu(leaf->hdr.count);
bytes -= be16_to_cpu(leaf->hdr.usedbytes);
bytes -= count * sizeof(xfs_attr_leaf_entry_t);
......@@ -1702,6 +1750,8 @@ xfs_attr_leaf_remove(
int tablesize, tmp, i;
xfs_mount_t *mp;
trace_xfs_attr_leaf_remove(args);
leaf = bp->b_addr;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
hdr = &leaf->hdr;
......@@ -2511,15 +2561,11 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
/*
* Set up the operation.
*/
error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp,
XFS_ATTR_FORK);
if (error) {
error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
if (error)
return(error);
}
ASSERT(bp != NULL);
leaf = bp->b_addr;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
ASSERT(args->index >= 0);
entry = &leaf->entries[ args->index ];
......@@ -2576,15 +2622,11 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args)
/*
* Set up the operation.
*/
error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp,
XFS_ATTR_FORK);
if (error) {
error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
if (error)
return(error);
}
ASSERT(bp != NULL);
leaf = bp->b_addr;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
ASSERT(args->index >= 0);
entry = &leaf->entries[ args->index ];
......@@ -2633,35 +2675,28 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
/*
* Read the block containing the "old" attr
*/
error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp1,
XFS_ATTR_FORK);
if (error) {
return(error);
}
ASSERT(bp1 != NULL);
error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp1);
if (error)
return error;
/*
* Read the block containing the "new" attr, if it is different
*/
if (args->blkno2 != args->blkno) {
error = xfs_da_read_buf(args->trans, args->dp, args->blkno2,
-1, &bp2, XFS_ATTR_FORK);
if (error) {
return(error);
}
ASSERT(bp2 != NULL);
error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno2,
-1, &bp2);
if (error)
return error;
} else {
bp2 = bp1;
}
leaf1 = bp1->b_addr;
ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT(args->index < be16_to_cpu(leaf1->hdr.count));
ASSERT(args->index >= 0);
entry1 = &leaf1->entries[ args->index ];
leaf2 = bp2->b_addr;
ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT(args->index2 < be16_to_cpu(leaf2->hdr.count));
ASSERT(args->index2 >= 0);
entry2 = &leaf2->entries[ args->index2 ];
......@@ -2746,7 +2781,7 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp)
* the extents in reverse order the extent containing
* block 0 must still be there.
*/
error = xfs_da_read_buf(*trans, dp, 0, -1, &bp, XFS_ATTR_FORK);
error = xfs_da_node_read(*trans, dp, 0, -1, &bp, XFS_ATTR_FORK);
if (error)
return(error);
blkno = XFS_BUF_ADDR(bp);
......@@ -2831,7 +2866,7 @@ xfs_attr_node_inactive(
* traversal of the tree so we may deal with many blocks
* before we come back to this one.
*/
error = xfs_da_read_buf(*trans, dp, child_fsb, -2, &child_bp,
error = xfs_da_node_read(*trans, dp, child_fsb, -2, &child_bp,
XFS_ATTR_FORK);
if (error)
return(error);
......@@ -2872,7 +2907,7 @@ xfs_attr_node_inactive(
* child block number.
*/
if ((i+1) < count) {
error = xfs_da_read_buf(*trans, dp, 0, parent_blkno,
error = xfs_da_node_read(*trans, dp, 0, parent_blkno,
&bp, XFS_ATTR_FORK);
if (error)
return(error);
......
......@@ -261,4 +261,10 @@ int xfs_attr_leaf_order(struct xfs_buf *leaf1_bp,
struct xfs_buf *leaf2_bp);
int xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize,
int *local);
int xfs_attr_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp,
xfs_dablk_t bno, xfs_daddr_t mappedbno,
struct xfs_buf **bpp);
extern const struct xfs_buf_ops xfs_attr_leaf_buf_ops;
#endif /* __XFS_ATTR_LEAF_H__ */
......@@ -2662,8 +2662,9 @@ xfs_bmap_btree_to_extents(
if ((error = xfs_btree_check_lptr(cur, cbno, 1)))
return error;
#endif
if ((error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp,
XFS_BMAP_BTREE_REF)))
error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF,
&xfs_bmbt_buf_ops);
if (error)
return error;
cblock = XFS_BUF_TO_BLOCK(cbp);
if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
......@@ -3123,6 +3124,7 @@ xfs_bmap_extents_to_btree(
/*
* Fill in the child block.
*/
abp->b_ops = &xfs_bmbt_buf_ops;
ablock = XFS_BUF_TO_BLOCK(abp);
ablock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
ablock->bb_level = 0;
......@@ -3269,6 +3271,7 @@ xfs_bmap_local_to_extents(
ASSERT(args.len == 1);
*firstblock = args.fsbno;
bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
bp->b_ops = &xfs_bmbt_buf_ops;
memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
xfs_bmap_forkoff_reset(args.mp, ip, whichfork);
......@@ -4078,8 +4081,9 @@ xfs_bmap_read_extents(
* pointer (leftmost) at each level.
*/
while (level-- > 0) {
if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
XFS_BMAP_BTREE_REF)))
error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
if (error)
return error;
block = XFS_BUF_TO_BLOCK(bp);
XFS_WANT_CORRUPTED_GOTO(
......@@ -4124,7 +4128,8 @@ xfs_bmap_read_extents(
*/
nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
if (nextbno != NULLFSBLOCK)
xfs_btree_reada_bufl(mp, nextbno, 1);
xfs_btree_reada_bufl(mp, nextbno, 1,
&xfs_bmbt_buf_ops);
/*
* Copy records into the extent records.
*/
......@@ -4156,8 +4161,9 @@ xfs_bmap_read_extents(
*/
if (bno == NULLFSBLOCK)
break;
if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
XFS_BMAP_BTREE_REF)))
error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
if (error)
return error;
block = XFS_BUF_TO_BLOCK(bp);
}
......@@ -5599,7 +5605,7 @@ xfs_getbmap(
xfs_ilock(ip, XFS_IOLOCK_SHARED);
if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) {
if (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size) {
error = xfs_flush_pages(ip, 0, -1, 0, FI_REMAPF);
error = -filemap_write_and_wait(VFS_I(ip)->i_mapping);
if (error)
goto out_unlock_iolock;
}
......@@ -5868,15 +5874,16 @@ xfs_bmap_check_leaf_extents(
*/
while (level-- > 0) {
/* See if buf is in cur first */
bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
if (bp) {
bp_release = 0;
} else {
bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
if (!bp) {
bp_release = 1;
}
if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
XFS_BMAP_BTREE_REF)))
error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
XFS_BMAP_BTREE_REF,
&xfs_bmbt_buf_ops);
if (error)
goto error_norelse;
}
block = XFS_BUF_TO_BLOCK(bp);
XFS_WANT_CORRUPTED_GOTO(
xfs_bmap_sanity_check(mp, bp, level),
......@@ -5953,15 +5960,16 @@ xfs_bmap_check_leaf_extents(
if (bno == NULLFSBLOCK)
break;
bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
if (bp) {
bp_release = 0;
} else {
bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
if (!bp) {
bp_release = 1;
}
if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
XFS_BMAP_BTREE_REF)))
error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
XFS_BMAP_BTREE_REF,
&xfs_bmbt_buf_ops);
if (error)
goto error_norelse;
}
block = XFS_BUF_TO_BLOCK(bp);
}
if (bp_release) {
......@@ -6052,7 +6060,9 @@ xfs_bmap_count_tree(
struct xfs_btree_block *block, *nextblock;
int numrecs;
if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF)))
error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF,
&xfs_bmbt_buf_ops);
if (error)
return error;
*count += 1;
block = XFS_BUF_TO_BLOCK(bp);
......@@ -6061,8 +6071,10 @@ xfs_bmap_count_tree(
/* Not at node above leaves, count this level of nodes */
nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
while (nextbno != NULLFSBLOCK) {
if ((error = xfs_btree_read_bufl(mp, tp, nextbno,
0, &nbp, XFS_BMAP_BTREE_REF)))
error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp,
XFS_BMAP_BTREE_REF,
&xfs_bmbt_buf_ops);
if (error)
return error;
*count += 1;
nextblock = XFS_BUF_TO_BLOCK(nbp);
......@@ -6091,8 +6103,10 @@ xfs_bmap_count_tree(
if (nextbno == NULLFSBLOCK)
break;
bno = nextbno;
if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
XFS_BMAP_BTREE_REF)))
error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
XFS_BMAP_BTREE_REF,
&xfs_bmbt_buf_ops);
if (error)
return error;
*count += 1;
block = XFS_BUF_TO_BLOCK(bp);
......
......@@ -36,6 +36,7 @@
#include "xfs_bmap.h"
#include "xfs_error.h"
#include "xfs_quota.h"
#include "xfs_trace.h"
/*
* Determine the extent state.
......@@ -707,6 +708,67 @@ xfs_bmbt_key_diff(
cur->bc_rec.b.br_startoff;
}
static void
xfs_bmbt_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
unsigned int level;
int lblock_ok; /* block passes checks */
/* magic number and level verification.
*
* We don't know waht fork we belong to, so just verify that the level
* is less than the maximum of the two. Later checks will be more
* precise.
*/
level = be16_to_cpu(block->bb_level);
lblock_ok = block->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC) &&
level < max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1]);
/* numrecs verification */
lblock_ok = lblock_ok &&
be16_to_cpu(block->bb_numrecs) <= mp->m_bmap_dmxr[level != 0];
/* sibling pointer verification */
lblock_ok = lblock_ok &&
block->bb_u.l.bb_leftsib &&
(block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||
XFS_FSB_SANITY_CHECK(mp,
be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
block->bb_u.l.bb_rightsib &&
(block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||
XFS_FSB_SANITY_CHECK(mp,
be64_to_cpu(block->bb_u.l.bb_rightsib)));
if (!lblock_ok) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block);
xfs_buf_ioerror(bp, EFSCORRUPTED);
}
}
static void
xfs_bmbt_read_verify(
struct xfs_buf *bp)
{
xfs_bmbt_verify(bp);
}
static void
xfs_bmbt_write_verify(
struct xfs_buf *bp)
{
xfs_bmbt_verify(bp);
}
const struct xfs_buf_ops xfs_bmbt_buf_ops = {
.verify_read = xfs_bmbt_read_verify,
.verify_write = xfs_bmbt_write_verify,
};
#ifdef DEBUG
STATIC int
xfs_bmbt_keys_inorder(
......@@ -746,6 +808,7 @@ static const struct xfs_btree_ops xfs_bmbt_ops = {
.init_rec_from_cur = xfs_bmbt_init_rec_from_cur,
.init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur,
.key_diff = xfs_bmbt_key_diff,
.buf_ops = &xfs_bmbt_buf_ops,
#ifdef DEBUG
.keys_inorder = xfs_bmbt_keys_inorder,
.recs_inorder = xfs_bmbt_recs_inorder,
......
......@@ -236,5 +236,6 @@ extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf);
extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *,
struct xfs_trans *, struct xfs_inode *, int);
extern const struct xfs_buf_ops xfs_bmbt_buf_ops;
#endif /* __XFS_BMAP_BTREE_H__ */
......@@ -266,9 +266,13 @@ xfs_btree_dup_cursor(
for (i = 0; i < new->bc_nlevels; i++) {
new->bc_ptrs[i] = cur->bc_ptrs[i];
new->bc_ra[i] = cur->bc_ra[i];
if ((bp = cur->bc_bufs[i])) {
if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp))) {
bp = cur->bc_bufs[i];
if (bp) {
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
XFS_BUF_ADDR(bp), mp->m_bsize,
0, &bp,
cur->bc_ops->buf_ops);
if (error) {
xfs_btree_del_cursor(new, error);
*ncur = NULL;
return error;
......@@ -609,25 +613,26 @@ xfs_btree_offsets(
* Get a buffer for the block, return it read in.
* Long-form addressing.
*/
int /* error */
int
xfs_btree_read_bufl(
xfs_mount_t *mp, /* file system mount point */
xfs_trans_t *tp, /* transaction pointer */
struct xfs_mount *mp, /* file system mount point */
struct xfs_trans *tp, /* transaction pointer */
xfs_fsblock_t fsbno, /* file system block number */
uint lock, /* lock flags for read_buf */
xfs_buf_t **bpp, /* buffer for fsbno */
int refval) /* ref count value for buffer */
struct xfs_buf **bpp, /* buffer for fsbno */
int refval, /* ref count value for buffer */
const struct xfs_buf_ops *ops)
{
xfs_buf_t *bp; /* return value */
struct xfs_buf *bp; /* return value */
xfs_daddr_t d; /* real disk block address */
int error;
ASSERT(fsbno != NULLFSBLOCK);
d = XFS_FSB_TO_DADDR(mp, fsbno);
if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
mp->m_bsize, lock, &bp))) {
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
mp->m_bsize, lock, &bp, ops);
if (error)
return error;
}
ASSERT(!xfs_buf_geterror(bp));
if (bp)
xfs_buf_set_ref(bp, refval);
......@@ -642,15 +647,16 @@ xfs_btree_read_bufl(
/* ARGSUSED */
void
xfs_btree_reada_bufl(
xfs_mount_t *mp, /* file system mount point */
struct xfs_mount *mp, /* file system mount point */
xfs_fsblock_t fsbno, /* file system block number */
xfs_extlen_t count) /* count of filesystem blocks */
xfs_extlen_t count, /* count of filesystem blocks */
const struct xfs_buf_ops *ops)
{
xfs_daddr_t d;
ASSERT(fsbno != NULLFSBLOCK);
d = XFS_FSB_TO_DADDR(mp, fsbno);
xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count);
xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
}
/*
......@@ -660,17 +666,18 @@ xfs_btree_reada_bufl(
/* ARGSUSED */
void
xfs_btree_reada_bufs(
xfs_mount_t *mp, /* file system mount point */
struct xfs_mount *mp, /* file system mount point */
xfs_agnumber_t agno, /* allocation group number */
xfs_agblock_t agbno, /* allocation group block number */
xfs_extlen_t count) /* count of filesystem blocks */
xfs_extlen_t count, /* count of filesystem blocks */
const struct xfs_buf_ops *ops)
{
xfs_daddr_t d;
ASSERT(agno != NULLAGNUMBER);
ASSERT(agbno != NULLAGBLOCK);
d = XFS_AGB_TO_DADDR(mp, agno, agbno);
xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count);
xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
}
STATIC int
......@@ -684,12 +691,14 @@ xfs_btree_readahead_lblock(
xfs_dfsbno_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
xfs_btree_reada_bufl(cur->bc_mp, left, 1);
xfs_btree_reada_bufl(cur->bc_mp, left, 1,
cur->bc_ops->buf_ops);
rval++;
}
if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
xfs_btree_reada_bufl(cur->bc_mp, right, 1);
xfs_btree_reada_bufl(cur->bc_mp, right, 1,
cur->bc_ops->buf_ops);
rval++;
}
......@@ -709,13 +718,13 @@ xfs_btree_readahead_sblock(
if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
left, 1);
left, 1, cur->bc_ops->buf_ops);
rval++;
}
if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
right, 1);
right, 1, cur->bc_ops->buf_ops);
rval++;
}
......@@ -853,18 +862,22 @@ xfs_btree_set_sibling(
}
}
STATIC void
void
xfs_btree_init_block(
struct xfs_btree_cur *cur,
int level,
int numrecs,
struct xfs_btree_block *new) /* new block */
struct xfs_mount *mp,
struct xfs_buf *bp,
__u32 magic,
__u16 level,
__u16 numrecs,
unsigned int flags)
{
new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
struct xfs_btree_block *new = XFS_BUF_TO_BLOCK(bp);
new->bb_magic = cpu_to_be32(magic);
new->bb_level = cpu_to_be16(level);
new->bb_numrecs = cpu_to_be16(numrecs);
if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
if (flags & XFS_BTREE_LONG_PTRS) {
new->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
new->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
} else {
......@@ -873,6 +886,17 @@ xfs_btree_init_block(
}
}
STATIC void
xfs_btree_init_block_cur(
struct xfs_btree_cur *cur,
int level,
int numrecs,
struct xfs_buf *bp)
{
xfs_btree_init_block(cur->bc_mp, bp, xfs_magics[cur->bc_btnum],
level, numrecs, cur->bc_flags);
}
/*
* Return true if ptr is the last record in the btree and
* we need to track updateѕ to this record. The decision
......@@ -972,6 +996,7 @@ xfs_btree_get_buf_block(
if (!*bpp)
return ENOMEM;
(*bpp)->b_ops = cur->bc_ops->buf_ops;
*block = XFS_BUF_TO_BLOCK(*bpp);
return 0;
}
......@@ -998,19 +1023,15 @@ xfs_btree_read_buf_block(
d = xfs_btree_ptr_to_daddr(cur, ptr);
error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
mp->m_bsize, flags, bpp);
mp->m_bsize, flags, bpp,
cur->bc_ops->buf_ops);
if (error)
return error;
ASSERT(!xfs_buf_geterror(*bpp));
xfs_btree_set_refs(cur, *bpp);
*block = XFS_BUF_TO_BLOCK(*bpp);
error = xfs_btree_check_block(cur, *block, level, *bpp);
if (error)
xfs_trans_brelse(cur->bc_tp, *bpp);
return error;
return 0;
}
/*
......@@ -2183,7 +2204,7 @@ xfs_btree_split(
goto error0;
/* Fill in the btree header for the new right block. */
xfs_btree_init_block(cur, xfs_btree_get_level(left), 0, right);
xfs_btree_init_block_cur(cur, xfs_btree_get_level(left), 0, rbp);
/*
* Split the entries between the old and the new block evenly.
......@@ -2492,7 +2513,7 @@ xfs_btree_new_root(
nptr = 2;
}
/* Fill in the new block's btree header and log it. */
xfs_btree_init_block(cur, cur->bc_nlevels, 2, new);
xfs_btree_init_block_cur(cur, cur->bc_nlevels, 2, nbp);
xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
!xfs_btree_ptr_is_null(cur, &rptr));
......
......@@ -188,6 +188,8 @@ struct xfs_btree_ops {
__int64_t (*key_diff)(struct xfs_btree_cur *cur,
union xfs_btree_key *key);
const struct xfs_buf_ops *buf_ops;
#ifdef DEBUG
/* check that k1 is lower than k2 */
int (*keys_inorder)(struct xfs_btree_cur *cur,
......@@ -355,7 +357,8 @@ xfs_btree_read_bufl(
xfs_fsblock_t fsbno, /* file system block number */
uint lock, /* lock flags for read_buf */
struct xfs_buf **bpp, /* buffer for fsbno */
int refval);/* ref count value for buffer */
int refval, /* ref count value for buffer */
const struct xfs_buf_ops *ops);
/*
* Read-ahead the block, don't wait for it, don't return a buffer.
......@@ -365,7 +368,8 @@ void /* error */
xfs_btree_reada_bufl(
struct xfs_mount *mp, /* file system mount point */
xfs_fsblock_t fsbno, /* file system block number */
xfs_extlen_t count); /* count of filesystem blocks */
xfs_extlen_t count, /* count of filesystem blocks */
const struct xfs_buf_ops *ops);
/*
* Read-ahead the block, don't wait for it, don't return a buffer.
......@@ -376,8 +380,20 @@ xfs_btree_reada_bufs(
struct xfs_mount *mp, /* file system mount point */
xfs_agnumber_t agno, /* allocation group number */
xfs_agblock_t agbno, /* allocation group block number */
xfs_extlen_t count); /* count of filesystem blocks */
xfs_extlen_t count, /* count of filesystem blocks */
const struct xfs_buf_ops *ops);
/*
* Initialise a new btree block header
*/
void
xfs_btree_init_block(
struct xfs_mount *mp,
struct xfs_buf *bp,
__u32 magic,
__u16 level,
__u16 numrecs,
unsigned int flags);
/*
* Common btree core entry points.
......
......@@ -569,7 +569,9 @@ _xfs_buf_find(
*/
if (bp->b_flags & XBF_STALE) {
ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
ASSERT(bp->b_iodone == NULL);
bp->b_flags &= _XBF_KMEM | _XBF_PAGES;
bp->b_ops = NULL;
}
trace_xfs_buf_find(bp, flags, _RET_IP_);
......@@ -654,7 +656,8 @@ xfs_buf_read_map(
struct xfs_buftarg *target,
struct xfs_buf_map *map,
int nmaps,
xfs_buf_flags_t flags)
xfs_buf_flags_t flags,
const struct xfs_buf_ops *ops)
{
struct xfs_buf *bp;
......@@ -666,6 +669,7 @@ xfs_buf_read_map(
if (!XFS_BUF_ISDONE(bp)) {
XFS_STATS_INC(xb_get_read);
bp->b_ops = ops;
_xfs_buf_read(bp, flags);
} else if (flags & XBF_ASYNC) {
/*
......@@ -691,13 +695,14 @@ void
xfs_buf_readahead_map(
struct xfs_buftarg *target,
struct xfs_buf_map *map,
int nmaps)
int nmaps,
const struct xfs_buf_ops *ops)
{
if (bdi_read_congested(target->bt_bdi))
return;
xfs_buf_read_map(target, map, nmaps,
XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD, ops);
}
/*
......@@ -709,10 +714,10 @@ xfs_buf_read_uncached(
struct xfs_buftarg *target,
xfs_daddr_t daddr,
size_t numblks,
int flags)
int flags,
const struct xfs_buf_ops *ops)
{
xfs_buf_t *bp;
int error;
struct xfs_buf *bp;
bp = xfs_buf_get_uncached(target, numblks, flags);
if (!bp)
......@@ -723,13 +728,10 @@ xfs_buf_read_uncached(
bp->b_bn = daddr;
bp->b_maps[0].bm_bn = daddr;
bp->b_flags |= XBF_READ;
bp->b_ops = ops;
xfsbdstrat(target->bt_mount, bp);
error = xfs_buf_iowait(bp);
if (error) {
xfs_buf_relse(bp);
return NULL;
}
xfs_buf_iowait(bp);
return bp;
}
......@@ -999,27 +1001,37 @@ STATIC void
xfs_buf_iodone_work(
struct work_struct *work)
{
xfs_buf_t *bp =
struct xfs_buf *bp =
container_of(work, xfs_buf_t, b_iodone_work);
bool read = !!(bp->b_flags & XBF_READ);
bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
if (read && bp->b_ops)
bp->b_ops->verify_read(bp);
if (bp->b_iodone)
(*(bp->b_iodone))(bp);
else if (bp->b_flags & XBF_ASYNC)
xfs_buf_relse(bp);
else {
ASSERT(read && bp->b_ops);
complete(&bp->b_iowait);
}
}
void
xfs_buf_ioend(
xfs_buf_t *bp,
struct xfs_buf *bp,
int schedule)
{
bool read = !!(bp->b_flags & XBF_READ);
trace_xfs_buf_iodone(bp, _RET_IP_);
bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
if (bp->b_error == 0)
bp->b_flags |= XBF_DONE;
if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) {
if (bp->b_iodone || (read && bp->b_ops) || (bp->b_flags & XBF_ASYNC)) {
if (schedule) {
INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work);
queue_work(xfslogd_workqueue, &bp->b_iodone_work);
......@@ -1027,6 +1039,7 @@ xfs_buf_ioend(
xfs_buf_iodone_work(&bp->b_iodone_work);
}
} else {
bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
complete(&bp->b_iowait);
}
}
......@@ -1314,6 +1327,20 @@ _xfs_buf_ioapply(
rw |= REQ_FUA;
if (bp->b_flags & XBF_FLUSH)
rw |= REQ_FLUSH;
/*
* Run the write verifier callback function if it exists. If
* this function fails it will mark the buffer with an error and
* the IO should not be dispatched.
*/
if (bp->b_ops) {
bp->b_ops->verify_write(bp);
if (bp->b_error) {
xfs_force_shutdown(bp->b_target->bt_mount,
SHUTDOWN_CORRUPT_INCORE);
return;
}
}
} else if (bp->b_flags & XBF_READ_AHEAD) {
rw = READA;
} else {
......
......@@ -100,6 +100,7 @@ typedef struct xfs_buftarg {
struct xfs_buf;
typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
#define XB_PAGES 2
struct xfs_buf_map {
......@@ -110,6 +111,11 @@ struct xfs_buf_map {
#define DEFINE_SINGLE_BUF_MAP(map, blkno, numblk) \
struct xfs_buf_map (map) = { .bm_bn = (blkno), .bm_len = (numblk) };
struct xfs_buf_ops {
void (*verify_read)(struct xfs_buf *);
void (*verify_write)(struct xfs_buf *);
};
typedef struct xfs_buf {
/*
* first cacheline holds all the fields needed for an uncontended cache
......@@ -153,13 +159,13 @@ typedef struct xfs_buf {
unsigned int b_page_count; /* size of page array */
unsigned int b_offset; /* page offset in first page */
unsigned short b_error; /* error code on I/O */
const struct xfs_buf_ops *b_ops;
#ifdef XFS_BUF_LOCK_TRACKING
int b_last_holder;
#endif
} xfs_buf_t;
/* Finding and Reading Buffers */
struct xfs_buf *_xfs_buf_find(struct xfs_buftarg *target,
struct xfs_buf_map *map, int nmaps,
......@@ -196,9 +202,11 @@ struct xfs_buf *xfs_buf_get_map(struct xfs_buftarg *target,
xfs_buf_flags_t flags);
struct xfs_buf *xfs_buf_read_map(struct xfs_buftarg *target,
struct xfs_buf_map *map, int nmaps,
xfs_buf_flags_t flags);
xfs_buf_flags_t flags,
const struct xfs_buf_ops *ops);
void xfs_buf_readahead_map(struct xfs_buftarg *target,
struct xfs_buf_map *map, int nmaps);
struct xfs_buf_map *map, int nmaps,
const struct xfs_buf_ops *ops);
static inline struct xfs_buf *
xfs_buf_get(
......@@ -216,20 +224,22 @@ xfs_buf_read(
struct xfs_buftarg *target,
xfs_daddr_t blkno,
size_t numblks,
xfs_buf_flags_t flags)
xfs_buf_flags_t flags,
const struct xfs_buf_ops *ops)
{
DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
return xfs_buf_read_map(target, &map, 1, flags);
return xfs_buf_read_map(target, &map, 1, flags, ops);
}
static inline void
xfs_buf_readahead(
struct xfs_buftarg *target,
xfs_daddr_t blkno,
size_t numblks)
size_t numblks,
const struct xfs_buf_ops *ops)
{
DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
return xfs_buf_readahead_map(target, &map, 1);
return xfs_buf_readahead_map(target, &map, 1, ops);
}
struct xfs_buf *xfs_buf_get_empty(struct xfs_buftarg *target, size_t numblks);
......@@ -239,7 +249,8 @@ int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length);
struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks,
int flags);
struct xfs_buf *xfs_buf_read_uncached(struct xfs_buftarg *target,
xfs_daddr_t daddr, size_t numblks, int flags);
xfs_daddr_t daddr, size_t numblks, int flags,
const struct xfs_buf_ops *ops);
void xfs_buf_hold(struct xfs_buf *bp);
/* Releasing Buffers */
......
#ifndef _XFS_CKSUM_H
#define _XFS_CKSUM_H 1
#define XFS_CRC_SEED (~(__uint32_t)0)
/*
* Calculate the intermediate checksum for a buffer that has the CRC field
* inside it. The offset of the 32bit crc fields is passed as the
* cksum_offset parameter.
*/
static inline __uint32_t
xfs_start_cksum(char *buffer, size_t length, unsigned long cksum_offset)
{
__uint32_t zero = 0;
__uint32_t crc;
/* Calculate CRC up to the checksum. */
crc = crc32c(XFS_CRC_SEED, buffer, cksum_offset);
/* Skip checksum field */
crc = crc32c(crc, &zero, sizeof(__u32));
/* Calculate the rest of the CRC. */
return crc32c(crc, &buffer[cksum_offset + sizeof(__be32)],
length - (cksum_offset + sizeof(__be32)));
}
/*
* Convert the intermediate checksum to the final ondisk format.
*
* The CRC32c calculation uses LE format even on BE machines, but returns the
* result in host endian format. Hence we need to byte swap it back to LE format
* so that it is consistent on disk.
*/
static inline __le32
xfs_end_cksum(__uint32_t crc)
{
return ~cpu_to_le32(crc);
}
/*
* Helper to generate the checksum for a buffer.
*/
static inline void
xfs_update_cksum(char *buffer, size_t length, unsigned long cksum_offset)
{
__uint32_t crc = xfs_start_cksum(buffer, length, cksum_offset);
*(__le32 *)(buffer + cksum_offset) = xfs_end_cksum(crc);
}
/*
* Helper to verify the checksum for a buffer.
*/
static inline int
xfs_verify_cksum(char *buffer, size_t length, unsigned long cksum_offset)
{
__uint32_t crc = xfs_start_cksum(buffer, length, cksum_offset);
return *(__le32 *)(buffer + cksum_offset) == xfs_end_cksum(crc);
}
#endif /* _XFS_CKSUM_H */
......@@ -91,6 +91,84 @@ STATIC int xfs_da_blk_unlink(xfs_da_state_t *state,
xfs_da_state_blk_t *save_blk);
STATIC void xfs_da_state_kill_altpath(xfs_da_state_t *state);
static void
xfs_da_node_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_da_node_hdr *hdr = bp->b_addr;
int block_ok = 0;
block_ok = hdr->info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC);
block_ok = block_ok &&
be16_to_cpu(hdr->level) > 0 &&
be16_to_cpu(hdr->count) > 0 ;
if (!block_ok) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
xfs_buf_ioerror(bp, EFSCORRUPTED);
}
}
static void
xfs_da_node_write_verify(
struct xfs_buf *bp)
{
xfs_da_node_verify(bp);
}
/*
* leaf/node format detection on trees is sketchy, so a node read can be done on
* leaf level blocks when detection identifies the tree as a node format tree
* incorrectly. In this case, we need to swap the verifier to match the correct
* format of the block being read.
*/
static void
xfs_da_node_read_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_da_blkinfo *info = bp->b_addr;
switch (be16_to_cpu(info->magic)) {
case XFS_DA_NODE_MAGIC:
xfs_da_node_verify(bp);
break;
case XFS_ATTR_LEAF_MAGIC:
bp->b_ops = &xfs_attr_leaf_buf_ops;
bp->b_ops->verify_read(bp);
return;
case XFS_DIR2_LEAFN_MAGIC:
bp->b_ops = &xfs_dir2_leafn_buf_ops;
bp->b_ops->verify_read(bp);
return;
default:
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
mp, info);
xfs_buf_ioerror(bp, EFSCORRUPTED);
break;
}
}
const struct xfs_buf_ops xfs_da_node_buf_ops = {
.verify_read = xfs_da_node_read_verify,
.verify_write = xfs_da_node_write_verify,
};
int
xfs_da_node_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
xfs_dablk_t bno,
xfs_daddr_t mappedbno,
struct xfs_buf **bpp,
int which_fork)
{
return xfs_da_read_buf(tp, dp, bno, mappedbno, bpp,
which_fork, &xfs_da_node_buf_ops);
}
/*========================================================================
* Routines used for growing the Btree.
*========================================================================*/
......@@ -125,6 +203,7 @@ xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
xfs_trans_log_buf(tp, bp,
XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
bp->b_ops = &xfs_da_node_buf_ops;
*bpp = bp;
return(0);
}
......@@ -324,6 +403,8 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
}
memcpy(node, oldroot, size);
xfs_trans_log_buf(tp, bp, 0, size - 1);
bp->b_ops = blk1->bp->b_ops;
blk1->bp = bp;
blk1->blkno = blkno;
......@@ -746,7 +827,7 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
*/
child = be32_to_cpu(oldroot->btree[0].before);
ASSERT(child != 0);
error = xfs_da_read_buf(args->trans, args->dp, child, -1, &bp,
error = xfs_da_node_read(args->trans, args->dp, child, -1, &bp,
args->whichfork);
if (error)
return(error);
......@@ -754,7 +835,14 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
xfs_da_blkinfo_onlychild_validate(bp->b_addr,
be16_to_cpu(oldroot->hdr.level));
/*
* This could be copying a leaf back into the root block in the case of
* there only being a single leaf block left in the tree. Hence we have
* to update the b_ops pointer as well to match the buffer type change
* that could occur.
*/
memcpy(root_blk->bp->b_addr, bp->b_addr, state->blocksize);
root_blk->bp->b_ops = bp->b_ops;
xfs_trans_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
error = xfs_da_shrink_inode(args, child, bp);
return(error);
......@@ -779,6 +867,8 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
xfs_dablk_t blkno;
struct xfs_buf *bp;
trace_xfs_da_node_toosmall(state->args);
/*
* Check for the degenerate case of the block being over 50% full.
* If so, it's not worth even looking to see if we might be able
......@@ -835,7 +925,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
blkno = be32_to_cpu(info->back);
if (blkno == 0)
continue;
error = xfs_da_read_buf(state->args->trans, state->args->dp,
error = xfs_da_node_read(state->args->trans, state->args->dp,
blkno, -1, &bp, state->args->whichfork);
if (error)
return(error);
......@@ -900,6 +990,8 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
xfs_dahash_t lasthash=0;
int level, count;
trace_xfs_da_fixhashpath(state->args);
level = path->active-1;
blk = &path->blk[ level ];
switch (blk->magic) {
......@@ -1079,7 +1171,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
* Read the next node down in the tree.
*/
blk->blkno = blkno;
error = xfs_da_read_buf(args->trans, args->dp, blkno,
error = xfs_da_node_read(args->trans, args->dp, blkno,
-1, &blk->bp, args->whichfork);
if (error) {
blk->blkno = 0;
......@@ -1241,7 +1333,7 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
new_info->forw = cpu_to_be32(old_blk->blkno);
new_info->back = old_info->back;
if (old_info->back) {
error = xfs_da_read_buf(args->trans, args->dp,
error = xfs_da_node_read(args->trans, args->dp,
be32_to_cpu(old_info->back),
-1, &bp, args->whichfork);
if (error)
......@@ -1262,7 +1354,7 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
new_info->forw = old_info->forw;
new_info->back = cpu_to_be32(old_blk->blkno);
if (old_info->forw) {
error = xfs_da_read_buf(args->trans, args->dp,
error = xfs_da_node_read(args->trans, args->dp,
be32_to_cpu(old_info->forw),
-1, &bp, args->whichfork);
if (error)
......@@ -1362,7 +1454,7 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
trace_xfs_da_unlink_back(args);
save_info->back = drop_info->back;
if (drop_info->back) {
error = xfs_da_read_buf(args->trans, args->dp,
error = xfs_da_node_read(args->trans, args->dp,
be32_to_cpu(drop_info->back),
-1, &bp, args->whichfork);
if (error)
......@@ -1379,7 +1471,7 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
trace_xfs_da_unlink_forward(args);
save_info->forw = drop_info->forw;
if (drop_info->forw) {
error = xfs_da_read_buf(args->trans, args->dp,
error = xfs_da_node_read(args->trans, args->dp,
be32_to_cpu(drop_info->forw),
-1, &bp, args->whichfork);
if (error)
......@@ -1417,6 +1509,8 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
xfs_dablk_t blkno=0;
int level, error;
trace_xfs_da_path_shift(state->args);
/*
* Roll up the Btree looking for the first block where our
* current index is not at the edge of the block. Note that
......@@ -1463,7 +1557,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
* Read the next child block.
*/
blk->blkno = blkno;
error = xfs_da_read_buf(args->trans, args->dp, blkno, -1,
error = xfs_da_node_read(args->trans, args->dp, blkno, -1,
&blk->bp, args->whichfork);
if (error)
return(error);
......@@ -1727,7 +1821,8 @@ xfs_da_swap_lastblock(
* Read the last block in the btree space.
*/
last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs;
if ((error = xfs_da_read_buf(tp, ip, last_blkno, -1, &last_buf, w)))
error = xfs_da_node_read(tp, ip, last_blkno, -1, &last_buf, w);
if (error)
return error;
/*
* Copy the last block into the dead buffer and log it.
......@@ -1753,7 +1848,8 @@ xfs_da_swap_lastblock(
* If the moved block has a left sibling, fix up the pointers.
*/
if ((sib_blkno = be32_to_cpu(dead_info->back))) {
if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
error = xfs_da_node_read(tp, ip, sib_blkno, -1, &sib_buf, w);
if (error)
goto done;
sib_info = sib_buf->b_addr;
if (unlikely(
......@@ -1774,7 +1870,8 @@ xfs_da_swap_lastblock(
* If the moved block has a right sibling, fix up the pointers.
*/
if ((sib_blkno = be32_to_cpu(dead_info->forw))) {
if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
error = xfs_da_node_read(tp, ip, sib_blkno, -1, &sib_buf, w);
if (error)
goto done;
sib_info = sib_buf->b_addr;
if (unlikely(
......@@ -1797,7 +1894,8 @@ xfs_da_swap_lastblock(
* Walk down the tree looking for the parent of the moved block.
*/
for (;;) {
if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
error = xfs_da_node_read(tp, ip, par_blkno, -1, &par_buf, w);
if (error)
goto done;
par_node = par_buf->b_addr;
if (unlikely(par_node->hdr.info.magic !=
......@@ -1847,7 +1945,8 @@ xfs_da_swap_lastblock(
error = XFS_ERROR(EFSCORRUPTED);
goto done;
}
if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
error = xfs_da_node_read(tp, ip, par_blkno, -1, &par_buf, w);
if (error)
goto done;
par_node = par_buf->b_addr;
if (unlikely(
......@@ -2133,7 +2232,8 @@ xfs_da_read_buf(
xfs_dablk_t bno,
xfs_daddr_t mappedbno,
struct xfs_buf **bpp,
int whichfork)
int whichfork,
const struct xfs_buf_ops *ops)
{
struct xfs_buf *bp;
struct xfs_buf_map map;
......@@ -2155,7 +2255,7 @@ xfs_da_read_buf(
error = xfs_trans_read_buf_map(dp->i_mount, trans,
dp->i_mount->m_ddev_targp,
mapp, nmap, 0, &bp);
mapp, nmap, 0, &bp, ops);
if (error)
goto out_free;
......@@ -2211,9 +2311,10 @@ xfs_da_reada_buf(
struct xfs_trans *trans,
struct xfs_inode *dp,
xfs_dablk_t bno,
int whichfork)
xfs_daddr_t mappedbno,
int whichfork,
const struct xfs_buf_ops *ops)
{
xfs_daddr_t mappedbno = -1;
struct xfs_buf_map map;
struct xfs_buf_map *mapp;
int nmap;
......@@ -2221,7 +2322,7 @@ xfs_da_reada_buf(
mapp = &map;
nmap = 1;
error = xfs_dabuf_map(trans, dp, bno, -1, whichfork,
error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork,
&mapp, &nmap);
if (error) {
/* mapping a hole is not an error, but we don't continue */
......@@ -2231,7 +2332,7 @@ xfs_da_reada_buf(
}
mappedbno = mapp[0].bm_bn;
xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap);
xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap, ops);
out_free:
if (mapp != &map)
......
......@@ -18,7 +18,6 @@
#ifndef __XFS_DA_BTREE_H__
#define __XFS_DA_BTREE_H__
struct xfs_buf;
struct xfs_bmap_free;
struct xfs_inode;
struct xfs_mount;
......@@ -214,6 +213,9 @@ int xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
*/
int xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
xfs_da_state_blk_t *new_blk);
int xfs_da_node_read(struct xfs_trans *tp, struct xfs_inode *dp,
xfs_dablk_t bno, xfs_daddr_t mappedbno,
struct xfs_buf **bpp, int which_fork);
/*
* Utility routines.
......@@ -226,9 +228,11 @@ int xfs_da_get_buf(struct xfs_trans *trans, struct xfs_inode *dp,
struct xfs_buf **bp, int whichfork);
int xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp,
xfs_dablk_t bno, xfs_daddr_t mappedbno,
struct xfs_buf **bpp, int whichfork);
struct xfs_buf **bpp, int whichfork,
const struct xfs_buf_ops *ops);
xfs_daddr_t xfs_da_reada_buf(struct xfs_trans *trans, struct xfs_inode *dp,
xfs_dablk_t bno, int whichfork);
xfs_dablk_t bno, xfs_daddr_t mapped_bno,
int whichfork, const struct xfs_buf_ops *ops);
int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
struct xfs_buf *dead_buf);
......
......@@ -246,12 +246,10 @@ xfs_swap_extents(
goto out_unlock;
}
if (VN_CACHED(VFS_I(tip)) != 0) {
error = xfs_flushinval_pages(tip, 0, -1,
FI_REMAPF_LOCKED);
error = -filemap_write_and_wait(VFS_I(ip)->i_mapping);
if (error)
goto out_unlock;
}
truncate_pagecache_range(VFS_I(ip), 0, -1);
/* Verify O_DIRECT for ftmp */
if (VN_CACHED(VFS_I(tip)) != 0) {
......@@ -315,8 +313,7 @@ xfs_swap_extents(
* are safe. We don't really care if non-io related
* fields change.
*/
xfs_tosspages(ip, 0, -1, FI_REMAPF);
truncate_pagecache_range(VFS_I(ip), 0, -1);
tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT);
if ((error = xfs_trans_reserve(tp, 0,
......
......@@ -56,144 +56,96 @@ xfs_dir_startup(void)
xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2);
}
/*
* Add an entry to a block directory.
*/
int /* error */
xfs_dir2_block_addname(
xfs_da_args_t *args) /* directory op arguments */
static void
xfs_dir2_block_verify(
struct xfs_buf *bp)
{
xfs_dir2_data_free_t *bf; /* bestfree table in block */
xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dir2_leaf_entry_t *blp; /* block leaf entries */
struct xfs_buf *bp; /* buffer for block */
xfs_dir2_block_tail_t *btp; /* block tail */
int compact; /* need to compact leaf ents */
xfs_dir2_data_entry_t *dep; /* block data entry */
xfs_inode_t *dp; /* directory inode */
xfs_dir2_data_unused_t *dup; /* block unused entry */
int error; /* error return value */
xfs_dir2_data_unused_t *enddup=NULL; /* unused at end of data */
xfs_dahash_t hash; /* hash value of found entry */
int high; /* high index for binary srch */
int highstale; /* high stale index */
int lfloghigh=0; /* last final leaf to log */
int lfloglow=0; /* first final leaf to log */
int len; /* length of the new entry */
int low; /* low index for binary srch */
int lowstale; /* low stale index */
int mid=0; /* midpoint for binary srch */
xfs_mount_t *mp; /* filesystem mount point */
int needlog; /* need to log header */
int needscan; /* need to rescan freespace */
__be16 *tagp; /* pointer to tag value */
xfs_trans_t *tp; /* transaction structure */
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_dir2_data_hdr *hdr = bp->b_addr;
int block_ok = 0;
trace_xfs_dir2_block_addname(args);
block_ok = hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
block_ok = block_ok && __xfs_dir2_data_check(NULL, bp) == 0;
dp = args->dp;
tp = args->trans;
mp = dp->i_mount;
/*
* Read the (one and only) directory block into dabuf bp.
*/
if ((error =
xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, &bp, XFS_DATA_FORK))) {
return error;
}
ASSERT(bp != NULL);
hdr = bp->b_addr;
/*
* Check the magic number, corrupted if wrong.
*/
if (unlikely(hdr->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))) {
XFS_CORRUPTION_ERROR("xfs_dir2_block_addname",
XFS_ERRLEVEL_LOW, mp, hdr);
xfs_trans_brelse(tp, bp);
return XFS_ERROR(EFSCORRUPTED);
if (!block_ok) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
xfs_buf_ioerror(bp, EFSCORRUPTED);
}
len = xfs_dir2_data_entsize(args->namelen);
/*
* Set up pointers to parts of the block.
*/
}
static void
xfs_dir2_block_read_verify(
struct xfs_buf *bp)
{
xfs_dir2_block_verify(bp);
}
static void
xfs_dir2_block_write_verify(
struct xfs_buf *bp)
{
xfs_dir2_block_verify(bp);
}
const struct xfs_buf_ops xfs_dir2_block_buf_ops = {
.verify_read = xfs_dir2_block_read_verify,
.verify_write = xfs_dir2_block_write_verify,
};
static int
xfs_dir2_block_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
struct xfs_buf **bpp)
{
struct xfs_mount *mp = dp->i_mount;
return xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, bpp,
XFS_DATA_FORK, &xfs_dir2_block_buf_ops);
}
static void
xfs_dir2_block_need_space(
struct xfs_dir2_data_hdr *hdr,
struct xfs_dir2_block_tail *btp,
struct xfs_dir2_leaf_entry *blp,
__be16 **tagpp,
struct xfs_dir2_data_unused **dupp,
struct xfs_dir2_data_unused **enddupp,
int *compact,
int len)
{
struct xfs_dir2_data_free *bf;
__be16 *tagp = NULL;
struct xfs_dir2_data_unused *dup = NULL;
struct xfs_dir2_data_unused *enddup = NULL;
*compact = 0;
bf = hdr->bestfree;
btp = xfs_dir2_block_tail_p(mp, hdr);
blp = xfs_dir2_block_leaf_p(btp);
/*
* No stale entries? Need space for entry and new leaf.
*/
if (!btp->stale) {
/*
* Tag just before the first leaf entry.
*/
tagp = (__be16 *)blp - 1;
/*
* Data object just before the first leaf entry.
*/
enddup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
/*
* If it's not free then can't do this add without cleaning up:
* the space before the first leaf entry needs to be free so it
* can be expanded to hold the pointer to the new entry.
*/
if (be16_to_cpu(enddup->freetag) != XFS_DIR2_DATA_FREE_TAG)
dup = enddup = NULL;
/*
* Check out the biggest freespace and see if it's the same one.
*/
else {
dup = (xfs_dir2_data_unused_t *)
((char *)hdr + be16_to_cpu(bf[0].offset));
if (dup == enddup) {
/*
* It is the biggest freespace, is it too small
* to hold the new leaf too?
*/
if (be16_to_cpu(dup->length) < len + (uint)sizeof(*blp)) {
/*
* Yes, we use the second-largest
* entry instead if it works.
*/
if (be16_to_cpu(bf[1].length) >= len)
dup = (xfs_dir2_data_unused_t *)
((char *)hdr +
be16_to_cpu(bf[1].offset));
else
dup = NULL;
}
} else {
/*
* Not the same free entry,
* just check its length.
* If there are stale entries we'll use one for the leaf.
*/
if (be16_to_cpu(dup->length) < len) {
dup = NULL;
}
}
}
compact = 0;
}
if (btp->stale) {
if (be16_to_cpu(bf[0].length) >= len) {
/*
* If there are stale entries we'll use one for the leaf.
* Is the biggest entry enough to avoid compaction?
* The biggest entry enough to avoid compaction.
*/
else if (be16_to_cpu(bf[0].length) >= len) {
dup = (xfs_dir2_data_unused_t *)
((char *)hdr + be16_to_cpu(bf[0].offset));
compact = 0;
goto out;
}
/*
* Will need to compact to make this work.
*/
else {
/*
* Tag just before the first leaf entry.
*/
*compact = 1;
tagp = (__be16 *)blp - 1;
/*
* Data object just before the first leaf entry.
*/
/* Data object just before the first leaf entry. */
dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
/*
* If it's not free then the data will go where the
* leaf data starts now, if it works at all.
......@@ -206,58 +158,88 @@ xfs_dir2_block_addname(
dup = NULL;
else
dup = (xfs_dir2_data_unused_t *)blp;
compact = 1;
goto out;
}
/*
* If this isn't a real add, we're done with the buffer.
* no stale entries, so just use free space.
* Tag just before the first leaf entry.
*/
if (args->op_flags & XFS_DA_OP_JUSTCHECK)
xfs_trans_brelse(tp, bp);
tagp = (__be16 *)blp - 1;
/* Data object just before the first leaf entry. */
enddup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
/*
* If we don't have space for the new entry & leaf ...
* If it's not free then can't do this add without cleaning up:
* the space before the first leaf entry needs to be free so it
* can be expanded to hold the pointer to the new entry.
*/
if (!dup) {
if (be16_to_cpu(enddup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
/*
* Not trying to actually do anything, or don't have
* a space reservation: return no-space.
* Check out the biggest freespace and see if it's the same one.
*/
if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
return XFS_ERROR(ENOSPC);
dup = (xfs_dir2_data_unused_t *)
((char *)hdr + be16_to_cpu(bf[0].offset));
if (dup != enddup) {
/*
* Convert to the next larger format.
* Then add the new entry in that format.
* Not the same free entry, just check its length.
*/
error = xfs_dir2_block_to_leaf(args, bp);
if (error)
return error;
return xfs_dir2_leaf_addname(args);
if (be16_to_cpu(dup->length) < len)
dup = NULL;
goto out;
}
/*
* Just checking, and it would work, so say so.
* It is the biggest freespace, can it hold the leaf too?
*/
if (args->op_flags & XFS_DA_OP_JUSTCHECK)
return 0;
needlog = needscan = 0;
if (be16_to_cpu(dup->length) < len + (uint)sizeof(*blp)) {
/*
* If need to compact the leaf entries, do it now.
* Yes, use the second-largest entry instead if it works.
*/
if (be16_to_cpu(bf[1].length) >= len)
dup = (xfs_dir2_data_unused_t *)
((char *)hdr + be16_to_cpu(bf[1].offset));
else
dup = NULL;
}
}
out:
*tagpp = tagp;
*dupp = dup;
*enddupp = enddup;
}
/*
* compact the leaf entries.
* Leave the highest-numbered stale entry stale.
* XXX should be the one closest to mid but mid is not yet computed.
*/
if (compact) {
static void
xfs_dir2_block_compact(
struct xfs_trans *tp,
struct xfs_buf *bp,
struct xfs_dir2_data_hdr *hdr,
struct xfs_dir2_block_tail *btp,
struct xfs_dir2_leaf_entry *blp,
int *needlog,
int *lfloghigh,
int *lfloglow)
{
int fromidx; /* source leaf index */
int toidx; /* target leaf index */
int needscan = 0;
int highstale; /* high stale index */
for (fromidx = toidx = be32_to_cpu(btp->count) - 1,
highstale = lfloghigh = -1;
fromidx >= 0;
fromidx--) {
if (blp[fromidx].address ==
cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) {
fromidx = toidx = be32_to_cpu(btp->count) - 1;
highstale = *lfloghigh = -1;
for (; fromidx >= 0; fromidx--) {
if (blp[fromidx].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) {
if (highstale == -1)
highstale = toidx;
else {
if (lfloghigh == -1)
lfloghigh = toidx;
if (*lfloghigh == -1)
*lfloghigh = toidx;
continue;
}
}
......@@ -265,32 +247,126 @@ xfs_dir2_block_addname(
blp[toidx] = blp[fromidx];
toidx--;
}
lfloglow = toidx + 1 - (be32_to_cpu(btp->stale) - 1);
lfloghigh -= be32_to_cpu(btp->stale) - 1;
*lfloglow = toidx + 1 - (be32_to_cpu(btp->stale) - 1);
*lfloghigh -= be32_to_cpu(btp->stale) - 1;
be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1));
xfs_dir2_data_make_free(tp, bp,
(xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
(xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)),
&needlog, &needscan);
needlog, &needscan);
blp += be32_to_cpu(btp->stale) - 1;
btp->stale = cpu_to_be32(1);
/*
* If we now need to rebuild the bestfree map, do so.
* This needs to happen before the next call to use_free.
*/
if (needscan) {
xfs_dir2_data_freescan(mp, hdr, &needlog);
needscan = 0;
if (needscan)
xfs_dir2_data_freescan(tp->t_mountp, hdr, needlog);
}
/*
* Add an entry to a block directory.
*/
int /* error */
xfs_dir2_block_addname(
xfs_da_args_t *args) /* directory op arguments */
{
xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dir2_leaf_entry_t *blp; /* block leaf entries */
struct xfs_buf *bp; /* buffer for block */
xfs_dir2_block_tail_t *btp; /* block tail */
int compact; /* need to compact leaf ents */
xfs_dir2_data_entry_t *dep; /* block data entry */
xfs_inode_t *dp; /* directory inode */
xfs_dir2_data_unused_t *dup; /* block unused entry */
int error; /* error return value */
xfs_dir2_data_unused_t *enddup=NULL; /* unused at end of data */
xfs_dahash_t hash; /* hash value of found entry */
int high; /* high index for binary srch */
int highstale; /* high stale index */
int lfloghigh=0; /* last final leaf to log */
int lfloglow=0; /* first final leaf to log */
int len; /* length of the new entry */
int low; /* low index for binary srch */
int lowstale; /* low stale index */
int mid=0; /* midpoint for binary srch */
xfs_mount_t *mp; /* filesystem mount point */
int needlog; /* need to log header */
int needscan; /* need to rescan freespace */
__be16 *tagp; /* pointer to tag value */
xfs_trans_t *tp; /* transaction structure */
trace_xfs_dir2_block_addname(args);
dp = args->dp;
tp = args->trans;
mp = dp->i_mount;
/* Read the (one and only) directory block into bp. */
error = xfs_dir2_block_read(tp, dp, &bp);
if (error)
return error;
len = xfs_dir2_data_entsize(args->namelen);
/*
* Set up pointers to parts of the block.
*/
hdr = bp->b_addr;
btp = xfs_dir2_block_tail_p(mp, hdr);
blp = xfs_dir2_block_leaf_p(btp);
/*
* Find out if we can reuse stale entries or whether we need extra
* space for entry and new leaf.
*/
xfs_dir2_block_need_space(hdr, btp, blp, &tagp, &dup,
&enddup, &compact, len);
/*
* Done everything we need for a space check now.
*/
if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
xfs_trans_brelse(tp, bp);
if (!dup)
return XFS_ERROR(ENOSPC);
return 0;
}
/*
* If we don't have space for the new entry & leaf ...
*/
if (!dup) {
/* Don't have a space reservation: return no-space. */
if (args->total == 0)
return XFS_ERROR(ENOSPC);
/*
* Convert to the next larger format.
* Then add the new entry in that format.
*/
error = xfs_dir2_block_to_leaf(args, bp);
if (error)
return error;
return xfs_dir2_leaf_addname(args);
}
needlog = needscan = 0;
/*
* If need to compact the leaf entries, do it now.
*/
if (compact)
xfs_dir2_block_compact(tp, bp, hdr, btp, blp, &needlog,
&lfloghigh, &lfloglow);
else if (btp->stale) {
/*
* Set leaf logging boundaries to impossible state.
* For the no-stale case they're set explicitly.
*/
else if (btp->stale) {
lfloglow = be32_to_cpu(btp->count);
lfloghigh = -1;
}
/*
* Find the slot that's first lower than our hash value, -1 if none.
*/
......@@ -450,18 +526,13 @@ xfs_dir2_block_getdents(
/*
* If the block number in the offset is out of range, we're done.
*/
if (xfs_dir2_dataptr_to_db(mp, *offset) > mp->m_dirdatablk) {
if (xfs_dir2_dataptr_to_db(mp, *offset) > mp->m_dirdatablk)
return 0;
}
/*
* Can't read the block, give up, else get dabuf in bp.
*/
error = xfs_da_read_buf(NULL, dp, mp->m_dirdatablk, -1,
&bp, XFS_DATA_FORK);
error = xfs_dir2_block_read(NULL, dp, &bp);
if (error)
return error;
ASSERT(bp != NULL);
/*
* Extract the byte offset we start at from the seek pointer.
* We'll skip entries before this.
......@@ -637,14 +708,11 @@ xfs_dir2_block_lookup_int(
dp = args->dp;
tp = args->trans;
mp = dp->i_mount;
/*
* Read the buffer, return error if we can't get it.
*/
if ((error =
xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, &bp, XFS_DATA_FORK))) {
error = xfs_dir2_block_read(tp, dp, &bp);
if (error)
return error;
}
ASSERT(bp != NULL);
hdr = bp->b_addr;
xfs_dir2_data_check(dp, bp);
btp = xfs_dir2_block_tail_p(mp, hdr);
......@@ -917,9 +985,9 @@ xfs_dir2_leaf_to_block(
/*
* Read the data block if we don't already have it, give up if it fails.
*/
if (dbp == NULL &&
(error = xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, &dbp,
XFS_DATA_FORK))) {
if (!dbp) {
error = xfs_dir2_data_read(tp, dp, mp->m_dirdatablk, -1, &dbp);
if (error)
return error;
}
hdr = dbp->b_addr;
......@@ -944,6 +1012,7 @@ xfs_dir2_leaf_to_block(
/*
* Start converting it to block form.
*/
dbp->b_ops = &xfs_dir2_block_buf_ops;
hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
needlog = 1;
needscan = 0;
......@@ -1073,6 +1142,7 @@ xfs_dir2_sf_to_block(
kmem_free(sfp);
return error;
}
bp->b_ops = &xfs_dir2_block_buf_ops;
hdr = bp->b_addr;
hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
/*
......
......@@ -34,14 +34,13 @@
STATIC xfs_dir2_data_free_t *
xfs_dir2_data_freefind(xfs_dir2_data_hdr_t *hdr, xfs_dir2_data_unused_t *dup);
#ifdef DEBUG
/*
* Check the consistency of the data block.
* The input can also be a block-format directory.
* Pop an assert if we find anything bad.
* Return 0 is the buffer is good, otherwise an error.
*/
void
xfs_dir2_data_check(
int
__xfs_dir2_data_check(
struct xfs_inode *dp, /* incore inode pointer */
struct xfs_buf *bp) /* data block's buffer */
{
......@@ -64,18 +63,23 @@ xfs_dir2_data_check(
int stale; /* count of stale leaves */
struct xfs_name name;
mp = dp->i_mount;
mp = bp->b_target->bt_mount;
hdr = bp->b_addr;
bf = hdr->bestfree;
p = (char *)(hdr + 1);
if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) {
switch (hdr->magic) {
case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
btp = xfs_dir2_block_tail_p(mp, hdr);
lep = xfs_dir2_block_leaf_p(btp);
endp = (char *)lep;
} else {
ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
break;
case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
endp = (char *)hdr + mp->m_dirblksize;
break;
default:
XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp);
return EFSCORRUPTED;
}
count = lastfree = freeseen = 0;
......@@ -83,19 +87,22 @@ xfs_dir2_data_check(
* Account for zero bestfree entries.
*/
if (!bf[0].length) {
ASSERT(!bf[0].offset);
XFS_WANT_CORRUPTED_RETURN(!bf[0].offset);
freeseen |= 1 << 0;
}
if (!bf[1].length) {
ASSERT(!bf[1].offset);
XFS_WANT_CORRUPTED_RETURN(!bf[1].offset);
freeseen |= 1 << 1;
}
if (!bf[2].length) {
ASSERT(!bf[2].offset);
XFS_WANT_CORRUPTED_RETURN(!bf[2].offset);
freeseen |= 1 << 2;
}
ASSERT(be16_to_cpu(bf[0].length) >= be16_to_cpu(bf[1].length));
ASSERT(be16_to_cpu(bf[1].length) >= be16_to_cpu(bf[2].length));
XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[0].length) >=
be16_to_cpu(bf[1].length));
XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[1].length) >=
be16_to_cpu(bf[2].length));
/*
* Loop over the data/unused entries.
*/
......@@ -107,16 +114,19 @@ xfs_dir2_data_check(
* doesn't need to be there.
*/
if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
ASSERT(lastfree == 0);
ASSERT(be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) ==
XFS_WANT_CORRUPTED_RETURN(lastfree == 0);
XFS_WANT_CORRUPTED_RETURN(
be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) ==
(char *)dup - (char *)hdr);
dfp = xfs_dir2_data_freefind(hdr, dup);
if (dfp) {
i = (int)(dfp - bf);
ASSERT((freeseen & (1 << i)) == 0);
XFS_WANT_CORRUPTED_RETURN(
(freeseen & (1 << i)) == 0);
freeseen |= 1 << i;
} else {
ASSERT(be16_to_cpu(dup->length) <=
XFS_WANT_CORRUPTED_RETURN(
be16_to_cpu(dup->length) <=
be16_to_cpu(bf[2].length));
}
p += be16_to_cpu(dup->length);
......@@ -130,9 +140,11 @@ xfs_dir2_data_check(
* The linear search is crude but this is DEBUG code.
*/
dep = (xfs_dir2_data_entry_t *)p;
ASSERT(dep->namelen != 0);
ASSERT(xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)) == 0);
ASSERT(be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)) ==
XFS_WANT_CORRUPTED_RETURN(dep->namelen != 0);
XFS_WANT_CORRUPTED_RETURN(
!xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)));
XFS_WANT_CORRUPTED_RETURN(
be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)) ==
(char *)dep - (char *)hdr);
count++;
lastfree = 0;
......@@ -148,27 +160,122 @@ xfs_dir2_data_check(
be32_to_cpu(lep[i].hashval) == hash)
break;
}
ASSERT(i < be32_to_cpu(btp->count));
XFS_WANT_CORRUPTED_RETURN(i < be32_to_cpu(btp->count));
}
p += xfs_dir2_data_entsize(dep->namelen);
}
/*
* Need to have seen all the entries and all the bestfree slots.
*/
ASSERT(freeseen == 7);
XFS_WANT_CORRUPTED_RETURN(freeseen == 7);
if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) {
for (i = stale = 0; i < be32_to_cpu(btp->count); i++) {
if (lep[i].address ==
cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
stale++;
if (i > 0)
ASSERT(be32_to_cpu(lep[i].hashval) >= be32_to_cpu(lep[i - 1].hashval));
XFS_WANT_CORRUPTED_RETURN(
be32_to_cpu(lep[i].hashval) >=
be32_to_cpu(lep[i - 1].hashval));
}
ASSERT(count == be32_to_cpu(btp->count) - be32_to_cpu(btp->stale));
ASSERT(stale == be32_to_cpu(btp->stale));
XFS_WANT_CORRUPTED_RETURN(count ==
be32_to_cpu(btp->count) - be32_to_cpu(btp->stale));
XFS_WANT_CORRUPTED_RETURN(stale == be32_to_cpu(btp->stale));
}
return 0;
}
static void
xfs_dir2_data_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_dir2_data_hdr *hdr = bp->b_addr;
int block_ok = 0;
block_ok = hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC);
block_ok = block_ok && __xfs_dir2_data_check(NULL, bp) == 0;
if (!block_ok) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
xfs_buf_ioerror(bp, EFSCORRUPTED);
}
}
#endif
/*
* Readahead of the first block of the directory when it is opened is completely
* oblivious to the format of the directory. Hence we can either get a block
* format buffer or a data format buffer on readahead.
*/
static void
xfs_dir2_data_reada_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_dir2_data_hdr *hdr = bp->b_addr;
switch (hdr->magic) {
case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
bp->b_ops = &xfs_dir2_block_buf_ops;
bp->b_ops->verify_read(bp);
return;
case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
xfs_dir2_data_verify(bp);
return;
default:
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
xfs_buf_ioerror(bp, EFSCORRUPTED);
break;
}
}
static void
xfs_dir2_data_read_verify(
struct xfs_buf *bp)
{
xfs_dir2_data_verify(bp);
}
static void
xfs_dir2_data_write_verify(
struct xfs_buf *bp)
{
xfs_dir2_data_verify(bp);
}
const struct xfs_buf_ops xfs_dir2_data_buf_ops = {
.verify_read = xfs_dir2_data_read_verify,
.verify_write = xfs_dir2_data_write_verify,
};
static const struct xfs_buf_ops xfs_dir2_data_reada_buf_ops = {
.verify_read = xfs_dir2_data_reada_verify,
.verify_write = xfs_dir2_data_write_verify,
};
int
xfs_dir2_data_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
xfs_dablk_t bno,
xfs_daddr_t mapped_bno,
struct xfs_buf **bpp)
{
return xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp,
XFS_DATA_FORK, &xfs_dir2_data_buf_ops);
}
int
xfs_dir2_data_readahead(
struct xfs_trans *tp,
struct xfs_inode *dp,
xfs_dablk_t bno,
xfs_daddr_t mapped_bno)
{
return xfs_da_reada_buf(tp, dp, bno, mapped_bno,
XFS_DATA_FORK, &xfs_dir2_data_reada_buf_ops);
}
/*
* Given a data block and an unused entry from that block,
......@@ -409,10 +516,9 @@ xfs_dir2_data_init(
*/
error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, blkno), -1, &bp,
XFS_DATA_FORK);
if (error) {
if (error)
return error;
}
ASSERT(bp != NULL);
bp->b_ops = &xfs_dir2_data_buf_ops;
/*
* Initialize the header.
......
......@@ -48,6 +48,83 @@ static void xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_buf *bp,
int first, int last);
static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_buf *bp);
static void
xfs_dir2_leaf_verify(
struct xfs_buf *bp,
__be16 magic)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_dir2_leaf_hdr *hdr = bp->b_addr;
int block_ok = 0;
block_ok = hdr->info.magic == magic;
if (!block_ok) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
xfs_buf_ioerror(bp, EFSCORRUPTED);
}
}
static void
xfs_dir2_leaf1_read_verify(
struct xfs_buf *bp)
{
xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
}
static void
xfs_dir2_leaf1_write_verify(
struct xfs_buf *bp)
{
xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
}
void
xfs_dir2_leafn_read_verify(
struct xfs_buf *bp)
{
xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
}
void
xfs_dir2_leafn_write_verify(
struct xfs_buf *bp)
{
xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
}
static const struct xfs_buf_ops xfs_dir2_leaf1_buf_ops = {
.verify_read = xfs_dir2_leaf1_read_verify,
.verify_write = xfs_dir2_leaf1_write_verify,
};
const struct xfs_buf_ops xfs_dir2_leafn_buf_ops = {
.verify_read = xfs_dir2_leafn_read_verify,
.verify_write = xfs_dir2_leafn_write_verify,
};
static int
xfs_dir2_leaf_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
xfs_dablk_t fbno,
xfs_daddr_t mappedbno,
struct xfs_buf **bpp)
{
return xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
XFS_DATA_FORK, &xfs_dir2_leaf1_buf_ops);
}
int
xfs_dir2_leafn_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
xfs_dablk_t fbno,
xfs_daddr_t mappedbno,
struct xfs_buf **bpp)
{
return xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
XFS_DATA_FORK, &xfs_dir2_leafn_buf_ops);
}
/*
* Convert a block form directory to a leaf form directory.
......@@ -125,6 +202,7 @@ xfs_dir2_block_to_leaf(
/*
* Fix up the block header, make it a data block.
*/
dbp->b_ops = &xfs_dir2_data_buf_ops;
hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
if (needscan)
xfs_dir2_data_freescan(mp, hdr, &needlog);
......@@ -311,15 +389,11 @@ xfs_dir2_leaf_addname(
dp = args->dp;
tp = args->trans;
mp = dp->i_mount;
/*
* Read the leaf block.
*/
error = xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp,
XFS_DATA_FORK);
if (error) {
error = xfs_dir2_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp);
if (error)
return error;
}
ASSERT(lbp != NULL);
/*
* Look up the entry by hash value and name.
* We know it's not there, our caller has already done a lookup.
......@@ -494,22 +568,21 @@ xfs_dir2_leaf_addname(
hdr = dbp->b_addr;
bestsp[use_block] = hdr->bestfree[0].length;
grown = 1;
}
} else {
/*
* Already had space in some data block.
* Just read that one in.
*/
else {
if ((error =
xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, use_block),
-1, &dbp, XFS_DATA_FORK))) {
error = xfs_dir2_data_read(tp, dp,
xfs_dir2_db_to_da(mp, use_block),
-1, &dbp);
if (error) {
xfs_trans_brelse(tp, lbp);
return error;
}
hdr = dbp->b_addr;
grown = 0;
}
xfs_dir2_data_check(dp, dbp);
/*
* Point to the biggest freespace in our data block.
*/
......@@ -892,10 +965,9 @@ xfs_dir2_leaf_readbuf(
* Read the directory block starting at the first mapping.
*/
mip->curdb = xfs_dir2_da_to_db(mp, map->br_startoff);
error = xfs_da_read_buf(NULL, dp, map->br_startoff,
error = xfs_dir2_data_read(NULL, dp, map->br_startoff,
map->br_blockcount >= mp->m_dirblkfsbs ?
XFS_FSB_TO_DADDR(mp, map->br_startblock) : -1,
&bp, XFS_DATA_FORK);
XFS_FSB_TO_DADDR(mp, map->br_startblock) : -1, &bp);
/*
* Should just skip over the data block instead of giving up.
......@@ -922,11 +994,11 @@ xfs_dir2_leaf_readbuf(
*/
if (i > mip->ra_current &&
map[mip->ra_index].br_blockcount >= mp->m_dirblkfsbs) {
xfs_buf_readahead(mp->m_ddev_targp,
xfs_dir2_data_readahead(NULL, dp,
map[mip->ra_index].br_startoff + mip->ra_offset,
XFS_FSB_TO_DADDR(mp,
map[mip->ra_index].br_startblock +
mip->ra_offset),
(int)BTOBB(mp->m_dirblksize));
mip->ra_offset));
mip->ra_current = i;
}
......@@ -935,10 +1007,9 @@ xfs_dir2_leaf_readbuf(
* use our mapping, but this is a very rare case.
*/
else if (i > mip->ra_current) {
xfs_da_reada_buf(NULL, dp,
xfs_dir2_data_readahead(NULL, dp,
map[mip->ra_index].br_startoff +
mip->ra_offset,
XFS_DATA_FORK);
mip->ra_offset, -1);
mip->ra_current = i;
}
......@@ -1178,14 +1249,13 @@ xfs_dir2_leaf_init(
*/
error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, bno), -1, &bp,
XFS_DATA_FORK);
if (error) {
if (error)
return error;
}
ASSERT(bp != NULL);
leaf = bp->b_addr;
/*
* Initialize the header.
*/
leaf = bp->b_addr;
leaf->hdr.info.magic = cpu_to_be16(magic);
leaf->hdr.info.forw = 0;
leaf->hdr.info.back = 0;
......@@ -1198,10 +1268,12 @@ xfs_dir2_leaf_init(
* the block.
*/
if (magic == XFS_DIR2_LEAF1_MAGIC) {
bp->b_ops = &xfs_dir2_leaf1_buf_ops;
ltp = xfs_dir2_leaf_tail_p(mp, leaf);
ltp->bestcount = 0;
xfs_dir2_leaf_log_tail(tp, bp);
}
} else
bp->b_ops = &xfs_dir2_leafn_buf_ops;
*bpp = bp;
return 0;
}
......@@ -1372,13 +1444,11 @@ xfs_dir2_leaf_lookup_int(
dp = args->dp;
tp = args->trans;
mp = dp->i_mount;
/*
* Read the leaf block into the buffer.
*/
error = xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp,
XFS_DATA_FORK);
error = xfs_dir2_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp);
if (error)
return error;
*lbpp = lbp;
leaf = lbp->b_addr;
xfs_dir2_leaf_check(dp, lbp);
......@@ -1409,14 +1479,13 @@ xfs_dir2_leaf_lookup_int(
if (newdb != curdb) {
if (dbp)
xfs_trans_brelse(tp, dbp);
error = xfs_da_read_buf(tp, dp,
error = xfs_dir2_data_read(tp, dp,
xfs_dir2_db_to_da(mp, newdb),
-1, &dbp, XFS_DATA_FORK);
-1, &dbp);
if (error) {
xfs_trans_brelse(tp, lbp);
return error;
}
xfs_dir2_data_check(dp, dbp);
curdb = newdb;
}
/*
......@@ -1451,9 +1520,9 @@ xfs_dir2_leaf_lookup_int(
ASSERT(cidb != -1);
if (cidb != curdb) {
xfs_trans_brelse(tp, dbp);
error = xfs_da_read_buf(tp, dp,
error = xfs_dir2_data_read(tp, dp,
xfs_dir2_db_to_da(mp, cidb),
-1, &dbp, XFS_DATA_FORK);
-1, &dbp);
if (error) {
xfs_trans_brelse(tp, lbp);
return error;
......@@ -1738,10 +1807,9 @@ xfs_dir2_leaf_trim_data(
/*
* Read the offending data block. We need its buffer.
*/
if ((error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp,
XFS_DATA_FORK))) {
error = xfs_dir2_data_read(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp);
if (error)
return error;
}
leaf = lbp->b_addr;
ltp = xfs_dir2_leaf_tail_p(mp, leaf);
......@@ -1864,10 +1932,9 @@ xfs_dir2_node_to_leaf(
/*
* Read the freespace block.
*/
if ((error = xfs_da_read_buf(tp, dp, mp->m_dirfreeblk, -1, &fbp,
XFS_DATA_FORK))) {
error = xfs_dir2_free_read(tp, dp, mp->m_dirfreeblk, &fbp);
if (error)
return error;
}
free = fbp->b_addr;
ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
ASSERT(!free->hdr.firstdb);
......@@ -1890,7 +1957,10 @@ xfs_dir2_node_to_leaf(
xfs_dir2_leaf_compact(args, lbp);
else
xfs_dir2_leaf_log_header(tp, lbp);
lbp->b_ops = &xfs_dir2_leaf1_buf_ops;
leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAF1_MAGIC);
/*
* Set up the leaf tail from the freespace block.
*/
......
......@@ -55,6 +55,74 @@ static int xfs_dir2_leafn_remove(xfs_da_args_t *args, struct xfs_buf *bp,
static int xfs_dir2_node_addname_int(xfs_da_args_t *args,
xfs_da_state_blk_t *fblk);
static void
xfs_dir2_free_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_dir2_free_hdr *hdr = bp->b_addr;
int block_ok = 0;
block_ok = hdr->magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC);
if (!block_ok) {
XFS_CORRUPTION_ERROR("xfs_dir2_free_verify magic",
XFS_ERRLEVEL_LOW, mp, hdr);
xfs_buf_ioerror(bp, EFSCORRUPTED);
}
}
static void
xfs_dir2_free_read_verify(
struct xfs_buf *bp)
{
xfs_dir2_free_verify(bp);
}
static void
xfs_dir2_free_write_verify(
struct xfs_buf *bp)
{
xfs_dir2_free_verify(bp);
}
static const struct xfs_buf_ops xfs_dir2_free_buf_ops = {
.verify_read = xfs_dir2_free_read_verify,
.verify_write = xfs_dir2_free_write_verify,
};
static int
__xfs_dir2_free_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
xfs_dablk_t fbno,
xfs_daddr_t mappedbno,
struct xfs_buf **bpp)
{
return xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
XFS_DATA_FORK, &xfs_dir2_free_buf_ops);
}
int
xfs_dir2_free_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
xfs_dablk_t fbno,
struct xfs_buf **bpp)
{
return __xfs_dir2_free_read(tp, dp, fbno, -1, bpp);
}
static int
xfs_dir2_free_try_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
xfs_dablk_t fbno,
struct xfs_buf **bpp)
{
return __xfs_dir2_free_read(tp, dp, fbno, -2, bpp);
}
/*
* Log entries from a freespace block.
*/
......@@ -131,11 +199,12 @@ xfs_dir2_leaf_to_node(
/*
* Get the buffer for the new freespace block.
*/
if ((error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, fdb), -1, &fbp,
XFS_DATA_FORK))) {
error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, fdb), -1, &fbp,
XFS_DATA_FORK);
if (error)
return error;
}
ASSERT(fbp != NULL);
fbp->b_ops = &xfs_dir2_free_buf_ops;
free = fbp->b_addr;
leaf = lbp->b_addr;
ltp = xfs_dir2_leaf_tail_p(mp, leaf);
......@@ -157,7 +226,10 @@ xfs_dir2_leaf_to_node(
*to = cpu_to_be16(off);
}
free->hdr.nused = cpu_to_be32(n);
lbp->b_ops = &xfs_dir2_leafn_buf_ops;
leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAFN_MAGIC);
/*
* Log everything.
*/
......@@ -394,12 +466,10 @@ xfs_dir2_leafn_lookup_for_addname(
*/
if (curbp)
xfs_trans_brelse(tp, curbp);
/*
* Read the free block.
*/
error = xfs_da_read_buf(tp, dp,
error = xfs_dir2_free_read(tp, dp,
xfs_dir2_db_to_da(mp, newfdb),
-1, &curbp, XFS_DATA_FORK);
&curbp);
if (error)
return error;
free = curbp->b_addr;
......@@ -534,9 +604,9 @@ xfs_dir2_leafn_lookup_for_entry(
ASSERT(state->extravalid);
curbp = state->extrablk.bp;
} else {
error = xfs_da_read_buf(tp, dp,
error = xfs_dir2_data_read(tp, dp,
xfs_dir2_db_to_da(mp, newdb),
-1, &curbp, XFS_DATA_FORK);
-1, &curbp);
if (error)
return error;
}
......@@ -568,6 +638,7 @@ xfs_dir2_leafn_lookup_for_entry(
state->extrablk.index = (int)((char *)dep -
(char *)curbp->b_addr);
state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
curbp->b_ops = &xfs_dir2_data_buf_ops;
if (cmp == XFS_CMP_EXACT)
return XFS_ERROR(EEXIST);
}
......@@ -582,6 +653,7 @@ xfs_dir2_leafn_lookup_for_entry(
state->extrablk.index = -1;
state->extrablk.blkno = curdb;
state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
curbp->b_ops = &xfs_dir2_data_buf_ops;
} else {
/* If the curbp is not the CI match block, drop it */
if (state->extrablk.bp != curbp)
......@@ -825,6 +897,77 @@ xfs_dir2_leafn_rebalance(
}
}
static int
xfs_dir2_data_block_free(
xfs_da_args_t *args,
struct xfs_dir2_data_hdr *hdr,
struct xfs_dir2_free *free,
xfs_dir2_db_t fdb,
int findex,
struct xfs_buf *fbp,
int longest)
{
struct xfs_trans *tp = args->trans;
int logfree = 0;
if (!hdr) {
/* One less used entry in the free table. */
be32_add_cpu(&free->hdr.nused, -1);
xfs_dir2_free_log_header(tp, fbp);
/*
* If this was the last entry in the table, we can trim the
* table size back. There might be other entries at the end
* referring to non-existent data blocks, get those too.
*/
if (findex == be32_to_cpu(free->hdr.nvalid) - 1) {
int i; /* free entry index */
for (i = findex - 1; i >= 0; i--) {
if (free->bests[i] != cpu_to_be16(NULLDATAOFF))
break;
}
free->hdr.nvalid = cpu_to_be32(i + 1);
logfree = 0;
} else {
/* Not the last entry, just punch it out. */
free->bests[findex] = cpu_to_be16(NULLDATAOFF);
logfree = 1;
}
/*
* If there are no useful entries left in the block,
* get rid of the block if we can.
*/
if (!free->hdr.nused) {
int error;
error = xfs_dir2_shrink_inode(args, fdb, fbp);
if (error == 0) {
fbp = NULL;
logfree = 0;
} else if (error != ENOSPC || args->total != 0)
return error;
/*
* It's possible to get ENOSPC if there is no
* space reservation. In this case some one
* else will eventually get rid of this block.
*/
}
} else {
/*
* Data block is not empty, just set the free entry to the new
* value.
*/
free->bests[findex] = cpu_to_be16(longest);
logfree = 1;
}
/* Log the free entry that changed, unless we got rid of it. */
if (logfree)
xfs_dir2_free_log_bests(tp, fbp, findex, findex);
return 0;
}
/*
* Remove an entry from a node directory.
* This removes the leaf entry and the data entry,
......@@ -908,17 +1051,16 @@ xfs_dir2_leafn_remove(
xfs_dir2_db_t fdb; /* freeblock block number */
int findex; /* index in freeblock entries */
xfs_dir2_free_t *free; /* freeblock structure */
int logfree; /* need to log free entry */
/*
* Convert the data block number to a free block,
* read in the free block.
*/
fdb = xfs_dir2_db_to_fdb(mp, db);
if ((error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, fdb),
-1, &fbp, XFS_DATA_FORK))) {
error = xfs_dir2_free_read(tp, dp, xfs_dir2_db_to_da(mp, fdb),
&fbp);
if (error)
return error;
}
free = fbp->b_addr;
ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
ASSERT(be32_to_cpu(free->hdr.firstdb) ==
......@@ -954,68 +1096,12 @@ xfs_dir2_leafn_remove(
* If we got rid of the data block, we can eliminate that entry
* in the free block.
*/
if (hdr == NULL) {
/*
* One less used entry in the free table.
*/
be32_add_cpu(&free->hdr.nused, -1);
xfs_dir2_free_log_header(tp, fbp);
/*
* If this was the last entry in the table, we can
* trim the table size back. There might be other
* entries at the end referring to non-existent
* data blocks, get those too.
*/
if (findex == be32_to_cpu(free->hdr.nvalid) - 1) {
int i; /* free entry index */
for (i = findex - 1;
i >= 0 &&
free->bests[i] == cpu_to_be16(NULLDATAOFF);
i--)
continue;
free->hdr.nvalid = cpu_to_be32(i + 1);
logfree = 0;
}
/*
* Not the last entry, just punch it out.
*/
else {
free->bests[findex] = cpu_to_be16(NULLDATAOFF);
logfree = 1;
}
/*
* If there are no useful entries left in the block,
* get rid of the block if we can.
*/
if (!free->hdr.nused) {
error = xfs_dir2_shrink_inode(args, fdb, fbp);
if (error == 0) {
fbp = NULL;
logfree = 0;
} else if (error != ENOSPC || args->total != 0)
error = xfs_dir2_data_block_free(args, hdr, free,
fdb, findex, fbp, longest);
if (error)
return error;
/*
* It's possible to get ENOSPC if there is no
* space reservation. In this case some one
* else will eventually get rid of this block.
*/
}
}
/*
* Data block is not empty, just set the free entry to
* the new value.
*/
else {
free->bests[findex] = cpu_to_be16(longest);
logfree = 1;
}
/*
* Log the free entry that changed, unless we got rid of it.
*/
if (logfree)
xfs_dir2_free_log_bests(tp, fbp, findex, findex);
}
xfs_dir2_leafn_check(dp, bp);
/*
* Return indication of whether this leaf block is empty enough
......@@ -1169,12 +1255,11 @@ xfs_dir2_leafn_toosmall(
/*
* Read the sibling leaf block.
*/
if ((error =
xfs_da_read_buf(state->args->trans, state->args->dp, blkno,
-1, &bp, XFS_DATA_FORK))) {
error = xfs_dir2_leafn_read(state->args->trans, state->args->dp,
blkno, -1, &bp);
if (error)
return error;
}
ASSERT(bp != NULL);
/*
* Count bytes in the two blocks combined.
*/
......@@ -1454,14 +1539,13 @@ xfs_dir2_node_addname_int(
* This should be really rare, so there's no reason
* to avoid it.
*/
if ((error = xfs_da_read_buf(tp, dp,
xfs_dir2_db_to_da(mp, fbno), -2, &fbp,
XFS_DATA_FORK))) {
error = xfs_dir2_free_try_read(tp, dp,
xfs_dir2_db_to_da(mp, fbno),
&fbp);
if (error)
return error;
}
if (unlikely(fbp == NULL)) {
if (!fbp)
continue;
}
free = fbp->b_addr;
ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
findex = 0;
......@@ -1520,9 +1604,10 @@ xfs_dir2_node_addname_int(
* that was just allocated.
*/
fbno = xfs_dir2_db_to_fdb(mp, dbno);
if (unlikely(error = xfs_da_read_buf(tp, dp,
xfs_dir2_db_to_da(mp, fbno), -2, &fbp,
XFS_DATA_FORK)))
error = xfs_dir2_free_try_read(tp, dp,
xfs_dir2_db_to_da(mp, fbno),
&fbp);
if (error)
return error;
/*
......@@ -1561,12 +1646,12 @@ xfs_dir2_node_addname_int(
/*
* Get a buffer for the new block.
*/
if ((error = xfs_da_get_buf(tp, dp,
error = xfs_da_get_buf(tp, dp,
xfs_dir2_db_to_da(mp, fbno),
-1, &fbp, XFS_DATA_FORK))) {
-1, &fbp, XFS_DATA_FORK);
if (error)
return error;
}
ASSERT(fbp != NULL);
fbp->b_ops = &xfs_dir2_free_buf_ops;
/*
* Initialize the new block to be empty, and remember
......@@ -1630,8 +1715,8 @@ xfs_dir2_node_addname_int(
/*
* Read the data block in.
*/
error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, dbno),
-1, &dbp, XFS_DATA_FORK);
error = xfs_dir2_data_read(tp, dp, xfs_dir2_db_to_da(mp, dbno),
-1, &dbp);
if (error)
return error;
hdr = dbp->b_addr;
......@@ -1917,18 +2002,15 @@ xfs_dir2_node_trim_free(
/*
* Read the freespace block.
*/
if (unlikely(error = xfs_da_read_buf(tp, dp, (xfs_dablk_t)fo, -2, &bp,
XFS_DATA_FORK))) {
error = xfs_dir2_free_try_read(tp, dp, fo, &bp);
if (error)
return error;
}
/*
* There can be holes in freespace. If fo is a hole, there's
* nothing to do.
*/
if (bp == NULL) {
if (!bp)
return 0;
}
free = bp->b_addr;
ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
/*
......
......@@ -30,6 +30,8 @@ extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
const unsigned char *name, int len);
/* xfs_dir2_block.c */
extern const struct xfs_buf_ops xfs_dir2_block_buf_ops;
extern int xfs_dir2_block_addname(struct xfs_da_args *args);
extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent,
xfs_off_t *offset, filldir_t filldir);
......@@ -41,10 +43,19 @@ extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
/* xfs_dir2_data.c */
#ifdef DEBUG
extern void xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_buf *bp);
#define xfs_dir2_data_check(dp,bp) __xfs_dir2_data_check(dp, bp);
#else
#define xfs_dir2_data_check(dp,bp)
#endif
extern const struct xfs_buf_ops xfs_dir2_data_buf_ops;
extern int __xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_buf *bp);
extern int xfs_dir2_data_read(struct xfs_trans *tp, struct xfs_inode *dp,
xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp);
extern int xfs_dir2_data_readahead(struct xfs_trans *tp, struct xfs_inode *dp,
xfs_dablk_t bno, xfs_daddr_t mapped_bno);
extern struct xfs_dir2_data_free *
xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr,
struct xfs_dir2_data_unused *dup, int *loghead);
......@@ -66,6 +77,10 @@ extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_buf *bp,
xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp);
/* xfs_dir2_leaf.c */
extern const struct xfs_buf_ops xfs_dir2_leafn_buf_ops;
extern int xfs_dir2_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp,
xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp);
extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
struct xfs_buf *dbp);
extern int xfs_dir2_leaf_addname(struct xfs_da_args *args);
......@@ -115,6 +130,8 @@ extern int xfs_dir2_node_removename(struct xfs_da_args *args);
extern int xfs_dir2_node_replace(struct xfs_da_args *args);
extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo,
int *rvalp);
extern int xfs_dir2_free_read(struct xfs_trans *tp, struct xfs_inode *dp,
xfs_dablk_t fbno, struct xfs_buf **bpp);
/* xfs_dir2_sf.c */
extern xfs_ino_t xfs_dir2_sf_get_parent_ino(struct xfs_dir2_sf_hdr *sfp);
......
......@@ -248,7 +248,59 @@ xfs_qm_init_dquot_blk(
xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
}
static void
xfs_dquot_buf_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr;
struct xfs_disk_dquot *ddq;
xfs_dqid_t id = 0;
int i;
/*
* On the first read of the buffer, verify that each dquot is valid.
* We don't know what the id of the dquot is supposed to be, just that
* they should be increasing monotonically within the buffer. If the
* first id is corrupt, then it will fail on the second dquot in the
* buffer so corruptions could point to the wrong dquot in this case.
*/
for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) {
int error;
ddq = &d[i].dd_diskdq;
if (i == 0)
id = be32_to_cpu(ddq->d_id);
error = xfs_qm_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN,
"xfs_dquot_read_verify");
if (error) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, d);
xfs_buf_ioerror(bp, EFSCORRUPTED);
break;
}
}
}
static void
xfs_dquot_buf_read_verify(
struct xfs_buf *bp)
{
xfs_dquot_buf_verify(bp);
}
void
xfs_dquot_buf_write_verify(
struct xfs_buf *bp)
{
xfs_dquot_buf_verify(bp);
}
const struct xfs_buf_ops xfs_dquot_buf_ops = {
.verify_read = xfs_dquot_buf_read_verify,
.verify_write = xfs_dquot_buf_write_verify,
};
/*
* Allocate a block and fill it with dquots.
......@@ -315,6 +367,7 @@ xfs_qm_dqalloc(
error = xfs_buf_geterror(bp);
if (error)
goto error1;
bp->b_ops = &xfs_dquot_buf_ops;
/*
* Make a chunk of dquots out of this buffer and log
......@@ -359,6 +412,51 @@ xfs_qm_dqalloc(
return (error);
}
STATIC int
xfs_qm_dqrepair(
struct xfs_mount *mp,
struct xfs_trans *tp,
struct xfs_dquot *dqp,
xfs_dqid_t firstid,
struct xfs_buf **bpp)
{
int error;
struct xfs_disk_dquot *ddq;
struct xfs_dqblk *d;
int i;
/*
* Read the buffer without verification so we get the corrupted
* buffer returned to us. make sure we verify it on write, though.
*/
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, dqp->q_blkno,
mp->m_quotainfo->qi_dqchunklen,
0, bpp, NULL);
if (error) {
ASSERT(*bpp == NULL);
return XFS_ERROR(error);
}
(*bpp)->b_ops = &xfs_dquot_buf_ops;
ASSERT(xfs_buf_islocked(*bpp));
d = (struct xfs_dqblk *)(*bpp)->b_addr;
/* Do the actual repair of dquots in this buffer */
for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) {
ddq = &d[i].dd_diskdq;
error = xfs_qm_dqcheck(mp, ddq, firstid + i,
dqp->dq_flags & XFS_DQ_ALLTYPES,
XFS_QMOPT_DQREPAIR, "xfs_qm_dqrepair");
if (error) {
/* repair failed, we're screwed */
xfs_trans_brelse(tp, *bpp);
return XFS_ERROR(EIO);
}
}
return 0;
}
/*
* Maps a dquot to the buffer containing its on-disk version.
......@@ -378,7 +476,6 @@ xfs_qm_dqtobp(
xfs_buf_t *bp;
xfs_inode_t *quotip = XFS_DQ_TO_QIP(dqp);
xfs_mount_t *mp = dqp->q_mount;
xfs_disk_dquot_t *ddq;
xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id);
xfs_trans_t *tp = (tpp ? *tpp : NULL);
......@@ -439,33 +536,24 @@ xfs_qm_dqtobp(
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
dqp->q_blkno,
mp->m_quotainfo->qi_dqchunklen,
0, &bp);
if (error || !bp)
return XFS_ERROR(error);
}
ASSERT(xfs_buf_islocked(bp));
0, &bp, &xfs_dquot_buf_ops);
/*
* calculate the location of the dquot inside the buffer.
*/
ddq = bp->b_addr + dqp->q_bufoffset;
if (error == EFSCORRUPTED && (flags & XFS_QMOPT_DQREPAIR)) {
xfs_dqid_t firstid = (xfs_dqid_t)map.br_startoff *
mp->m_quotainfo->qi_dqperchunk;
ASSERT(bp == NULL);
error = xfs_qm_dqrepair(mp, tp, dqp, firstid, &bp);
}
/*
* A simple sanity check in case we got a corrupted dquot...
*/
error = xfs_qm_dqcheck(mp, ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES,
flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN),
"dqtobp");
if (error) {
if (!(flags & XFS_QMOPT_DQREPAIR)) {
xfs_trans_brelse(tp, bp);
return XFS_ERROR(EIO);
ASSERT(bp == NULL);
return XFS_ERROR(error);
}
}
ASSERT(xfs_buf_islocked(bp));
*O_bpp = bp;
*O_ddpp = ddq;
*O_ddpp = bp->b_addr + dqp->q_bufoffset;
return (0);
}
......@@ -920,7 +1008,7 @@ xfs_qm_dqflush(
* Get the buffer containing the on-disk dquot
*/
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
mp->m_quotainfo->qi_dqchunklen, 0, &bp);
mp->m_quotainfo->qi_dqchunklen, 0, &bp, NULL);
if (error)
goto out_unlock;
......
......@@ -161,4 +161,6 @@ static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp)
return dqp;
}
extern const struct xfs_buf_ops xfs_dquot_buf_ops;
#endif /* __XFS_DQUOT_H__ */
......@@ -29,6 +29,7 @@
#include "xfs_inode.h"
#include "xfs_inode_item.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
/*
* Note that we only accept fileids which are long enough rather than allow
......
......@@ -31,6 +31,8 @@
#include "xfs_error.h"
#include "xfs_vnodeops.h"
#include "xfs_da_btree.h"
#include "xfs_dir2_format.h"
#include "xfs_dir2_priv.h"
#include "xfs_ioctl.h"
#include "xfs_trace.h"
......@@ -84,7 +86,7 @@ xfs_rw_ilock_demote(
* valid before the operation, it will be read from disk before
* being partially zeroed.
*/
STATIC int
int
xfs_iozero(
struct xfs_inode *ip, /* inode */
loff_t pos, /* offset in file */
......@@ -255,15 +257,14 @@ xfs_file_aio_read(
xfs_buftarg_t *target =
XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp;
if ((iocb->ki_pos & target->bt_smask) ||
(size & target->bt_smask)) {
if (iocb->ki_pos == i_size_read(inode))
if ((pos & target->bt_smask) || (size & target->bt_smask)) {
if (pos == i_size_read(inode))
return 0;
return -XFS_ERROR(EINVAL);
}
}
n = mp->m_super->s_maxbytes - iocb->ki_pos;
n = mp->m_super->s_maxbytes - pos;
if (n <= 0 || size == 0)
return 0;
......@@ -289,20 +290,21 @@ xfs_file_aio_read(
xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
if (inode->i_mapping->nrpages) {
ret = -xfs_flushinval_pages(ip,
(iocb->ki_pos & PAGE_CACHE_MASK),
-1, FI_REMAPF_LOCKED);
ret = -filemap_write_and_wait_range(
VFS_I(ip)->i_mapping,
pos, -1);
if (ret) {
xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
return ret;
}
truncate_pagecache_range(VFS_I(ip), pos, -1);
}
xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
}
trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
trace_xfs_file_read(ip, size, pos, ioflags);
ret = generic_file_aio_read(iocb, iovp, nr_segs, iocb->ki_pos);
ret = generic_file_aio_read(iocb, iovp, nr_segs, pos);
if (ret > 0)
XFS_STATS_ADD(xs_read_bytes, ret);
......@@ -670,10 +672,11 @@ xfs_file_dio_aio_write(
goto out;
if (mapping->nrpages) {
ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1,
FI_REMAPF_LOCKED);
ret = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
pos, -1);
if (ret)
goto out;
truncate_pagecache_range(VFS_I(ip), pos, -1);
}
/*
......@@ -728,15 +731,16 @@ xfs_file_buffered_aio_write(
write_retry:
trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0);
ret = generic_file_buffered_write(iocb, iovp, nr_segs,
pos, &iocb->ki_pos, count, ret);
pos, &iocb->ki_pos, count, 0);
/*
* if we just got an ENOSPC, flush the inode now we aren't holding any
* page locks and retry *once*
* If we just got an ENOSPC, try to write back all dirty inodes to
* convert delalloc space to free up some of the excess reserved
* metadata space.
*/
if (ret == -ENOSPC && !enospc) {
enospc = 1;
ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
if (!ret)
xfs_flush_inodes(ip->i_mount);
goto write_retry;
}
......@@ -889,7 +893,7 @@ xfs_dir_open(
*/
mode = xfs_ilock_map_shared(ip);
if (ip->i_d.di_nextents > 0)
xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK);
xfs_dir2_data_readahead(NULL, ip, 0, -1);
xfs_iunlock(ip, mode);
return 0;
}
......
......@@ -233,6 +233,7 @@ typedef struct xfs_fsop_resblks {
#define XFS_FSOP_GEOM_FLAGS_LOGV2 0x0100 /* log format version 2 */
#define XFS_FSOP_GEOM_FLAGS_SECTOR 0x0200 /* sector sizes >1BB */
#define XFS_FSOP_GEOM_FLAGS_ATTR2 0x0400 /* inline attributes rework */
#define XFS_FSOP_GEOM_FLAGS_PROJID32 0x0800 /* 32-bit project IDs */
#define XFS_FSOP_GEOM_FLAGS_DIRV2CI 0x1000 /* ASCII only CI names */
#define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */
......@@ -338,6 +339,35 @@ typedef struct xfs_error_injection {
} xfs_error_injection_t;
/*
* Speculative preallocation trimming.
*/
#define XFS_EOFBLOCKS_VERSION 1
struct xfs_eofblocks {
__u32 eof_version;
__u32 eof_flags;
uid_t eof_uid;
gid_t eof_gid;
prid_t eof_prid;
__u32 pad32;
__u64 eof_min_file_size;
__u64 pad64[12];
};
/* eof_flags values */
#define XFS_EOF_FLAGS_SYNC (1 << 0) /* sync/wait mode scan */
#define XFS_EOF_FLAGS_UID (1 << 1) /* filter by uid */
#define XFS_EOF_FLAGS_GID (1 << 2) /* filter by gid */
#define XFS_EOF_FLAGS_PRID (1 << 3) /* filter by project id */
#define XFS_EOF_FLAGS_MINFILESIZE (1 << 4) /* filter by min file size */
#define XFS_EOF_FLAGS_VALID \
(XFS_EOF_FLAGS_SYNC | \
XFS_EOF_FLAGS_UID | \
XFS_EOF_FLAGS_GID | \
XFS_EOF_FLAGS_PRID | \
XFS_EOF_FLAGS_MINFILESIZE)
/*
* The user-level Handle Request interface structure.
*/
......@@ -456,6 +486,7 @@ typedef struct xfs_handle {
/* XFS_IOC_GETBIOSIZE ---- deprecated 47 */
#define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap)
#define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64)
#define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_eofblocks)
/*
* ioctl commands that replace IRIX syssgi()'s
......
/*
* Copyright (c) 2000-2002,2005-2006 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
#include "xfs_vnodeops.h"
#include "xfs_bmap_btree.h"
#include "xfs_inode.h"
#include "xfs_trace.h"
/*
* note: all filemap functions return negative error codes. These
* need to be inverted before returning to the xfs core functions.
*/
void
xfs_tosspages(
xfs_inode_t *ip,
xfs_off_t first,
xfs_off_t last,
int fiopt)
{
/* can't toss partial tail pages, so mask them out */
last &= ~(PAGE_SIZE - 1);
truncate_inode_pages_range(VFS_I(ip)->i_mapping, first, last - 1);
}
int
xfs_flushinval_pages(
xfs_inode_t *ip,
xfs_off_t first,
xfs_off_t last,
int fiopt)
{
struct address_space *mapping = VFS_I(ip)->i_mapping;
int ret = 0;
trace_xfs_pagecache_inval(ip, first, last);
xfs_iflags_clear(ip, XFS_ITRUNCATED);
ret = filemap_write_and_wait_range(mapping, first,
last == -1 ? LLONG_MAX : last);
if (!ret)
truncate_inode_pages_range(mapping, first, last);
return -ret;
}
int
xfs_flush_pages(
xfs_inode_t *ip,
xfs_off_t first,
xfs_off_t last,
uint64_t flags,
int fiopt)
{
struct address_space *mapping = VFS_I(ip)->i_mapping;
int ret = 0;
int ret2;
xfs_iflags_clear(ip, XFS_ITRUNCATED);
ret = -filemap_fdatawrite_range(mapping, first,
last == -1 ? LLONG_MAX : last);
if (flags & XBF_ASYNC)
return ret;
ret2 = xfs_wait_on_pages(ip, first, last);
if (!ret)
ret = ret2;
return ret;
}
int
xfs_wait_on_pages(
xfs_inode_t *ip,
xfs_off_t first,
xfs_off_t last)
{
struct address_space *mapping = VFS_I(ip)->i_mapping;
if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) {
return -filemap_fdatawait_range(mapping, first,
last == -1 ? XFS_ISIZE(ip) - 1 : last);
}
return 0;
}
......@@ -97,7 +97,9 @@ xfs_fs_geometry(
(xfs_sb_version_haslazysbcount(&mp->m_sb) ?
XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) |
(xfs_sb_version_hasattr2(&mp->m_sb) ?
XFS_FSOP_GEOM_FLAGS_ATTR2 : 0);
XFS_FSOP_GEOM_FLAGS_ATTR2 : 0) |
(xfs_sb_version_hasprojid32bit(&mp->m_sb) ?
XFS_FSOP_GEOM_FLAGS_PROJID32 : 0);
geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
mp->m_sb.sb_logsectsize : BBSIZE;
geo->rtsectsize = mp->m_sb.sb_blocksize;
......@@ -112,18 +114,40 @@ xfs_fs_geometry(
return 0;
}
static struct xfs_buf *
xfs_growfs_get_hdr_buf(
struct xfs_mount *mp,
xfs_daddr_t blkno,
size_t numblks,
int flags,
const struct xfs_buf_ops *ops)
{
struct xfs_buf *bp;
bp = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, flags);
if (!bp)
return NULL;
xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
bp->b_bn = blkno;
bp->b_maps[0].bm_bn = blkno;
bp->b_ops = ops;
return bp;
}
static int
xfs_growfs_data_private(
xfs_mount_t *mp, /* mount point for filesystem */
xfs_growfs_data_t *in) /* growfs data input struct */
{
xfs_agf_t *agf;
struct xfs_agfl *agfl;
xfs_agi_t *agi;
xfs_agnumber_t agno;
xfs_extlen_t agsize;
xfs_extlen_t tmpsize;
xfs_alloc_rec_t *arec;
struct xfs_btree_block *block;
xfs_buf_t *bp;
int bucket;
int dpct;
......@@ -146,9 +170,14 @@ xfs_growfs_data_private(
dpct = pct - mp->m_sb.sb_imax_pct;
bp = xfs_buf_read_uncached(mp->m_ddev_targp,
XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
XFS_FSS_TO_BB(mp, 1), 0);
XFS_FSS_TO_BB(mp, 1), 0, NULL);
if (!bp)
return EIO;
if (bp->b_error) {
int error = bp->b_error;
xfs_buf_relse(bp);
return error;
}
xfs_buf_relse(bp);
new = nb; /* use new as a temporary here */
......@@ -186,17 +215,18 @@ xfs_growfs_data_private(
nfree = 0;
for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) {
/*
* AG freelist header block
* AG freespace header block
*/
bp = xfs_buf_get(mp->m_ddev_targp,
bp = xfs_growfs_get_hdr_buf(mp,
XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
XFS_FSS_TO_BB(mp, 1), 0);
XFS_FSS_TO_BB(mp, 1), 0,
&xfs_agf_buf_ops);
if (!bp) {
error = ENOMEM;
goto error0;
}
agf = XFS_BUF_TO_AGF(bp);
memset(agf, 0, mp->m_sb.sb_sectsize);
agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION);
agf->agf_seqno = cpu_to_be32(agno);
......@@ -222,18 +252,40 @@ xfs_growfs_data_private(
if (error)
goto error0;
/*
* AG freelist header block
*/
bp = xfs_growfs_get_hdr_buf(mp,
XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
XFS_FSS_TO_BB(mp, 1), 0,
&xfs_agfl_buf_ops);
if (!bp) {
error = ENOMEM;
goto error0;
}
agfl = XFS_BUF_TO_AGFL(bp);
for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++)
agfl->agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
error = xfs_bwrite(bp);
xfs_buf_relse(bp);
if (error)
goto error0;
/*
* AG inode header block
*/
bp = xfs_buf_get(mp->m_ddev_targp,
bp = xfs_growfs_get_hdr_buf(mp,
XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
XFS_FSS_TO_BB(mp, 1), 0);
XFS_FSS_TO_BB(mp, 1), 0,
&xfs_agi_buf_ops);
if (!bp) {
error = ENOMEM;
goto error0;
}
agi = XFS_BUF_TO_AGI(bp);
memset(agi, 0, mp->m_sb.sb_sectsize);
agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC);
agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION);
agi->agi_seqno = cpu_to_be32(agno);
......@@ -254,24 +306,22 @@ xfs_growfs_data_private(
/*
* BNO btree root block
*/
bp = xfs_buf_get(mp->m_ddev_targp,
bp = xfs_growfs_get_hdr_buf(mp,
XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)),
BTOBB(mp->m_sb.sb_blocksize), 0);
BTOBB(mp->m_sb.sb_blocksize), 0,
&xfs_allocbt_buf_ops);
if (!bp) {
error = ENOMEM;
goto error0;
}
block = XFS_BUF_TO_BLOCK(bp);
memset(block, 0, mp->m_sb.sb_blocksize);
block->bb_magic = cpu_to_be32(XFS_ABTB_MAGIC);
block->bb_level = 0;
block->bb_numrecs = cpu_to_be16(1);
block->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
block->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
arec = XFS_ALLOC_REC_ADDR(mp, block, 1);
xfs_btree_init_block(mp, bp, XFS_ABTB_MAGIC, 0, 1, 0);
arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
arec->ar_blockcount = cpu_to_be32(
agsize - be32_to_cpu(arec->ar_startblock));
error = xfs_bwrite(bp);
xfs_buf_relse(bp);
if (error)
......@@ -280,25 +330,22 @@ xfs_growfs_data_private(
/*
* CNT btree root block
*/
bp = xfs_buf_get(mp->m_ddev_targp,
bp = xfs_growfs_get_hdr_buf(mp,
XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)),
BTOBB(mp->m_sb.sb_blocksize), 0);
BTOBB(mp->m_sb.sb_blocksize), 0,
&xfs_allocbt_buf_ops);
if (!bp) {
error = ENOMEM;
goto error0;
}
block = XFS_BUF_TO_BLOCK(bp);
memset(block, 0, mp->m_sb.sb_blocksize);
block->bb_magic = cpu_to_be32(XFS_ABTC_MAGIC);
block->bb_level = 0;
block->bb_numrecs = cpu_to_be16(1);
block->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
block->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
arec = XFS_ALLOC_REC_ADDR(mp, block, 1);
xfs_btree_init_block(mp, bp, XFS_ABTC_MAGIC, 0, 1, 0);
arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
arec->ar_blockcount = cpu_to_be32(
agsize - be32_to_cpu(arec->ar_startblock));
nfree += be32_to_cpu(arec->ar_blockcount);
error = xfs_bwrite(bp);
xfs_buf_relse(bp);
if (error)
......@@ -307,20 +354,17 @@ xfs_growfs_data_private(
/*
* INO btree root block
*/
bp = xfs_buf_get(mp->m_ddev_targp,
bp = xfs_growfs_get_hdr_buf(mp,
XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)),
BTOBB(mp->m_sb.sb_blocksize), 0);
BTOBB(mp->m_sb.sb_blocksize), 0,
&xfs_inobt_buf_ops);
if (!bp) {
error = ENOMEM;
goto error0;
}
block = XFS_BUF_TO_BLOCK(bp);
memset(block, 0, mp->m_sb.sb_blocksize);
block->bb_magic = cpu_to_be32(XFS_IBT_MAGIC);
block->bb_level = 0;
block->bb_numrecs = 0;
block->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
block->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
xfs_btree_init_block(mp, bp, XFS_IBT_MAGIC, 0, 0, 0);
error = xfs_bwrite(bp);
xfs_buf_relse(bp);
if (error)
......@@ -408,14 +452,16 @@ xfs_growfs_data_private(
if (agno < oagcount) {
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
XFS_FSS_TO_BB(mp, 1), 0, &bp);
XFS_FSS_TO_BB(mp, 1), 0, &bp,
&xfs_sb_buf_ops);
} else {
bp = xfs_trans_get_buf(NULL, mp->m_ddev_targp,
XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
XFS_FSS_TO_BB(mp, 1), 0);
if (bp)
if (bp) {
bp->b_ops = &xfs_sb_buf_ops;
xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
else
} else
error = ENOMEM;
}
......@@ -426,6 +472,7 @@ xfs_growfs_data_private(
break;
}
xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, XFS_SB_ALL_BITS);
/*
* If we get an error writing out the alternate superblocks,
* just issue a warning and continue. The real work is
......
......@@ -21,7 +21,8 @@
/*
* Tunable XFS parameters. xfs_params is required even when CONFIG_SYSCTL=n,
* other XFS code uses these values. Times are measured in centisecs (i.e.
* 100ths of a second).
* 100ths of a second) with the exception of eofb_timer, which is measured in
* seconds.
*/
xfs_param_t xfs_params = {
/* MIN DFLT MAX */
......@@ -40,4 +41,5 @@ xfs_param_t xfs_params = {
.rotorstep = { 1, 1, 255 },
.inherit_nodfrg = { 0, 1, 1 },
.fstrm_timer = { 1, 30*100, 3600*100},
.eofb_timer = { 1, 300, 3600*24},
};
......@@ -200,7 +200,8 @@ xfs_ialloc_inode_init(
*/
d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster));
fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
mp->m_bsize * blks_per_cluster, 0);
mp->m_bsize * blks_per_cluster,
XBF_UNMAPPED);
if (!fbuf)
return ENOMEM;
/*
......@@ -210,6 +211,7 @@ xfs_ialloc_inode_init(
* to log a whole cluster of inodes instead of all the
* individual transactions causing a lot of log traffic.
*/
fbuf->b_ops = &xfs_inode_buf_ops;
xfs_buf_zero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog);
for (i = 0; i < ninodes; i++) {
int ioffset = i << mp->m_sb.sb_inodelog;
......@@ -877,9 +879,9 @@ xfs_dialloc_ag(
* This function is designed to be called twice if it has to do an allocation
* to make more free inodes. On the first call, *IO_agbp should be set to NULL.
* If an inode is available without having to performn an allocation, an inode
* number is returned. In this case, *IO_agbp would be NULL. If an allocation
* needes to be done, xfs_dialloc would return the current AGI buffer in
* *IO_agbp. The caller should then commit the current transaction, allocate a
* number is returned. In this case, *IO_agbp is set to NULL. If an allocation
* needs to be done, xfs_dialloc returns the current AGI buffer in *IO_agbp.
* The caller should then commit the current transaction, allocate a
* new transaction, and call xfs_dialloc() again, passing in the previous value
* of *IO_agbp. IO_agbp should be held across the transactions. Since the AGI
* buffer is locked across the two calls, the second call is guaranteed to have
......@@ -1472,6 +1474,57 @@ xfs_check_agi_unlinked(
#define xfs_check_agi_unlinked(agi)
#endif
static void
xfs_agi_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_agi *agi = XFS_BUF_TO_AGI(bp);
int agi_ok;
/*
* Validate the magic number of the agi block.
*/
agi_ok = agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC) &&
XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum));
/*
* during growfs operations, the perag is not fully initialised,
* so we can't use it for any useful checking. growfs ensures we can't
* use it by using uncached buffers that don't have the perag attached
* so we can detect and avoid this problem.
*/
if (bp->b_pag)
agi_ok = agi_ok && be32_to_cpu(agi->agi_seqno) ==
bp->b_pag->pag_agno;
if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
XFS_RANDOM_IALLOC_READ_AGI))) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agi);
xfs_buf_ioerror(bp, EFSCORRUPTED);
}
xfs_check_agi_unlinked(agi);
}
static void
xfs_agi_read_verify(
struct xfs_buf *bp)
{
xfs_agi_verify(bp);
}
static void
xfs_agi_write_verify(
struct xfs_buf *bp)
{
xfs_agi_verify(bp);
}
const struct xfs_buf_ops xfs_agi_buf_ops = {
.verify_read = xfs_agi_read_verify,
.verify_write = xfs_agi_write_verify,
};
/*
* Read in the allocation group header (inode allocation section)
*/
......@@ -1482,38 +1535,18 @@ xfs_read_agi(
xfs_agnumber_t agno, /* allocation group number */
struct xfs_buf **bpp) /* allocation group hdr buf */
{
struct xfs_agi *agi; /* allocation group header */
int agi_ok; /* agi is consistent */
int error;
ASSERT(agno != NULLAGNUMBER);
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
XFS_FSS_TO_BB(mp, 1), 0, bpp);
XFS_FSS_TO_BB(mp, 1), 0, bpp, &xfs_agi_buf_ops);
if (error)
return error;
ASSERT(!xfs_buf_geterror(*bpp));
agi = XFS_BUF_TO_AGI(*bpp);
/*
* Validate the magic number of the agi block.
*/
agi_ok = agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC) &&
XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)) &&
be32_to_cpu(agi->agi_seqno) == agno;
if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
XFS_RANDOM_IALLOC_READ_AGI))) {
XFS_CORRUPTION_ERROR("xfs_read_agi", XFS_ERRLEVEL_LOW,
mp, agi);
xfs_trans_brelse(tp, *bpp);
return XFS_ERROR(EFSCORRUPTED);
}
xfs_buf_set_ref(*bpp, XFS_AGI_REF);
xfs_check_agi_unlinked(agi);
return 0;
}
......
......@@ -147,7 +147,9 @@ int xfs_inobt_lookup(struct xfs_btree_cur *cur, xfs_agino_t ino,
/*
* Get the data from the pointed-to record.
*/
extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur,
int xfs_inobt_get_rec(struct xfs_btree_cur *cur,
xfs_inobt_rec_incore_t *rec, int *stat);
extern const struct xfs_buf_ops xfs_agi_buf_ops;
#endif /* __XFS_IALLOC_H__ */
......@@ -33,6 +33,7 @@
#include "xfs_ialloc.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
#include "xfs_trace.h"
STATIC int
......@@ -181,6 +182,59 @@ xfs_inobt_key_diff(
cur->bc_rec.i.ir_startino;
}
void
xfs_inobt_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
unsigned int level;
int sblock_ok; /* block passes checks */
/* magic number and level verification */
level = be16_to_cpu(block->bb_level);
sblock_ok = block->bb_magic == cpu_to_be32(XFS_IBT_MAGIC) &&
level < mp->m_in_maxlevels;
/* numrecs verification */
sblock_ok = sblock_ok &&
be16_to_cpu(block->bb_numrecs) <= mp->m_inobt_mxr[level != 0];
/* sibling pointer verification */
sblock_ok = sblock_ok &&
(block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
be32_to_cpu(block->bb_u.s.bb_leftsib) < mp->m_sb.sb_agblocks) &&
block->bb_u.s.bb_leftsib &&
(block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
be32_to_cpu(block->bb_u.s.bb_rightsib) < mp->m_sb.sb_agblocks) &&
block->bb_u.s.bb_rightsib;
if (!sblock_ok) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block);
xfs_buf_ioerror(bp, EFSCORRUPTED);
}
}
static void
xfs_inobt_read_verify(
struct xfs_buf *bp)
{
xfs_inobt_verify(bp);
}
static void
xfs_inobt_write_verify(
struct xfs_buf *bp)
{
xfs_inobt_verify(bp);
}
const struct xfs_buf_ops xfs_inobt_buf_ops = {
.verify_read = xfs_inobt_read_verify,
.verify_write = xfs_inobt_write_verify,
};
#ifdef DEBUG
STATIC int
xfs_inobt_keys_inorder(
......@@ -218,6 +272,7 @@ static const struct xfs_btree_ops xfs_inobt_ops = {
.init_rec_from_cur = xfs_inobt_init_rec_from_cur,
.init_ptr_from_cur = xfs_inobt_init_ptr_from_cur,
.key_diff = xfs_inobt_key_diff,
.buf_ops = &xfs_inobt_buf_ops,
#ifdef DEBUG
.keys_inorder = xfs_inobt_keys_inorder,
.recs_inorder = xfs_inobt_recs_inorder,
......
......@@ -109,4 +109,6 @@ extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *,
struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t);
extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int);
extern const struct xfs_buf_ops xfs_inobt_buf_ops;
#endif /* __XFS_IALLOC_BTREE_H__ */
......@@ -24,28 +24,30 @@ struct xfs_perag;
#define SYNC_WAIT 0x0001 /* wait for i/o to complete */
#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */
extern struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */
int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino,
uint flags, uint lock_flags, xfs_inode_t **ipp);
int xfs_syncd_init(struct xfs_mount *mp);
void xfs_syncd_stop(struct xfs_mount *mp);
int xfs_quiesce_data(struct xfs_mount *mp);
void xfs_quiesce_attr(struct xfs_mount *mp);
void xfs_flush_inodes(struct xfs_inode *ip);
void xfs_reclaim_worker(struct work_struct *work);
int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
int xfs_reclaim_inodes_count(struct xfs_mount *mp);
void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
struct xfs_inode *ip);
void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip);
void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);
int xfs_icache_free_eofblocks(struct xfs_mount *, struct xfs_eofblocks *);
void xfs_eofblocks_worker(struct work_struct *);
int xfs_sync_inode_grab(struct xfs_inode *ip);
int xfs_inode_ag_iterator(struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
int flags);
int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag,
int flags, void *args),
int flags, void *args);
int xfs_inode_ag_iterator_tag(struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag,
int flags, void *args),
int flags, void *args, int tag);
#endif
此差异已折叠。
此差异已折叠。
......@@ -496,11 +496,10 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
(((pip)->i_mount->m_flags & XFS_MOUNT_GRPID) || \
((pip)->i_d.di_mode & S_ISGID))
/*
* xfs_iget.c prototypes.
* xfs_inode.c prototypes.
*/
int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
uint, uint, xfs_inode_t **);
void xfs_ilock(xfs_inode_t *, uint);
int xfs_ilock_nowait(xfs_inode_t *, uint);
void xfs_iunlock(xfs_inode_t *, uint);
......@@ -508,11 +507,6 @@ void xfs_ilock_demote(xfs_inode_t *, uint);
int xfs_isilocked(xfs_inode_t *, uint);
uint xfs_ilock_map_shared(xfs_inode_t *);
void xfs_iunlock_map_shared(xfs_inode_t *, uint);
void xfs_inode_free(struct xfs_inode *ip);
/*
* xfs_inode.c prototypes.
*/
int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, umode_t,
xfs_nlink_t, xfs_dev_t, prid_t, int,
struct xfs_buf **, xfs_inode_t **);
......@@ -591,6 +585,7 @@ void xfs_iext_irec_compact(xfs_ifork_t *);
void xfs_iext_irec_compact_pages(xfs_ifork_t *);
void xfs_iext_irec_compact_full(xfs_ifork_t *);
void xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int);
bool xfs_can_free_eofblocks(struct xfs_inode *, bool);
#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount))
......@@ -603,5 +598,6 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
extern struct kmem_zone *xfs_ifork_zone;
extern struct kmem_zone *xfs_inode_zone;
extern struct kmem_zone *xfs_ili_zone;
extern const struct xfs_buf_ops xfs_inode_buf_ops;
#endif /* __XFS_INODE_H__ */
......@@ -42,6 +42,7 @@
#include "xfs_inode_item.h"
#include "xfs_export.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
#include <linux/capability.h>
#include <linux/dcache.h>
......@@ -1602,6 +1603,26 @@ xfs_file_ioctl(
error = xfs_errortag_clearall(mp, 1);
return -error;
case XFS_IOC_FREE_EOFBLOCKS: {
struct xfs_eofblocks eofb;
if (copy_from_user(&eofb, arg, sizeof(eofb)))
return -XFS_ERROR(EFAULT);
if (eofb.eof_version != XFS_EOFBLOCKS_VERSION)
return -XFS_ERROR(EINVAL);
if (eofb.eof_flags & ~XFS_EOF_FLAGS_VALID)
return -XFS_ERROR(EINVAL);
if (memchr_inv(&eofb.pad32, 0, sizeof(eofb.pad32)) ||
memchr_inv(eofb.pad64, 0, sizeof(eofb.pad64)))
return -XFS_ERROR(EINVAL);
error = xfs_icache_free_eofblocks(mp, &eofb);
return -error;
}
default:
return -ENOTTY;
}
......
......@@ -41,6 +41,7 @@
#include "xfs_utils.h"
#include "xfs_iomap.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
#define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \
......@@ -373,7 +374,7 @@ xfs_iomap_write_delay(
xfs_extlen_t extsz;
int nimaps;
xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
int prealloc, flushed = 0;
int prealloc;
int error;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
......@@ -434,31 +435,29 @@ xfs_iomap_write_delay(
}
/*
* If bmapi returned us nothing, we got either ENOSPC or EDQUOT. For
* ENOSPC, * flush all other inodes with delalloc blocks to free up
* some of the excess reserved metadata space. For both cases, retry
* If bmapi returned us nothing, we got either ENOSPC or EDQUOT. Retry
* without EOF preallocation.
*/
if (nimaps == 0) {
trace_xfs_delalloc_enospc(ip, offset, count);
if (flushed)
return XFS_ERROR(error ? error : ENOSPC);
if (error == ENOSPC) {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_flush_inodes(ip);
xfs_ilock(ip, XFS_ILOCK_EXCL);
}
flushed = 1;
error = 0;
if (prealloc) {
prealloc = 0;
error = 0;
goto retry;
}
return XFS_ERROR(error ? error : ENOSPC);
}
if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip)))
return xfs_alert_fsblock_zero(ip, &imap[0]);
/*
* Tag the inode as speculatively preallocated so we can reclaim this
* space on demand, if necessary.
*/
if (prealloc)
xfs_inode_set_eofblocks_tag(ip);
*ret_imap = imap[0];
return 0;
}
......
......@@ -38,6 +38,7 @@
#include "xfs_vnodeops.h"
#include "xfs_inode_item.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
#include <linux/capability.h>
#include <linux/xattr.h>
......@@ -779,8 +780,8 @@ xfs_setattr_size(
* care about here.
*/
if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) {
error = xfs_flush_pages(ip, ip->i_d.di_size, newsize, 0,
FI_NONE);
error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
ip->i_d.di_size, newsize);
if (error)
goto out_unlock;
}
......@@ -854,6 +855,9 @@ xfs_setattr_size(
* and do not wait the usual (long) time for writeout.
*/
xfs_iflags_set(ip, XFS_ITRUNCATED);
/* A truncate down always removes post-EOF blocks. */
xfs_inode_clear_eofblocks_tag(ip);
}
if (mask & ATTR_CTIME) {
......
......@@ -34,6 +34,7 @@
#include "xfs_error.h"
#include "xfs_btree.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
STATIC int
xfs_internal_inum(
......@@ -395,7 +396,8 @@ xfs_bulkstat(
if (xfs_inobt_maskn(chunkidx, nicluster)
& ~r.ir_free)
xfs_btree_reada_bufs(mp, agno,
agbno, nbcluster);
agbno, nbcluster,
&xfs_inode_buf_ops);
}
irbp->ir_startino = r.ir_startino;
irbp->ir_freecount = r.ir_freecount;
......
......@@ -44,6 +44,7 @@
#include <linux/kernel.h>
#include <linux/blkdev.h>
#include <linux/slab.h>
#include <linux/crc32c.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/file.h>
......@@ -118,6 +119,7 @@
#define xfs_rotorstep xfs_params.rotorstep.val
#define xfs_inherit_nodefrag xfs_params.inherit_nodfrg.val
#define xfs_fstrm_centisecs xfs_params.fstrm_timer.val
#define xfs_eofb_secs xfs_params.eofb_timer.val
#define current_cpu() (raw_smp_processor_id())
#define current_pid() (current->pid)
......
此差异已折叠。
......@@ -181,5 +181,9 @@ int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_lsn_t *commit_lsn, int flags);
bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
void xfs_log_work_queue(struct xfs_mount *mp);
void xfs_log_worker(struct work_struct *work);
void xfs_log_quiesce(struct xfs_mount *mp);
#endif
#endif /* __XFS_LOG_H__ */
......@@ -139,7 +139,6 @@ static inline uint xlog_get_client_id(__be32 i)
/*
* Flags for log structure
*/
#define XLOG_CHKSUM_MISMATCH 0x1 /* used only during recovery */
#define XLOG_ACTIVE_RECOVERY 0x2 /* in the middle of recovery */
#define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */
#define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being
......@@ -291,7 +290,7 @@ typedef struct xlog_rec_header {
__be32 h_len; /* len in bytes; should be 64-bit aligned: 4 */
__be64 h_lsn; /* lsn of this LR : 8 */
__be64 h_tail_lsn; /* lsn of 1st LR w/ buffers not committed: 8 */
__be32 h_chksum; /* may not be used; non-zero if used : 4 */
__le32 h_crc; /* crc of log record : 4 */
__be32 h_prev_block; /* block number to previous LR : 4 */
__be32 h_num_logops; /* number of log operations in this LR : 4 */
__be32 h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE];
......@@ -495,6 +494,7 @@ struct xlog {
struct xfs_buf *l_xbuf; /* extra buffer for log
* wrapping */
struct xfs_buftarg *l_targ; /* buftarg of log */
struct delayed_work l_work; /* background flush work */
uint l_flags;
uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */
struct list_head *l_buf_cancel_table;
......@@ -554,11 +554,9 @@ xlog_recover(
extern int
xlog_recover_finish(
struct xlog *log);
extern void
xlog_pack_data(
struct xlog *log,
struct xlog_in_core *iclog,
int);
extern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead,
char *dp, int size);
extern kmem_zone_t *xfs_log_ticket_zone;
struct xlog_ticket *
......
此差异已折叠。
此差异已折叠。
......@@ -51,8 +51,6 @@ typedef struct xfs_trans_reservations {
#else /* __KERNEL__ */
#include "xfs_sync.h"
struct xlog;
struct xfs_inode;
struct xfs_mru_cache;
......@@ -197,9 +195,9 @@ typedef struct xfs_mount {
struct mutex m_icsb_mutex; /* balancer sync lock */
#endif
struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
struct delayed_work m_sync_work; /* background sync work */
struct delayed_work m_reclaim_work; /* background inode reclaim */
struct work_struct m_flush_work; /* background inode flush */
struct delayed_work m_eofblocks_work; /* background eof blocks
trimming */
__int64_t m_update_flags; /* sb flags we need to update
on the next remount,rw */
struct shrinker m_inode_shrink; /* inode reclaim shrinker */
......@@ -209,6 +207,9 @@ typedef struct xfs_mount {
struct workqueue_struct *m_data_workqueue;
struct workqueue_struct *m_unwritten_workqueue;
struct workqueue_struct *m_cil_workqueue;
struct workqueue_struct *m_reclaim_workqueue;
struct workqueue_struct *m_log_workqueue;
struct workqueue_struct *m_eofblocks_workqueue;
} xfs_mount_t;
/*
......@@ -387,7 +388,9 @@ extern void xfs_set_low_space_thresholds(struct xfs_mount *);
extern void xfs_mod_sb(struct xfs_trans *, __int64_t);
extern int xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t,
xfs_agnumber_t *);
extern void xfs_sb_from_disk(struct xfs_mount *, struct xfs_dsb *);
extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *);
extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t);
extern const struct xfs_buf_ops xfs_sb_buf_ops;
#endif /* __XFS_MOUNT_H__ */
......@@ -40,6 +40,7 @@
#include "xfs_utils.h"
#include "xfs_qm.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
/*
* The global quota manager. There is only one of these for the entire
......@@ -891,7 +892,8 @@ xfs_qm_dqiter_bufs(
while (blkcnt--) {
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
XFS_FSB_TO_DADDR(mp, bno),
mp->m_quotainfo->qi_dqchunklen, 0, &bp);
mp->m_quotainfo->qi_dqchunklen, 0, &bp,
&xfs_dquot_buf_ops);
if (error)
break;
......@@ -978,7 +980,8 @@ xfs_qm_dqiterate(
while (rablkcnt--) {
xfs_buf_readahead(mp->m_ddev_targp,
XFS_FSB_TO_DADDR(mp, rablkno),
mp->m_quotainfo->qi_dqchunklen);
mp->m_quotainfo->qi_dqchunklen,
NULL);
rablkno++;
}
}
......@@ -1453,7 +1456,7 @@ xfs_qm_dqreclaim_one(
int error;
if (!xfs_dqlock_nowait(dqp))
goto out_busy;
goto out_move_tail;
/*
* This dquot has acquired a reference in the meantime remove it from
......@@ -1476,7 +1479,7 @@ xfs_qm_dqreclaim_one(
* getting flushed to disk, we don't want to reclaim it.
*/
if (!xfs_dqflock_nowait(dqp))
goto out_busy;
goto out_unlock_move_tail;
if (XFS_DQ_IS_DIRTY(dqp)) {
struct xfs_buf *bp = NULL;
......@@ -1487,7 +1490,7 @@ xfs_qm_dqreclaim_one(
if (error) {
xfs_warn(mp, "%s: dquot %p flush failed",
__func__, dqp);
goto out_busy;
goto out_unlock_move_tail;
}
xfs_buf_delwri_queue(bp, buffer_list);
......@@ -1496,7 +1499,7 @@ xfs_qm_dqreclaim_one(
* Give the dquot another try on the freelist, as the
* flushing will take some time.
*/
goto out_busy;
goto out_unlock_move_tail;
}
xfs_dqfunlock(dqp);
......@@ -1515,14 +1518,13 @@ xfs_qm_dqreclaim_one(
XFS_STATS_INC(xs_qm_dqreclaims);
return;
out_busy:
xfs_dqunlock(dqp);
/*
* Move the dquot to the tail of the list so that we don't spin on it.
*/
out_unlock_move_tail:
xfs_dqunlock(dqp);
out_move_tail:
list_move_tail(&dqp->q_lru, &qi->qi_lru_list);
trace_xfs_dqreclaim_busy(dqp);
XFS_STATS_INC(xs_qm_dqreclaim_misses);
}
......
......@@ -40,6 +40,7 @@
#include "xfs_utils.h"
#include "xfs_qm.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
......@@ -845,7 +846,8 @@ STATIC int
xfs_dqrele_inode(
struct xfs_inode *ip,
struct xfs_perag *pag,
int flags)
int flags,
void *args)
{
/* skip quota inodes */
if (ip == ip->i_mount->m_quotainfo->qi_uquotaip ||
......@@ -881,5 +883,5 @@ xfs_qm_dqrele_all_inodes(
uint flags)
{
ASSERT(mp->m_quotainfo);
xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags);
xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, NULL);
}
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -74,6 +74,7 @@ struct block_device;
extern __uint64_t xfs_max_file_offset(unsigned int);
extern void xfs_flush_inodes(struct xfs_mount *mp);
extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
extern xfs_agnumber_t xfs_set_inode32(struct xfs_mount *);
extern xfs_agnumber_t xfs_set_inode64(struct xfs_mount *);
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册