提交 5619a693 编写于 作者: L Linus Torvalds

Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs

* 'for-linus' of git://oss.sgi.com/xfs/xfs: (69 commits)
  xfs: add AIL pushing tracepoints
  xfs: put in missed fix for merge problem
  xfs: do not flush data workqueues in xfs_flush_buftarg
  xfs: remove XFS_bflush
  xfs: remove xfs_buf_target_name
  xfs: use xfs_ioerror_alert in xfs_buf_iodone_callbacks
  xfs: clean up xfs_ioerror_alert
  xfs: clean up buffer allocation
  xfs: remove buffers from the delwri list in xfs_buf_stale
  xfs: remove XFS_BUF_STALE and XFS_BUF_SUPER_STALE
  xfs: remove XFS_BUF_SET_VTYPE and XFS_BUF_SET_VTYPE_REF
  xfs: remove XFS_BUF_FINISH_IOWAIT
  xfs: remove xfs_get_buftarg_list
  xfs: fix buffer flushing during unmount
  xfs: optimize fsync on directories
  xfs: reduce the number of log forces from tail pushing
  xfs: Don't allocate new buffers on every call to _xfs_buf_find
  xfs: simplify xfs_trans_ijoin* again
  xfs: unlock the inode before log force in xfs_change_file_space
  xfs: unlock the inode before log force in xfs_fs_nfs_commit_metadata
  ...
......@@ -452,7 +452,7 @@ xfs_alloc_read_agfl(
if (error)
return error;
ASSERT(!xfs_buf_geterror(bp));
XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGFL, XFS_AGFL_REF);
xfs_buf_set_ref(bp, XFS_AGFL_REF);
*bpp = bp;
return 0;
}
......@@ -2139,7 +2139,7 @@ xfs_read_agf(
xfs_trans_brelse(tp, *bpp);
return XFS_ERROR(EFSCORRUPTED);
}
XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGF, XFS_AGF_REF);
xfs_buf_set_ref(*bpp, XFS_AGF_REF);
return 0;
}
......
......@@ -38,40 +38,6 @@
#include <linux/pagevec.h>
#include <linux/writeback.h>
/*
* Prime number of hash buckets since address is used as the key.
*/
#define NVSYNC 37
#define to_ioend_wq(v) (&xfs_ioend_wq[((unsigned long)v) % NVSYNC])
static wait_queue_head_t xfs_ioend_wq[NVSYNC];
void __init
xfs_ioend_init(void)
{
int i;
for (i = 0; i < NVSYNC; i++)
init_waitqueue_head(&xfs_ioend_wq[i]);
}
void
xfs_ioend_wait(
xfs_inode_t *ip)
{
wait_queue_head_t *wq = to_ioend_wq(ip);
wait_event(*wq, (atomic_read(&ip->i_iocount) == 0));
}
STATIC void
xfs_ioend_wake(
xfs_inode_t *ip)
{
if (atomic_dec_and_test(&ip->i_iocount))
wake_up(to_ioend_wq(ip));
}
void
xfs_count_page_state(
struct page *page,
......@@ -115,25 +81,20 @@ xfs_destroy_ioend(
xfs_ioend_t *ioend)
{
struct buffer_head *bh, *next;
struct xfs_inode *ip = XFS_I(ioend->io_inode);
for (bh = ioend->io_buffer_head; bh; bh = next) {
next = bh->b_private;
bh->b_end_io(bh, !ioend->io_error);
}
/*
* Volume managers supporting multiple paths can send back ENODEV
* when the final path disappears. In this case continuing to fill
* the page cache with dirty data which cannot be written out is
* evil, so prevent that.
*/
if (unlikely(ioend->io_error == -ENODEV)) {
xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ,
__FILE__, __LINE__);
if (ioend->io_iocb) {
if (ioend->io_isasync) {
aio_complete(ioend->io_iocb, ioend->io_error ?
ioend->io_error : ioend->io_result, 0);
}
inode_dio_done(ioend->io_inode);
}
xfs_ioend_wake(ip);
mempool_free(ioend, xfs_ioend_pool);
}
......@@ -155,6 +116,15 @@ xfs_ioend_new_eof(
return isize > ip->i_d.di_size ? isize : 0;
}
/*
* Fast and loose check if this write could update the on-disk inode size.
*/
static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
{
return ioend->io_offset + ioend->io_size >
XFS_I(ioend->io_inode)->i_d.di_size;
}
/*
* Update on-disk file size now that data has been written to disk. The
* current in-memory file size is i_size. If a write is beyond eof i_new_size
......@@ -173,9 +143,6 @@ xfs_setfilesize(
xfs_inode_t *ip = XFS_I(ioend->io_inode);
xfs_fsize_t isize;
if (unlikely(ioend->io_error))
return 0;
if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
return EAGAIN;
......@@ -192,6 +159,9 @@ xfs_setfilesize(
/*
* Schedule IO completion handling on the final put of an ioend.
*
* If there is no work to do we might as well call it a day and free the
* ioend right now.
*/
STATIC void
xfs_finish_ioend(
......@@ -200,8 +170,10 @@ xfs_finish_ioend(
if (atomic_dec_and_test(&ioend->io_remaining)) {
if (ioend->io_type == IO_UNWRITTEN)
queue_work(xfsconvertd_workqueue, &ioend->io_work);
else
else if (xfs_ioend_is_append(ioend))
queue_work(xfsdatad_workqueue, &ioend->io_work);
else
xfs_destroy_ioend(ioend);
}
}
......@@ -216,17 +188,24 @@ xfs_end_io(
struct xfs_inode *ip = XFS_I(ioend->io_inode);
int error = 0;
if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
error = -EIO;
goto done;
}
if (ioend->io_error)
goto done;
/*
* For unwritten extents we need to issue transactions to convert a
* range to normal written extens after the data I/O has finished.
*/
if (ioend->io_type == IO_UNWRITTEN &&
likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) {
if (ioend->io_type == IO_UNWRITTEN) {
error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
ioend->io_size);
if (error)
ioend->io_error = error;
if (error) {
ioend->io_error = -error;
goto done;
}
}
/*
......@@ -236,6 +215,7 @@ xfs_end_io(
error = xfs_setfilesize(ioend);
ASSERT(!error || error == EAGAIN);
done:
/*
* If we didn't complete processing of the ioend, requeue it to the
* tail of the workqueue for another attempt later. Otherwise destroy
......@@ -247,8 +227,6 @@ xfs_end_io(
/* ensure we don't spin on blocked ioends */
delay(1);
} else {
if (ioend->io_iocb)
aio_complete(ioend->io_iocb, ioend->io_result, 0);
xfs_destroy_ioend(ioend);
}
}
......@@ -285,13 +263,13 @@ xfs_alloc_ioend(
* all the I/O from calling the completion routine too early.
*/
atomic_set(&ioend->io_remaining, 1);
ioend->io_isasync = 0;
ioend->io_error = 0;
ioend->io_list = NULL;
ioend->io_type = type;
ioend->io_inode = inode;
ioend->io_buffer_head = NULL;
ioend->io_buffer_tail = NULL;
atomic_inc(&XFS_I(ioend->io_inode)->i_iocount);
ioend->io_offset = 0;
ioend->io_size = 0;
ioend->io_iocb = NULL;
......@@ -337,8 +315,8 @@ xfs_map_blocks(
count = mp->m_maxioffset - offset;
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
bmapi_flags, NULL, 0, imap, &nimaps, NULL);
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
imap, &nimaps, bmapi_flags);
xfs_iunlock(ip, XFS_ILOCK_SHARED);
if (error)
......@@ -551,7 +529,6 @@ xfs_cancel_ioend(
unlock_buffer(bh);
} while ((bh = next_bh) != NULL);
xfs_ioend_wake(XFS_I(ioend->io_inode));
mempool_free(ioend, xfs_ioend_pool);
} while ((ioend = next) != NULL);
}
......@@ -1161,8 +1138,8 @@ __xfs_get_blocks(
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
XFS_BMAPI_ENTIRE, NULL, 0, &imap, &nimaps, NULL);
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
&imap, &nimaps, XFS_BMAPI_ENTIRE);
if (error)
goto out_unlock;
......@@ -1300,7 +1277,6 @@ xfs_end_io_direct_write(
bool is_async)
{
struct xfs_ioend *ioend = iocb->private;
struct inode *inode = ioend->io_inode;
/*
* blockdev_direct_IO can return an error even after the I/O
......@@ -1311,28 +1287,17 @@ xfs_end_io_direct_write(
ioend->io_offset = offset;
ioend->io_size = size;
ioend->io_iocb = iocb;
ioend->io_result = ret;
if (private && size > 0)
ioend->io_type = IO_UNWRITTEN;
if (is_async) {
/*
* If we are converting an unwritten extent we need to delay
* the AIO completion until after the unwrittent extent
* conversion has completed, otherwise do it ASAP.
*/
if (ioend->io_type == IO_UNWRITTEN) {
ioend->io_iocb = iocb;
ioend->io_result = ret;
} else {
aio_complete(iocb, ret, 0);
}
ioend->io_isasync = 1;
xfs_finish_ioend(ioend);
} else {
xfs_finish_ioend_sync(ioend);
}
/* XXX: probably should move into the real I/O completion handler */
inode_dio_done(inode);
}
STATIC ssize_t
......
......@@ -47,6 +47,7 @@ typedef struct xfs_ioend {
unsigned int io_type; /* delalloc / unwritten */
int io_error; /* I/O error code */
atomic_t io_remaining; /* hold count */
unsigned int io_isasync : 1; /* needs aio_complete */
struct inode *io_inode; /* file being written to */
struct buffer_head *io_buffer_head;/* buffer linked list head */
struct buffer_head *io_buffer_tail;/* buffer linked list tail */
......@@ -60,9 +61,6 @@ typedef struct xfs_ioend {
extern const struct address_space_operations xfs_address_space_operations;
extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
extern void xfs_ioend_init(void);
extern void xfs_ioend_wait(struct xfs_inode *);
extern void xfs_count_page_state(struct page *, int *, int *);
#endif /* __XFS_AOPS_H__ */
......@@ -319,7 +319,7 @@ xfs_attr_set_int(
return (error);
}
xfs_trans_ijoin(args.trans, dp);
xfs_trans_ijoin(args.trans, dp, 0);
/*
* If the attribute list is non-existent or a shortform list,
......@@ -389,7 +389,7 @@ xfs_attr_set_int(
* a new one. We need the inode to be in all transactions.
*/
if (committed)
xfs_trans_ijoin(args.trans, dp);
xfs_trans_ijoin(args.trans, dp, 0);
/*
* Commit the leaf transformation. We'll need another (linked)
......@@ -537,7 +537,7 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
* No need to make quota reservations here. We expect to release some
* blocks not allocate in the common case.
*/
xfs_trans_ijoin(args.trans, dp);
xfs_trans_ijoin(args.trans, dp, 0);
/*
* Decide on what work routines to call based on the inode size.
......@@ -809,7 +809,7 @@ xfs_attr_inactive(xfs_inode_t *dp)
* No need to make quota reservations here. We expect to release some
* blocks, not allocate, in the common case.
*/
xfs_trans_ijoin(trans, dp);
xfs_trans_ijoin(trans, dp, 0);
/*
* Decide on what work routines to call based on the inode size.
......@@ -823,18 +823,6 @@ xfs_attr_inactive(xfs_inode_t *dp)
if (error)
goto out;
/*
* Signal synchronous inactive transactions unless this is a
* synchronous mount filesystem in which case we know that we're here
* because we've been called out of xfs_inactive which means that the
* last reference is gone and the unlink transaction has already hit
* the disk so async inactive transactions are safe.
*/
if (!(mp->m_flags & XFS_MOUNT_WSYNC)) {
if (dp->i_d.di_anextents > 0)
xfs_trans_set_sync(trans);
}
error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
if (error)
goto out;
......@@ -973,7 +961,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
* a new one. We need the inode to be in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, dp);
xfs_trans_ijoin(args->trans, dp, 0);
/*
* Commit the current trans (including the inode) and start
......@@ -1075,7 +1063,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
* in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, dp);
xfs_trans_ijoin(args->trans, dp, 0);
} else
xfs_da_buf_done(bp);
......@@ -1149,7 +1137,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
* a new one. We need the inode to be in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, dp);
xfs_trans_ijoin(args->trans, dp, 0);
} else
xfs_da_buf_done(bp);
return(0);
......@@ -1303,7 +1291,7 @@ xfs_attr_node_addname(xfs_da_args_t *args)
* in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, dp);
xfs_trans_ijoin(args->trans, dp, 0);
/*
* Commit the node conversion and start the next
......@@ -1340,7 +1328,7 @@ xfs_attr_node_addname(xfs_da_args_t *args)
* a new one. We need the inode to be in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, dp);
xfs_trans_ijoin(args->trans, dp, 0);
} else {
/*
* Addition succeeded, update Btree hashvals.
......@@ -1452,7 +1440,7 @@ xfs_attr_node_addname(xfs_da_args_t *args)
* in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, dp);
xfs_trans_ijoin(args->trans, dp, 0);
}
/*
......@@ -1584,7 +1572,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
* a new one. We need the inode to be in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, dp);
xfs_trans_ijoin(args->trans, dp, 0);
/*
* Commit the Btree join operation and start a new trans.
......@@ -1635,7 +1623,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
* in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, dp);
xfs_trans_ijoin(args->trans, dp, 0);
} else
xfs_da_brelse(args->trans, bp);
}
......@@ -1975,10 +1963,9 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
lblkno = args->rmtblkno;
while (valuelen > 0) {
nmap = ATTR_RMTVALUE_MAPSIZE;
error = xfs_bmapi(args->trans, args->dp, (xfs_fileoff_t)lblkno,
args->rmtblkcnt,
XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
NULL, 0, map, &nmap, NULL);
error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
args->rmtblkcnt, map, &nmap,
XFS_BMAPI_ATTRFORK);
if (error)
return(error);
ASSERT(nmap >= 1);
......@@ -2052,10 +2039,9 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
*/
xfs_bmap_init(args->flist, args->firstblock);
nmap = 1;
error = xfs_bmapi(args->trans, dp, (xfs_fileoff_t)lblkno,
error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
blkcnt,
XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA |
XFS_BMAPI_WRITE,
XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
args->firstblock, args->total, &map, &nmap,
args->flist);
if (!error) {
......@@ -2074,7 +2060,7 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
* a new one. We need the inode to be in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, dp);
xfs_trans_ijoin(args->trans, dp, 0);
ASSERT(nmap == 1);
ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
......@@ -2104,14 +2090,11 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
*/
xfs_bmap_init(args->flist, args->firstblock);
nmap = 1;
error = xfs_bmapi(NULL, dp, (xfs_fileoff_t)lblkno,
args->rmtblkcnt,
XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
args->firstblock, 0, &map, &nmap,
NULL);
if (error) {
error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
args->rmtblkcnt, &map, &nmap,
XFS_BMAPI_ATTRFORK);
if (error)
return(error);
}
ASSERT(nmap == 1);
ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
(map.br_startblock != HOLESTARTBLOCK));
......@@ -2121,16 +2104,17 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt,
XBF_LOCK | XBF_DONT_BLOCK);
ASSERT(!xfs_buf_geterror(bp));
if (!bp)
return ENOMEM;
tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen :
XFS_BUF_SIZE(bp);
xfs_buf_iomove(bp, 0, tmp, src, XBRW_WRITE);
if (tmp < XFS_BUF_SIZE(bp))
xfs_buf_zero(bp, tmp, XFS_BUF_SIZE(bp) - tmp);
if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */
return (error);
}
error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */
xfs_buf_relse(bp);
if (error)
return error;
src += tmp;
valuelen -= tmp;
......@@ -2166,16 +2150,12 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
/*
* Try to remember where we decided to put the value.
*/
xfs_bmap_init(args->flist, args->firstblock);
nmap = 1;
error = xfs_bmapi(NULL, args->dp, (xfs_fileoff_t)lblkno,
args->rmtblkcnt,
XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
args->firstblock, 0, &map, &nmap,
args->flist);
if (error) {
error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
args->rmtblkcnt, &map, &nmap,
XFS_BMAPI_ATTRFORK);
if (error)
return(error);
}
ASSERT(nmap == 1);
ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
(map.br_startblock != HOLESTARTBLOCK));
......@@ -2188,8 +2168,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
*/
bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, XBF_TRYLOCK);
if (bp) {
XFS_BUF_STALE(bp);
XFS_BUF_UNDELAYWRITE(bp);
xfs_buf_stale(bp);
xfs_buf_relse(bp);
bp = NULL;
}
......@@ -2227,7 +2206,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
* a new one. We need the inode to be in all transactions.
*/
if (committed)
xfs_trans_ijoin(args->trans, args->dp);
xfs_trans_ijoin(args->trans, args->dp, 0);
/*
* Close out trans and start the next one in the chain.
......
......@@ -2926,9 +2926,8 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
* Try to remember where we decided to put the value.
*/
nmap = 1;
error = xfs_bmapi(*trans, dp, (xfs_fileoff_t)tblkno, tblkcnt,
XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
NULL, 0, &map, &nmap, NULL);
error = xfs_bmapi_read(dp, (xfs_fileoff_t)tblkno, tblkcnt,
&map, &nmap, XFS_BMAPI_ATTRFORK);
if (error) {
return(error);
}
......@@ -2948,6 +2947,8 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
bp = xfs_trans_get_buf(*trans,
dp->i_mount->m_ddev_targp,
dblkno, dblkcnt, XBF_LOCK);
if (!bp)
return ENOMEM;
xfs_trans_binval(*trans, bp);
/*
* Roll to next transaction.
......
此差异已折叠。
......@@ -62,27 +62,23 @@ typedef struct xfs_bmap_free
#define XFS_BMAP_MAX_NMAP 4
/*
* Flags for xfs_bmapi
* Flags for xfs_bmapi_*
*/
#define XFS_BMAPI_WRITE 0x001 /* write operation: allocate space */
#define XFS_BMAPI_DELAY 0x002 /* delayed write operation */
#define XFS_BMAPI_ENTIRE 0x004 /* return entire extent, not trimmed */
#define XFS_BMAPI_METADATA 0x008 /* mapping metadata not user data */
#define XFS_BMAPI_ATTRFORK 0x010 /* use attribute fork not data */
#define XFS_BMAPI_PREALLOC 0x040 /* preallocation op: unwritten space */
#define XFS_BMAPI_IGSTATE 0x080 /* Ignore state - */
#define XFS_BMAPI_ENTIRE 0x001 /* return entire extent, not trimmed */
#define XFS_BMAPI_METADATA 0x002 /* mapping metadata not user data */
#define XFS_BMAPI_ATTRFORK 0x004 /* use attribute fork not data */
#define XFS_BMAPI_PREALLOC 0x008 /* preallocation op: unwritten space */
#define XFS_BMAPI_IGSTATE 0x010 /* Ignore state - */
/* combine contig. space */
#define XFS_BMAPI_CONTIG 0x100 /* must allocate only one extent */
#define XFS_BMAPI_CONTIG 0x020 /* must allocate only one extent */
/*
* unwritten extent conversion - this needs write cache flushing and no additional
* allocation alignments. When specified with XFS_BMAPI_PREALLOC it converts
* from written to unwritten, otherwise convert from unwritten to written.
*/
#define XFS_BMAPI_CONVERT 0x200
#define XFS_BMAPI_CONVERT 0x040
#define XFS_BMAPI_FLAGS \
{ XFS_BMAPI_WRITE, "WRITE" }, \
{ XFS_BMAPI_DELAY, "DELAY" }, \
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \
{ XFS_BMAPI_METADATA, "METADATA" }, \
{ XFS_BMAPI_ATTRFORK, "ATTRFORK" }, \
......@@ -113,21 +109,28 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
* Argument structure for xfs_bmap_alloc.
*/
typedef struct xfs_bmalloca {
xfs_fsblock_t firstblock; /* i/o first block allocated */
xfs_fsblock_t rval; /* starting block of new extent */
xfs_fileoff_t off; /* offset in file filling in */
xfs_fsblock_t *firstblock; /* i/o first block allocated */
struct xfs_bmap_free *flist; /* bmap freelist */
struct xfs_trans *tp; /* transaction pointer */
struct xfs_inode *ip; /* incore inode pointer */
struct xfs_bmbt_irec *prevp; /* extent before the new one */
struct xfs_bmbt_irec *gotp; /* extent after, or delayed */
xfs_extlen_t alen; /* i/o length asked/allocated */
struct xfs_bmbt_irec prev; /* extent before the new one */
struct xfs_bmbt_irec got; /* extent after, or delayed */
xfs_fileoff_t offset; /* offset in file filling in */
xfs_extlen_t length; /* i/o length asked/allocated */
xfs_fsblock_t blkno; /* starting block of new extent */
struct xfs_btree_cur *cur; /* btree cursor */
xfs_extnum_t idx; /* current extent index */
int nallocs;/* number of extents alloc'd */
int logflags;/* flags for transaction logging */
xfs_extlen_t total; /* total blocks needed for xaction */
xfs_extlen_t minlen; /* minimum allocation size (blocks) */
xfs_extlen_t minleft; /* amount must be left after alloc */
char eof; /* set if allocating past last extent */
char wasdel; /* replacing a delayed allocation */
char userdata;/* set if is user data */
char low; /* low on space, using seq'l ags */
char aeof; /* allocated space at eof */
char conv; /* overwriting unwritten extents */
} xfs_bmalloca_t;
......@@ -152,251 +155,62 @@ typedef struct xfs_bmalloca {
{ BMAP_RIGHT_FILLING, "RF" }, \
{ BMAP_ATTRFORK, "ATTR" }
/*
* Add bmap trace insert entries for all the contents of the extent list.
*
* Quite excessive tracing. Only do this for debug builds.
*/
#if defined(__KERNEL) && defined(DEBUG)
void
xfs_bmap_trace_exlist(
struct xfs_inode *ip, /* incore inode pointer */
xfs_extnum_t cnt, /* count of entries in list */
int whichfork,
unsigned long caller_ip); /* data or attr fork */
void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
int whichfork, unsigned long caller_ip);
#define XFS_BMAP_TRACE_EXLIST(ip,c,w) \
xfs_bmap_trace_exlist(ip,c,w, _THIS_IP_)
#else
#define XFS_BMAP_TRACE_EXLIST(ip,c,w)
#endif
/*
* Convert inode from non-attributed to attributed.
* Must not be in a transaction, ip must not be locked.
*/
int /* error code */
xfs_bmap_add_attrfork(
struct xfs_inode *ip, /* incore inode pointer */
int size, /* space needed for new attribute */
int rsvd); /* flag for reserved block allocation */
/*
* Add the extent to the list of extents to be free at transaction end.
* The list is maintained sorted (by block number).
*/
void
xfs_bmap_add_free(
xfs_fsblock_t bno, /* fs block number of extent */
xfs_filblks_t len, /* length of extent */
xfs_bmap_free_t *flist, /* list of extents */
struct xfs_mount *mp); /* mount point structure */
/*
* Routine to clean up the free list data structure when
* an error occurs during a transaction.
*/
void
xfs_bmap_cancel(
xfs_bmap_free_t *flist); /* free list to clean up */
/*
* Compute and fill in the value of the maximum depth of a bmap btree
* in this filesystem. Done once, during mount.
*/
void
xfs_bmap_compute_maxlevels(
struct xfs_mount *mp, /* file system mount structure */
int whichfork); /* data or attr fork */
/*
* Returns the file-relative block number of the first unused block in the file.
* This is the lowest-address hole if the file has holes, else the first block
* past the end of file.
*/
int /* error */
xfs_bmap_first_unused(
struct xfs_trans *tp, /* transaction pointer */
struct xfs_inode *ip, /* incore inode */
xfs_extlen_t len, /* size of hole to find */
xfs_fileoff_t *unused, /* unused block num */
int whichfork); /* data or attr fork */
/*
* Returns the file-relative block number of the last block + 1 before
* last_block (input value) in the file.
* This is not based on i_size, it is based on the extent list.
* Returns 0 for local files, as they do not have an extent list.
*/
int /* error */
xfs_bmap_last_before(
struct xfs_trans *tp, /* transaction pointer */
struct xfs_inode *ip, /* incore inode */
xfs_fileoff_t *last_block, /* last block */
int whichfork); /* data or attr fork */
/*
* Returns the file-relative block number of the first block past eof in
* the file. This is not based on i_size, it is based on the extent list.
* Returns 0 for local files, as they do not have an extent list.
*/
int /* error */
xfs_bmap_last_offset(
struct xfs_trans *tp, /* transaction pointer */
struct xfs_inode *ip, /* incore inode */
xfs_fileoff_t *unused, /* last block num */
int whichfork); /* data or attr fork */
/*
* Returns whether the selected fork of the inode has exactly one
* block or not. For the data fork we check this matches di_size,
* implying the file's range is 0..bsize-1.
*/
int
xfs_bmap_one_block(
struct xfs_inode *ip, /* incore inode */
int whichfork); /* data or attr fork */
/*
* Read in the extents to iu_extents.
* All inode fields are set up by caller, we just traverse the btree
* and copy the records in.
*/
int /* error */
xfs_bmap_read_extents(
struct xfs_trans *tp, /* transaction pointer */
struct xfs_inode *ip, /* incore inode */
int whichfork); /* data or attr fork */
/*
* Map file blocks to filesystem blocks.
* File range is given by the bno/len pair.
* Adds blocks to file if a write ("flags & XFS_BMAPI_WRITE" set)
* into a hole or past eof.
* Only allocates blocks from a single allocation group,
* to avoid locking problems.
* The returned value in "firstblock" from the first call in a transaction
* must be remembered and presented to subsequent calls in "firstblock".
* An upper bound for the number of blocks to be allocated is supplied to
* the first call in "total"; if no allocation group has that many free
* blocks then the call will fail (return NULLFSBLOCK in "firstblock").
*/
int /* error */
xfs_bmapi(
struct xfs_trans *tp, /* transaction pointer */
struct xfs_inode *ip, /* incore inode */
xfs_fileoff_t bno, /* starting file offs. mapped */
xfs_filblks_t len, /* length to map in file */
int flags, /* XFS_BMAPI_... */
xfs_fsblock_t *firstblock, /* first allocated block
controls a.g. for allocs */
xfs_extlen_t total, /* total blocks needed */
struct xfs_bmbt_irec *mval, /* output: map values */
int *nmap, /* i/o: mval size/count */
xfs_bmap_free_t *flist); /* i/o: list extents to free */
/*
* Map file blocks to filesystem blocks, simple version.
* One block only, read-only.
* For flags, only the XFS_BMAPI_ATTRFORK flag is examined.
* For the other flag values, the effect is as if XFS_BMAPI_METADATA
* was set and all the others were clear.
*/
int /* error */
xfs_bmapi_single(
struct xfs_trans *tp, /* transaction pointer */
struct xfs_inode *ip, /* incore inode */
int whichfork, /* data or attr fork */
xfs_fsblock_t *fsb, /* output: mapped block */
xfs_fileoff_t bno); /* starting file offs. mapped */
/*
* Unmap (remove) blocks from a file.
* If nexts is nonzero then the number of extents to remove is limited to
* that value. If not all extents in the block range can be removed then
* *done is set.
*/
int /* error */
xfs_bunmapi(
struct xfs_trans *tp, /* transaction pointer */
struct xfs_inode *ip, /* incore inode */
xfs_fileoff_t bno, /* starting offset to unmap */
xfs_filblks_t len, /* length to unmap in file */
int flags, /* XFS_BMAPI_... */
xfs_extnum_t nexts, /* number of extents max */
xfs_fsblock_t *firstblock, /* first allocated block
controls a.g. for allocs */
xfs_bmap_free_t *flist, /* i/o: list extents to free */
int *done); /* set if not done yet */
/*
* Check an extent list, which has just been read, for
* any bit in the extent flag field.
*/
int
xfs_check_nostate_extents(
struct xfs_ifork *ifp,
xfs_extnum_t idx,
xfs_extnum_t num);
uint
xfs_default_attroffset(
struct xfs_inode *ip);
int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
void xfs_bmap_add_free(xfs_fsblock_t bno, xfs_filblks_t len,
struct xfs_bmap_free *flist, struct xfs_mount *mp);
void xfs_bmap_cancel(struct xfs_bmap_free *flist);
void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork);
int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork);
int xfs_bmap_last_before(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t *last_block, int whichfork);
int xfs_bmap_last_offset(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t *unused, int whichfork);
int xfs_bmap_one_block(struct xfs_inode *ip, int whichfork);
int xfs_bmap_read_extents(struct xfs_trans *tp, struct xfs_inode *ip,
int whichfork);
int xfs_bmapi_read(struct xfs_inode *ip, xfs_fileoff_t bno,
xfs_filblks_t len, struct xfs_bmbt_irec *mval,
int *nmap, int flags);
int xfs_bmapi_delay(struct xfs_inode *ip, xfs_fileoff_t bno,
xfs_filblks_t len, struct xfs_bmbt_irec *mval,
int *nmap, int flags);
int xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t bno, xfs_filblks_t len, int flags,
xfs_fsblock_t *firstblock, xfs_extlen_t total,
struct xfs_bmbt_irec *mval, int *nmap,
struct xfs_bmap_free *flist);
int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t bno, xfs_filblks_t len, int flags,
xfs_extnum_t nexts, xfs_fsblock_t *firstblock,
struct xfs_bmap_free *flist, int *done);
int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
xfs_extnum_t num);
uint xfs_default_attroffset(struct xfs_inode *ip);
#ifdef __KERNEL__
/*
* Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
* caller. Frees all the extents that need freeing, which must be done
* last due to locking considerations.
*
* Return 1 if the given transaction was committed and a new one allocated,
* and 0 otherwise.
*/
int /* error */
xfs_bmap_finish(
struct xfs_trans **tp, /* transaction pointer addr */
xfs_bmap_free_t *flist, /* i/o: list extents to free */
int *committed); /* xact committed or not */
/* bmap to userspace formatter - copy to user & advance pointer */
typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *, int *);
/*
* Get inode's extents as described in bmv, and format for output.
*/
int /* error code */
xfs_getbmap(
xfs_inode_t *ip,
struct getbmapx *bmv, /* user bmap structure */
xfs_bmap_format_t formatter, /* format to user */
void *arg); /* formatter arg */
/*
* Check if the endoff is outside the last extent. If so the caller will grow
* the allocation to a stripe unit boundary
*/
int
xfs_bmap_eof(
struct xfs_inode *ip,
xfs_fileoff_t endoff,
int whichfork,
int *eof);
/*
* Count fsblocks of the given fork.
*/
int
xfs_bmap_count_blocks(
xfs_trans_t *tp,
struct xfs_inode *ip,
int whichfork,
int *count);
int
xfs_bmap_punch_delalloc_range(
struct xfs_inode *ip,
xfs_fileoff_t start_fsb,
xfs_fileoff_t length);
int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
int *committed);
int xfs_getbmap(struct xfs_inode *ip, struct getbmapx *bmv,
xfs_bmap_format_t formatter, void *arg);
int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,
int whichfork, int *eof);
int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
int whichfork, int *count);
int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
xfs_fileoff_t start_fsb, xfs_fileoff_t length);
#endif /* __KERNEL__ */
#endif /* __XFS_BMAP_H__ */
......@@ -631,7 +631,7 @@ xfs_btree_read_bufl(
}
ASSERT(!xfs_buf_geterror(bp));
if (bp)
XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
xfs_buf_set_ref(bp, refval);
*bpp = bp;
return 0;
}
......@@ -939,13 +939,13 @@ xfs_btree_set_refs(
switch (cur->bc_btnum) {
case XFS_BTNUM_BNO:
case XFS_BTNUM_CNT:
XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_ALLOC_BTREE_REF);
xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
break;
case XFS_BTNUM_INO:
XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, XFS_INO_BTREE_REF);
xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
break;
case XFS_BTNUM_BMAP:
XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_BMAP_BTREE_REF);
xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
break;
default:
ASSERT(0);
......@@ -970,7 +970,8 @@ xfs_btree_get_buf_block(
*bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
mp->m_bsize, flags);
ASSERT(!xfs_buf_geterror(*bpp));
if (!*bpp)
return ENOMEM;
*block = XFS_BUF_TO_BLOCK(*bpp);
return 0;
......
......@@ -43,7 +43,6 @@
static kmem_zone_t *xfs_buf_zone;
STATIC int xfsbufd(void *);
STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
static struct workqueue_struct *xfslogd_workqueue;
struct workqueue_struct *xfsdatad_workqueue;
......@@ -66,10 +65,6 @@ struct workqueue_struct *xfsconvertd_workqueue;
#define xb_to_km(flags) \
(((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
#define xfs_buf_allocate(flags) \
kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags))
#define xfs_buf_deallocate(bp) \
kmem_zone_free(xfs_buf_zone, (bp));
static inline int
xfs_buf_is_vmapped(
......@@ -152,6 +147,7 @@ xfs_buf_stale(
struct xfs_buf *bp)
{
bp->b_flags |= XBF_STALE;
xfs_buf_delwri_dequeue(bp);
atomic_set(&(bp)->b_lru_ref, 0);
if (!list_empty(&bp->b_lru)) {
struct xfs_buftarg *btp = bp->b_target;
......@@ -167,14 +163,19 @@ xfs_buf_stale(
ASSERT(atomic_read(&bp->b_hold) >= 1);
}
STATIC void
_xfs_buf_initialize(
xfs_buf_t *bp,
xfs_buftarg_t *target,
struct xfs_buf *
xfs_buf_alloc(
struct xfs_buftarg *target,
xfs_off_t range_base,
size_t range_length,
xfs_buf_flags_t flags)
{
struct xfs_buf *bp;
bp = kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags));
if (unlikely(!bp))
return NULL;
/*
* We don't want certain flags to appear in b_flags.
*/
......@@ -203,8 +204,9 @@ _xfs_buf_initialize(
init_waitqueue_head(&bp->b_waiters);
XFS_STATS_INC(xb_create);
trace_xfs_buf_init(bp, _RET_IP_);
return bp;
}
/*
......@@ -277,7 +279,7 @@ xfs_buf_free(
} else if (bp->b_flags & _XBF_KMEM)
kmem_free(bp->b_addr);
_xfs_buf_free_pages(bp);
xfs_buf_deallocate(bp);
kmem_zone_free(xfs_buf_zone, bp);
}
/*
......@@ -416,10 +418,7 @@ _xfs_buf_map_pages(
/*
* Look up, and creates if absent, a lockable buffer for
* a given range of an inode. The buffer is returned
* locked. If other overlapping buffers exist, they are
* released before the new buffer is created and locked,
* which may imply that this call will block until those buffers
* are unlocked. No I/O is implied by this call.
* locked. No I/O is implied by this call.
*/
xfs_buf_t *
_xfs_buf_find(
......@@ -481,8 +480,6 @@ _xfs_buf_find(
/* No match found */
if (new_bp) {
_xfs_buf_initialize(new_bp, btp, range_base,
range_length, flags);
rb_link_node(&new_bp->b_rbnode, parent, rbp);
rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
/* the buffer keeps the perag reference until it is freed */
......@@ -525,35 +522,51 @@ _xfs_buf_find(
}
/*
* Assembles a buffer covering the specified range.
* Storage in memory for all portions of the buffer will be allocated,
* although backing storage may not be.
* Assembles a buffer covering the specified range. The code is optimised for
* cache hits, as metadata intensive workloads will see 3 orders of magnitude
* more hits than misses.
*/
xfs_buf_t *
struct xfs_buf *
xfs_buf_get(
xfs_buftarg_t *target,/* target for buffer */
xfs_off_t ioff, /* starting offset of range */
size_t isize, /* length of range */
xfs_buf_flags_t flags)
{
xfs_buf_t *bp, *new_bp;
struct xfs_buf *bp;
struct xfs_buf *new_bp;
int error = 0;
new_bp = xfs_buf_allocate(flags);
bp = _xfs_buf_find(target, ioff, isize, flags, NULL);
if (likely(bp))
goto found;
new_bp = xfs_buf_alloc(target, ioff << BBSHIFT, isize << BBSHIFT,
flags);
if (unlikely(!new_bp))
return NULL;
bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);
if (!bp) {
kmem_zone_free(xfs_buf_zone, new_bp);
return NULL;
}
if (bp == new_bp) {
error = xfs_buf_allocate_memory(bp, flags);
if (error)
goto no_buffer;
} else {
xfs_buf_deallocate(new_bp);
if (unlikely(bp == NULL))
return NULL;
}
} else
kmem_zone_free(xfs_buf_zone, new_bp);
/*
* Now we have a workable buffer, fill in the block number so
* that we can do IO on it.
*/
bp->b_bn = ioff;
bp->b_count_desired = bp->b_buffer_length;
found:
if (!(bp->b_flags & XBF_MAPPED)) {
error = _xfs_buf_map_pages(bp, flags);
if (unlikely(error)) {
......@@ -564,18 +577,10 @@ xfs_buf_get(
}
XFS_STATS_INC(xb_get);
/*
* Always fill in the block number now, the mapped cases can do
* their own overlay of this later.
*/
bp->b_bn = ioff;
bp->b_count_desired = bp->b_buffer_length;
trace_xfs_buf_get(bp, flags, _RET_IP_);
return bp;
no_buffer:
no_buffer:
if (flags & (XBF_LOCK | XBF_TRYLOCK))
xfs_buf_unlock(bp);
xfs_buf_rele(bp);
......@@ -689,19 +694,6 @@ xfs_buf_read_uncached(
return bp;
}
xfs_buf_t *
xfs_buf_get_empty(
size_t len,
xfs_buftarg_t *target)
{
xfs_buf_t *bp;
bp = xfs_buf_allocate(0);
if (bp)
_xfs_buf_initialize(bp, target, 0, len, 0);
return bp;
}
/*
* Return a buffer allocated as an empty buffer and associated to external
* memory via xfs_buf_associate_memory() back to it's empty state.
......@@ -787,10 +779,9 @@ xfs_buf_get_uncached(
int error, i;
xfs_buf_t *bp;
bp = xfs_buf_allocate(0);
bp = xfs_buf_alloc(target, 0, len, 0);
if (unlikely(bp == NULL))
goto fail;
_xfs_buf_initialize(bp, target, 0, len, 0);
error = _xfs_buf_get_pages(bp, page_count, 0);
if (error)
......@@ -818,7 +809,7 @@ xfs_buf_get_uncached(
__free_page(bp->b_pages[i]);
_xfs_buf_free_pages(bp);
fail_free_buf:
xfs_buf_deallocate(bp);
kmem_zone_free(xfs_buf_zone, bp);
fail:
return NULL;
}
......@@ -937,12 +928,6 @@ void
xfs_buf_unlock(
struct xfs_buf *bp)
{
if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) {
atomic_inc(&bp->b_hold);
bp->b_flags |= XBF_ASYNC;
xfs_buf_delwri_queue(bp, 0);
}
XB_CLEAR_OWNER(bp);
up(&bp->b_sema);
......@@ -1019,9 +1004,19 @@ xfs_buf_ioerror(
trace_xfs_buf_ioerror(bp, error, _RET_IP_);
}
void
xfs_buf_ioerror_alert(
struct xfs_buf *bp,
const char *func)
{
xfs_alert(bp->b_target->bt_mount,
"metadata I/O error: block 0x%llx (\"%s\") error %d buf count %zd",
(__uint64_t)XFS_BUF_ADDR(bp), func,
bp->b_error, XFS_BUF_COUNT(bp));
}
int
xfs_bwrite(
struct xfs_mount *mp,
struct xfs_buf *bp)
{
int error;
......@@ -1033,25 +1028,13 @@ xfs_bwrite(
xfs_bdstrat_cb(bp);
error = xfs_buf_iowait(bp);
if (error)
xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
xfs_buf_relse(bp);
if (error) {
xfs_force_shutdown(bp->b_target->bt_mount,
SHUTDOWN_META_IO_ERROR);
}
return error;
}
void
xfs_bdwrite(
void *mp,
struct xfs_buf *bp)
{
trace_xfs_buf_bdwrite(bp, _RET_IP_);
bp->b_flags &= ~XBF_READ;
bp->b_flags |= (XBF_DELWRI | XBF_ASYNC);
xfs_buf_delwri_queue(bp, 1);
}
/*
* Called when we want to stop a buffer from getting written or read.
* We attach the EIO error, muck with its flags, and call xfs_buf_ioend
......@@ -1074,9 +1057,8 @@ xfs_bioerror(
* We're calling xfs_buf_ioend, so delete XBF_DONE flag.
*/
XFS_BUF_UNREAD(bp);
XFS_BUF_UNDELAYWRITE(bp);
XFS_BUF_UNDONE(bp);
XFS_BUF_STALE(bp);
xfs_buf_stale(bp);
xfs_buf_ioend(bp, 0);
......@@ -1103,9 +1085,8 @@ xfs_bioerror_relse(
* change that interface.
*/
XFS_BUF_UNREAD(bp);
XFS_BUF_UNDELAYWRITE(bp);
XFS_BUF_DONE(bp);
XFS_BUF_STALE(bp);
xfs_buf_stale(bp);
bp->b_iodone = NULL;
if (!(fl & XBF_ASYNC)) {
/*
......@@ -1115,7 +1096,7 @@ xfs_bioerror_relse(
* ASYNC buffers.
*/
xfs_buf_ioerror(bp, EIO);
XFS_BUF_FINISH_IOWAIT(bp);
complete(&bp->b_iowait);
} else {
xfs_buf_relse(bp);
}
......@@ -1275,15 +1256,10 @@ xfs_buf_iorequest(
{
trace_xfs_buf_iorequest(bp, _RET_IP_);
if (bp->b_flags & XBF_DELWRI) {
xfs_buf_delwri_queue(bp, 1);
return 0;
}
ASSERT(!(bp->b_flags & XBF_DELWRI));
if (bp->b_flags & XBF_WRITE) {
if (bp->b_flags & XBF_WRITE)
xfs_buf_wait_unpin(bp);
}
xfs_buf_hold(bp);
/* Set the count to 1 initially, this will stop an I/O
......@@ -1481,9 +1457,13 @@ xfs_setsize_buftarg_flags(
btp->bt_smask = sectorsize - 1;
if (set_blocksize(btp->bt_bdev, sectorsize)) {
char name[BDEVNAME_SIZE];
bdevname(btp->bt_bdev, name);
xfs_warn(btp->bt_mount,
"Cannot set_blocksize to %u on device %s\n",
sectorsize, xfs_buf_target_name(btp));
sectorsize, name);
return EINVAL;
}
......@@ -1514,12 +1494,12 @@ xfs_setsize_buftarg(
}
STATIC int
xfs_alloc_delwrite_queue(
xfs_alloc_delwri_queue(
xfs_buftarg_t *btp,
const char *fsname)
{
INIT_LIST_HEAD(&btp->bt_delwrite_queue);
spin_lock_init(&btp->bt_delwrite_lock);
INIT_LIST_HEAD(&btp->bt_delwri_queue);
spin_lock_init(&btp->bt_delwri_lock);
btp->bt_flags = 0;
btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname);
if (IS_ERR(btp->bt_task))
......@@ -1549,7 +1529,7 @@ xfs_alloc_buftarg(
spin_lock_init(&btp->bt_lru_lock);
if (xfs_setsize_buftarg_early(btp, bdev))
goto error;
if (xfs_alloc_delwrite_queue(btp, fsname))
if (xfs_alloc_delwri_queue(btp, fsname))
goto error;
btp->bt_shrinker.shrink = xfs_buftarg_shrink;
btp->bt_shrinker.seeks = DEFAULT_SEEKS;
......@@ -1565,56 +1545,48 @@ xfs_alloc_buftarg(
/*
* Delayed write buffer handling
*/
STATIC void
void
xfs_buf_delwri_queue(
xfs_buf_t *bp,
int unlock)
xfs_buf_t *bp)
{
struct list_head *dwq = &bp->b_target->bt_delwrite_queue;
spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock;
struct xfs_buftarg *btp = bp->b_target;
trace_xfs_buf_delwri_queue(bp, _RET_IP_);
ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC));
ASSERT(!(bp->b_flags & XBF_READ));
spin_lock(dwlk);
/* If already in the queue, dequeue and place at tail */
spin_lock(&btp->bt_delwri_lock);
if (!list_empty(&bp->b_list)) {
/* if already in the queue, move it to the tail */
ASSERT(bp->b_flags & _XBF_DELWRI_Q);
if (unlock)
atomic_dec(&bp->b_hold);
list_del(&bp->b_list);
}
if (list_empty(dwq)) {
list_move_tail(&bp->b_list, &btp->bt_delwri_queue);
} else {
/* start xfsbufd as it is about to have something to do */
wake_up_process(bp->b_target->bt_task);
}
if (list_empty(&btp->bt_delwri_queue))
wake_up_process(bp->b_target->bt_task);
bp->b_flags |= _XBF_DELWRI_Q;
list_add_tail(&bp->b_list, dwq);
atomic_inc(&bp->b_hold);
bp->b_flags |= XBF_DELWRI | _XBF_DELWRI_Q | XBF_ASYNC;
list_add_tail(&bp->b_list, &btp->bt_delwri_queue);
}
bp->b_queuetime = jiffies;
spin_unlock(dwlk);
if (unlock)
xfs_buf_unlock(bp);
spin_unlock(&btp->bt_delwri_lock);
}
void
xfs_buf_delwri_dequeue(
xfs_buf_t *bp)
{
spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock;
int dequeued = 0;
spin_lock(dwlk);
spin_lock(&bp->b_target->bt_delwri_lock);
if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) {
ASSERT(bp->b_flags & _XBF_DELWRI_Q);
list_del_init(&bp->b_list);
dequeued = 1;
}
bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
spin_unlock(dwlk);
spin_unlock(&bp->b_target->bt_delwri_lock);
if (dequeued)
xfs_buf_rele(bp);
......@@ -1646,16 +1618,9 @@ xfs_buf_delwri_promote(
if (bp->b_queuetime < jiffies - age)
return;
bp->b_queuetime = jiffies - age;
spin_lock(&btp->bt_delwrite_lock);
list_move(&bp->b_list, &btp->bt_delwrite_queue);
spin_unlock(&btp->bt_delwrite_lock);
}
STATIC void
xfs_buf_runall_queues(
struct workqueue_struct *queue)
{
flush_workqueue(queue);
spin_lock(&btp->bt_delwri_lock);
list_move(&bp->b_list, &btp->bt_delwri_queue);
spin_unlock(&btp->bt_delwri_lock);
}
/*
......@@ -1669,15 +1634,13 @@ xfs_buf_delwri_split(
unsigned long age)
{
xfs_buf_t *bp, *n;
struct list_head *dwq = &target->bt_delwrite_queue;
spinlock_t *dwlk = &target->bt_delwrite_lock;
int skipped = 0;
int force;
force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
INIT_LIST_HEAD(list);
spin_lock(dwlk);
list_for_each_entry_safe(bp, n, dwq, b_list) {
spin_lock(&target->bt_delwri_lock);
list_for_each_entry_safe(bp, n, &target->bt_delwri_queue, b_list) {
ASSERT(bp->b_flags & XBF_DELWRI);
if (!xfs_buf_ispinned(bp) && xfs_buf_trylock(bp)) {
......@@ -1694,10 +1657,9 @@ xfs_buf_delwri_split(
} else
skipped++;
}
spin_unlock(dwlk);
spin_unlock(&target->bt_delwri_lock);
return skipped;
}
/*
......@@ -1747,7 +1709,7 @@ xfsbufd(
}
/* sleep for a long time if there is nothing to do. */
if (list_empty(&target->bt_delwrite_queue))
if (list_empty(&target->bt_delwri_queue))
tout = MAX_SCHEDULE_TIMEOUT;
schedule_timeout_interruptible(tout);
......@@ -1783,9 +1745,7 @@ xfs_flush_buftarg(
LIST_HEAD(wait_list);
struct blk_plug plug;
xfs_buf_runall_queues(xfsconvertd_workqueue);
xfs_buf_runall_queues(xfsdatad_workqueue);
xfs_buf_runall_queues(xfslogd_workqueue);
flush_workqueue(xfslogd_workqueue);
set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
pincount = xfs_buf_delwri_split(target, &tmp_list, 0);
......@@ -1866,11 +1826,3 @@ xfs_buf_terminate(void)
destroy_workqueue(xfslogd_workqueue);
kmem_zone_destroy(xfs_buf_zone);
}
#ifdef CONFIG_KDB_MODULES
struct list_head *
xfs_get_buftarg_list(void)
{
return &xfs_buftarg_list;
}
#endif
......@@ -105,8 +105,8 @@ typedef struct xfs_buftarg {
/* per device delwri queue */
struct task_struct *bt_task;
struct list_head bt_delwrite_queue;
spinlock_t bt_delwrite_lock;
struct list_head bt_delwri_queue;
spinlock_t bt_delwri_lock;
unsigned long bt_flags;
/* LRU control structures */
......@@ -175,7 +175,8 @@ extern xfs_buf_t *xfs_buf_get(xfs_buftarg_t *, xfs_off_t, size_t,
extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
xfs_buf_flags_t);
extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
struct xfs_buf *xfs_buf_alloc(struct xfs_buftarg *, xfs_off_t, size_t,
xfs_buf_flags_t);
extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len);
extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
......@@ -197,14 +198,14 @@ extern void xfs_buf_unlock(xfs_buf_t *);
((bp)->b_sema.count <= 0)
/* Buffer Read and Write Routines */
extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
extern void xfs_bdwrite(void *mp, xfs_buf_t *bp);
extern int xfs_bwrite(struct xfs_buf *bp);
extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
extern int xfs_bdstrat_cb(struct xfs_buf *);
extern void xfs_buf_ioend(xfs_buf_t *, int);
extern void xfs_buf_ioerror(xfs_buf_t *, int);
extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func);
extern int xfs_buf_iorequest(xfs_buf_t *);
extern int xfs_buf_iowait(xfs_buf_t *);
extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
......@@ -221,38 +222,22 @@ static inline int xfs_buf_geterror(xfs_buf_t *bp)
extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
/* Delayed Write Buffer Routines */
extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
extern void xfs_buf_delwri_promote(xfs_buf_t *);
extern void xfs_buf_delwri_queue(struct xfs_buf *);
extern void xfs_buf_delwri_dequeue(struct xfs_buf *);
extern void xfs_buf_delwri_promote(struct xfs_buf *);
/* Buffer Daemon Setup Routines */
extern int xfs_buf_init(void);
extern void xfs_buf_terminate(void);
static inline const char *
xfs_buf_target_name(struct xfs_buftarg *target)
{
static char __b[BDEVNAME_SIZE];
return bdevname(target->bt_bdev, __b);
}
#define XFS_BUF_ZEROFLAGS(bp) \
((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \
XBF_SYNCIO|XBF_FUA|XBF_FLUSH))
void xfs_buf_stale(struct xfs_buf *bp);
#define XFS_BUF_STALE(bp) xfs_buf_stale(bp);
#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE)
#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE)
#define XFS_BUF_SUPER_STALE(bp) do { \
XFS_BUF_STALE(bp); \
xfs_buf_delwri_dequeue(bp); \
XFS_BUF_DONE(bp); \
} while (0)
#define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI)
#define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp)
#define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI)
#define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE)
......@@ -280,23 +265,16 @@ void xfs_buf_stale(struct xfs_buf *bp);
#define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length)
#define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt))
static inline void
xfs_buf_set_ref(
struct xfs_buf *bp,
int lru_ref)
static inline void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
{
atomic_set(&bp->b_lru_ref, lru_ref);
}
#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) xfs_buf_set_ref(bp, ref)
#define XFS_BUF_SET_VTYPE(bp, type) do { } while (0)
static inline int xfs_buf_ispinned(struct xfs_buf *bp)
{
return atomic_read(&bp->b_pin_count);
}
#define XFS_BUF_FINISH_IOWAIT(bp) complete(&bp->b_iowait);
static inline void xfs_buf_relse(xfs_buf_t *bp)
{
xfs_buf_unlock(bp);
......@@ -313,14 +291,7 @@ extern void xfs_wait_buftarg(xfs_buftarg_t *);
extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
#ifdef CONFIG_KDB_MODULES
extern struct list_head *xfs_get_buftarg_list(void);
#endif
#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev)
#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev)
#define xfs_binval(buftarg) xfs_flush_buftarg(buftarg, 1)
#define XFS_bflush(buftarg) xfs_flush_buftarg(buftarg, 1)
#endif /* __XFS_BUF_H__ */
......@@ -967,7 +967,8 @@ xfs_buf_iodone_callbacks(
* I/O errors, there's no point in giving this a retry.
*/
if (XFS_FORCED_SHUTDOWN(mp)) {
XFS_BUF_SUPER_STALE(bp);
xfs_buf_stale(bp);
XFS_BUF_DONE(bp);
trace_xfs_buf_item_iodone(bp, _RET_IP_);
goto do_callbacks;
}
......@@ -975,9 +976,7 @@ xfs_buf_iodone_callbacks(
if (bp->b_target != lasttarg ||
time_after(jiffies, (lasttime + 5*HZ))) {
lasttime = jiffies;
xfs_alert(mp, "Device %s: metadata write error block 0x%llx",
xfs_buf_target_name(bp->b_target),
(__uint64_t)XFS_BUF_ADDR(bp));
xfs_buf_ioerror_alert(bp, __func__);
}
lasttarg = bp->b_target;
......@@ -993,7 +992,7 @@ xfs_buf_iodone_callbacks(
xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */
if (!XFS_BUF_ISSTALE(bp)) {
XFS_BUF_DELAYWRITE(bp);
xfs_buf_delwri_queue(bp);
XFS_BUF_DONE(bp);
}
ASSERT(bp->b_iodone != NULL);
......@@ -1006,9 +1005,8 @@ xfs_buf_iodone_callbacks(
* If the write of the buffer was synchronous, we want to make
* sure to return the error to the caller of xfs_bwrite().
*/
XFS_BUF_STALE(bp);
xfs_buf_stale(bp);
XFS_BUF_DONE(bp);
XFS_BUF_UNDELAYWRITE(bp);
trace_xfs_buf_error_relse(bp, _RET_IP_);
......
......@@ -1578,9 +1578,8 @@ xfs_da_grow_inode_int(
*/
nmap = 1;
ASSERT(args->firstblock != NULL);
error = xfs_bmapi(tp, dp, *bno, count,
xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|
XFS_BMAPI_CONTIG,
error = xfs_bmapi_write(tp, dp, *bno, count,
xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
args->firstblock, args->total, &map, &nmap,
args->flist);
if (error)
......@@ -1602,9 +1601,8 @@ xfs_da_grow_inode_int(
for (b = *bno, mapi = 0; b < *bno + count; ) {
nmap = MIN(XFS_BMAP_MAX_NMAP, count);
c = (int)(*bno + count - b);
error = xfs_bmapi(tp, dp, b, c,
xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|
XFS_BMAPI_METADATA,
error = xfs_bmapi_write(tp, dp, b, c,
xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
args->firstblock, args->total,
&mapp[mapi], &nmap, args->flist);
if (error)
......@@ -1975,33 +1973,16 @@ xfs_da_do_buf(
/*
* Optimize the one-block case.
*/
if (nfsb == 1) {
xfs_fsblock_t fsb;
if ((error =
xfs_bmapi_single(trans, dp, whichfork, &fsb,
(xfs_fileoff_t)bno))) {
return error;
}
if (nfsb == 1)
mapp = &map;
if (fsb == NULLFSBLOCK) {
nmap = 0;
} else {
map.br_startblock = fsb;
map.br_startoff = (xfs_fileoff_t)bno;
map.br_blockcount = 1;
nmap = 1;
}
} else {
else
mapp = kmem_alloc(sizeof(*mapp) * nfsb, KM_SLEEP);
nmap = nfsb;
if ((error = xfs_bmapi(trans, dp, (xfs_fileoff_t)bno,
nfsb,
XFS_BMAPI_METADATA |
xfs_bmapi_aflag(whichfork),
NULL, 0, mapp, &nmap, NULL)))
goto exit0;
}
nmap = nfsb;
error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, mapp,
&nmap, xfs_bmapi_aflag(whichfork));
if (error)
goto exit0;
} else {
map.br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno);
map.br_startoff = (xfs_fileoff_t)bno;
......@@ -2072,13 +2053,10 @@ xfs_da_do_buf(
if (!bp)
continue;
if (caller == 1) {
if (whichfork == XFS_ATTR_FORK) {
XFS_BUF_SET_VTYPE_REF(bp, B_FS_ATTR_BTREE,
XFS_ATTR_BTREE_REF);
} else {
XFS_BUF_SET_VTYPE_REF(bp, B_FS_DIR_BTREE,
XFS_DIR_BTREE_REF);
}
if (whichfork == XFS_ATTR_FORK)
xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF);
else
xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF);
}
if (bplist) {
bplist[nbplist++] = bp;
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册