提交 e1d8fb88 编写于 作者: N Namjae Jeon 提交者: Dave Chinner

xfs: Add support FALLOC_FL_COLLAPSE_RANGE for fallocate

This patch implements fallocate's FALLOC_FL_COLLAPSE_RANGE for XFS.

The semantics of this flag are following:
1) It collapses the range lying between offset and length by removing any data
   blocks which are present in this range and than updates all the logical
   offsets of extents beyond "offset + len" to nullify the hole created by
   removing blocks. In short, it does not leave a hole.
2) It should be used exclusively. No other fallocate flag in combination.
3) Offset and length supplied to fallocate should be fs block size aligned
   in case of xfs and ext4.
4) Collaspe range does not work beyond i_size.
Signed-off-by: NNamjae Jeon <namjae.jeon@samsung.com>
Signed-off-by: NAshish Sangwan <a.sangwan@samsung.com>
Reviewed-by: NDave Chinner <dchinner@redhat.com>
Signed-off-by: NDave Chinner <david@fromorbit.com>
上级 00f5e619
...@@ -5378,3 +5378,196 @@ xfs_bunmapi( ...@@ -5378,3 +5378,196 @@ xfs_bunmapi(
} }
return error; return error;
} }
/*
* Shift extent records to the left to cover a hole.
*
* The maximum number of extents to be shifted in a single operation
* is @num_exts, and @current_ext keeps track of the current extent
* index we have shifted. @offset_shift_fsb is the length by which each
* extent is shifted. If there is no hole to shift the extents
* into, this will be considered invalid operation and we abort immediately.
*/
int
xfs_bmap_shift_extents(
struct xfs_trans *tp,
struct xfs_inode *ip,
int *done,
xfs_fileoff_t start_fsb,
xfs_fileoff_t offset_shift_fsb,
xfs_extnum_t *current_ext,
xfs_fsblock_t *firstblock,
struct xfs_bmap_free *flist,
int num_exts)
{
struct xfs_btree_cur *cur;
struct xfs_bmbt_rec_host *gotp;
struct xfs_bmbt_irec got;
struct xfs_bmbt_irec left;
struct xfs_mount *mp = ip->i_mount;
struct xfs_ifork *ifp;
xfs_extnum_t nexts = 0;
xfs_fileoff_t startoff;
int error = 0;
int i;
int whichfork = XFS_DATA_FORK;
int logflags;
xfs_filblks_t blockcount = 0;
if (unlikely(XFS_TEST_ERROR(
(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
XFS_ERROR_REPORT("xfs_bmap_shift_extents",
XFS_ERRLEVEL_LOW, mp);
return XFS_ERROR(EFSCORRUPTED);
}
if (XFS_FORCED_SHUTDOWN(mp))
return XFS_ERROR(EIO);
ASSERT(current_ext != NULL);
ifp = XFS_IFORK_PTR(ip, whichfork);
if (!(ifp->if_flags & XFS_IFEXTENTS)) {
/* Read in all the extents */
error = xfs_iread_extents(tp, ip, whichfork);
if (error)
return error;
}
/*
* If *current_ext is 0, we would need to lookup the extent
* from where we would start shifting and store it in gotp.
*/
if (!*current_ext) {
gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext);
/*
* gotp can be null in 2 cases: 1) if there are no extents
* or 2) start_fsb lies in a hole beyond which there are
* no extents. Either way, we are done.
*/
if (!gotp) {
*done = 1;
return 0;
}
}
/* We are going to change core inode */
logflags = XFS_ILOG_CORE;
if (ifp->if_flags & XFS_IFBROOT) {
cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
cur->bc_private.b.firstblock = *firstblock;
cur->bc_private.b.flist = flist;
cur->bc_private.b.flags = 0;
} else {
cur = NULL;
logflags |= XFS_ILOG_DEXT;
}
while (nexts++ < num_exts &&
*current_ext < XFS_IFORK_NEXTENTS(ip, whichfork)) {
gotp = xfs_iext_get_ext(ifp, *current_ext);
xfs_bmbt_get_all(gotp, &got);
startoff = got.br_startoff - offset_shift_fsb;
/*
* Before shifting extent into hole, make sure that the hole
* is large enough to accomodate the shift.
*/
if (*current_ext) {
xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
*current_ext - 1), &left);
if (startoff < left.br_startoff + left.br_blockcount)
error = XFS_ERROR(EINVAL);
} else if (offset_shift_fsb > got.br_startoff) {
/*
* When first extent is shifted, offset_shift_fsb
* should be less than the stating offset of
* the first extent.
*/
error = XFS_ERROR(EINVAL);
}
if (error)
goto del_cursor;
if (cur) {
error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
got.br_startblock,
got.br_blockcount,
&i);
if (error)
goto del_cursor;
XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
}
/* Check if we can merge 2 adjacent extents */
if (*current_ext &&
left.br_startoff + left.br_blockcount == startoff &&
left.br_startblock + left.br_blockcount ==
got.br_startblock &&
left.br_state == got.br_state &&
left.br_blockcount + got.br_blockcount <= MAXEXTLEN) {
blockcount = left.br_blockcount +
got.br_blockcount;
xfs_iext_remove(ip, *current_ext, 1, 0);
if (cur) {
error = xfs_btree_delete(cur, &i);
if (error)
goto del_cursor;
XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
}
XFS_IFORK_NEXT_SET(ip, whichfork,
XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
gotp = xfs_iext_get_ext(ifp, --*current_ext);
xfs_bmbt_get_all(gotp, &got);
/* Make cursor point to the extent we will update */
if (cur) {
error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
got.br_startblock,
got.br_blockcount,
&i);
if (error)
goto del_cursor;
XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
}
xfs_bmbt_set_blockcount(gotp, blockcount);
got.br_blockcount = blockcount;
} else {
/* We have to update the startoff */
xfs_bmbt_set_startoff(gotp, startoff);
got.br_startoff = startoff;
}
if (cur) {
error = xfs_bmbt_update(cur, got.br_startoff,
got.br_startblock,
got.br_blockcount,
got.br_state);
if (error)
goto del_cursor;
}
(*current_ext)++;
}
/* Check if we are done */
if (*current_ext == XFS_IFORK_NEXTENTS(ip, whichfork))
*done = 1;
del_cursor:
if (cur)
xfs_btree_del_cursor(cur,
error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
xfs_trans_log_inode(tp, ip, logflags);
return error;
}
...@@ -127,6 +127,16 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp) ...@@ -127,6 +127,16 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
{ BMAP_RIGHT_FILLING, "RF" }, \ { BMAP_RIGHT_FILLING, "RF" }, \
{ BMAP_ATTRFORK, "ATTR" } { BMAP_ATTRFORK, "ATTR" }
/*
* This macro is used to determine how many extents will be shifted
* in one write transaction. We could require two splits,
* an extent move on the first and an extent merge on the second,
* So it is proper that one extent is shifted inside write transaction
* at a time.
*/
#define XFS_BMAP_MAX_SHIFT_EXTENTS 1
#ifdef DEBUG #ifdef DEBUG
void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
int whichfork, unsigned long caller_ip); int whichfork, unsigned long caller_ip);
...@@ -169,5 +179,10 @@ int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip, ...@@ -169,5 +179,10 @@ int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
xfs_extnum_t num); xfs_extnum_t num);
uint xfs_default_attroffset(struct xfs_inode *ip); uint xfs_default_attroffset(struct xfs_inode *ip);
int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
int *done, xfs_fileoff_t start_fsb,
xfs_fileoff_t offset_shift_fsb, xfs_extnum_t *current_ext,
xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist,
int num_exts);
#endif /* __XFS_BMAP_H__ */ #endif /* __XFS_BMAP_H__ */
...@@ -1349,7 +1349,6 @@ xfs_free_file_space( ...@@ -1349,7 +1349,6 @@ xfs_free_file_space(
* the freeing of the space succeeds at ENOSPC. * the freeing of the space succeeds at ENOSPC.
*/ */
tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
tp->t_flags |= XFS_TRANS_RESERVE;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0);
/* /*
...@@ -1467,6 +1466,102 @@ xfs_zero_file_space( ...@@ -1467,6 +1466,102 @@ xfs_zero_file_space(
} }
/*
* xfs_collapse_file_space()
* This routine frees disk space and shift extent for the given file.
* The first thing we do is to free data blocks in the specified range
* by calling xfs_free_file_space(). It would also sync dirty data
* and invalidate page cache over the region on which collapse range
* is working. And Shift extent records to the left to cover a hole.
* RETURNS:
* 0 on success
* errno on error
*
*/
int
xfs_collapse_file_space(
struct xfs_inode *ip,
xfs_off_t offset,
xfs_off_t len)
{
int done = 0;
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp;
int error;
xfs_extnum_t current_ext = 0;
struct xfs_bmap_free free_list;
xfs_fsblock_t first_block;
int committed;
xfs_fileoff_t start_fsb;
xfs_fileoff_t shift_fsb;
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
trace_xfs_collapse_file_space(ip);
start_fsb = XFS_B_TO_FSB(mp, offset + len);
shift_fsb = XFS_B_TO_FSB(mp, len);
error = xfs_free_file_space(ip, offset, len);
if (error)
return error;
while (!error && !done) {
tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
tp->t_flags |= XFS_TRANS_RESERVE;
/*
* We would need to reserve permanent block for transaction.
* This will come into picture when after shifting extent into
* hole we found that adjacent extents can be merged which
* may lead to freeing of a block during record update.
*/
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
if (error) {
ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
xfs_trans_cancel(tp, 0);
break;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
ip->i_gdquot, ip->i_pdquot,
XFS_DIOSTRAT_SPACE_RES(mp, 0), 0,
XFS_QMOPT_RES_REGBLKS);
if (error)
goto out;
xfs_trans_ijoin(tp, ip, 0);
xfs_bmap_init(&free_list, &first_block);
/*
* We are using the write transaction in which max 2 bmbt
* updates are allowed
*/
error = xfs_bmap_shift_extents(tp, ip, &done, start_fsb,
shift_fsb, &current_ext,
&first_block, &free_list,
XFS_BMAP_MAX_SHIFT_EXTENTS);
if (error)
goto out;
error = xfs_bmap_finish(&tp, &free_list, &committed);
if (error)
goto out;
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
return error;
out:
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
}
/* /*
* We need to check that the format of the data fork in the temporary inode is * We need to check that the format of the data fork in the temporary inode is
* valid for the target inode before doing the swap. This is not a problem with * valid for the target inode before doing the swap. This is not a problem with
......
...@@ -99,6 +99,8 @@ int xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset, ...@@ -99,6 +99,8 @@ int xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t len); xfs_off_t len);
int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset, int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t len); xfs_off_t len);
int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
xfs_off_t len);
/* EOF block manipulation functions */ /* EOF block manipulation functions */
bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
......
...@@ -823,7 +823,8 @@ xfs_file_fallocate( ...@@ -823,7 +823,8 @@ xfs_file_fallocate(
if (!S_ISREG(inode->i_mode)) if (!S_ISREG(inode->i_mode))
return -EINVAL; return -EINVAL;
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
FALLOC_FL_COLLAPSE_RANGE))
return -EOPNOTSUPP; return -EOPNOTSUPP;
xfs_ilock(ip, XFS_IOLOCK_EXCL); xfs_ilock(ip, XFS_IOLOCK_EXCL);
...@@ -831,6 +832,20 @@ xfs_file_fallocate( ...@@ -831,6 +832,20 @@ xfs_file_fallocate(
error = xfs_free_file_space(ip, offset, len); error = xfs_free_file_space(ip, offset, len);
if (error) if (error)
goto out_unlock; goto out_unlock;
} else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
if (offset & blksize_mask || len & blksize_mask) {
error = -EINVAL;
goto out_unlock;
}
ASSERT(offset + len < i_size_read(inode));
new_size = i_size_read(inode) - len;
error = xfs_collapse_file_space(ip, offset, len);
if (error)
goto out_unlock;
} else { } else {
if (!(mode & FALLOC_FL_KEEP_SIZE) && if (!(mode & FALLOC_FL_KEEP_SIZE) &&
offset + len > i_size_read(inode)) { offset + len > i_size_read(inode)) {
...@@ -859,7 +874,7 @@ xfs_file_fallocate( ...@@ -859,7 +874,7 @@ xfs_file_fallocate(
if (ip->i_d.di_mode & S_IXGRP) if (ip->i_d.di_mode & S_IXGRP)
ip->i_d.di_mode &= ~S_ISGID; ip->i_d.di_mode &= ~S_ISGID;
if (!(mode & FALLOC_FL_PUNCH_HOLE)) if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE)))
ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
......
...@@ -603,6 +603,7 @@ DEFINE_INODE_EVENT(xfs_readlink); ...@@ -603,6 +603,7 @@ DEFINE_INODE_EVENT(xfs_readlink);
DEFINE_INODE_EVENT(xfs_inactive_symlink); DEFINE_INODE_EVENT(xfs_inactive_symlink);
DEFINE_INODE_EVENT(xfs_alloc_file_space); DEFINE_INODE_EVENT(xfs_alloc_file_space);
DEFINE_INODE_EVENT(xfs_free_file_space); DEFINE_INODE_EVENT(xfs_free_file_space);
DEFINE_INODE_EVENT(xfs_collapse_file_space);
DEFINE_INODE_EVENT(xfs_readdir); DEFINE_INODE_EVENT(xfs_readdir);
#ifdef CONFIG_XFS_POSIX_ACL #ifdef CONFIG_XFS_POSIX_ACL
DEFINE_INODE_EVENT(xfs_get_acl); DEFINE_INODE_EVENT(xfs_get_acl);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册