提交 abbf9e8a 编写于 作者: C Christoph Hellwig 提交者: Darrick J. Wong

xfs: rewrite getbmap using the xfs_iext_* helpers

Currently getbmap uses xfs_bmapi_read to query the extent map, and then
fixes up various bits that are eventually reported to userspace.

This patch instead rewrites it to use xfs_iext_lookup_extent and
xfs_iext_get_extent to iteratively process the extent map.  This not
only avoids the need to allocate a map for the returned xfs_bmbt_irec
structures but also greatly simplified the code.

There are two intentional behavior changes compared to the old code:

 - the current code reports unwritten extents that don't directly border
   a written one as unwritten even when not passing the BMV_IF_PREALLOC
   option, contrary to the documentation.  The new code requires the
   BMV_IF_PREALLOC flag to report the unwrittent extent bit.
 - The new code does never merges consecutive extents, unlike the old
   code that sometimes does it based on the boundaries of the
   xfs_bmapi_read calls.  Note that the extent merging behavior was
   entirely undocumented.
Signed-off-by: NChristoph Hellwig <hch@lst.de>
Reviewed-by: NDarrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: NDarrick J. Wong <darrick.wong@oracle.com>
上级 15f859ae
......@@ -405,125 +405,103 @@ xfs_bmap_count_blocks(
return 0;
}
/*
* returns 1 for success, 0 if we failed to map the extent.
*/
STATIC int
xfs_getbmapx_fix_eof_hole(
xfs_inode_t *ip, /* xfs incore inode pointer */
int whichfork,
struct getbmapx *out, /* output structure */
int prealloced, /* this is a file with
* preallocated data space */
int64_t end, /* last block requested */
xfs_fsblock_t startblock,
bool moretocome)
static int
xfs_getbmap_report_one(
struct xfs_inode *ip,
struct getbmapx *bmv,
struct getbmapx *out,
int64_t bmv_end,
struct xfs_bmbt_irec *got)
{
int64_t fixlen;
xfs_mount_t *mp; /* file system mount point */
xfs_ifork_t *ifp; /* inode fork pointer */
xfs_extnum_t lastx; /* last extent pointer */
xfs_fileoff_t fileblock;
if (startblock == HOLESTARTBLOCK) {
mp = ip->i_mount;
out->bmv_block = -1;
fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
fixlen -= out->bmv_offset;
if (prealloced && out->bmv_offset + out->bmv_length == end) {
/* Came to hole at EOF. Trim it. */
if (fixlen <= 0)
return 0;
out->bmv_length = fixlen;
}
struct getbmapx *p = out + bmv->bmv_entries;
bool shared = false, trimmed = false;
int error;
error = xfs_reflink_trim_around_shared(ip, got, &shared, &trimmed);
if (error)
return error;
if (isnullstartblock(got->br_startblock) ||
got->br_startblock == DELAYSTARTBLOCK) {
/*
* Delalloc extents that start beyond EOF can occur due to
* speculative EOF allocation when the delalloc extent is larger
* than the largest freespace extent at conversion time. These
* extents cannot be converted by data writeback, so can exist
* here even if we are not supposed to be finding delalloc
* extents.
*/
if (got->br_startoff < XFS_B_TO_FSB(ip->i_mount, XFS_ISIZE(ip)))
ASSERT((bmv->bmv_iflags & BMV_IF_DELALLOC) != 0);
p->bmv_oflags |= BMV_OF_DELALLOC;
p->bmv_block = -2;
} else {
if (startblock == DELAYSTARTBLOCK)
out->bmv_block = -2;
else
out->bmv_block = xfs_fsb_to_db(ip, startblock);
fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset);
ifp = XFS_IFORK_PTR(ip, whichfork);
if (!moretocome &&
xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
(lastx == xfs_iext_count(ifp) - 1))
out->bmv_oflags |= BMV_OF_LAST;
p->bmv_block = xfs_fsb_to_db(ip, got->br_startblock);
}
return 1;
if (got->br_state == XFS_EXT_UNWRITTEN &&
(bmv->bmv_iflags & BMV_IF_PREALLOC))
p->bmv_oflags |= BMV_OF_PREALLOC;
if (shared)
p->bmv_oflags |= BMV_OF_SHARED;
p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, got->br_startoff);
p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, got->br_blockcount);
bmv->bmv_offset = p->bmv_offset + p->bmv_length;
bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
bmv->bmv_entries++;
return 0;
}
/* Adjust the reported bmap around shared/unshared extent transitions. */
STATIC int
xfs_getbmap_adjust_shared(
struct xfs_inode *ip,
int whichfork,
struct xfs_bmbt_irec *map,
struct getbmapx *out,
struct xfs_bmbt_irec *next_map)
static void
xfs_getbmap_report_hole(
struct xfs_inode *ip,
struct getbmapx *bmv,
struct getbmapx *out,
int64_t bmv_end,
xfs_fileoff_t bno,
xfs_fileoff_t end)
{
struct xfs_mount *mp = ip->i_mount;
xfs_agnumber_t agno;
xfs_agblock_t agbno;
xfs_agblock_t ebno;
xfs_extlen_t elen;
xfs_extlen_t nlen;
int error;
struct getbmapx *p = out + bmv->bmv_entries;
next_map->br_startblock = NULLFSBLOCK;
next_map->br_startoff = NULLFILEOFF;
next_map->br_blockcount = 0;
if (bmv->bmv_iflags & BMV_IF_NO_HOLES)
return;
/* Only written data blocks can be shared. */
if (!xfs_is_reflink_inode(ip) ||
whichfork != XFS_DATA_FORK ||
!xfs_bmap_is_real_extent(map))
return 0;
p->bmv_block = -1;
p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, bno);
p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, end - bno);
agno = XFS_FSB_TO_AGNO(mp, map->br_startblock);
agbno = XFS_FSB_TO_AGBNO(mp, map->br_startblock);
error = xfs_reflink_find_shared(mp, NULL, agno, agbno,
map->br_blockcount, &ebno, &elen, true);
if (error)
return error;
bmv->bmv_offset = p->bmv_offset + p->bmv_length;
bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
bmv->bmv_entries++;
}
if (ebno == NULLAGBLOCK) {
/* No shared blocks at all. */
return 0;
} else if (agbno == ebno) {
/*
* Shared extent at (agbno, elen). Shrink the reported
* extent length and prepare to move the start of map[i]
* to agbno+elen, with the aim of (re)formatting the new
* map[i] the next time through the inner loop.
*/
out->bmv_length = XFS_FSB_TO_BB(mp, elen);
out->bmv_oflags |= BMV_OF_SHARED;
if (elen != map->br_blockcount) {
*next_map = *map;
next_map->br_startblock += elen;
next_map->br_startoff += elen;
next_map->br_blockcount -= elen;
}
map->br_blockcount -= elen;
} else {
/*
* There's an unshared extent (agbno, ebno - agbno)
* followed by shared extent at (ebno, elen). Shrink
* the reported extent length to cover only the unshared
* extent and prepare to move up the start of map[i] to
* ebno, with the aim of (re)formatting the new map[i]
* the next time through the inner loop.
*/
*next_map = *map;
nlen = ebno - agbno;
out->bmv_length = XFS_FSB_TO_BB(mp, nlen);
next_map->br_startblock += nlen;
next_map->br_startoff += nlen;
next_map->br_blockcount -= nlen;
map->br_blockcount -= nlen;
}
static inline bool
xfs_getbmap_full(
struct getbmapx *bmv)
{
return bmv->bmv_length == 0 || bmv->bmv_entries >= bmv->bmv_count - 1;
}
return 0;
static bool
xfs_getbmap_next_rec(
struct xfs_bmbt_irec *rec,
xfs_fileoff_t total_end)
{
xfs_fileoff_t end = rec->br_startoff + rec->br_blockcount;
if (end == total_end)
return false;
rec->br_startoff += rec->br_blockcount;
if (!isnullstartblock(rec->br_startblock) &&
rec->br_startblock != DELAYSTARTBLOCK)
rec->br_startblock += rec->br_blockcount;
rec->br_blockcount = total_end - end;
return true;
}
/*
......@@ -540,119 +518,72 @@ xfs_getbmap(
xfs_bmap_format_t formatter, /* format to user */
void *arg) /* formatter arg */
{
int64_t bmvend; /* last block requested */
int error = 0; /* return value */
int64_t fixlen; /* length for -1 case */
int i; /* extent number */
int lock; /* lock state */
xfs_bmbt_irec_t *map; /* buffer for user's data */
xfs_mount_t *mp; /* file system mount point */
int nex; /* # of user extents can do */
int subnex; /* # of bmapi's can do */
int nmap; /* number of map entries */
struct getbmapx *out; /* output structure */
int whichfork; /* data or attr fork */
int prealloced; /* this is a file with
* preallocated data space */
int iflags; /* interface flags */
int bmapi_flags; /* flags for xfs_bmapi */
int cur_ext = 0;
struct xfs_bmbt_irec inject_map;
mp = ip->i_mount;
iflags = bmv->bmv_iflags;
struct xfs_mount *mp = ip->i_mount;
int iflags = bmv->bmv_iflags;
int whichfork, lock, i, error = 0;
int64_t bmv_end, max_len;
xfs_fileoff_t bno, first_bno;
struct xfs_ifork *ifp;
struct getbmapx *out;
struct xfs_bmbt_irec got, rec;
xfs_filblks_t len;
xfs_extnum_t idx;
#ifndef DEBUG
/* Only allow CoW fork queries if we're debugging. */
if (iflags & BMV_IF_COWFORK)
return -EINVAL;
#endif
if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK))
return -EINVAL;
if (bmv->bmv_count <= 1)
return -EINVAL;
if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
return -ENOMEM;
if (bmv->bmv_length < -1)
return -EINVAL;
bmv->bmv_entries = 0;
if (bmv->bmv_length == 0)
return 0;
out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
if (!out)
return -ENOMEM;
if (iflags & BMV_IF_ATTRFORK)
whichfork = XFS_ATTR_FORK;
else if (iflags & BMV_IF_COWFORK)
whichfork = XFS_COW_FORK;
else
whichfork = XFS_DATA_FORK;
ifp = XFS_IFORK_PTR(ip, whichfork);
xfs_ilock(ip, XFS_IOLOCK_SHARED);
switch (whichfork) {
case XFS_ATTR_FORK:
if (XFS_IFORK_Q(ip)) {
if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS &&
ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE &&
ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
return -EINVAL;
} else if (unlikely(
ip->i_d.di_aformat != 0 &&
ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) {
XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW,
ip->i_mount);
return -EFSCORRUPTED;
}
if (!XFS_IFORK_Q(ip))
goto out_unlock_iolock;
prealloced = 0;
fixlen = 1LL << 32;
max_len = 1LL << 32;
lock = xfs_ilock_attr_map_shared(ip);
break;
case XFS_COW_FORK:
if (ip->i_cformat != XFS_DINODE_FMT_EXTENTS)
return -EINVAL;
/* No CoW fork? Just return */
if (!ifp)
goto out_unlock_iolock;
if (xfs_get_cowextsz_hint(ip)) {
prealloced = 1;
fixlen = mp->m_super->s_maxbytes;
} else {
prealloced = 0;
fixlen = XFS_ISIZE(ip);
}
break;
default:
/* Local format data forks report no extents. */
if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
bmv->bmv_entries = 0;
return 0;
}
if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
return -EINVAL;
if (xfs_get_cowextsz_hint(ip))
max_len = mp->m_super->s_maxbytes;
else
max_len = XFS_ISIZE(ip);
if (xfs_get_extsz_hint(ip) ||
ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
prealloced = 1;
fixlen = mp->m_super->s_maxbytes;
} else {
prealloced = 0;
fixlen = XFS_ISIZE(ip);
}
lock = XFS_ILOCK_SHARED;
xfs_ilock(ip, lock);
break;
}
if (bmv->bmv_length == -1) {
fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
bmv->bmv_length =
max_t(int64_t, fixlen - bmv->bmv_offset, 0);
} else if (bmv->bmv_length == 0) {
bmv->bmv_entries = 0;
return 0;
} else if (bmv->bmv_length < 0) {
return -EINVAL;
}
nex = bmv->bmv_count - 1;
if (nex <= 0)
return -EINVAL;
bmvend = bmv->bmv_offset + bmv->bmv_length;
if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
return -ENOMEM;
out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
if (!out)
return -ENOMEM;
xfs_ilock(ip, XFS_IOLOCK_SHARED);
switch (whichfork) {
case XFS_DATA_FORK:
if (!(iflags & BMV_IF_DELALLOC) &&
(ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) {
......@@ -670,147 +601,107 @@ xfs_getbmap(
*/
}
if (xfs_get_extsz_hint(ip) ||
(ip->i_d.di_flags &
(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))
max_len = mp->m_super->s_maxbytes;
else
max_len = XFS_ISIZE(ip);
lock = xfs_ilock_data_map_shared(ip);
break;
case XFS_COW_FORK:
lock = XFS_ILOCK_SHARED;
xfs_ilock(ip, lock);
break;
case XFS_ATTR_FORK:
lock = xfs_ilock_attr_map_shared(ip);
break;
}
/*
* Don't let nex be bigger than the number of extents
* we can have assuming alternating holes and real extents.
*/
if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1)
nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1;
bmapi_flags = xfs_bmapi_aflag(whichfork);
if (!(iflags & BMV_IF_PREALLOC))
bmapi_flags |= XFS_BMAPI_IGSTATE;
/*
* Allocate enough space to handle "subnex" maps at a time.
*/
error = -ENOMEM;
subnex = 16;
map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
if (!map)
switch (XFS_IFORK_FORMAT(ip, whichfork)) {
case XFS_DINODE_FMT_EXTENTS:
case XFS_DINODE_FMT_BTREE:
break;
case XFS_DINODE_FMT_LOCAL:
/* Local format inode forks report no extents. */
goto out_unlock_ilock;
default:
error = -EINVAL;
goto out_unlock_ilock;
}
bmv->bmv_entries = 0;
if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 &&
(whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) {
error = 0;
goto out_free_map;
if (bmv->bmv_length == -1) {
max_len = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, max_len));
bmv->bmv_length = max(0LL, max_len - bmv->bmv_offset);
}
do {
nmap = (nex> subnex) ? subnex : nex;
error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
XFS_BB_TO_FSB(mp, bmv->bmv_length),
map, &nmap, bmapi_flags);
if (error)
goto out_free_map;
ASSERT(nmap <= subnex);
for (i = 0; i < nmap && bmv->bmv_length &&
cur_ext < bmv->bmv_count - 1; i++) {
out[cur_ext].bmv_oflags = 0;
if (map[i].br_state == XFS_EXT_UNWRITTEN)
out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC;
else if (map[i].br_startblock == DELAYSTARTBLOCK)
out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC;
out[cur_ext].bmv_offset =
XFS_FSB_TO_BB(mp, map[i].br_startoff);
out[cur_ext].bmv_length =
XFS_FSB_TO_BB(mp, map[i].br_blockcount);
out[cur_ext].bmv_unused1 = 0;
out[cur_ext].bmv_unused2 = 0;
bmv_end = bmv->bmv_offset + bmv->bmv_length;
/*
* delayed allocation extents that start beyond EOF can
* occur due to speculative EOF allocation when the
* delalloc extent is larger than the largest freespace
* extent at conversion time. These extents cannot be
* converted by data writeback, so can exist here even
* if we are not supposed to be finding delalloc
* extents.
*/
if (map[i].br_startblock == DELAYSTARTBLOCK &&
map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
ASSERT((iflags & BMV_IF_DELALLOC) != 0);
if (map[i].br_startblock == HOLESTARTBLOCK &&
whichfork == XFS_ATTR_FORK) {
/* came to the end of attribute fork */
out[cur_ext].bmv_oflags |= BMV_OF_LAST;
goto out_free_map;
}
first_bno = bno = XFS_BB_TO_FSBT(mp, bmv->bmv_offset);
len = XFS_BB_TO_FSB(mp, bmv->bmv_length);
/* Is this a shared block? */
error = xfs_getbmap_adjust_shared(ip, whichfork,
&map[i], &out[cur_ext], &inject_map);
if (error)
goto out_free_map;
if (!(ifp->if_flags & XFS_IFEXTENTS)) {
error = xfs_iread_extents(NULL, ip, whichfork);
if (error)
goto out_unlock_ilock;
}
if (!xfs_getbmapx_fix_eof_hole(ip, whichfork,
&out[cur_ext], prealloced, bmvend,
map[i].br_startblock,
inject_map.br_startblock != NULLFSBLOCK))
goto out_free_map;
if (!xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got)) {
/*
* Report a whole-file hole if the delalloc flag is set to
* stay compatible with the old implementation.
*/
if (iflags & BMV_IF_DELALLOC)
xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno,
XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
goto out_unlock_ilock;
}
bmv->bmv_offset =
out[cur_ext].bmv_offset +
out[cur_ext].bmv_length;
bmv->bmv_length =
max_t(int64_t, 0, bmvend - bmv->bmv_offset);
while (!xfs_getbmap_full(bmv)) {
xfs_trim_extent(&got, first_bno, len);
/*
* In case we don't want to return the hole,
* don't increase cur_ext so that we can reuse
* it in the next loop.
*/
if ((iflags & BMV_IF_NO_HOLES) &&
map[i].br_startblock == HOLESTARTBLOCK) {
memset(&out[cur_ext], 0, sizeof(out[cur_ext]));
continue;
}
/*
* Report an entry for a hole if this extent doesn't directly
* follow the previous one.
*/
if (got.br_startoff > bno) {
xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno,
got.br_startoff);
if (xfs_getbmap_full(bmv))
break;
}
/*
* In order to report shared extents accurately,
* we report each distinct shared/unshared part
* of a single bmbt record using multiple bmap
* extents. To make that happen, we iterate the
* same map array item multiple times, each
* time trimming out the subextent that we just
* reported.
*
* Because of this, we must check the out array
* index (cur_ext) directly against bmv_count-1
* to avoid overflows.
*/
if (inject_map.br_startblock != NULLFSBLOCK) {
map[i] = inject_map;
i--;
/*
* In order to report shared extents accurately, we report each
* distinct shared / unshared part of a single bmbt record with
* an individual getbmapx record.
*/
bno = got.br_startoff + got.br_blockcount;
rec = got;
do {
error = xfs_getbmap_report_one(ip, bmv, out, bmv_end,
&rec);
if (error || xfs_getbmap_full(bmv))
goto out_unlock_ilock;
} while (xfs_getbmap_next_rec(&rec, bno));
if (!xfs_iext_get_extent(ifp, ++idx, &got)) {
xfs_fileoff_t end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
out[bmv->bmv_entries - 1].bmv_oflags |= BMV_OF_LAST;
if (whichfork != XFS_ATTR_FORK && bno < end &&
!xfs_getbmap_full(bmv)) {
xfs_getbmap_report_hole(ip, bmv, out, bmv_end,
bno, end);
}
bmv->bmv_entries++;
cur_ext++;
break;
}
} while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1);
out_free_map:
kmem_free(map);
out_unlock_ilock:
if (bno >= first_bno + len)
break;
}
out_unlock_ilock:
xfs_iunlock(ip, lock);
out_unlock_iolock:
out_unlock_iolock:
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
for (i = 0; i < cur_ext; i++) {
for (i = 0; i < bmv->bmv_entries; i++) {
/* format results & advance arg */
error = formatter(&arg, &out[i]);
if (error)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册