提交 ecd788a9 编写于 作者: D Dave Chinner

xfs: rework xfs_alloc_vextent()

It's a multiplexing mess that can be greatly simplified, and really
needs to be simplified to allow active per-ag references to
propagate from initial AG selection code the the bmapi code.

This splits the code out into separate a parameter checking
function, an iterator function, and allocation completion functions
and then implements the individual policies using these functions.
Signed-off-by: NDave Chinner <dchinner@redhat.com>
Reviewed-by: NDarrick J. Wong <djwong@kernel.org>
上级 76257a15
...@@ -3151,29 +3151,20 @@ xfs_alloc_read_agf( ...@@ -3151,29 +3151,20 @@ xfs_alloc_read_agf(
} }
/* /*
* Allocate an extent (variable-size). * Pre-proces allocation arguments to set initial state that we don't require
* Depending on the allocation type, we either look in a single allocation * callers to set up correctly, as well as bounds check the allocation args
* group or loop over the allocation groups to find the result. * that are set up.
*/ */
int /* error */ static int
xfs_alloc_vextent( xfs_alloc_vextent_check_args(
struct xfs_alloc_arg *args) /* allocation argument structure */ struct xfs_alloc_arg *args)
{ {
xfs_agblock_t agsize; /* allocation group size */ struct xfs_mount *mp = args->mp;
int error; xfs_agblock_t agsize;
int flags; /* XFS_ALLOC_FLAG_... locking flags */
struct xfs_mount *mp; /* mount structure pointer */
xfs_agnumber_t sagno; /* starting allocation group number */
xfs_alloctype_t type; /* input allocation type */
int bump_rotor = 0;
xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */
xfs_agnumber_t minimum_agno = 0;
mp = args->mp; args->otype = args->type;
type = args->otype = args->type;
args->agbno = NULLAGBLOCK; args->agbno = NULLAGBLOCK;
if (args->tp->t_highest_agno != NULLAGNUMBER)
minimum_agno = args->tp->t_highest_agno;
/* /*
* Just fix this up, for the case where the last a.g. is shorter * Just fix this up, for the case where the last a.g. is shorter
* (or there's only one a.g.) and the caller couldn't easily figure * (or there's only one a.g.) and the caller couldn't easily figure
...@@ -3195,199 +3186,314 @@ xfs_alloc_vextent( ...@@ -3195,199 +3186,314 @@ xfs_alloc_vextent(
args->mod >= args->prod) { args->mod >= args->prod) {
args->fsbno = NULLFSBLOCK; args->fsbno = NULLFSBLOCK;
trace_xfs_alloc_vextent_badargs(args); trace_xfs_alloc_vextent_badargs(args);
return -ENOSPC;
}
return 0;
}
/*
* Post-process allocation results to set the allocated block number correctly
* for the caller.
*
* XXX: xfs_alloc_vextent() should really be returning ENOSPC for ENOSPC, not
* hiding it behind a "successful" NULLFSBLOCK allocation.
*/
static void
xfs_alloc_vextent_set_fsbno(
struct xfs_alloc_arg *args,
xfs_agnumber_t minimum_agno)
{
struct xfs_mount *mp = args->mp;
/*
* We can end up here with a locked AGF. If we failed, the caller is
* likely going to try to allocate again with different parameters, and
* that can widen the AGs that are searched for free space. If we have
* to do BMBT block allocation, we have to do a new allocation.
*
* Hence leaving this function with the AGF locked opens up potential
* ABBA AGF deadlocks because a future allocation attempt in this
* transaction may attempt to lock a lower number AGF.
*
* We can't release the AGF until the transaction is commited, so at
* this point we must update the "first allocation" tracker to point at
* this AG if the tracker is empty or points to a lower AG. This allows
* the next allocation attempt to be modified appropriately to avoid
* deadlocks.
*/
if (args->agbp &&
(args->tp->t_highest_agno == NULLAGNUMBER ||
args->agno > minimum_agno))
args->tp->t_highest_agno = args->agno;
/* Allocation failed with ENOSPC if NULLAGBLOCK was returned. */
if (args->agbno == NULLAGBLOCK) {
args->fsbno = NULLFSBLOCK;
return;
}
args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
#ifdef DEBUG
ASSERT(args->len >= args->minlen);
ASSERT(args->len <= args->maxlen);
ASSERT(args->agbno % args->alignment == 0);
XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), args->len);
#endif
}
/*
* Allocate within a single AG only.
*/
static int
xfs_alloc_vextent_this_ag(
struct xfs_alloc_arg *args,
xfs_agnumber_t minimum_agno)
{
struct xfs_mount *mp = args->mp;
int error;
error = xfs_alloc_vextent_check_args(args);
if (error) {
if (error == -ENOSPC)
return 0;
return error;
}
args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
if (minimum_agno > args->agno) {
trace_xfs_alloc_vextent_skip_deadlock(args);
args->fsbno = NULLFSBLOCK;
return 0; return 0;
} }
switch (type) { args->pag = xfs_perag_get(mp, args->agno);
case XFS_ALLOCTYPE_THIS_AG: error = xfs_alloc_fix_freelist(args, 0);
case XFS_ALLOCTYPE_NEAR_BNO: if (error) {
case XFS_ALLOCTYPE_THIS_BNO: trace_xfs_alloc_vextent_nofix(args);
/* goto out_error;
* These three force us into a single a.g. }
*/ if (!args->agbp) {
args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); trace_xfs_alloc_vextent_noagbp(args);
args->pag = xfs_perag_get(mp, args->agno); args->fsbno = NULLFSBLOCK;
goto out_error;
}
args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
error = xfs_alloc_ag_vextent(args);
if (minimum_agno > args->agno) { xfs_alloc_vextent_set_fsbno(args, minimum_agno);
trace_xfs_alloc_vextent_skip_deadlock(args); out_error:
error = 0; xfs_perag_put(args->pag);
break; return error;
} }
/*
* Iterate all AGs trying to allocate an extent starting from @start_ag.
*
* If the incoming allocation type is XFS_ALLOCTYPE_NEAR_BNO, it means the
* allocation attempts in @start_agno have locality information. If we fail to
* allocate in that AG, then we revert to anywhere-in-AG for all the other AGs
* we attempt to allocation in as there is no locality optimisation possible for
* those allocations.
*
* When we wrap the AG iteration at the end of the filesystem, we have to be
* careful not to wrap into AGs below ones we already have locked in the
* transaction if we are doing a blocking iteration. This will result in an
* out-of-order locking of AGFs and hence can cause deadlocks.
*/
static int
xfs_alloc_vextent_iterate_ags(
struct xfs_alloc_arg *args,
xfs_agnumber_t minimum_agno,
xfs_agnumber_t start_agno,
uint32_t flags)
{
struct xfs_mount *mp = args->mp;
int error = 0;
error = xfs_alloc_fix_freelist(args, 0); ASSERT(start_agno >= minimum_agno);
/*
* Loop over allocation groups twice; first time with
* trylock set, second time without.
*/
args->agno = start_agno;
for (;;) {
args->pag = xfs_perag_get(mp, args->agno);
error = xfs_alloc_fix_freelist(args, flags);
if (error) { if (error) {
trace_xfs_alloc_vextent_nofix(args); trace_xfs_alloc_vextent_nofix(args);
goto error0;
}
if (!args->agbp) {
trace_xfs_alloc_vextent_noagbp(args);
break; break;
} }
args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
if ((error = xfs_alloc_ag_vextent(args)))
goto error0;
break;
case XFS_ALLOCTYPE_START_BNO:
/* /*
* Try near allocation first, then anywhere-in-ag after * If we get a buffer back then the allocation will fly.
* the first a.g. fails.
*/ */
if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) && if (args->agbp) {
xfs_is_inode32(mp)) { error = xfs_alloc_ag_vextent(args);
args->fsbno = XFS_AGB_TO_FSB(mp, break;
((mp->m_agfrotor / rotorstep) %
mp->m_sb.sb_agcount), 0);
bump_rotor = 1;
} }
args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
args->type = XFS_ALLOCTYPE_NEAR_BNO; trace_xfs_alloc_vextent_loopfailed(args);
fallthrough;
case XFS_ALLOCTYPE_FIRST_AG:
/* /*
* Rotate through the allocation groups looking for a winner. * Didn't work, figure out the next iteration.
* If we are blocking, we must obey minimum_agno contraints for
* avoiding ABBA deadlocks on AGF locking.
*/ */
if (type == XFS_ALLOCTYPE_FIRST_AG) { if (args->agno == start_agno &&
/* args->otype == XFS_ALLOCTYPE_START_BNO)
* Start with allocation group given by bno.
*/
args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
args->type = XFS_ALLOCTYPE_THIS_AG; args->type = XFS_ALLOCTYPE_THIS_AG;
sagno = minimum_agno;
flags = 0; /*
} else { * If we are try-locking, we can't deadlock on AGF locks so we
/* * can wrap all the way back to the first AG. Otherwise, wrap
* Start with the given allocation group. * back to the start AG so we can't deadlock and let the end of
*/ * scan handler decide what to do next.
args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno); */
flags = XFS_ALLOC_FLAG_TRYLOCK; if (++(args->agno) == mp->m_sb.sb_agcount) {
if (flags & XFS_ALLOC_FLAG_TRYLOCK)
args->agno = 0;
else
args->agno = minimum_agno;
} }
/* /*
* Loop over allocation groups twice; first time with * Reached the starting a.g., must either be done
* trylock set, second time without. * or switch to non-trylock mode.
*/ */
for (;;) { if (args->agno == start_agno) {
args->pag = xfs_perag_get(mp, args->agno); if (flags == 0) {
error = xfs_alloc_fix_freelist(args, flags); args->agbno = NULLAGBLOCK;
if (error) { trace_xfs_alloc_vextent_allfailed(args);
trace_xfs_alloc_vextent_nofix(args);
goto error0;
}
/*
* If we get a buffer back then the allocation will fly.
*/
if (args->agbp) {
if ((error = xfs_alloc_ag_vextent(args)))
goto error0;
break; break;
} }
trace_xfs_alloc_vextent_loopfailed(args); flags = 0;
if (args->otype == XFS_ALLOCTYPE_START_BNO) {
args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
args->type = XFS_ALLOCTYPE_NEAR_BNO;
}
}
xfs_perag_put(args->pag);
args->pag = NULL;
}
if (args->pag) {
xfs_perag_put(args->pag);
args->pag = NULL;
}
return error;
}
/* /*
* Didn't work, figure out the next iteration. * Iterate from the AGs from the start AG to the end of the filesystem, trying
*/ * to allocate blocks. It starts with a near allocation attempt in the initial
if (args->agno == sagno && * AG, then falls back to anywhere-in-ag after the first AG fails. It will wrap
type == XFS_ALLOCTYPE_START_BNO) * back to zero if allowed by previous allocations in this transaction,
args->type = XFS_ALLOCTYPE_THIS_AG; * otherwise will wrap back to the start AG and run a second blocking pass to
* the end of the filesystem.
*/
static int
xfs_alloc_vextent_start_ag(
struct xfs_alloc_arg *args,
xfs_agnumber_t minimum_agno)
{
struct xfs_mount *mp = args->mp;
xfs_agnumber_t start_agno;
xfs_agnumber_t rotorstep = xfs_rotorstep;
bool bump_rotor = false;
int error;
/* error = xfs_alloc_vextent_check_args(args);
* If we are try-locking, we can't deadlock on AGF if (error) {
* locks, so we can wrap all the way back to the first if (error == -ENOSPC)
* AG. Otherwise, wrap back to the start AG so we can't return 0;
* deadlock, and let the end of scan handler decide what return error;
* to do next. }
*/
if (++(args->agno) == mp->m_sb.sb_agcount) {
if (flags & XFS_ALLOC_FLAG_TRYLOCK)
args->agno = 0;
else
args->agno = sagno;
}
/* if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
* Reached the starting a.g., must either be done xfs_is_inode32(mp)) {
* or switch to non-trylock mode. args->fsbno = XFS_AGB_TO_FSB(mp,
*/ ((mp->m_agfrotor / rotorstep) %
if (args->agno == sagno) { mp->m_sb.sb_agcount), 0);
if (flags == 0) { bump_rotor = 1;
args->agbno = NULLAGBLOCK; }
trace_xfs_alloc_vextent_allfailed(args); start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, args->fsbno));
break; args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
} args->type = XFS_ALLOCTYPE_NEAR_BNO;
/* error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
* Blocking pass next, so we must obey minimum XFS_ALLOC_FLAG_TRYLOCK);
* agno constraints to avoid ABBA AGF deadlocks. if (bump_rotor) {
*/ if (args->agno == start_agno)
flags = 0; mp->m_agfrotor = (mp->m_agfrotor + 1) %
if (minimum_agno > sagno) (mp->m_sb.sb_agcount * rotorstep);
sagno = minimum_agno; else
mp->m_agfrotor = (args->agno * rotorstep + 1) %
if (type == XFS_ALLOCTYPE_START_BNO) { (mp->m_sb.sb_agcount * rotorstep);
args->agbno = XFS_FSB_TO_AGBNO(mp,
args->fsbno);
args->type = XFS_ALLOCTYPE_NEAR_BNO;
}
}
xfs_perag_put(args->pag);
}
if (bump_rotor) {
if (args->agno == sagno)
mp->m_agfrotor = (mp->m_agfrotor + 1) %
(mp->m_sb.sb_agcount * rotorstep);
else
mp->m_agfrotor = (args->agno * rotorstep + 1) %
(mp->m_sb.sb_agcount * rotorstep);
}
break;
default:
ASSERT(0);
/* NOTREACHED */
} }
if (args->agbno == NULLAGBLOCK) {
args->fsbno = NULLFSBLOCK;
} else {
args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
#ifdef DEBUG
ASSERT(args->len >= args->minlen);
ASSERT(args->len <= args->maxlen);
ASSERT(args->agbno % args->alignment == 0);
XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
args->len);
#endif
xfs_alloc_vextent_set_fsbno(args, minimum_agno);
return error;
}
/*
* Iterate from the agno indicated from args->fsbno through to the end of the
* filesystem attempting blocking allocation. This does not wrap or try a second
* pass, so will not recurse into AGs lower than indicated by fsbno.
*/
static int
xfs_alloc_vextent_first_ag(
struct xfs_alloc_arg *args,
xfs_agnumber_t minimum_agno)
{
struct xfs_mount *mp = args->mp;
xfs_agnumber_t start_agno;
int error;
error = xfs_alloc_vextent_check_args(args);
if (error) {
if (error == -ENOSPC)
return 0;
return error;
} }
/* start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, args->fsbno));
* We end up here with a locked AGF. If we failed, the caller is likely
* going to try to allocate again with different parameters, and that args->type = XFS_ALLOCTYPE_THIS_AG;
* can widen the AGs that are searched for free space. If we have to do error = xfs_alloc_vextent_iterate_ags(args, minimum_agno,
* BMBT block allocation, we have to do a new allocation. start_agno, 0);
* xfs_alloc_vextent_set_fsbno(args, minimum_agno);
* Hence leaving this function with the AGF locked opens up potential
* ABBA AGF deadlocks because a future allocation attempt in this
* transaction may attempt to lock a lower number AGF.
*
* We can't release the AGF until the transaction is commited, so at
* this point we must update the "firstblock" tracker to point at this
* AG if the tracker is empty or points to a lower AG. This allows the
* next allocation attempt to be modified appropriately to avoid
* deadlocks.
*/
if (args->agbp &&
(args->tp->t_highest_agno == NULLAGNUMBER ||
args->pag->pag_agno > minimum_agno))
args->tp->t_highest_agno = args->pag->pag_agno;
xfs_perag_put(args->pag);
return 0;
error0:
xfs_perag_put(args->pag);
return error; return error;
} }
/*
* Allocate an extent (variable-size).
* Depending on the allocation type, we either look in a single allocation
* group or loop over the allocation groups to find the result.
*/
int
xfs_alloc_vextent(
struct xfs_alloc_arg *args)
{
xfs_agnumber_t minimum_agno = 0;
if (args->tp->t_highest_agno != NULLAGNUMBER)
minimum_agno = args->tp->t_highest_agno;
switch (args->type) {
case XFS_ALLOCTYPE_THIS_AG:
case XFS_ALLOCTYPE_NEAR_BNO:
case XFS_ALLOCTYPE_THIS_BNO:
return xfs_alloc_vextent_this_ag(args, minimum_agno);
case XFS_ALLOCTYPE_START_BNO:
return xfs_alloc_vextent_start_ag(args, minimum_agno);
case XFS_ALLOCTYPE_FIRST_AG:
return xfs_alloc_vextent_first_ag(args, minimum_agno);
default:
ASSERT(0);
/* NOTREACHED */
}
/* Should never get here */
return -EFSCORRUPTED;
}
/* Ensure that the freelist is at full capacity. */ /* Ensure that the freelist is at full capacity. */
int int
xfs_free_extent_fix_freelist( xfs_free_extent_fix_freelist(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册