diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index b06ede1d0bed4562e60e4067e729b9209de65e1d..f5e2a19e0f8eb25114fa8de3478b289d4972b8a9 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -985,10 +985,22 @@ xfs_ioctl_setattr( /* * Extent size must be a multiple of the appropriate block - * size, if set at all. + * size, if set at all. It must also be smaller than the + * maximum extent size supported by the filesystem. + * + * Also, for non-realtime files, limit the extent size hint to + * half the size of the AGs in the filesystem so alignment + * doesn't result in extents larger than an AG. */ if (fa->fsx_extsize != 0) { - xfs_extlen_t size; + xfs_extlen_t size; + xfs_fsblock_t extsize_fsb; + + extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); + if (extsize_fsb > MAXEXTLEN) { + code = XFS_ERROR(EINVAL); + goto error_return; + } if (XFS_IS_REALTIME_INODE(ip) || ((mask & FSX_XFLAGS) && @@ -997,6 +1009,10 @@ xfs_ioctl_setattr( mp->m_sb.sb_blocklog; } else { size = mp->m_sb.sb_blocksize; + if (extsize_fsb > mp->m_sb.sb_agblocks / 2) { + code = XFS_ERROR(EINVAL); + goto error_return; + } } if (fa->fsx_extsize % size) { diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index f8e854b4fde87c49f17ff07dc87893b172bb6203..206a2815ced67399c9cfa248a45d185605e12aee 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -1863,12 +1863,14 @@ xfs_qm_dqreclaim_one(void) xfs_dquot_t *dqpout; xfs_dquot_t *dqp; int restarts; + int startagain; restarts = 0; dqpout = NULL; /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */ -startagain: +again: + startagain = 0; mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) { @@ -1885,13 +1887,10 @@ xfs_qm_dqreclaim_one(void) ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE)); trace_xfs_dqreclaim_want(dqp); - - xfs_dqunlock(dqp); - mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); - if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) - return NULL; XQM_STATS_INC(xqmstats.xs_qm_dqwants); - goto startagain; + restarts++; + startagain = 1; + goto dqunlock; } /* @@ -1906,23 +1905,20 @@ xfs_qm_dqreclaim_one(void) ASSERT(list_empty(&dqp->q_mplist)); list_del_init(&dqp->q_freelist); xfs_Gqm->qm_dqfrlist_cnt--; - xfs_dqunlock(dqp); dqpout = dqp; XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims); - break; + goto dqunlock; } ASSERT(dqp->q_hash); ASSERT(!list_empty(&dqp->q_mplist)); /* - * Try to grab the flush lock. If this dquot is in the process of - * getting flushed to disk, we don't want to reclaim it. + * Try to grab the flush lock. If this dquot is in the process + * of getting flushed to disk, we don't want to reclaim it. */ - if (!xfs_dqflock_nowait(dqp)) { - xfs_dqunlock(dqp); - continue; - } + if (!xfs_dqflock_nowait(dqp)) + goto dqunlock; /* * We have the flush lock so we know that this is not in the @@ -1944,8 +1940,7 @@ xfs_qm_dqreclaim_one(void) xfs_fs_cmn_err(CE_WARN, mp, "xfs_qm_dqreclaim: dquot %p flush failed", dqp); } - xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ - continue; + goto dqunlock; } /* @@ -1967,13 +1962,8 @@ xfs_qm_dqreclaim_one(void) */ if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) { restarts++; - mutex_unlock(&dqp->q_hash->qh_lock); - xfs_dqfunlock(dqp); - xfs_dqunlock(dqp); - mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); - if (restarts++ >= XFS_QM_RECLAIM_MAX_RESTARTS) - return NULL; - goto startagain; + startagain = 1; + goto qhunlock; } ASSERT(dqp->q_nrefs == 0); @@ -1986,14 +1976,20 @@ xfs_qm_dqreclaim_one(void) xfs_Gqm->qm_dqfrlist_cnt--; dqpout = dqp; mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); +qhunlock: mutex_unlock(&dqp->q_hash->qh_lock); dqfunlock: xfs_dqfunlock(dqp); +dqunlock: xfs_dqunlock(dqp); if (dqpout) break; if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) - return NULL; + break; + if (startagain) { + mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); + goto again; + } } mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); return dqpout; diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 0ab56b32c7eb70484ed4a7fdcfdbac3595c1ecab..d0b3bc72005bd147557625e4afabdca058c6e163 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h @@ -74,6 +74,22 @@ typedef unsigned int xfs_alloctype_t; */ #define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4)) +/* + * When deciding how much space to allocate out of an AG, we limit the + * allocation maximum size to the size the AG. However, we cannot use all the + * blocks in the AG - some are permanently used by metadata. These + * blocks are generally: + * - the AG superblock, AGF, AGI and AGFL + * - the AGF (bno and cnt) and AGI btree root blocks + * - 4 blocks on the AGFL according to XFS_ALLOC_SET_ASIDE() limits + * + * The AG headers are sector sized, so the amount of space they take up is + * dependent on filesystem geometry. The others are all single blocks. + */ +#define XFS_ALLOC_AG_MAX_USABLE(mp) \ + ((mp)->m_sb.sb_agblocks - XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)) - 7) + + /* * Argument structure for xfs_alloc routines. * This is turned into a structure to avoid having 20 arguments passed diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 4111cd3966c764b3c41300d1e7a51a7579ead435..dc3afd7739ff40754d1e04eda13178acc78ef3c2 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -1038,17 +1038,34 @@ xfs_bmap_add_extent_delay_real( * Filling in the middle part of a previous delayed allocation. * Contiguity is impossible here. * This case is avoided almost all the time. + * + * We start with a delayed allocation: + * + * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+ + * PREV @ idx + * + * and we are allocating: + * +rrrrrrrrrrrrrrrrr+ + * new + * + * and we set it up for insertion as: + * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+ + * new + * PREV @ idx LEFT RIGHT + * inserted at idx + 1 */ temp = new->br_startoff - PREV.br_startoff; - trace_xfs_bmap_pre_update(ip, idx, 0, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, temp); - r[0] = *new; - r[1].br_state = PREV.br_state; - r[1].br_startblock = 0; - r[1].br_startoff = new_endoff; temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; - r[1].br_blockcount = temp2; - xfs_iext_insert(ip, idx + 1, 2, &r[0], state); + trace_xfs_bmap_pre_update(ip, idx, 0, _THIS_IP_); + xfs_bmbt_set_blockcount(ep, temp); /* truncate PREV */ + LEFT = *new; + RIGHT.br_state = PREV.br_state; + RIGHT.br_startblock = nullstartblock( + (int)xfs_bmap_worst_indlen(ip, temp2)); + RIGHT.br_startoff = new_endoff; + RIGHT.br_blockcount = temp2; + /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */ + xfs_iext_insert(ip, idx + 1, 2, &LEFT, state); ip->i_df.if_lastex = idx + 1; ip->i_d.di_nextents++; if (cur == NULL) @@ -2430,7 +2447,7 @@ xfs_bmap_btalloc_nullfb( startag = ag = 0; pag = xfs_perag_get(mp, ag); - while (*blen < ap->alen) { + while (*blen < args->maxlen) { if (!pag->pagf_init) { error = xfs_alloc_pagf_init(mp, args->tp, ag, XFS_ALLOC_FLAG_TRYLOCK); @@ -2452,7 +2469,7 @@ xfs_bmap_btalloc_nullfb( notinit = 1; if (xfs_inode_is_filestream(ap->ip)) { - if (*blen >= ap->alen) + if (*blen >= args->maxlen) break; if (ap->userdata) { @@ -2498,14 +2515,14 @@ xfs_bmap_btalloc_nullfb( * If the best seen length is less than the request * length, use the best as the minimum. */ - else if (*blen < ap->alen) + else if (*blen < args->maxlen) args->minlen = *blen; /* - * Otherwise we've seen an extent as big as alen, + * Otherwise we've seen an extent as big as maxlen, * use that as the minimum. */ else - args->minlen = ap->alen; + args->minlen = args->maxlen; /* * set the failure fallback case to look in the selected @@ -2573,7 +2590,9 @@ xfs_bmap_btalloc( args.tp = ap->tp; args.mp = mp; args.fsbno = ap->rval; - args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks); + + /* Trim the allocation back to the maximum an AG can fit. */ + args.maxlen = MIN(ap->alen, XFS_ALLOC_AG_MAX_USABLE(mp)); args.firstblock = ap->firstblock; blen = 0; if (nullfb) { @@ -2621,7 +2640,7 @@ xfs_bmap_btalloc( /* * Adjust for alignment */ - if (blen > args.alignment && blen <= ap->alen) + if (blen > args.alignment && blen <= args.maxlen) args.minlen = blen - args.alignment; args.minalignslop = 0; } else { @@ -2640,7 +2659,7 @@ xfs_bmap_btalloc( * of minlen+alignment+slop doesn't go up * between the calls. */ - if (blen > mp->m_dalign && blen <= ap->alen) + if (blen > mp->m_dalign && blen <= args.maxlen) nextminlen = blen - mp->m_dalign; else nextminlen = args.minlen; @@ -4485,6 +4504,16 @@ xfs_bmapi( /* Figure out the extent size, adjust alen */ extsz = xfs_get_extsz_hint(ip); if (extsz) { + /* + * make sure we don't exceed a single + * extent length when we align the + * extent by reducing length we are + * going to allocate by the maximum + * amount extent size aligment may + * require. + */ + alen = XFS_FILBLKS_MIN(len, + MAXEXTLEN - (2 * extsz - 1)); error = xfs_bmap_extsize_align(mp, &got, &prev, extsz, rt, eof, diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 98c6f73b675218bded1a6b326ecdf349cb241a07..6f8c21ce0d6d95fd8c45a5d3eea4ef0240fad1fd 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -427,13 +427,15 @@ xfs_buf_item_unpin( if (remove) { /* - * We have to remove the log item from the transaction - * as we are about to release our reference to the - * buffer. If we don't, the unlock that occurs later - * in xfs_trans_uncommit() will ry to reference the + * If we are in a transaction context, we have to + * remove the log item from the transaction as we are + * about to release our reference to the buffer. If we + * don't, the unlock that occurs later in + * xfs_trans_uncommit() will try to reference the * buffer which we no longer have a hold on. */ - xfs_trans_del_item(lip); + if (lip->li_desc) + xfs_trans_del_item(lip); /* * Since the transaction no longer refers to the buffer, diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 75f2ef60e579e62f483213fa6a038cb235698054..d22e62623437aafb79d62f5492252e140f3a6cb7 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -138,7 +138,8 @@ xfs_efi_item_unpin( if (remove) { ASSERT(!(lip->li_flags & XFS_LI_IN_AIL)); - xfs_trans_del_item(lip); + if (lip->li_desc) + xfs_trans_del_item(lip); xfs_efi_item_free(efip); return; } diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 55582bd66659140ed9592a85ec0189187bcfd4ac..8a0f044750c3207eff174c4137bcd77580465894 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -337,7 +337,12 @@ xfs_iomap_prealloc_size( int shift = 0; int64_t freesp; - alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size); + /* + * rounddown_pow_of_two() returns an undefined result + * if we pass in alloc_blocks = 0. Hence the "+ 1" to + * ensure we always pass in a non-zero value. + */ + alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size) + 1; alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN, rounddown_pow_of_two(alloc_blocks)); diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 916eb7db14d9a09ff541882bf12905d9fd5068ea..3bd3291ef8d21fc09697b3c4aa5f7b76df4d1c87 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -191,7 +191,7 @@ void xfs_log_ticket_put(struct xlog_ticket *ticket); xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp); -int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, +void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_log_vec *log_vector, xfs_lsn_t *commit_lsn, int flags); bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 9dc8125d04e5e09188c88695c42b13a4ae9b6a9b..9ca59be089779d6f3c4e570c926545d6441b85da 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -543,7 +543,7 @@ xlog_cil_push( error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0); if (error) - goto out_abort; + goto out_abort_free_ticket; /* * now that we've written the checkpoint into the log, strictly @@ -569,8 +569,9 @@ xlog_cil_push( } spin_unlock(&cil->xc_cil_lock); + /* xfs_log_done always frees the ticket on error. */ commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0); - if (error || commit_lsn == -1) + if (commit_lsn == -1) goto out_abort; /* attach all the transactions w/ busy extents to iclog */ @@ -600,6 +601,8 @@ xlog_cil_push( kmem_free(new_ctx); return 0; +out_abort_free_ticket: + xfs_log_ticket_put(tic); out_abort: xlog_cil_committed(ctx, XFS_LI_ABORTED); return XFS_ERROR(EIO); @@ -622,7 +625,7 @@ xlog_cil_push( * background commit, returns without it held once background commits are * allowed again. */ -int +void xfs_log_commit_cil( struct xfs_mount *mp, struct xfs_trans *tp, @@ -637,11 +640,6 @@ xfs_log_commit_cil( if (flags & XFS_TRANS_RELEASE_LOG_RES) log_flags = XFS_LOG_REL_PERM_RESERV; - if (XLOG_FORCED_SHUTDOWN(log)) { - xlog_cil_free_logvec(log_vector); - return XFS_ERROR(EIO); - } - /* * do all the hard work of formatting items (including memory * allocation) outside the CIL context lock. This prevents stalling CIL @@ -701,7 +699,6 @@ xfs_log_commit_cil( */ if (push) xlog_cil_push(log, 0); - return 0; } /* diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 33dbc4e0ad622a94dbbf8aa67b49e0eacef0cbbc..76922793f64fa0d58606a4411f008ba8b9e88259 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1446,6 +1446,14 @@ xfs_log_item_batch_insert( * Bulk operation version of xfs_trans_committed that takes a log vector of * items to insert into the AIL. This uses bulk AIL insertion techniques to * minimise lock traffic. + * + * If we are called with the aborted flag set, it is because a log write during + * a CIL checkpoint commit has failed. In this case, all the items in the + * checkpoint have already gone through IOP_COMMITED and IOP_UNLOCK, which + * means that checkpoint commit abort handling is treated exactly the same + * as an iclog write error even though we haven't started any IO yet. Hence in + * this case all we need to do is IOP_COMMITTED processing, followed by an + * IOP_UNPIN(aborted) call. */ void xfs_trans_committed_bulk( @@ -1472,6 +1480,16 @@ xfs_trans_committed_bulk( if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) continue; + /* + * if we are aborting the operation, no point in inserting the + * object into the AIL as we are in a shutdown situation. + */ + if (aborted) { + ASSERT(XFS_FORCED_SHUTDOWN(ailp->xa_mount)); + IOP_UNPIN(lip, 1); + continue; + } + if (item_lsn != commit_lsn) { /* @@ -1503,20 +1521,24 @@ xfs_trans_committed_bulk( } /* - * Called from the trans_commit code when we notice that - * the filesystem is in the middle of a forced shutdown. + * Called from the trans_commit code when we notice that the filesystem is in + * the middle of a forced shutdown. + * + * When we are called here, we have already pinned all the items in the + * transaction. However, neither IOP_COMMITTING or IOP_UNLOCK has been called + * so we can simply walk the items in the transaction, unpin them with an abort + * flag and then free the items. Note that unpinning the items can result in + * them being freed immediately, so we need to use a safe list traversal method + * here. */ STATIC void xfs_trans_uncommit( struct xfs_trans *tp, uint flags) { - struct xfs_log_item_desc *lidp; + struct xfs_log_item_desc *lidp, *n; - list_for_each_entry(lidp, &tp->t_items, lid_trans) { - /* - * Unpin all but those that aren't dirty. - */ + list_for_each_entry_safe(lidp, n, &tp->t_items, lid_trans) { if (lidp->lid_flags & XFS_LID_DIRTY) IOP_UNPIN(lidp->lid_item, 1); } @@ -1733,7 +1755,6 @@ xfs_trans_commit_cil( int flags) { struct xfs_log_vec *log_vector; - int error; /* * Get each log item to allocate a vector structure for @@ -1744,9 +1765,7 @@ xfs_trans_commit_cil( if (!log_vector) return ENOMEM; - error = xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags); - if (error) - return error; + xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags); current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); xfs_trans_free(tp);