提交 29ad0de2 编写于 作者: L Linus Torvalds

Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs

* 'for-linus' of git://oss.sgi.com/xfs/xfs: (22 commits)
  xfs: mark the xfssyncd workqueue as non-reentrant
  xfs: simplify xfs_qm_detach_gdquots
  xfs: fix acl count validation in xfs_acl_from_disk()
  xfs: remove unused XBT_FORCE_SLEEP bit
  xfs: remove XFS_QMOPT_DQSUSER
  xfs: kill xfs_qm_idtodq
  xfs: merge xfs_qm_dqinit_core into the only caller
  xfs: add a xfs_dqhold helper
  xfs: simplify xfs_qm_dqattach_grouphint
  xfs: nest qm_dqfrlist_lock inside the dquot qlock
  xfs: flatten the dquot lock ordering
  xfs: implement lazy removal for the dquot freelist
  xfs: remove XFS_DQ_INACTIVE
  xfs: cleanup xfs_qm_dqlookup
  xfs: cleanup dquot locking helpers
  xfs: remove the sync_mode argument to xfs_qm_dqflush_all
  xfs: remove xfs_qm_sync
  xfs: make sure to really flush all dquots in xfs_qm_quotacheck
  xfs: untangle SYNC_WAIT and SYNC_TRYLOCK meanings for xfs_qm_dqflush
  xfs: remove the lid_size field in struct log_item_desc
  ...

Fix up trivial conflict in fs/xfs/xfs_sync.c
......@@ -39,7 +39,7 @@ xfs_acl_from_disk(struct xfs_acl *aclp)
struct posix_acl_entry *acl_e;
struct posix_acl *acl;
struct xfs_acl_entry *ace;
int count, i;
unsigned int count, i;
count = be32_to_cpu(aclp->acl_cnt);
if (count > XFS_ACL_MAX_ENTRIES)
......
......@@ -1701,12 +1701,8 @@ xfsbufd(
struct list_head tmp;
struct blk_plug plug;
if (unlikely(freezing(current))) {
set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
if (unlikely(freezing(current)))
refrigerator();
} else {
clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
}
/* sleep for a long time if there is nothing to do. */
if (list_empty(&target->bt_delwri_queue))
......
......@@ -90,8 +90,7 @@ typedef unsigned int xfs_buf_flags_t;
{ _XBF_DELWRI_Q, "DELWRI_Q" }
typedef enum {
XBT_FORCE_SLEEP = 0,
XBT_FORCE_FLUSH = 1,
XBT_FORCE_FLUSH = 0,
} xfs_buftarg_flags_t;
typedef struct xfs_buftarg {
......
......@@ -39,20 +39,19 @@
#include "xfs_qm.h"
#include "xfs_trace.h"
/*
LOCK ORDER
inode lock (ilock)
dquot hash-chain lock (hashlock)
xqm dquot freelist lock (freelistlock
mount's dquot list lock (mplistlock)
user dquot lock - lock ordering among dquots is based on the uid or gid
group dquot lock - similar to udquots. Between the two dquots, the udquot
has to be locked first.
pin lock - the dquot lock must be held to take this lock.
flush lock - ditto.
*/
* Lock order:
*
* ip->i_lock
* qh->qh_lock
* qi->qi_dqlist_lock
* dquot->q_qlock (xfs_dqlock() and friends)
* dquot->q_flush (xfs_dqflock() and friends)
* xfs_Gqm->qm_dqfrlist_lock
*
* If two dquots need to be locked the order is user before group/project,
* otherwise by the lowest id first, see xfs_dqlock2.
*/
#ifdef DEBUG
xfs_buftarg_t *xfs_dqerror_target;
......@@ -154,24 +153,6 @@ xfs_qm_dqdestroy(
atomic_dec(&xfs_Gqm->qm_totaldquots);
}
/*
* This is what a 'fresh' dquot inside a dquot chunk looks like on disk.
*/
STATIC void
xfs_qm_dqinit_core(
xfs_dqid_t id,
uint type,
xfs_dqblk_t *d)
{
/*
* Caller has zero'd the entire dquot 'chunk' already.
*/
d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
d->dd_diskdq.d_id = cpu_to_be32(id);
d->dd_diskdq.d_flags = type;
}
/*
* If default limits are in force, push them into the dquot now.
* We overwrite the dquot limits only if they are zero and this
......@@ -328,8 +309,13 @@ xfs_qm_init_dquot_blk(
curid = id - (id % q->qi_dqperchunk);
ASSERT(curid >= 0);
memset(d, 0, BBTOB(q->qi_dqchunklen));
for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++)
xfs_qm_dqinit_core(curid, type, d);
for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) {
d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
d->dd_diskdq.d_id = cpu_to_be32(curid);
d->dd_diskdq.d_flags = type;
}
xfs_trans_dquot_buf(tp, bp,
(type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF :
((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF :
......@@ -564,36 +550,62 @@ xfs_qm_dqtobp(
* Read in the ondisk dquot using dqtobp() then copy it to an incore version,
* and release the buffer immediately.
*
* If XFS_QMOPT_DQALLOC is set, allocate a dquot on disk if it needed.
*/
/* ARGSUSED */
STATIC int
int
xfs_qm_dqread(
xfs_trans_t **tpp,
xfs_dqid_t id,
xfs_dquot_t *dqp, /* dquot to get filled in */
uint flags)
struct xfs_mount *mp,
xfs_dqid_t id,
uint type,
uint flags,
struct xfs_dquot **O_dqpp)
{
xfs_disk_dquot_t *ddqp;
xfs_buf_t *bp;
int error;
xfs_trans_t *tp;
struct xfs_dquot *dqp;
struct xfs_disk_dquot *ddqp;
struct xfs_buf *bp;
struct xfs_trans *tp = NULL;
int error;
int cancelflags = 0;
ASSERT(tpp);
dqp = xfs_qm_dqinit(mp, id, type);
trace_xfs_dqread(dqp);
if (flags & XFS_QMOPT_DQALLOC) {
tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
XFS_WRITE_LOG_RES(mp) +
/*
* Round the chunklen up to the next multiple
* of 128 (buf log item chunk size)).
*/
BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 + 128,
0,
XFS_TRANS_PERM_LOG_RES,
XFS_WRITE_LOG_COUNT);
if (error)
goto error1;
cancelflags = XFS_TRANS_RELEASE_LOG_RES;
}
/*
* get a pointer to the on-disk dquot and the buffer containing it
* dqp already knows its own type (GROUP/USER).
*/
if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) {
return (error);
error = xfs_qm_dqtobp(&tp, dqp, &ddqp, &bp, flags);
if (error) {
/*
* This can happen if quotas got turned off (ESRCH),
* or if the dquot didn't exist on disk and we ask to
* allocate (ENOENT).
*/
trace_xfs_dqread_fail(dqp);
cancelflags |= XFS_TRANS_ABORT;
goto error1;
}
tp = *tpp;
/* copy everything from disk dquot to the incore dquot */
memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
xfs_qm_dquot_logitem_init(dqp);
/*
......@@ -622,77 +634,22 @@ xfs_qm_dqread(
ASSERT(xfs_buf_islocked(bp));
xfs_trans_brelse(tp, bp);
return (error);
}
/*
* allocate an incore dquot from the kernel heap,
* and fill its core with quota information kept on disk.
* If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk
* if it wasn't already allocated.
*/
STATIC int
xfs_qm_idtodq(
xfs_mount_t *mp,
xfs_dqid_t id, /* gid or uid, depending on type */
uint type, /* UDQUOT or GDQUOT */
uint flags, /* DQALLOC, DQREPAIR */
xfs_dquot_t **O_dqpp)/* OUT : incore dquot, not locked */
{
xfs_dquot_t *dqp;
int error;
xfs_trans_t *tp;
int cancelflags=0;
dqp = xfs_qm_dqinit(mp, id, type);
tp = NULL;
if (flags & XFS_QMOPT_DQALLOC) {
tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
XFS_WRITE_LOG_RES(mp) +
BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 +
128,
0,
XFS_TRANS_PERM_LOG_RES,
XFS_WRITE_LOG_COUNT);
if (error) {
cancelflags = 0;
goto error0;
}
cancelflags = XFS_TRANS_RELEASE_LOG_RES;
}
/*
* Read it from disk; xfs_dqread() takes care of
* all the necessary initialization of dquot's fields (locks, etc)
*/
if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) {
/*
* This can happen if quotas got turned off (ESRCH),
* or if the dquot didn't exist on disk and we ask to
* allocate (ENOENT).
*/
trace_xfs_dqread_fail(dqp);
cancelflags |= XFS_TRANS_ABORT;
goto error0;
}
if (tp) {
if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES)))
goto error1;
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
if (error)
goto error0;
}
*O_dqpp = dqp;
return (0);
return error;
error0:
ASSERT(error);
error1:
if (tp)
xfs_trans_cancel(tp, cancelflags);
error1:
error0:
xfs_qm_dqdestroy(dqp);
*O_dqpp = NULL;
return (error);
return error;
}
/*
......@@ -710,12 +667,9 @@ xfs_qm_dqlookup(
xfs_dquot_t **O_dqpp)
{
xfs_dquot_t *dqp;
uint flist_locked;
ASSERT(mutex_is_locked(&qh->qh_lock));
flist_locked = B_FALSE;
/*
* Traverse the hashchain looking for a match
*/
......@@ -725,70 +679,31 @@ xfs_qm_dqlookup(
* dqlock to look at the id field of the dquot, since the
* id can't be modified without the hashlock anyway.
*/
if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) {
trace_xfs_dqlookup_found(dqp);
if (be32_to_cpu(dqp->q_core.d_id) != id || dqp->q_mount != mp)
continue;
/*
* All in core dquots must be on the dqlist of mp
*/
ASSERT(!list_empty(&dqp->q_mplist));
xfs_dqlock(dqp);
if (dqp->q_nrefs == 0) {
ASSERT(!list_empty(&dqp->q_freelist));
if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
trace_xfs_dqlookup_want(dqp);
/*
* We may have raced with dqreclaim_one()
* (and lost). So, flag that we don't
* want the dquot to be reclaimed.
*/
dqp->dq_flags |= XFS_DQ_WANT;
xfs_dqunlock(dqp);
mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
xfs_dqlock(dqp);
dqp->dq_flags &= ~(XFS_DQ_WANT);
}
flist_locked = B_TRUE;
}
trace_xfs_dqlookup_found(dqp);
/*
* id couldn't have changed; we had the hashlock all
* along
*/
ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
if (flist_locked) {
if (dqp->q_nrefs != 0) {
mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
flist_locked = B_FALSE;
} else {
/* take it off the freelist */
trace_xfs_dqlookup_freelist(dqp);
list_del_init(&dqp->q_freelist);
xfs_Gqm->qm_dqfrlist_cnt--;
}
}
xfs_dqlock(dqp);
if (dqp->dq_flags & XFS_DQ_FREEING) {
*O_dqpp = NULL;
xfs_dqunlock(dqp);
return -1;
}
XFS_DQHOLD(dqp);
dqp->q_nrefs++;
if (flist_locked)
mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
/*
* move the dquot to the front of the hashchain
*/
ASSERT(mutex_is_locked(&qh->qh_lock));
list_move(&dqp->q_hashlist, &qh->qh_list);
trace_xfs_dqlookup_done(dqp);
*O_dqpp = dqp;
return 0;
}
/*
* move the dquot to the front of the hashchain
*/
list_move(&dqp->q_hashlist, &qh->qh_list);
trace_xfs_dqlookup_done(dqp);
*O_dqpp = dqp;
return 0;
}
*O_dqpp = NULL;
ASSERT(mutex_is_locked(&qh->qh_lock));
return (1);
return 1;
}
/*
......@@ -829,11 +744,7 @@ xfs_qm_dqget(
return (EIO);
}
}
#endif
again:
#ifdef DEBUG
ASSERT(type == XFS_DQ_USER ||
type == XFS_DQ_PROJ ||
type == XFS_DQ_GROUP);
......@@ -845,13 +756,21 @@ xfs_qm_dqget(
ASSERT(ip->i_gdquot == NULL);
}
#endif
restart:
mutex_lock(&h->qh_lock);
/*
* Look in the cache (hashtable).
* The chain is kept locked during lookup.
*/
if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) {
switch (xfs_qm_dqlookup(mp, id, h, O_dqpp)) {
case -1:
XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
mutex_unlock(&h->qh_lock);
delay(1);
goto restart;
case 0:
XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
/*
* The dquot was found, moved to the front of the chain,
......@@ -862,9 +781,11 @@ xfs_qm_dqget(
ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
mutex_unlock(&h->qh_lock);
trace_xfs_dqget_hit(*O_dqpp);
return (0); /* success */
return 0; /* success */
default:
XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
break;
}
XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
/*
* Dquot cache miss. We don't want to keep the inode lock across
......@@ -882,41 +803,18 @@ xfs_qm_dqget(
version = h->qh_version;
mutex_unlock(&h->qh_lock);
/*
* Allocate the dquot on the kernel heap, and read the ondisk
* portion off the disk. Also, do all the necessary initialization
* This can return ENOENT if dquot didn't exist on disk and we didn't
* ask it to allocate; ESRCH if quotas got turned off suddenly.
*/
if ((error = xfs_qm_idtodq(mp, id, type,
flags & (XFS_QMOPT_DQALLOC|XFS_QMOPT_DQREPAIR|
XFS_QMOPT_DOWARN),
&dqp))) {
if (ip)
xfs_ilock(ip, XFS_ILOCK_EXCL);
return (error);
}
error = xfs_qm_dqread(mp, id, type, flags, &dqp);
/*
* See if this is mount code calling to look at the overall quota limits
* which are stored in the id == 0 user or group's dquot.
* Since we may not have done a quotacheck by this point, just return
* the dquot without attaching it to any hashtables, lists, etc, or even
* taking a reference.
* The caller must dqdestroy this once done.
*/
if (flags & XFS_QMOPT_DQSUSER) {
ASSERT(id == 0);
ASSERT(! ip);
goto dqret;
}
if (ip)
xfs_ilock(ip, XFS_ILOCK_EXCL);
if (error)
return error;
/*
* Dquot lock comes after hashlock in the lock ordering
*/
if (ip) {
xfs_ilock(ip, XFS_ILOCK_EXCL);
/*
* A dquot could be attached to this inode by now, since
* we had dropped the ilock.
......@@ -961,16 +859,21 @@ xfs_qm_dqget(
* lock order between the two dquots here since dqp isn't
* on any findable lists yet.
*/
if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) {
switch (xfs_qm_dqlookup(mp, id, h, &tmpdqp)) {
case 0:
case -1:
/*
* Duplicate found. Just throw away the new dquot
* and start over.
* Duplicate found, either in cache or on its way out.
* Just throw away the new dquot and start over.
*/
xfs_qm_dqput(tmpdqp);
if (tmpdqp)
xfs_qm_dqput(tmpdqp);
mutex_unlock(&h->qh_lock);
xfs_qm_dqdestroy(dqp);
XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
goto again;
goto restart;
default:
break;
}
}
......@@ -1015,67 +918,49 @@ xfs_qm_dqget(
*/
void
xfs_qm_dqput(
xfs_dquot_t *dqp)
struct xfs_dquot *dqp)
{
xfs_dquot_t *gdqp;
struct xfs_dquot *gdqp;
ASSERT(dqp->q_nrefs > 0);
ASSERT(XFS_DQ_IS_LOCKED(dqp));
trace_xfs_dqput(dqp);
if (dqp->q_nrefs != 1) {
dqp->q_nrefs--;
recurse:
if (--dqp->q_nrefs > 0) {
xfs_dqunlock(dqp);
return;
}
trace_xfs_dqput_free(dqp);
mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
if (list_empty(&dqp->q_freelist)) {
list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
xfs_Gqm->qm_dqfrlist_cnt++;
}
mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
/*
* drop the dqlock and acquire the freelist and dqlock
* in the right order; but try to get it out-of-order first
* If we just added a udquot to the freelist, then we want to release
* the gdquot reference that it (probably) has. Otherwise it'll keep
* the gdquot from getting reclaimed.
*/
if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
trace_xfs_dqput_wait(dqp);
xfs_dqunlock(dqp);
mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
xfs_dqlock(dqp);
gdqp = dqp->q_gdquot;
if (gdqp) {
xfs_dqlock(gdqp);
dqp->q_gdquot = NULL;
}
xfs_dqunlock(dqp);
while (1) {
gdqp = NULL;
/* We can't depend on nrefs being == 1 here */
if (--dqp->q_nrefs == 0) {
trace_xfs_dqput_free(dqp);
list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
xfs_Gqm->qm_dqfrlist_cnt++;
/*
* If we just added a udquot to the freelist, then
* we want to release the gdquot reference that
* it (probably) has. Otherwise it'll keep the
* gdquot from getting reclaimed.
*/
if ((gdqp = dqp->q_gdquot)) {
/*
* Avoid a recursive dqput call
*/
xfs_dqlock(gdqp);
dqp->q_gdquot = NULL;
}
}
xfs_dqunlock(dqp);
/*
* If we had a group quota inside the user quota as a hint,
* release it now.
*/
if (! gdqp)
break;
/*
* If we had a group quota hint, release it now.
*/
if (gdqp) {
dqp = gdqp;
goto recurse;
}
mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
}
/*
......@@ -1169,7 +1054,7 @@ xfs_qm_dqflush(
* If not dirty, or it's pinned and we are not supposed to block, nada.
*/
if (!XFS_DQ_IS_DIRTY(dqp) ||
(!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) {
((flags & SYNC_TRYLOCK) && atomic_read(&dqp->q_pincount) > 0)) {
xfs_dqfunlock(dqp);
return 0;
}
......@@ -1257,40 +1142,17 @@ xfs_qm_dqflush(
}
int
xfs_qm_dqlock_nowait(
xfs_dquot_t *dqp)
{
return mutex_trylock(&dqp->q_qlock);
}
void
xfs_dqlock(
xfs_dquot_t *dqp)
{
mutex_lock(&dqp->q_qlock);
}
void
xfs_dqunlock(
xfs_dquot_t *dqp)
{
mutex_unlock(&(dqp->q_qlock));
xfs_dqunlock_nonotify(dqp);
if (dqp->q_logitem.qli_dquot == dqp) {
/* Once was dqp->q_mount, but might just have been cleared */
xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp,
(xfs_log_item_t*)&(dqp->q_logitem));
&dqp->q_logitem.qli_item);
}
}
void
xfs_dqunlock_nonotify(
xfs_dquot_t *dqp)
{
mutex_unlock(&(dqp->q_qlock));
}
/*
* Lock two xfs_dquot structures.
*
......@@ -1319,43 +1181,18 @@ xfs_dqlock2(
}
}
/*
* Take a dquot out of the mount's dqlist as well as the hashlist.
* This is called via unmount as well as quotaoff, and the purge
* will always succeed unless there are soft (temp) references
* outstanding.
*
* This returns 0 if it was purged, 1 if it wasn't. It's not an error code
* that we're returning! XXXsup - not cool.
* Take a dquot out of the mount's dqlist as well as the hashlist. This is
* called via unmount as well as quotaoff, and the purge will always succeed.
*/
/* ARGSUSED */
int
void
xfs_qm_dqpurge(
xfs_dquot_t *dqp)
struct xfs_dquot *dqp)
{
xfs_dqhash_t *qh = dqp->q_hash;
xfs_mount_t *mp = dqp->q_mount;
ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock));
ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock));
struct xfs_mount *mp = dqp->q_mount;
struct xfs_dqhash *qh = dqp->q_hash;
xfs_dqlock(dqp);
/*
* We really can't afford to purge a dquot that is
* referenced, because these are hard refs.
* It shouldn't happen in general because we went thru _all_ inodes in
* dqrele_all_inodes before calling this and didn't let the mountlock go.
* However it is possible that we have dquots with temporary
* references that are not attached to an inode. e.g. see xfs_setattr().
*/
if (dqp->q_nrefs != 0) {
xfs_dqunlock(dqp);
mutex_unlock(&dqp->q_hash->qh_lock);
return (1);
}
ASSERT(!list_empty(&dqp->q_freelist));
/*
* If we're turning off quotas, we have to make sure that, for
......@@ -1370,23 +1207,18 @@ xfs_qm_dqpurge(
* Block on the flush lock after nudging dquot buffer,
* if it is incore.
*/
xfs_qm_dqflock_pushbuf_wait(dqp);
xfs_dqflock_pushbuf_wait(dqp);
}
/*
* XXXIf we're turning this type of quotas off, we don't care
* If we are turning this type of quotas off, we don't care
* about the dirty metadata sitting in this dquot. OTOH, if
* we're unmounting, we do care, so we flush it and wait.
*/
if (XFS_DQ_IS_DIRTY(dqp)) {
int error;
/* dqflush unlocks dqflock */
/*
* Given that dqpurge is a very rare occurrence, it is OK
* that we're holding the hashlist and mplist locks
* across the disk write. But, ... XXXsup
*
* We don't care about getting disk errors here. We need
* to purge this dquot anyway, so we go ahead regardless.
*/
......@@ -1396,38 +1228,44 @@ xfs_qm_dqpurge(
__func__, dqp);
xfs_dqflock(dqp);
}
ASSERT(atomic_read(&dqp->q_pincount) == 0);
ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
!(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
xfs_dqfunlock(dqp);
xfs_dqunlock(dqp);
mutex_lock(&qh->qh_lock);
list_del_init(&dqp->q_hashlist);
qh->qh_version++;
mutex_unlock(&qh->qh_lock);
mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
list_del_init(&dqp->q_mplist);
mp->m_quotainfo->qi_dqreclaims++;
mp->m_quotainfo->qi_dquots--;
mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
/*
* XXX Move this to the front of the freelist, if we can get the
* freelist lock.
* We move dquots to the freelist as soon as their reference count
* hits zero, so it really should be on the freelist here.
*/
mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
ASSERT(!list_empty(&dqp->q_freelist));
list_del_init(&dqp->q_freelist);
xfs_Gqm->qm_dqfrlist_cnt--;
mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
dqp->q_mount = NULL;
dqp->q_hash = NULL;
dqp->dq_flags = XFS_DQ_INACTIVE;
memset(&dqp->q_core, 0, sizeof(dqp->q_core));
xfs_dqfunlock(dqp);
xfs_dqunlock(dqp);
mutex_unlock(&qh->qh_lock);
return (0);
xfs_qm_dqdestroy(dqp);
}
/*
* Give the buffer a little push if it is incore and
* wait on the flush lock.
*/
void
xfs_qm_dqflock_pushbuf_wait(
xfs_dqflock_pushbuf_wait(
xfs_dquot_t *dqp)
{
xfs_mount_t *mp = dqp->q_mount;
......
......@@ -80,8 +80,6 @@ enum {
XFS_QLOCK_NESTED,
};
#define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++)
/*
* Manage the q_flush completion queue embedded in the dquot. This completion
* queue synchronizes processes attempting to flush the in-core dquot back to
......@@ -102,6 +100,21 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
complete(&dqp->q_flush);
}
static inline int xfs_dqlock_nowait(struct xfs_dquot *dqp)
{
return mutex_trylock(&dqp->q_qlock);
}
static inline void xfs_dqlock(struct xfs_dquot *dqp)
{
mutex_lock(&dqp->q_qlock);
}
static inline void xfs_dqunlock_nonotify(struct xfs_dquot *dqp)
{
mutex_unlock(&dqp->q_qlock);
}
#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock)))
#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY)
#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER)
......@@ -116,12 +129,12 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
(XFS_IS_UQUOTA_ON((d)->q_mount)) : \
(XFS_IS_OQUOTA_ON((d)->q_mount))))
extern int xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint,
uint, struct xfs_dquot **);
extern void xfs_qm_dqdestroy(xfs_dquot_t *);
extern int xfs_qm_dqflush(xfs_dquot_t *, uint);
extern int xfs_qm_dqpurge(xfs_dquot_t *);
extern void xfs_qm_dqpurge(xfs_dquot_t *);
extern void xfs_qm_dqunpin_wait(xfs_dquot_t *);
extern int xfs_qm_dqlock_nowait(xfs_dquot_t *);
extern void xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp);
extern void xfs_qm_adjust_dqtimers(xfs_mount_t *,
xfs_disk_dquot_t *);
extern void xfs_qm_adjust_dqlimits(xfs_mount_t *,
......@@ -129,9 +142,17 @@ extern void xfs_qm_adjust_dqlimits(xfs_mount_t *,
extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
xfs_dqid_t, uint, uint, xfs_dquot_t **);
extern void xfs_qm_dqput(xfs_dquot_t *);
extern void xfs_dqlock(xfs_dquot_t *);
extern void xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *);
extern void xfs_dqunlock(xfs_dquot_t *);
extern void xfs_dqunlock_nonotify(xfs_dquot_t *);
extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *);
extern void xfs_dqunlock(struct xfs_dquot *);
extern void xfs_dqflock_pushbuf_wait(struct xfs_dquot *dqp);
static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp)
{
xfs_dqlock(dqp);
dqp->q_nrefs++;
xfs_dqunlock(dqp);
return dqp;
}
#endif /* __XFS_DQUOT_H__ */
......@@ -73,7 +73,6 @@ xfs_qm_dquot_logitem_format(
logvec->i_len = sizeof(xfs_disk_dquot_t);
logvec->i_type = XLOG_REG_TYPE_DQUOT;
ASSERT(2 == lip->li_desc->lid_size);
qlip->qli_format.qlf_size = 2;
}
......@@ -134,7 +133,7 @@ xfs_qm_dquot_logitem_push(
* lock without sleeping, then there must not have been
* anyone in the process of flushing the dquot.
*/
error = xfs_qm_dqflush(dqp, 0);
error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK);
if (error)
xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p",
__func__, error, dqp);
......@@ -237,7 +236,7 @@ xfs_qm_dquot_logitem_trylock(
if (atomic_read(&dqp->q_pincount) > 0)
return XFS_ITEM_PINNED;
if (!xfs_qm_dqlock_nowait(dqp))
if (!xfs_dqlock_nowait(dqp))
return XFS_ITEM_LOCKED;
if (!xfs_dqflock_nowait(dqp)) {
......
......@@ -437,7 +437,6 @@ xfs_inode_item_format(
* Assert that no attribute-related log flags are set.
*/
if (!XFS_IFORK_Q(ip)) {
ASSERT(nvecs == lip->li_desc->lid_size);
iip->ili_format.ilf_size = nvecs;
ASSERT(!(iip->ili_format.ilf_fields &
(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT)));
......@@ -521,7 +520,6 @@ xfs_inode_item_format(
break;
}
ASSERT(nvecs == lip->li_desc->lid_size);
iip->ili_format.ilf_size = nvecs;
}
......
......@@ -760,38 +760,6 @@ xfs_log_item_init(
INIT_LIST_HEAD(&item->li_cil);
}
/*
* Write region vectors to log. The write happens using the space reservation
* of the ticket (tic). It is not a requirement that all writes for a given
* transaction occur with one call to xfs_log_write(). However, it is important
* to note that the transaction reservation code makes an assumption about the
* number of log headers a transaction requires that may be violated if you
* don't pass all the transaction vectors in one call....
*/
int
xfs_log_write(
struct xfs_mount *mp,
struct xfs_log_iovec reg[],
int nentries,
struct xlog_ticket *tic,
xfs_lsn_t *start_lsn)
{
struct log *log = mp->m_log;
int error;
struct xfs_log_vec vec = {
.lv_niovecs = nentries,
.lv_iovecp = reg,
};
if (XLOG_FORCED_SHUTDOWN(log))
return XFS_ERROR(EIO);
error = xlog_write(log, &vec, tic, start_lsn, NULL, 0);
if (error)
xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
return error;
}
void
xfs_log_move_tail(xfs_mount_t *mp,
xfs_lsn_t tail_lsn)
......@@ -1685,7 +1653,7 @@ xlog_print_tic_res(
};
xfs_warn(mp,
"xfs_log_write: reservation summary:\n"
"xlog_write: reservation summary:\n"
" trans type = %s (%u)\n"
" unit res = %d bytes\n"
" current res = %d bytes\n"
......@@ -1714,7 +1682,7 @@ xlog_print_tic_res(
}
xfs_alert_tag(mp, XFS_PTAG_LOGRES,
"xfs_log_write: reservation ran out. Need to up reservation");
"xlog_write: reservation ran out. Need to up reservation");
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
}
......@@ -1968,23 +1936,21 @@ xlog_write(
*start_lsn = 0;
len = xlog_write_calc_vec_length(ticket, log_vector);
if (log->l_cilp) {
/*
* Region headers and bytes are already accounted for.
* We only need to take into account start records and
* split regions in this function.
*/
if (ticket->t_flags & XLOG_TIC_INITED)
ticket->t_curr_res -= sizeof(xlog_op_header_t);
/*
* Commit record headers need to be accounted for. These
* come in as separate writes so are easy to detect.
*/
if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
ticket->t_curr_res -= sizeof(xlog_op_header_t);
} else
ticket->t_curr_res -= len;
/*
* Region headers and bytes are already accounted for.
* We only need to take into account start records and
* split regions in this function.
*/
if (ticket->t_flags & XLOG_TIC_INITED)
ticket->t_curr_res -= sizeof(xlog_op_header_t);
/*
* Commit record headers need to be accounted for. These
* come in as separate writes so are easy to detect.
*/
if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
ticket->t_curr_res -= sizeof(xlog_op_header_t);
if (ticket->t_curr_res < 0)
xlog_print_tic_res(log->l_mp, ticket);
......@@ -2931,8 +2897,7 @@ _xfs_log_force(
XFS_STATS_INC(xs_log_force);
if (log->l_cilp)
xlog_cil_force(log);
xlog_cil_force(log);
spin_lock(&log->l_icloglock);
......@@ -3081,11 +3046,9 @@ _xfs_log_force_lsn(
XFS_STATS_INC(xs_log_force);
if (log->l_cilp) {
lsn = xlog_cil_force_lsn(log, lsn);
if (lsn == NULLCOMMITLSN)
return 0;
}
lsn = xlog_cil_force_lsn(log, lsn);
if (lsn == NULLCOMMITLSN)
return 0;
try_again:
spin_lock(&log->l_icloglock);
......@@ -3653,7 +3616,7 @@ xfs_log_force_umount(
* completed transactions are flushed to disk with the xfs_log_force()
* call below.
*/
if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG))
if (!logerror)
xlog_cil_force(log);
/*
......
......@@ -174,11 +174,6 @@ int xfs_log_reserve(struct xfs_mount *mp,
__uint8_t clientid,
uint flags,
uint t_type);
int xfs_log_write(struct xfs_mount *mp,
xfs_log_iovec_t region[],
int nentries,
struct xlog_ticket *ticket,
xfs_lsn_t *start_lsn);
int xfs_log_unmount_write(struct xfs_mount *mp);
void xfs_log_unmount(struct xfs_mount *mp);
int xfs_log_force_umount(struct xfs_mount *mp, int logerror);
......@@ -189,8 +184,7 @@ void xlog_iodone(struct xfs_buf *);
struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
void xfs_log_ticket_put(struct xlog_ticket *ticket);
void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
struct xfs_log_vec *log_vector,
int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_lsn_t *commit_lsn, int flags);
bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
......
......@@ -32,10 +32,7 @@
#include "xfs_discard.h"
/*
* Perform initial CIL structure initialisation. If the CIL is not
* enabled in this filesystem, ensure the log->l_cilp is null so
* we can check this conditional to determine if we are doing delayed
* logging or not.
* Perform initial CIL structure initialisation.
*/
int
xlog_cil_init(
......@@ -44,10 +41,6 @@ xlog_cil_init(
struct xfs_cil *cil;
struct xfs_cil_ctx *ctx;
log->l_cilp = NULL;
if (!(log->l_mp->m_flags & XFS_MOUNT_DELAYLOG))
return 0;
cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL);
if (!cil)
return ENOMEM;
......@@ -80,9 +73,6 @@ void
xlog_cil_destroy(
struct log *log)
{
if (!log->l_cilp)
return;
if (log->l_cilp->xc_ctx) {
if (log->l_cilp->xc_ctx->ticket)
xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket);
......@@ -137,9 +127,6 @@ void
xlog_cil_init_post_recovery(
struct log *log)
{
if (!log->l_cilp)
return;
log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log);
log->l_cilp->xc_ctx->sequence = 1;
log->l_cilp->xc_ctx->commit_lsn = xlog_assign_lsn(log->l_curr_cycle,
......@@ -172,37 +159,73 @@ xlog_cil_init_post_recovery(
* format the regions into the iclog as though they are being formatted
* directly out of the objects themselves.
*/
static void
xlog_cil_format_items(
struct log *log,
struct xfs_log_vec *log_vector)
static struct xfs_log_vec *
xlog_cil_prepare_log_vecs(
struct xfs_trans *tp)
{
struct xfs_log_vec *lv;
struct xfs_log_item_desc *lidp;
struct xfs_log_vec *lv = NULL;
struct xfs_log_vec *ret_lv = NULL;
ASSERT(log_vector);
for (lv = log_vector; lv; lv = lv->lv_next) {
/* Bail out if we didn't find a log item. */
if (list_empty(&tp->t_items)) {
ASSERT(0);
return NULL;
}
list_for_each_entry(lidp, &tp->t_items, lid_trans) {
struct xfs_log_vec *new_lv;
void *ptr;
int index;
int len = 0;
uint niovecs;
/* Skip items which aren't dirty in this transaction. */
if (!(lidp->lid_flags & XFS_LID_DIRTY))
continue;
/* Skip items that do not have any vectors for writing */
niovecs = IOP_SIZE(lidp->lid_item);
if (!niovecs)
continue;
new_lv = kmem_zalloc(sizeof(*new_lv) +
niovecs * sizeof(struct xfs_log_iovec),
KM_SLEEP);
/* The allocated iovec region lies beyond the log vector. */
new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1];
new_lv->lv_niovecs = niovecs;
new_lv->lv_item = lidp->lid_item;
/* build the vector array and calculate it's length */
IOP_FORMAT(lv->lv_item, lv->lv_iovecp);
for (index = 0; index < lv->lv_niovecs; index++)
len += lv->lv_iovecp[index].i_len;
IOP_FORMAT(new_lv->lv_item, new_lv->lv_iovecp);
for (index = 0; index < new_lv->lv_niovecs; index++)
len += new_lv->lv_iovecp[index].i_len;
lv->lv_buf_len = len;
lv->lv_buf = kmem_alloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS);
ptr = lv->lv_buf;
new_lv->lv_buf_len = len;
new_lv->lv_buf = kmem_alloc(new_lv->lv_buf_len,
KM_SLEEP|KM_NOFS);
ptr = new_lv->lv_buf;
for (index = 0; index < lv->lv_niovecs; index++) {
struct xfs_log_iovec *vec = &lv->lv_iovecp[index];
for (index = 0; index < new_lv->lv_niovecs; index++) {
struct xfs_log_iovec *vec = &new_lv->lv_iovecp[index];
memcpy(ptr, vec->i_addr, vec->i_len);
vec->i_addr = ptr;
ptr += vec->i_len;
}
ASSERT(ptr == lv->lv_buf + lv->lv_buf_len);
ASSERT(ptr == new_lv->lv_buf + new_lv->lv_buf_len);
if (!ret_lv)
ret_lv = new_lv;
else
lv->lv_next = new_lv;
lv = new_lv;
}
return ret_lv;
}
/*
......@@ -635,28 +658,30 @@ xlog_cil_push(
* background commit, returns without it held once background commits are
* allowed again.
*/
void
int
xfs_log_commit_cil(
struct xfs_mount *mp,
struct xfs_trans *tp,
struct xfs_log_vec *log_vector,
xfs_lsn_t *commit_lsn,
int flags)
{
struct log *log = mp->m_log;
int log_flags = 0;
int push = 0;
struct xfs_log_vec *log_vector;
if (flags & XFS_TRANS_RELEASE_LOG_RES)
log_flags = XFS_LOG_REL_PERM_RESERV;
/*
* do all the hard work of formatting items (including memory
* Do all the hard work of formatting items (including memory
* allocation) outside the CIL context lock. This prevents stalling CIL
* pushes when we are low on memory and a transaction commit spends a
* lot of time in memory reclaim.
*/
xlog_cil_format_items(log, log_vector);
log_vector = xlog_cil_prepare_log_vecs(tp);
if (!log_vector)
return ENOMEM;
/* lock out background commit */
down_read(&log->l_cilp->xc_ctx_lock);
......@@ -709,6 +734,7 @@ xfs_log_commit_cil(
*/
if (push)
xlog_cil_push(log, 0);
return 0;
}
/*
......@@ -786,8 +812,6 @@ xfs_log_item_in_current_chkpt(
{
struct xfs_cil_ctx *ctx;
if (!(lip->li_mountp->m_flags & XFS_MOUNT_DELAYLOG))
return false;
if (list_empty(&lip->li_cil))
return false;
......
......@@ -219,7 +219,6 @@ typedef struct xfs_mount {
#define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops
must be synchronous except
for space allocations */
#define XFS_MOUNT_DELAYLOG (1ULL << 1) /* delayed logging is enabled */
#define XFS_MOUNT_WAS_CLEAN (1ULL << 3)
#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem
operations, typically for
......
......@@ -154,12 +154,17 @@ STATIC void
xfs_qm_destroy(
struct xfs_qm *xqm)
{
struct xfs_dquot *dqp, *n;
int hsize, i;
ASSERT(xqm != NULL);
ASSERT(xqm->qm_nrefs == 0);
unregister_shrinker(&xfs_qm_shaker);
mutex_lock(&xqm->qm_dqfrlist_lock);
ASSERT(list_empty(&xqm->qm_dqfrlist));
mutex_unlock(&xqm->qm_dqfrlist_lock);
hsize = xqm->qm_dqhashmask + 1;
for (i = 0; i < hsize; i++) {
xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
......@@ -171,17 +176,6 @@ xfs_qm_destroy(
xqm->qm_grp_dqhtable = NULL;
xqm->qm_dqhashmask = 0;
/* frlist cleanup */
mutex_lock(&xqm->qm_dqfrlist_lock);
list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) {
xfs_dqlock(dqp);
list_del_init(&dqp->q_freelist);
xfs_Gqm->qm_dqfrlist_cnt--;
xfs_dqunlock(dqp);
xfs_qm_dqdestroy(dqp);
}
mutex_unlock(&xqm->qm_dqfrlist_lock);
mutex_destroy(&xqm->qm_dqfrlist_lock);
kmem_free(xqm);
}
......@@ -232,33 +226,9 @@ STATIC void
xfs_qm_rele_quotafs_ref(
struct xfs_mount *mp)
{
xfs_dquot_t *dqp, *n;
ASSERT(xfs_Gqm);
ASSERT(xfs_Gqm->qm_nrefs > 0);
/*
* Go thru the freelist and destroy all inactive dquots.
*/
mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
list_for_each_entry_safe(dqp, n, &xfs_Gqm->qm_dqfrlist, q_freelist) {
xfs_dqlock(dqp);
if (dqp->dq_flags & XFS_DQ_INACTIVE) {
ASSERT(dqp->q_mount == NULL);
ASSERT(! XFS_DQ_IS_DIRTY(dqp));
ASSERT(list_empty(&dqp->q_hashlist));
ASSERT(list_empty(&dqp->q_mplist));
list_del_init(&dqp->q_freelist);
xfs_Gqm->qm_dqfrlist_cnt--;
xfs_dqunlock(dqp);
xfs_qm_dqdestroy(dqp);
} else {
xfs_dqunlock(dqp);
}
}
mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
/*
* Destroy the entire XQM. If somebody mounts with quotaon, this'll
* be restarted.
......@@ -415,8 +385,7 @@ xfs_qm_unmount_quotas(
*/
STATIC int
xfs_qm_dqflush_all(
struct xfs_mount *mp,
int sync_mode)
struct xfs_mount *mp)
{
struct xfs_quotainfo *q = mp->m_quotainfo;
int recl;
......@@ -429,7 +398,8 @@ xfs_qm_dqflush_all(
mutex_lock(&q->qi_dqlist_lock);
list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
xfs_dqlock(dqp);
if (! XFS_DQ_IS_DIRTY(dqp)) {
if ((dqp->dq_flags & XFS_DQ_FREEING) ||
!XFS_DQ_IS_DIRTY(dqp)) {
xfs_dqunlock(dqp);
continue;
}
......@@ -444,14 +414,14 @@ xfs_qm_dqflush_all(
* out immediately. We'll be able to acquire
* the flush lock when the I/O completes.
*/
xfs_qm_dqflock_pushbuf_wait(dqp);
xfs_dqflock_pushbuf_wait(dqp);
}
/*
* Let go of the mplist lock. We don't want to hold it
* across a disk write.
*/
mutex_unlock(&q->qi_dqlist_lock);
error = xfs_qm_dqflush(dqp, sync_mode);
error = xfs_qm_dqflush(dqp, 0);
xfs_dqunlock(dqp);
if (error)
return error;
......@@ -468,6 +438,7 @@ xfs_qm_dqflush_all(
/* return ! busy */
return 0;
}
/*
* Release the group dquot pointers the user dquots may be
* carrying around as a hint. mplist is locked on entry and exit.
......@@ -478,31 +449,26 @@ xfs_qm_detach_gdquots(
{
struct xfs_quotainfo *q = mp->m_quotainfo;
struct xfs_dquot *dqp, *gdqp;
int nrecl;
again:
ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
xfs_dqlock(dqp);
if ((gdqp = dqp->q_gdquot)) {
xfs_dqlock(gdqp);
dqp->q_gdquot = NULL;
}
xfs_dqunlock(dqp);
if (gdqp) {
/*
* Can't hold the mplist lock across a dqput.
* XXXmust convert to marker based iterations here.
*/
nrecl = q->qi_dqreclaims;
if (dqp->dq_flags & XFS_DQ_FREEING) {
xfs_dqunlock(dqp);
mutex_unlock(&q->qi_dqlist_lock);
xfs_qm_dqput(gdqp);
delay(1);
mutex_lock(&q->qi_dqlist_lock);
if (nrecl != q->qi_dqreclaims)
goto again;
goto again;
}
gdqp = dqp->q_gdquot;
if (gdqp)
dqp->q_gdquot = NULL;
xfs_dqunlock(dqp);
if (gdqp)
xfs_qm_dqrele(gdqp);
}
}
......@@ -520,8 +486,8 @@ xfs_qm_dqpurge_int(
struct xfs_quotainfo *q = mp->m_quotainfo;
struct xfs_dquot *dqp, *n;
uint dqtype;
int nrecl;
int nmisses;
int nmisses = 0;
LIST_HEAD (dispose_list);
if (!q)
return 0;
......@@ -540,47 +506,26 @@ xfs_qm_dqpurge_int(
*/
xfs_qm_detach_gdquots(mp);
again:
nmisses = 0;
ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
/*
* Try to get rid of all of the unwanted dquots. The idea is to
* get them off mplist and hashlist, but leave them on freelist.
* Try to get rid of all of the unwanted dquots.
*/
list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
/*
* It's OK to look at the type without taking dqlock here.
* We're holding the mplist lock here, and that's needed for
* a dqreclaim.
*/
if ((dqp->dq_flags & dqtype) == 0)
continue;
if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
nrecl = q->qi_dqreclaims;
mutex_unlock(&q->qi_dqlist_lock);
mutex_lock(&dqp->q_hash->qh_lock);
mutex_lock(&q->qi_dqlist_lock);
/*
* XXXTheoretically, we can get into a very long
* ping pong game here.
* No one can be adding dquots to the mplist at
* this point, but somebody might be taking things off.
*/
if (nrecl != q->qi_dqreclaims) {
mutex_unlock(&dqp->q_hash->qh_lock);
goto again;
}
xfs_dqlock(dqp);
if ((dqp->dq_flags & dqtype) != 0 &&
!(dqp->dq_flags & XFS_DQ_FREEING)) {
if (dqp->q_nrefs == 0) {
dqp->dq_flags |= XFS_DQ_FREEING;
list_move_tail(&dqp->q_mplist, &dispose_list);
} else
nmisses++;
}
/*
* Take the dquot off the mplist and hashlist. It may remain on
* freelist in INACTIVE state.
*/
nmisses += xfs_qm_dqpurge(dqp);
xfs_dqunlock(dqp);
}
mutex_unlock(&q->qi_dqlist_lock);
list_for_each_entry_safe(dqp, n, &dispose_list, q_mplist)
xfs_qm_dqpurge(dqp);
return nmisses;
}
......@@ -648,12 +593,9 @@ xfs_qm_dqattach_one(
*/
dqp = udqhint->q_gdquot;
if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {
xfs_dqlock(dqp);
XFS_DQHOLD(dqp);
ASSERT(*IO_idqpp == NULL);
*IO_idqpp = dqp;
xfs_dqunlock(dqp);
*IO_idqpp = xfs_qm_dqhold(dqp);
xfs_dqunlock(udqhint);
return 0;
}
......@@ -693,11 +635,7 @@ xfs_qm_dqattach_one(
/*
* Given a udquot and gdquot, attach a ptr to the group dquot in the
* udquot as a hint for future lookups. The idea sounds simple, but the
* execution isn't, because the udquot might have a group dquot attached
* already and getting rid of that gets us into lock ordering constraints.
* The process is complicated more by the fact that the dquots may or may not
* be locked on entry.
* udquot as a hint for future lookups.
*/
STATIC void
xfs_qm_dqattach_grouphint(
......@@ -708,45 +646,17 @@ xfs_qm_dqattach_grouphint(
xfs_dqlock(udq);
if ((tmp = udq->q_gdquot)) {
if (tmp == gdq) {
xfs_dqunlock(udq);
return;
}
tmp = udq->q_gdquot;
if (tmp) {
if (tmp == gdq)
goto done;
udq->q_gdquot = NULL;
/*
* We can't keep any dqlocks when calling dqrele,
* because the freelist lock comes before dqlocks.
*/
xfs_dqunlock(udq);
/*
* we took a hard reference once upon a time in dqget,
* so give it back when the udquot no longer points at it
* dqput() does the unlocking of the dquot.
*/
xfs_qm_dqrele(tmp);
xfs_dqlock(udq);
xfs_dqlock(gdq);
} else {
ASSERT(XFS_DQ_IS_LOCKED(udq));
xfs_dqlock(gdq);
}
ASSERT(XFS_DQ_IS_LOCKED(udq));
ASSERT(XFS_DQ_IS_LOCKED(gdq));
/*
* Somebody could have attached a gdquot here,
* when we dropped the uqlock. If so, just do nothing.
*/
if (udq->q_gdquot == NULL) {
XFS_DQHOLD(gdq);
udq->q_gdquot = gdq;
}
xfs_dqunlock(gdq);
udq->q_gdquot = xfs_qm_dqhold(gdq);
done:
xfs_dqunlock(udq);
}
......@@ -813,17 +723,13 @@ xfs_qm_dqattach_locked(
ASSERT(ip->i_gdquot);
/*
* We may or may not have the i_udquot locked at this point,
* but this check is OK since we don't depend on the i_gdquot to
* be accurate 100% all the time. It is just a hint, and this
* will succeed in general.
*/
if (ip->i_udquot->q_gdquot == ip->i_gdquot)
goto done;
/*
* Attach i_gdquot to the gdquot hint inside the i_udquot.
* We do not have i_udquot locked at this point, but this check
* is OK since we don't depend on the i_gdquot to be accurate
* 100% all the time. It is just a hint, and this will
* succeed in general.
*/
xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
if (ip->i_udquot->q_gdquot != ip->i_gdquot)
xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
}
done:
......@@ -879,100 +785,6 @@ xfs_qm_dqdetach(
}
}
int
xfs_qm_sync(
struct xfs_mount *mp,
int flags)
{
struct xfs_quotainfo *q = mp->m_quotainfo;
int recl, restarts;
struct xfs_dquot *dqp;
int error;
if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
return 0;
restarts = 0;
again:
mutex_lock(&q->qi_dqlist_lock);
/*
* dqpurge_all() also takes the mplist lock and iterate thru all dquots
* in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
* when we have the mplist lock, we know that dquots will be consistent
* as long as we have it locked.
*/
if (!XFS_IS_QUOTA_ON(mp)) {
mutex_unlock(&q->qi_dqlist_lock);
return 0;
}
ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
/*
* If this is vfs_sync calling, then skip the dquots that
* don't 'seem' to be dirty. ie. don't acquire dqlock.
* This is very similar to what xfs_sync does with inodes.
*/
if (flags & SYNC_TRYLOCK) {
if (!XFS_DQ_IS_DIRTY(dqp))
continue;
if (!xfs_qm_dqlock_nowait(dqp))
continue;
} else {
xfs_dqlock(dqp);
}
/*
* Now, find out for sure if this dquot is dirty or not.
*/
if (! XFS_DQ_IS_DIRTY(dqp)) {
xfs_dqunlock(dqp);
continue;
}
/* XXX a sentinel would be better */
recl = q->qi_dqreclaims;
if (!xfs_dqflock_nowait(dqp)) {
if (flags & SYNC_TRYLOCK) {
xfs_dqunlock(dqp);
continue;
}
/*
* If we can't grab the flush lock then if the caller
* really wanted us to give this our best shot, so
* see if we can give a push to the buffer before we wait
* on the flush lock. At this point, we know that
* even though the dquot is being flushed,
* it has (new) dirty data.
*/
xfs_qm_dqflock_pushbuf_wait(dqp);
}
/*
* Let go of the mplist lock. We don't want to hold it
* across a disk write
*/
mutex_unlock(&q->qi_dqlist_lock);
error = xfs_qm_dqflush(dqp, flags);
xfs_dqunlock(dqp);
if (error && XFS_FORCED_SHUTDOWN(mp))
return 0; /* Need to prevent umount failure */
else if (error)
return error;
mutex_lock(&q->qi_dqlist_lock);
if (recl != q->qi_dqreclaims) {
if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
break;
mutex_unlock(&q->qi_dqlist_lock);
goto again;
}
}
mutex_unlock(&q->qi_dqlist_lock);
return 0;
}
/*
* The hash chains and the mplist use the same xfs_dqhash structure as
* their list head, but we can take the mplist qh_lock and one of the
......@@ -1034,18 +846,21 @@ xfs_qm_init_quotainfo(
/*
* We try to get the limits from the superuser's limits fields.
* This is quite hacky, but it is standard quota practice.
*
* We look at the USR dquot with id == 0 first, but if user quotas
* are not enabled we goto the GRP dquot with id == 0.
* We don't really care to keep separate default limits for user
* and group quotas, at least not at this point.
*
* Since we may not have done a quotacheck by this point, just read
* the dquot without attaching it to any hashtables or lists.
*/
error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0,
XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER :
(XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
XFS_DQ_PROJ),
XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN,
&dqp);
if (! error) {
error = xfs_qm_dqread(mp, 0,
XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER :
(XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
XFS_DQ_PROJ),
XFS_QMOPT_DOWARN, &dqp);
if (!error) {
xfs_disk_dquot_t *ddqp = &dqp->q_core;
/*
......@@ -1072,11 +887,6 @@ xfs_qm_init_quotainfo(
qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
/*
* We sent the XFS_QMOPT_DQSUSER flag to dqget because
* we don't want this dquot cached. We haven't done a
* quotacheck yet, and quotacheck doesn't like incore dquots.
*/
xfs_qm_dqdestroy(dqp);
} else {
qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
......@@ -1661,7 +1471,7 @@ xfs_qm_quotacheck(
* successfully.
*/
if (!error)
error = xfs_qm_dqflush_all(mp, 0);
error = xfs_qm_dqflush_all(mp);
/*
* We can get this error if we couldn't do a dquot allocation inside
......@@ -1793,59 +1603,33 @@ xfs_qm_init_quotainos(
/*
* Just pop the least recently used dquot off the freelist and
* recycle it. The returned dquot is locked.
* Pop the least recently used dquot off the freelist and recycle it.
*/
STATIC xfs_dquot_t *
STATIC struct xfs_dquot *
xfs_qm_dqreclaim_one(void)
{
xfs_dquot_t *dqpout;
xfs_dquot_t *dqp;
int restarts;
int startagain;
restarts = 0;
dqpout = NULL;
struct xfs_dquot *dqp;
int restarts = 0;
/* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
again:
startagain = 0;
mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
restart:
list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
struct xfs_mount *mp = dqp->q_mount;
xfs_dqlock(dqp);
if (!xfs_dqlock_nowait(dqp))
continue;
/*
* We are racing with dqlookup here. Naturally we don't
* want to reclaim a dquot that lookup wants. We release the
* freelist lock and start over, so that lookup will grab
* both the dquot and the freelistlock.
* This dquot has already been grabbed by dqlookup.
* Remove it from the freelist and try again.
*/
if (dqp->dq_flags & XFS_DQ_WANT) {
ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
if (dqp->q_nrefs) {
trace_xfs_dqreclaim_want(dqp);
XQM_STATS_INC(xqmstats.xs_qm_dqwants);
restarts++;
startagain = 1;
goto dqunlock;
}
/*
* If the dquot is inactive, we are assured that it is
* not on the mplist or the hashlist, and that makes our
* life easier.
*/
if (dqp->dq_flags & XFS_DQ_INACTIVE) {
ASSERT(mp == NULL);
ASSERT(! XFS_DQ_IS_DIRTY(dqp));
ASSERT(list_empty(&dqp->q_hashlist));
ASSERT(list_empty(&dqp->q_mplist));
list_del_init(&dqp->q_freelist);
xfs_Gqm->qm_dqfrlist_cnt--;
dqpout = dqp;
XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
restarts++;
goto dqunlock;
}
......@@ -1874,64 +1658,49 @@ xfs_qm_dqreclaim_one(void)
* We flush it delayed write, so don't bother
* releasing the freelist lock.
*/
error = xfs_qm_dqflush(dqp, 0);
error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK);
if (error) {
xfs_warn(mp, "%s: dquot %p flush failed",
__func__, dqp);
}
goto dqunlock;
}
xfs_dqfunlock(dqp);
/*
* We're trying to get the hashlock out of order. This races
* with dqlookup; so, we giveup and goto the next dquot if
* we couldn't get the hashlock. This way, we won't starve
* a dqlookup process that holds the hashlock that is
* waiting for the freelist lock.
* Prevent lookup now that we are going to reclaim the dquot.
* Once XFS_DQ_FREEING is set lookup won't touch the dquot,
* thus we can drop the lock now.
*/
if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
restarts++;
goto dqfunlock;
}
dqp->dq_flags |= XFS_DQ_FREEING;
xfs_dqunlock(dqp);
/*
* This races with dquot allocation code as well as dqflush_all
* and reclaim code. So, if we failed to grab the mplist lock,
* giveup everything and start over.
*/
if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
restarts++;
startagain = 1;
goto qhunlock;
}
mutex_lock(&dqp->q_hash->qh_lock);
list_del_init(&dqp->q_hashlist);
dqp->q_hash->qh_version++;
mutex_unlock(&dqp->q_hash->qh_lock);
ASSERT(dqp->q_nrefs == 0);
mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
list_del_init(&dqp->q_mplist);
mp->m_quotainfo->qi_dquots--;
mp->m_quotainfo->qi_dqreclaims++;
list_del_init(&dqp->q_hashlist);
dqp->q_hash->qh_version++;
mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
ASSERT(dqp->q_nrefs == 0);
list_del_init(&dqp->q_freelist);
xfs_Gqm->qm_dqfrlist_cnt--;
dqpout = dqp;
mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
qhunlock:
mutex_unlock(&dqp->q_hash->qh_lock);
dqfunlock:
xfs_dqfunlock(dqp);
mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
return dqp;
dqunlock:
xfs_dqunlock(dqp);
if (dqpout)
break;
if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
break;
if (startagain) {
mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
goto again;
}
goto restart;
}
mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
return dqpout;
return NULL;
}
/*
......@@ -2151,10 +1920,7 @@ xfs_qm_vop_dqalloc(
* this to caller
*/
ASSERT(ip->i_udquot);
uq = ip->i_udquot;
xfs_dqlock(uq);
XFS_DQHOLD(uq);
xfs_dqunlock(uq);
uq = xfs_qm_dqhold(ip->i_udquot);
}
}
if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
......@@ -2175,10 +1941,7 @@ xfs_qm_vop_dqalloc(
xfs_ilock(ip, lockflags);
} else {
ASSERT(ip->i_gdquot);
gq = ip->i_gdquot;
xfs_dqlock(gq);
XFS_DQHOLD(gq);
xfs_dqunlock(gq);
gq = xfs_qm_dqhold(ip->i_gdquot);
}
} else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
if (xfs_get_projid(ip) != prid) {
......@@ -2198,10 +1961,7 @@ xfs_qm_vop_dqalloc(
xfs_ilock(ip, lockflags);
} else {
ASSERT(ip->i_gdquot);
gq = ip->i_gdquot;
xfs_dqlock(gq);
XFS_DQHOLD(gq);
xfs_dqunlock(gq);
gq = xfs_qm_dqhold(ip->i_gdquot);
}
}
if (uq)
......@@ -2251,14 +2011,10 @@ xfs_qm_vop_chown(
xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
/*
* Take an extra reference, because the inode
* is going to keep this dquot pointer even
* after the trans_commit.
* Take an extra reference, because the inode is going to keep
* this dquot pointer even after the trans_commit.
*/
xfs_dqlock(newdq);
XFS_DQHOLD(newdq);
xfs_dqunlock(newdq);
*IO_olddq = newdq;
*IO_olddq = xfs_qm_dqhold(newdq);
return prevdq;
}
......@@ -2390,25 +2146,21 @@ xfs_qm_vop_create_dqattach(
ASSERT(XFS_IS_QUOTA_RUNNING(mp));
if (udqp) {
xfs_dqlock(udqp);
XFS_DQHOLD(udqp);
xfs_dqunlock(udqp);
ASSERT(ip->i_udquot == NULL);
ip->i_udquot = udqp;
ASSERT(XFS_IS_UQUOTA_ON(mp));
ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
ip->i_udquot = xfs_qm_dqhold(udqp);
xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
}
if (gdqp) {
xfs_dqlock(gdqp);
XFS_DQHOLD(gdqp);
xfs_dqunlock(gdqp);
ASSERT(ip->i_gdquot == NULL);
ip->i_gdquot = gdqp;
ASSERT(XFS_IS_OQUOTA_ON(mp));
ASSERT((XFS_IS_GQUOTA_ON(mp) ?
ip->i_d.di_gid : xfs_get_projid(ip)) ==
be32_to_cpu(gdqp->q_core.d_id));
ip->i_gdquot = xfs_qm_dqhold(gdqp);
xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
}
}
......
......@@ -32,12 +32,6 @@ extern struct xfs_qm *xfs_Gqm;
extern kmem_zone_t *qm_dqzone;
extern kmem_zone_t *qm_dqtrxzone;
/*
* Used in xfs_qm_sync called by xfs_sync to count the max times that it can
* iterate over the mountpt's dquot list in one call.
*/
#define XFS_QM_SYNC_MAX_RESTARTS 7
/*
* Ditto, for xfs_qm_dqreclaim_one.
*/
......
......@@ -87,8 +87,7 @@ typedef struct xfs_dqblk {
#define XFS_DQ_PROJ 0x0002 /* project quota */
#define XFS_DQ_GROUP 0x0004 /* a group quota */
#define XFS_DQ_DIRTY 0x0008 /* dquot is dirty */
#define XFS_DQ_WANT 0x0010 /* for lookup/reclaim race */
#define XFS_DQ_INACTIVE 0x0020 /* dq off mplist & hashlist */
#define XFS_DQ_FREEING 0x0010 /* dquot is beeing torn down */
#define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)
......@@ -97,8 +96,7 @@ typedef struct xfs_dqblk {
{ XFS_DQ_PROJ, "PROJ" }, \
{ XFS_DQ_GROUP, "GROUP" }, \
{ XFS_DQ_DIRTY, "DIRTY" }, \
{ XFS_DQ_WANT, "WANT" }, \
{ XFS_DQ_INACTIVE, "INACTIVE" }
{ XFS_DQ_FREEING, "FREEING" }
/*
* In the worst case, when both user and group quotas are on,
......@@ -199,7 +197,6 @@ typedef struct xfs_qoff_logformat {
#define XFS_QMOPT_UQUOTA 0x0000004 /* user dquot requested */
#define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */
#define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */
#define XFS_QMOPT_DQSUSER 0x0000020 /* don't cache super users dquot */
#define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */
#define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */
#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */
......@@ -326,7 +323,6 @@ extern int xfs_qm_dqattach_locked(struct xfs_inode *, uint);
extern void xfs_qm_dqdetach(struct xfs_inode *);
extern void xfs_qm_dqrele(struct xfs_dquot *);
extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *);
extern int xfs_qm_sync(struct xfs_mount *, int);
extern int xfs_qm_newmount(struct xfs_mount *, uint *, uint *);
extern void xfs_qm_mount_quotas(struct xfs_mount *);
extern void xfs_qm_unmount(struct xfs_mount *);
......@@ -366,10 +362,6 @@ static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp,
#define xfs_qm_dqdetach(ip)
#define xfs_qm_dqrele(d)
#define xfs_qm_statvfs(ip, s)
static inline int xfs_qm_sync(struct xfs_mount *mp, int flags)
{
return 0;
}
#define xfs_qm_newmount(mp, a, b) (0)
#define xfs_qm_mount_quotas(mp)
#define xfs_qm_unmount(mp)
......
......@@ -199,7 +199,6 @@ xfs_parseargs(
mp->m_flags |= XFS_MOUNT_BARRIER;
mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
mp->m_flags |= XFS_MOUNT_DELAYLOG;
/*
* These can be overridden by the mount option parsing.
......@@ -353,11 +352,11 @@ xfs_parseargs(
mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
mp->m_qflags &= ~XFS_OQUOTA_ENFD;
} else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
mp->m_flags |= XFS_MOUNT_DELAYLOG;
xfs_warn(mp,
"delaylog is the default now, option is deprecated.");
} else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
xfs_warn(mp,
"nodelaylog is deprecated and will be removed in Linux 3.3");
"nodelaylog support has been removed, option is deprecated.");
} else if (!strcmp(this_char, MNTOPT_DISCARD)) {
mp->m_flags |= XFS_MOUNT_DISCARD;
} else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
......@@ -395,13 +394,6 @@ xfs_parseargs(
return EINVAL;
}
if ((mp->m_flags & XFS_MOUNT_DISCARD) &&
!(mp->m_flags & XFS_MOUNT_DELAYLOG)) {
xfs_warn(mp,
"the discard option is incompatible with the nodelaylog option");
return EINVAL;
}
#ifndef CONFIG_XFS_QUOTA
if (XFS_IS_QUOTA_RUNNING(mp)) {
xfs_warn(mp, "quota support not available in this kernel.");
......@@ -501,7 +493,6 @@ xfs_showargs(
{ XFS_MOUNT_ATTR2, "," MNTOPT_ATTR2 },
{ XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM },
{ XFS_MOUNT_GRPID, "," MNTOPT_GRPID },
{ XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG },
{ XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD },
{ 0, NULL }
};
......@@ -1014,17 +1005,10 @@ xfs_fs_sync_fs(
int error;
/*
* Not much we can do for the first async pass. Writing out the
* superblock would be counter-productive as we are going to redirty
* when writing out other data and metadata (and writing out a single
* block is quite fast anyway).
*
* Try to asynchronously kick off quota syncing at least.
* Doing anything during the async pass would be counterproductive.
*/
if (!wait) {
xfs_qm_sync(mp, SYNC_TRYLOCK);
if (!wait)
return 0;
}
error = xfs_quiesce_data(mp);
if (error)
......@@ -1621,12 +1605,12 @@ STATIC int __init
xfs_init_workqueues(void)
{
/*
* max_active is set to 8 to give enough concurency to allow
* multiple work operations on each CPU to run. This allows multiple
* filesystems to be running sync work concurrently, and scales with
* the number of CPUs in the system.
* We never want to the same work item to run twice, reclaiming inodes
* or idling the log is not going to get any faster by multiple CPUs
* competing for ressources. Use the default large max_active value
* so that even lots of filesystems can perform these task in parallel.
*/
xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8);
xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_NON_REENTRANT, 0);
if (!xfs_syncd_wq)
return -ENOMEM;
return 0;
......
......@@ -395,10 +395,7 @@ xfs_quiesce_data(
*/
xfs_inode_ag_iterator(mp, xfs_log_dirty_inode, 0);
xfs_qm_sync(mp, SYNC_TRYLOCK);
xfs_qm_sync(mp, SYNC_WAIT);
/* force out the newly dirtied log buffers */
/* force out the log */
xfs_log_force(mp, XFS_LOG_SYNC);
/* write superblock and hoover up shutdown errors */
......@@ -506,7 +503,6 @@ xfs_sync_worker(
error = xfs_fs_log_dummy(mp);
else
xfs_log_force(mp, 0);
error = xfs_qm_sync(mp, SYNC_TRYLOCK);
/* start pushing all the metadata that is currently dirty */
xfs_ail_push_all(mp->m_ail);
......
......@@ -743,8 +743,6 @@ DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
DEFINE_DQUOT_EVENT(xfs_dqread);
DEFINE_DQUOT_EVENT(xfs_dqread_fail);
DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
DEFINE_DQUOT_EVENT(xfs_dqlookup_want);
DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist);
DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
DEFINE_DQUOT_EVENT(xfs_dqget_hit);
DEFINE_DQUOT_EVENT(xfs_dqget_miss);
......
......@@ -1158,7 +1158,6 @@ xfs_trans_add_item(
lidp->lid_item = lip;
lidp->lid_flags = 0;
lidp->lid_size = 0;
list_add_tail(&lidp->lid_trans, &tp->t_items);
lip->li_desc = lidp;
......@@ -1210,219 +1209,6 @@ xfs_trans_free_items(
}
}
/*
* Unlock the items associated with a transaction.
*
* Items which were not logged should be freed. Those which were logged must
* still be tracked so they can be unpinned when the transaction commits.
*/
STATIC void
xfs_trans_unlock_items(
struct xfs_trans *tp,
xfs_lsn_t commit_lsn)
{
struct xfs_log_item_desc *lidp, *next;
list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
struct xfs_log_item *lip = lidp->lid_item;
lip->li_desc = NULL;
if (commit_lsn != NULLCOMMITLSN)
IOP_COMMITTING(lip, commit_lsn);
IOP_UNLOCK(lip);
/*
* Free the descriptor if the item is not dirty
* within this transaction.
*/
if (!(lidp->lid_flags & XFS_LID_DIRTY))
xfs_trans_free_item_desc(lidp);
}
}
/*
* Total up the number of log iovecs needed to commit this
* transaction. The transaction itself needs one for the
* transaction header. Ask each dirty item in turn how many
* it needs to get the total.
*/
static uint
xfs_trans_count_vecs(
struct xfs_trans *tp)
{
int nvecs;
struct xfs_log_item_desc *lidp;
nvecs = 1;
/* In the non-debug case we need to start bailing out if we
* didn't find a log_item here, return zero and let trans_commit
* deal with it.
*/
if (list_empty(&tp->t_items)) {
ASSERT(0);
return 0;
}
list_for_each_entry(lidp, &tp->t_items, lid_trans) {
/*
* Skip items which aren't dirty in this transaction.
*/
if (!(lidp->lid_flags & XFS_LID_DIRTY))
continue;
lidp->lid_size = IOP_SIZE(lidp->lid_item);
nvecs += lidp->lid_size;
}
return nvecs;
}
/*
* Fill in the vector with pointers to data to be logged
* by this transaction. The transaction header takes
* the first vector, and then each dirty item takes the
* number of vectors it indicated it needed in xfs_trans_count_vecs().
*
* As each item fills in the entries it needs, also pin the item
* so that it cannot be flushed out until the log write completes.
*/
static void
xfs_trans_fill_vecs(
struct xfs_trans *tp,
struct xfs_log_iovec *log_vector)
{
struct xfs_log_item_desc *lidp;
struct xfs_log_iovec *vecp;
uint nitems;
/*
* Skip over the entry for the transaction header, we'll
* fill that in at the end.
*/
vecp = log_vector + 1;
nitems = 0;
ASSERT(!list_empty(&tp->t_items));
list_for_each_entry(lidp, &tp->t_items, lid_trans) {
/* Skip items which aren't dirty in this transaction. */
if (!(lidp->lid_flags & XFS_LID_DIRTY))
continue;
/*
* The item may be marked dirty but not log anything. This can
* be used to get called when a transaction is committed.
*/
if (lidp->lid_size)
nitems++;
IOP_FORMAT(lidp->lid_item, vecp);
vecp += lidp->lid_size;
IOP_PIN(lidp->lid_item);
}
/*
* Now that we've counted the number of items in this transaction, fill
* in the transaction header. Note that the transaction header does not
* have a log item.
*/
tp->t_header.th_magic = XFS_TRANS_HEADER_MAGIC;
tp->t_header.th_type = tp->t_type;
tp->t_header.th_num_items = nitems;
log_vector->i_addr = (xfs_caddr_t)&tp->t_header;
log_vector->i_len = sizeof(xfs_trans_header_t);
log_vector->i_type = XLOG_REG_TYPE_TRANSHDR;
}
/*
* The committed item processing consists of calling the committed routine of
* each logged item, updating the item's position in the AIL if necessary, and
* unpinning each item. If the committed routine returns -1, then do nothing
* further with the item because it may have been freed.
*
* Since items are unlocked when they are copied to the incore log, it is
* possible for two transactions to be completing and manipulating the same
* item simultaneously. The AIL lock will protect the lsn field of each item.
* The value of this field can never go backwards.
*
* We unpin the items after repositioning them in the AIL, because otherwise
* they could be immediately flushed and we'd have to race with the flusher
* trying to pull the item from the AIL as we add it.
*/
static void
xfs_trans_item_committed(
struct xfs_log_item *lip,
xfs_lsn_t commit_lsn,
int aborted)
{
xfs_lsn_t item_lsn;
struct xfs_ail *ailp;
if (aborted)
lip->li_flags |= XFS_LI_ABORTED;
item_lsn = IOP_COMMITTED(lip, commit_lsn);
/* item_lsn of -1 means the item needs no further processing */
if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
return;
/*
* If the returned lsn is greater than what it contained before, update
* the location of the item in the AIL. If it is not, then do nothing.
* Items can never move backwards in the AIL.
*
* While the new lsn should usually be greater, it is possible that a
* later transaction completing simultaneously with an earlier one
* using the same item could complete first with a higher lsn. This
* would cause the earlier transaction to fail the test below.
*/
ailp = lip->li_ailp;
spin_lock(&ailp->xa_lock);
if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) {
/*
* This will set the item's lsn to item_lsn and update the
* position of the item in the AIL.
*
* xfs_trans_ail_update() drops the AIL lock.
*/
xfs_trans_ail_update(ailp, lip, item_lsn);
} else {
spin_unlock(&ailp->xa_lock);
}
/*
* Now that we've repositioned the item in the AIL, unpin it so it can
* be flushed. Pass information about buffer stale state down from the
* log item flags, if anyone else stales the buffer we do not want to
* pay any attention to it.
*/
IOP_UNPIN(lip, 0);
}
/*
* This is typically called by the LM when a transaction has been fully
* committed to disk. It needs to unpin the items which have
* been logged by the transaction and update their positions
* in the AIL if necessary.
*
* This also gets called when the transactions didn't get written out
* because of an I/O error. Abortflag & XFS_LI_ABORTED is set then.
*/
STATIC void
xfs_trans_committed(
void *arg,
int abortflag)
{
struct xfs_trans *tp = arg;
struct xfs_log_item_desc *lidp, *next;
list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
xfs_trans_item_committed(lidp->lid_item, tp->t_lsn, abortflag);
xfs_trans_free_item_desc(lidp);
}
xfs_trans_free(tp);
}
static inline void
xfs_log_item_batch_insert(
struct xfs_ail *ailp,
......@@ -1537,258 +1323,6 @@ xfs_trans_committed_bulk(
spin_unlock(&ailp->xa_lock);
}
/*
* Called from the trans_commit code when we notice that the filesystem is in
* the middle of a forced shutdown.
*
* When we are called here, we have already pinned all the items in the
* transaction. However, neither IOP_COMMITTING or IOP_UNLOCK has been called
* so we can simply walk the items in the transaction, unpin them with an abort
* flag and then free the items. Note that unpinning the items can result in
* them being freed immediately, so we need to use a safe list traversal method
* here.
*/
STATIC void
xfs_trans_uncommit(
struct xfs_trans *tp,
uint flags)
{
struct xfs_log_item_desc *lidp, *n;
list_for_each_entry_safe(lidp, n, &tp->t_items, lid_trans) {
if (lidp->lid_flags & XFS_LID_DIRTY)
IOP_UNPIN(lidp->lid_item, 1);
}
xfs_trans_unreserve_and_mod_sb(tp);
xfs_trans_unreserve_and_mod_dquots(tp);
xfs_trans_free_items(tp, NULLCOMMITLSN, flags);
xfs_trans_free(tp);
}
/*
* Format the transaction direct to the iclog. This isolates the physical
* transaction commit operation from the logical operation and hence allows
* other methods to be introduced without affecting the existing commit path.
*/
static int
xfs_trans_commit_iclog(
struct xfs_mount *mp,
struct xfs_trans *tp,
xfs_lsn_t *commit_lsn,
int flags)
{
int shutdown;
int error;
int log_flags = 0;
struct xlog_in_core *commit_iclog;
#define XFS_TRANS_LOGVEC_COUNT 16
struct xfs_log_iovec log_vector_fast[XFS_TRANS_LOGVEC_COUNT];
struct xfs_log_iovec *log_vector;
uint nvec;
/*
* Ask each log item how many log_vector entries it will
* need so we can figure out how many to allocate.
* Try to avoid the kmem_alloc() call in the common case
* by using a vector from the stack when it fits.
*/
nvec = xfs_trans_count_vecs(tp);
if (nvec == 0) {
return ENOMEM; /* triggers a shutdown! */
} else if (nvec <= XFS_TRANS_LOGVEC_COUNT) {
log_vector = log_vector_fast;
} else {
log_vector = (xfs_log_iovec_t *)kmem_alloc(nvec *
sizeof(xfs_log_iovec_t),
KM_SLEEP);
}
/*
* Fill in the log_vector and pin the logged items, and
* then write the transaction to the log.
*/
xfs_trans_fill_vecs(tp, log_vector);
if (flags & XFS_TRANS_RELEASE_LOG_RES)
log_flags = XFS_LOG_REL_PERM_RESERV;
error = xfs_log_write(mp, log_vector, nvec, tp->t_ticket, &(tp->t_lsn));
/*
* The transaction is committed incore here, and can go out to disk
* at any time after this call. However, all the items associated
* with the transaction are still locked and pinned in memory.
*/
*commit_lsn = xfs_log_done(mp, tp->t_ticket, &commit_iclog, log_flags);
tp->t_commit_lsn = *commit_lsn;
trace_xfs_trans_commit_lsn(tp);
if (nvec > XFS_TRANS_LOGVEC_COUNT)
kmem_free(log_vector);
/*
* If we got a log write error. Unpin the logitems that we
* had pinned, clean up, free trans structure, and return error.
*/
if (error || *commit_lsn == -1) {
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
xfs_trans_uncommit(tp, flags|XFS_TRANS_ABORT);
return XFS_ERROR(EIO);
}
/*
* Once the transaction has committed, unused
* reservations need to be released and changes to
* the superblock need to be reflected in the in-core
* version. Do that now.
*/
xfs_trans_unreserve_and_mod_sb(tp);
/*
* Tell the LM to call the transaction completion routine
* when the log write with LSN commit_lsn completes (e.g.
* when the transaction commit really hits the on-disk log).
* After this call we cannot reference tp, because the call
* can happen at any time and the call will free the transaction
* structure pointed to by tp. The only case where we call
* the completion routine (xfs_trans_committed) directly is
* if the log is turned off on a debug kernel or we're
* running in simulation mode (the log is explicitly turned
* off).
*/
tp->t_logcb.cb_func = xfs_trans_committed;
tp->t_logcb.cb_arg = tp;
/*
* We need to pass the iclog buffer which was used for the
* transaction commit record into this function, and attach
* the callback to it. The callback must be attached before
* the items are unlocked to avoid racing with other threads
* waiting for an item to unlock.
*/
shutdown = xfs_log_notify(mp, commit_iclog, &(tp->t_logcb));
/*
* Mark this thread as no longer being in a transaction
*/
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
/*
* Once all the items of the transaction have been copied
* to the in core log and the callback is attached, the
* items can be unlocked.
*
* This will free descriptors pointing to items which were
* not logged since there is nothing more to do with them.
* For items which were logged, we will keep pointers to them
* so they can be unpinned after the transaction commits to disk.
* This will also stamp each modified meta-data item with
* the commit lsn of this transaction for dependency tracking
* purposes.
*/
xfs_trans_unlock_items(tp, *commit_lsn);
/*
* If we detected a log error earlier, finish committing
* the transaction now (unpin log items, etc).
*
* Order is critical here, to avoid using the transaction
* pointer after its been freed (by xfs_trans_committed
* either here now, or as a callback). We cannot do this
* step inside xfs_log_notify as was done earlier because
* of this issue.
*/
if (shutdown)
xfs_trans_committed(tp, XFS_LI_ABORTED);
/*
* Now that the xfs_trans_committed callback has been attached,
* and the items are released we can finally allow the iclog to
* go to disk.
*/
return xfs_log_release_iclog(mp, commit_iclog);
}
/*
* Walk the log items and allocate log vector structures for
* each item large enough to fit all the vectors they require.
* Note that this format differs from the old log vector format in
* that there is no transaction header in these log vectors.
*/
STATIC struct xfs_log_vec *
xfs_trans_alloc_log_vecs(
xfs_trans_t *tp)
{
struct xfs_log_item_desc *lidp;
struct xfs_log_vec *lv = NULL;
struct xfs_log_vec *ret_lv = NULL;
/* Bail out if we didn't find a log item. */
if (list_empty(&tp->t_items)) {
ASSERT(0);
return NULL;
}
list_for_each_entry(lidp, &tp->t_items, lid_trans) {
struct xfs_log_vec *new_lv;
/* Skip items which aren't dirty in this transaction. */
if (!(lidp->lid_flags & XFS_LID_DIRTY))
continue;
/* Skip items that do not have any vectors for writing */
lidp->lid_size = IOP_SIZE(lidp->lid_item);
if (!lidp->lid_size)
continue;
new_lv = kmem_zalloc(sizeof(*new_lv) +
lidp->lid_size * sizeof(struct xfs_log_iovec),
KM_SLEEP);
/* The allocated iovec region lies beyond the log vector. */
new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1];
new_lv->lv_niovecs = lidp->lid_size;
new_lv->lv_item = lidp->lid_item;
if (!ret_lv)
ret_lv = new_lv;
else
lv->lv_next = new_lv;
lv = new_lv;
}
return ret_lv;
}
static int
xfs_trans_commit_cil(
struct xfs_mount *mp,
struct xfs_trans *tp,
xfs_lsn_t *commit_lsn,
int flags)
{
struct xfs_log_vec *log_vector;
/*
* Get each log item to allocate a vector structure for
* the log item to to pass to the log write code. The
* CIL commit code will format the vector and save it away.
*/
log_vector = xfs_trans_alloc_log_vecs(tp);
if (!log_vector)
return ENOMEM;
xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags);
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
xfs_trans_free(tp);
return 0;
}
/*
* Commit the given transaction to the log.
*
......@@ -1845,17 +1379,16 @@ xfs_trans_commit(
xfs_trans_apply_sb_deltas(tp);
xfs_trans_apply_dquot_deltas(tp);
if (mp->m_flags & XFS_MOUNT_DELAYLOG)
error = xfs_trans_commit_cil(mp, tp, &commit_lsn, flags);
else
error = xfs_trans_commit_iclog(mp, tp, &commit_lsn, flags);
error = xfs_log_commit_cil(mp, tp, &commit_lsn, flags);
if (error == ENOMEM) {
xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
error = XFS_ERROR(EIO);
goto out_unreserve;
}
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
xfs_trans_free(tp);
/*
* If the transaction needs to be synchronous, then force the
* log out now and wait for it.
......
......@@ -163,9 +163,8 @@ typedef struct xfs_trans_header {
*/
struct xfs_log_item_desc {
struct xfs_log_item *lid_item;
ushort lid_size;
unsigned char lid_flags;
struct list_head lid_trans;
unsigned char lid_flags;
};
#define XFS_LID_DIRTY 0x1
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册