提交 1be44e23 编写于 作者: L Linus Torvalds

Merge tag 'xfs-for-linus-4.1-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs

Pull xfs fixes from Dave Chinner:
 "This is a little larger than I'd like late in the release cycle, but
  all the fixes are for regressions introduced in the 4.1-rc1 merge, or
  are needed back in -stable kernels fairly quickly as they are
  filesystem corruption or userspace visible correctness issues.

  Changes in this update:

   - regression fix for new rename whiteout code

   - regression fixes for new superblock generic per-cpu counter code

   - fix for incorrect error return sign introduced in 3.17

   - metadata corruption fixes that need to go back to -stable kernels"

* tag 'xfs-for-linus-4.1-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs:
  xfs: fix broken i_nlink accounting for whiteout tmpfile inode
  xfs: xfs_iozero can return positive errno
  xfs: xfs_attr_inactive leaves inconsistent attr fork state behind
  xfs: extent size hints can round up extents past MAXEXTLEN
  xfs: inode and free block counters need to use __percpu_counter_compare
  percpu_counter: batch size aware __percpu_counter_compare()
  xfs: use percpu_counter_read_positive for mp->m_icount
......@@ -574,8 +574,8 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
* After the last attribute is removed revert to original inode format,
* making all literal area available to the data fork once more.
*/
STATIC void
xfs_attr_fork_reset(
void
xfs_attr_fork_remove(
struct xfs_inode *ip,
struct xfs_trans *tp)
{
......@@ -641,7 +641,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
(mp->m_flags & XFS_MOUNT_ATTR2) &&
(dp->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
!(args->op_flags & XFS_DA_OP_ADDNAME)) {
xfs_attr_fork_reset(dp, args->trans);
xfs_attr_fork_remove(dp, args->trans);
} else {
xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
......@@ -905,7 +905,7 @@ xfs_attr3_leaf_to_shortform(
if (forkoff == -1) {
ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2);
ASSERT(dp->i_d.di_format != XFS_DINODE_FMT_BTREE);
xfs_attr_fork_reset(dp, args->trans);
xfs_attr_fork_remove(dp, args->trans);
goto out;
}
......
......@@ -53,7 +53,7 @@ int xfs_attr_shortform_remove(struct xfs_da_args *args);
int xfs_attr_shortform_list(struct xfs_attr_list_context *context);
int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
int xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes);
void xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp);
/*
* Internal routines when attribute fork size == XFS_LBSIZE(mp).
......
......@@ -3224,12 +3224,24 @@ xfs_bmap_extsize_align(
align_alen += temp;
align_off -= temp;
}
/* Same adjustment for the end of the requested area. */
temp = (align_alen % extsz);
if (temp)
align_alen += extsz - temp;
/*
* Same adjustment for the end of the requested area.
* For large extent hint sizes, the aligned extent might be larger than
* MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls
* the length back under MAXEXTLEN. The outer allocation loops handle
* short allocation just fine, so it is safe to do this. We only want to
* do it when we are forced to, though, because it means more allocation
* operations are required.
*/
if ((temp = (align_alen % extsz))) {
align_alen += extsz - temp;
}
while (align_alen > MAXEXTLEN)
align_alen -= extsz;
ASSERT(align_alen <= MAXEXTLEN);
/*
* If the previous block overlaps with this proposed allocation
* then move the start forward without adjusting the length.
......@@ -3318,7 +3330,9 @@ xfs_bmap_extsize_align(
return -EINVAL;
} else {
ASSERT(orig_off >= align_off);
ASSERT(orig_end <= align_off + align_alen);
/* see MAXEXTLEN handling above */
ASSERT(orig_end <= align_off + align_alen ||
align_alen + extsz > MAXEXTLEN);
}
#ifdef DEBUG
......@@ -4099,13 +4113,6 @@ xfs_bmapi_reserve_delalloc(
/* Figure out the extent size, adjust alen */
extsz = xfs_get_extsz_hint(ip);
if (extsz) {
/*
* Make sure we don't exceed a single extent length when we
* align the extent by reducing length we are going to
* allocate by the maximum amount extent size aligment may
* require.
*/
alen = XFS_FILBLKS_MIN(len, MAXEXTLEN - (2 * extsz - 1));
error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof,
1, 0, &aoff, &alen);
ASSERT(!error);
......
......@@ -376,7 +376,7 @@ xfs_ialloc_ag_alloc(
*/
newlen = args.mp->m_ialloc_inos;
if (args.mp->m_maxicount &&
percpu_counter_read(&args.mp->m_icount) + newlen >
percpu_counter_read_positive(&args.mp->m_icount) + newlen >
args.mp->m_maxicount)
return -ENOSPC;
args.minlen = args.maxlen = args.mp->m_ialloc_blks;
......@@ -1339,10 +1339,13 @@ xfs_dialloc(
* If we have already hit the ceiling of inode blocks then clear
* okalloc so we scan all available agi structures for a free
* inode.
*
* Read rough value of mp->m_icount by percpu_counter_read_positive,
* which will sacrifice the preciseness but improve the performance.
*/
if (mp->m_maxicount &&
percpu_counter_read(&mp->m_icount) + mp->m_ialloc_inos >
mp->m_maxicount) {
percpu_counter_read_positive(&mp->m_icount) + mp->m_ialloc_inos
> mp->m_maxicount) {
noroom = 1;
okalloc = 0;
}
......
......@@ -380,23 +380,31 @@ xfs_attr3_root_inactive(
return error;
}
/*
* xfs_attr_inactive kills all traces of an attribute fork on an inode. It
* removes both the on-disk and in-memory inode fork. Note that this also has to
* handle the condition of inodes without attributes but with an attribute fork
* configured, so we can't use xfs_inode_hasattr() here.
*
* The in-memory attribute fork is removed even on error.
*/
int
xfs_attr_inactive(xfs_inode_t *dp)
xfs_attr_inactive(
struct xfs_inode *dp)
{
xfs_trans_t *trans;
xfs_mount_t *mp;
int error;
struct xfs_trans *trans;
struct xfs_mount *mp;
int cancel_flags = 0;
int lock_mode = XFS_ILOCK_SHARED;
int error = 0;
mp = dp->i_mount;
ASSERT(! XFS_NOT_DQATTACHED(mp, dp));
xfs_ilock(dp, XFS_ILOCK_SHARED);
if (!xfs_inode_hasattr(dp) ||
dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
xfs_iunlock(dp, XFS_ILOCK_SHARED);
return 0;
}
xfs_iunlock(dp, XFS_ILOCK_SHARED);
xfs_ilock(dp, lock_mode);
if (!XFS_IFORK_Q(dp))
goto out_destroy_fork;
xfs_iunlock(dp, lock_mode);
/*
* Start our first transaction of the day.
......@@ -408,13 +416,18 @@ xfs_attr_inactive(xfs_inode_t *dp)
* the inode in every transaction to let it float upward through
* the log.
*/
lock_mode = 0;
trans = xfs_trans_alloc(mp, XFS_TRANS_ATTRINVAL);
error = xfs_trans_reserve(trans, &M_RES(mp)->tr_attrinval, 0, 0);
if (error) {
xfs_trans_cancel(trans, 0);
return error;
}
xfs_ilock(dp, XFS_ILOCK_EXCL);
if (error)
goto out_cancel;
lock_mode = XFS_ILOCK_EXCL;
cancel_flags = XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT;
xfs_ilock(dp, lock_mode);
if (!XFS_IFORK_Q(dp))
goto out_cancel;
/*
* No need to make quota reservations here. We expect to release some
......@@ -422,29 +435,31 @@ xfs_attr_inactive(xfs_inode_t *dp)
*/
xfs_trans_ijoin(trans, dp, 0);
/*
* Decide on what work routines to call based on the inode size.
*/
if (!xfs_inode_hasattr(dp) ||
dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
error = 0;
goto out;
/* invalidate and truncate the attribute fork extents */
if (dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) {
error = xfs_attr3_root_inactive(&trans, dp);
if (error)
goto out_cancel;
error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
if (error)
goto out_cancel;
}
error = xfs_attr3_root_inactive(&trans, dp);
if (error)
goto out;
error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
if (error)
goto out;
/* Reset the attribute fork - this also destroys the in-core fork */
xfs_attr_fork_remove(dp, trans);
error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
xfs_iunlock(dp, lock_mode);
return error;
out:
xfs_trans_cancel(trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
out_cancel:
xfs_trans_cancel(trans, cancel_flags);
out_destroy_fork:
/* kill the in-core attr fork before we drop the inode lock */
if (dp->i_afp)
xfs_idestroy_fork(dp, XFS_ATTR_FORK);
if (lock_mode)
xfs_iunlock(dp, lock_mode);
return error;
}
......@@ -124,7 +124,7 @@ xfs_iozero(
status = 0;
} while (count);
return (-status);
return status;
}
int
......
......@@ -1946,21 +1946,17 @@ xfs_inactive(
/*
* If there are attributes associated with the file then blow them away
* now. The code calls a routine that recursively deconstructs the
* attribute fork. We need to just commit the current transaction
* because we can't use it for xfs_attr_inactive().
* attribute fork. If also blows away the in-core attribute fork.
*/
if (ip->i_d.di_anextents > 0) {
ASSERT(ip->i_d.di_forkoff != 0);
if (XFS_IFORK_Q(ip)) {
error = xfs_attr_inactive(ip);
if (error)
return;
}
if (ip->i_afp)
xfs_idestroy_fork(ip, XFS_ATTR_FORK);
ASSERT(!ip->i_afp);
ASSERT(ip->i_d.di_anextents == 0);
ASSERT(ip->i_d.di_forkoff == 0);
/*
* Free the inode.
......@@ -2883,7 +2879,13 @@ xfs_rename_alloc_whiteout(
if (error)
return error;
/* Satisfy xfs_bumplink that this is a real tmpfile */
/*
* Prepare the tmpfile inode as if it were created through the VFS.
* Otherwise, the link increment paths will complain about nlink 0->1.
* Drop the link count as done by d_tmpfile(), complete the inode setup
* and flag it as linkable.
*/
drop_nlink(VFS_I(tmpfile));
xfs_finish_inode_setup(tmpfile);
VFS_I(tmpfile)->i_state |= I_LINKABLE;
......@@ -3151,7 +3153,7 @@ xfs_rename(
* intermediate state on disk.
*/
if (wip) {
ASSERT(wip->i_d.di_nlink == 0);
ASSERT(VFS_I(wip)->i_nlink == 0 && wip->i_d.di_nlink == 0);
error = xfs_bumplink(tp, wip);
if (error)
goto out_trans_abort;
......
......@@ -1084,14 +1084,18 @@ xfs_log_sbcount(xfs_mount_t *mp)
return xfs_sync_sb(mp, true);
}
/*
* Deltas for the inode count are +/-64, hence we use a large batch size
* of 128 so we don't need to take the counter lock on every update.
*/
#define XFS_ICOUNT_BATCH 128
int
xfs_mod_icount(
struct xfs_mount *mp,
int64_t delta)
{
/* deltas are +/-64, hence the large batch size of 128. */
__percpu_counter_add(&mp->m_icount, delta, 128);
if (percpu_counter_compare(&mp->m_icount, 0) < 0) {
__percpu_counter_add(&mp->m_icount, delta, XFS_ICOUNT_BATCH);
if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) {
ASSERT(0);
percpu_counter_add(&mp->m_icount, -delta);
return -EINVAL;
......@@ -1113,6 +1117,14 @@ xfs_mod_ifree(
return 0;
}
/*
* Deltas for the block count can vary from 1 to very large, but lock contention
* only occurs on frequent small block count updates such as in the delayed
* allocation path for buffered writes (page a time updates). Hence we set
* a large batch count (1024) to minimise global counter updates except when
* we get near to ENOSPC and we have to be very accurate with our updates.
*/
#define XFS_FDBLOCKS_BATCH 1024
int
xfs_mod_fdblocks(
struct xfs_mount *mp,
......@@ -1151,25 +1163,19 @@ xfs_mod_fdblocks(
* Taking blocks away, need to be more accurate the closer we
* are to zero.
*
* batch size is set to a maximum of 1024 blocks - if we are
* allocating of freeing extents larger than this then we aren't
* going to be hammering the counter lock so a lock per update
* is not a problem.
*
* If the counter has a value of less than 2 * max batch size,
* then make everything serialise as we are real close to
* ENOSPC.
*/
#define __BATCH 1024
if (percpu_counter_compare(&mp->m_fdblocks, 2 * __BATCH) < 0)
if (__percpu_counter_compare(&mp->m_fdblocks, 2 * XFS_FDBLOCKS_BATCH,
XFS_FDBLOCKS_BATCH) < 0)
batch = 1;
else
batch = __BATCH;
#undef __BATCH
batch = XFS_FDBLOCKS_BATCH;
__percpu_counter_add(&mp->m_fdblocks, delta, batch);
if (percpu_counter_compare(&mp->m_fdblocks,
XFS_ALLOC_SET_ASIDE(mp)) >= 0) {
if (__percpu_counter_compare(&mp->m_fdblocks, XFS_ALLOC_SET_ASIDE(mp),
XFS_FDBLOCKS_BATCH) >= 0) {
/* we had space! */
return 0;
}
......
......@@ -41,7 +41,12 @@ void percpu_counter_destroy(struct percpu_counter *fbc);
void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch);
s64 __percpu_counter_sum(struct percpu_counter *fbc);
int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs);
int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch);
static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
{
return __percpu_counter_compare(fbc, rhs, percpu_counter_batch);
}
static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
{
......@@ -116,6 +121,12 @@ static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
return 0;
}
static inline int
__percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch)
{
return percpu_counter_compare(fbc, rhs);
}
static inline void
percpu_counter_add(struct percpu_counter *fbc, s64 amount)
{
......
......@@ -197,13 +197,13 @@ static int percpu_counter_hotcpu_callback(struct notifier_block *nb,
* Compare counter against given value.
* Return 1 if greater, 0 if equal and -1 if less
*/
int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch)
{
s64 count;
count = percpu_counter_read(fbc);
/* Check to see if rough count will be sufficient for comparison */
if (abs(count - rhs) > (percpu_counter_batch*num_online_cpus())) {
if (abs(count - rhs) > (batch * num_online_cpus())) {
if (count > rhs)
return 1;
else
......@@ -218,7 +218,7 @@ int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
else
return 0;
}
EXPORT_SYMBOL(percpu_counter_compare);
EXPORT_SYMBOL(__percpu_counter_compare);
static int __init percpu_counter_startup(void)
{
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册