xfs_dfrag.c 11.7 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
2
 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3
 * All Rights Reserved.
L
Linus Torvalds 已提交
4
 *
5 6
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
L
Linus Torvalds 已提交
7 8
 * published by the Free Software Foundation.
 *
9 10 11 12
 * This program is distributed in the hope that it would be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
L
Linus Torvalds 已提交
13
 *
14 15 16
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write the Free Software Foundation,
 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
L
Linus Torvalds 已提交
17 18
 */
#include "xfs.h"
19
#include "xfs_fs.h"
L
Linus Torvalds 已提交
20
#include "xfs_types.h"
21
#include "xfs_bit.h"
L
Linus Torvalds 已提交
22
#include "xfs_log.h"
23
#include "xfs_inum.h"
L
Linus Torvalds 已提交
24 25
#include "xfs_trans.h"
#include "xfs_sb.h"
26
#include "xfs_ag.h"
L
Linus Torvalds 已提交
27 28 29 30
#include "xfs_dir2.h"
#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
31
#include "xfs_alloc_btree.h"
L
Linus Torvalds 已提交
32 33
#include "xfs_ialloc_btree.h"
#include "xfs_dir2_sf.h"
34
#include "xfs_attr_sf.h"
L
Linus Torvalds 已提交
35 36
#include "xfs_dinode.h"
#include "xfs_inode.h"
37
#include "xfs_inode_item.h"
L
Linus Torvalds 已提交
38
#include "xfs_bmap.h"
39
#include "xfs_btree.h"
L
Linus Torvalds 已提交
40 41 42 43 44
#include "xfs_ialloc.h"
#include "xfs_itable.h"
#include "xfs_dfrag.h"
#include "xfs_error.h"
#include "xfs_rw.h"
45
#include "xfs_vnodeops.h"
C
Christoph Hellwig 已提交
46
#include "xfs_trace.h"
L
Linus Torvalds 已提交
47 48 49 50 51 52

/*
 * Syssgi interface for swapext
 */
int
xfs_swapext(
53
	xfs_swapext_t	*sxp)
L
Linus Torvalds 已提交
54
{
C
Christoph Hellwig 已提交
55 56
	xfs_inode_t     *ip, *tip;
	struct file	*file, *target_file;
L
Linus Torvalds 已提交
57 58 59
	int		error = 0;

	/* Pull information for the target fd */
C
Christoph Hellwig 已提交
60 61
	file = fget((int)sxp->sx_fdtarget);
	if (!file) {
L
Linus Torvalds 已提交
62
		error = XFS_ERROR(EINVAL);
63
		goto out;
L
Linus Torvalds 已提交
64 65
	}

66 67 68 69 70
	if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) {
		error = XFS_ERROR(EBADF);
		goto out_put_file;
	}

C
Christoph Hellwig 已提交
71 72
	target_file = fget((int)sxp->sx_fdtmp);
	if (!target_file) {
L
Linus Torvalds 已提交
73
		error = XFS_ERROR(EINVAL);
C
Christoph Hellwig 已提交
74
		goto out_put_file;
L
Linus Torvalds 已提交
75 76
	}

77 78 79 80 81 82
	if (!(target_file->f_mode & FMODE_WRITE) ||
	    (target_file->f_flags & O_APPEND)) {
		error = XFS_ERROR(EBADF);
		goto out_put_target_file;
	}

83 84 85 86 87 88
	if (IS_SWAPFILE(file->f_path.dentry->d_inode) ||
	    IS_SWAPFILE(target_file->f_path.dentry->d_inode)) {
		error = XFS_ERROR(EINVAL);
		goto out_put_target_file;
	}

C
Christoph Hellwig 已提交
89 90
	ip = XFS_I(file->f_path.dentry->d_inode);
	tip = XFS_I(target_file->f_path.dentry->d_inode);
L
Linus Torvalds 已提交
91 92

	if (ip->i_mount != tip->i_mount) {
C
Christoph Hellwig 已提交
93 94
		error = XFS_ERROR(EINVAL);
		goto out_put_target_file;
L
Linus Torvalds 已提交
95 96 97
	}

	if (ip->i_ino == tip->i_ino) {
C
Christoph Hellwig 已提交
98 99
		error = XFS_ERROR(EINVAL);
		goto out_put_target_file;
L
Linus Torvalds 已提交
100 101
	}

C
Christoph Hellwig 已提交
102 103 104
	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
		error = XFS_ERROR(EIO);
		goto out_put_target_file;
L
Linus Torvalds 已提交
105 106
	}

107
	error = xfs_swap_extents(ip, tip, sxp);
108

C
Christoph Hellwig 已提交
109 110 111 112 113
 out_put_target_file:
	fput(target_file);
 out_put_file:
	fput(file);
 out:
114 115 116
	return error;
}

117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
/*
 * We need to check that the format of the data fork in the temporary inode is
 * valid for the target inode before doing the swap. This is not a problem with
 * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
 * data fork depending on the space the attribute fork is taking so we can get
 * invalid formats on the target inode.
 *
 * E.g. target has space for 7 extents in extent format, temp inode only has
 * space for 6.  If we defragment down to 7 extents, then the tmp format is a
 * btree, but when swapped it needs to be in extent format. Hence we can't just
 * blindly swap data forks on attr2 filesystems.
 *
 * Note that we check the swap in both directions so that we don't end up with
 * a corrupt temporary inode, either.
 *
 * Note that fixing the way xfs_fsr sets up the attribute fork in the source
 * inode will prevent this situation from occurring, so all we do here is
 * reject and log the attempt. basically we are putting the responsibility on
 * userspace to get this right.
 */
static int
xfs_swap_extents_check_format(
	xfs_inode_t	*ip,	/* target inode */
	xfs_inode_t	*tip)	/* tmp inode */
{

	/* Should never get a local format */
	if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
	    tip->i_d.di_format == XFS_DINODE_FMT_LOCAL)
		return EINVAL;

	/*
	 * if the target inode has less extents that then temporary inode then
	 * why did userspace call us?
	 */
	if (ip->i_d.di_nextents < tip->i_d.di_nextents)
		return EINVAL;

	/*
	 * if the target inode is in extent form and the temp inode is in btree
	 * form then we will end up with the target inode in the wrong format
	 * as we already know there are less extents in the temp inode.
	 */
	if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
	    tip->i_d.di_format == XFS_DINODE_FMT_BTREE)
		return EINVAL;

	/* Check temp in extent form to max in target */
	if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
	    XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > ip->i_df.if_ext_max)
		return EINVAL;

	/* Check target in extent form to max in temp */
	if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
	    XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max)
		return EINVAL;

	/* Check root block of temp in btree form to max in target */
	if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
	    XFS_IFORK_BOFF(ip) &&
	    tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip))
		return EINVAL;

	/* Check root block of target in btree form to max in temp */
	if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
	    XFS_IFORK_BOFF(tip) &&
	    ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip))
		return EINVAL;

	return 0;
}

189 190
int
xfs_swap_extents(
191 192
	xfs_inode_t	*ip,	/* target inode */
	xfs_inode_t	*tip,	/* tmp inode */
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
	xfs_swapext_t	*sxp)
{
	xfs_mount_t	*mp;
	xfs_trans_t	*tp;
	xfs_bstat_t	*sbp = &sxp->sx_stat;
	xfs_ifork_t	*tempifp, *ifp, *tifp;
	int		ilf_fields, tilf_fields;
	int		error = 0;
	int		aforkblks = 0;
	int		taforkblks = 0;
	__uint64_t	tmp;

	mp = ip->i_mount;

	tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
	if (!tempifp) {
		error = XFS_ERROR(ENOMEM);
210
		goto out;
211 212 213
	}

	sbp = &sxp->sx_stat;
L
Linus Torvalds 已提交
214

215 216 217 218 219 220 221 222
	/*
	 * we have to do two separate lock calls here to keep lockdep
	 * happy. If we try to get all the locks in one call, lock will
	 * report false positives when we drop the ILOCK and regain them
	 * below.
	 */
	xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
	xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
L
Linus Torvalds 已提交
223 224 225 226

	/* Verify that both files have the same format */
	if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) {
		error = XFS_ERROR(EINVAL);
227
		goto out_unlock;
L
Linus Torvalds 已提交
228 229 230
	}

	/* Verify both files are either real-time or non-realtime */
231
	if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) {
L
Linus Torvalds 已提交
232
		error = XFS_ERROR(EINVAL);
233
		goto out_unlock;
L
Linus Torvalds 已提交
234 235
	}

236
	if (VN_CACHED(VFS_I(tip)) != 0) {
237 238
		error = xfs_flushinval_pages(tip, 0, -1,
				FI_REMAPF_LOCKED);
239
		if (error)
240
			goto out_unlock;
241
	}
L
Linus Torvalds 已提交
242 243

	/* Verify O_DIRECT for ftmp */
244
	if (VN_CACHED(VFS_I(tip)) != 0) {
L
Linus Torvalds 已提交
245
		error = XFS_ERROR(EINVAL);
246
		goto out_unlock;
L
Linus Torvalds 已提交
247 248 249
	}

	/* Verify all data are being swapped */
250 251 252
	if (sxp->sx_offset != 0 ||
	    sxp->sx_length != ip->i_d.di_size ||
	    sxp->sx_length != tip->i_d.di_size) {
L
Linus Torvalds 已提交
253
		error = XFS_ERROR(EFAULT);
254
		goto out_unlock;
L
Linus Torvalds 已提交
255 256
	}

257 258 259 260 261 262
	/* check inode formats now that data is flushed */
	error = xfs_swap_extents_check_format(ip, tip);
	if (error) {
		xfs_fs_cmn_err(CE_NOTE, mp,
		    "%s: inode 0x%llx format is incompatible for exchanging.",
				__FILE__, ip->i_ino);
263
		goto out_unlock;
L
Linus Torvalds 已提交
264 265 266 267 268 269 270 271 272
	}

	/*
	 * Compare the current change & modify times with that
	 * passed in.  If they differ, we abort this swap.
	 * This is the mechanism used to ensure the calling
	 * process that the file was not changed out from
	 * under it.
	 */
273 274 275 276
	if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) ||
	    (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) ||
	    (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) ||
	    (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) {
L
Linus Torvalds 已提交
277
		error = XFS_ERROR(EBUSY);
278
		goto out_unlock;
L
Linus Torvalds 已提交
279 280 281 282 283
	}

	/* We need to fail if the file is memory mapped.  Once we have tossed
	 * all existing pages, the page fault will have no option
	 * but to go to the filesystem for pages. By making the page fault call
284
	 * vop_read (or write in the case of autogrow) they block on the iolock
L
Linus Torvalds 已提交
285 286
	 * until we have switched the extents.
	 */
287
	if (VN_MAPPED(VFS_I(ip))) {
L
Linus Torvalds 已提交
288
		error = XFS_ERROR(EBUSY);
289
		goto out_unlock;
L
Linus Torvalds 已提交
290 291 292 293 294 295 296 297 298 299 300 301 302
	}

	xfs_iunlock(ip, XFS_ILOCK_EXCL);
	xfs_iunlock(tip, XFS_ILOCK_EXCL);

	/*
	 * There is a race condition here since we gave up the
	 * ilock.  However, the data fork will not change since
	 * we have the iolock (locked for truncation too) so we
	 * are safe.  We don't really care if non-io related
	 * fields change.
	 */

303
	xfs_tosspages(ip, 0, -1, FI_REMAPF);
L
Linus Torvalds 已提交
304 305 306 307 308 309 310 311

	tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT);
	if ((error = xfs_trans_reserve(tp, 0,
				     XFS_ICHANGE_LOG_RES(mp), 0,
				     0, 0))) {
		xfs_iunlock(ip,  XFS_IOLOCK_EXCL);
		xfs_iunlock(tip, XFS_IOLOCK_EXCL);
		xfs_trans_cancel(tp, 0);
312
		goto out;
L
Linus Torvalds 已提交
313
	}
314
	xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
L
Linus Torvalds 已提交
315 316 317 318 319 320 321

	/*
	 * Count the number of extended attribute blocks
	 */
	if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) &&
	     (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
		error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks);
322 323
		if (error)
			goto out_trans_cancel;
L
Linus Torvalds 已提交
324 325 326 327 328
	}
	if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) &&
	     (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
		error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK,
			&taforkblks);
329 330
		if (error)
			goto out_trans_cancel;
L
Linus Torvalds 已提交
331 332 333 334 335 336 337
	}

	/*
	 * Swap the data forks of the inodes
	 */
	ifp = &ip->i_df;
	tifp = &tip->i_df;
338 339 340
	*tempifp = *ifp;	/* struct copy */
	*ifp = *tifp;		/* struct copy */
	*tifp = *tempifp;	/* struct copy */
L
Linus Torvalds 已提交
341

342 343 344 345 346 347 348 349 350 351
	/*
	 * Fix the in-memory data fork values that are dependent on the fork
	 * offset in the inode. We can't assume they remain the same as attr2
	 * has dynamic fork offsets.
	 */
	ifp->if_ext_max = XFS_IFORK_SIZE(ip, XFS_DATA_FORK) /
					(uint)sizeof(xfs_bmbt_rec_t);
	tifp->if_ext_max = XFS_IFORK_SIZE(tip, XFS_DATA_FORK) /
					(uint)sizeof(xfs_bmbt_rec_t);

L
Linus Torvalds 已提交
352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405
	/*
	 * Fix the on-disk inode values
	 */
	tmp = (__uint64_t)ip->i_d.di_nblocks;
	ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks;
	tip->i_d.di_nblocks = tmp + taforkblks - aforkblks;

	tmp = (__uint64_t) ip->i_d.di_nextents;
	ip->i_d.di_nextents = tip->i_d.di_nextents;
	tip->i_d.di_nextents = tmp;

	tmp = (__uint64_t) ip->i_d.di_format;
	ip->i_d.di_format = tip->i_d.di_format;
	tip->i_d.di_format = tmp;

	ilf_fields = XFS_ILOG_CORE;

	switch(ip->i_d.di_format) {
	case XFS_DINODE_FMT_EXTENTS:
		/* If the extents fit in the inode, fix the
		 * pointer.  Otherwise it's already NULL or
		 * pointing to the extent.
		 */
		if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) {
			ifp->if_u1.if_extents =
				ifp->if_u2.if_inline_ext;
		}
		ilf_fields |= XFS_ILOG_DEXT;
		break;
	case XFS_DINODE_FMT_BTREE:
		ilf_fields |= XFS_ILOG_DBROOT;
		break;
	}

	tilf_fields = XFS_ILOG_CORE;

	switch(tip->i_d.di_format) {
	case XFS_DINODE_FMT_EXTENTS:
		/* If the extents fit in the inode, fix the
		 * pointer.  Otherwise it's already NULL or
		 * pointing to the extent.
		 */
		if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) {
			tifp->if_u1.if_extents =
				tifp->if_u2.if_inline_ext;
		}
		tilf_fields |= XFS_ILOG_DEXT;
		break;
	case XFS_DINODE_FMT_BTREE:
		tilf_fields |= XFS_ILOG_DBROOT;
		break;
	}


406
	IHOLD(ip);
407
	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
408 409

	IHOLD(tip);
410
	xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
L
Linus Torvalds 已提交
411 412 413 414 415 416 417 418

	xfs_trans_log_inode(tp, ip,  ilf_fields);
	xfs_trans_log_inode(tp, tip, tilf_fields);

	/*
	 * If this is a synchronous mount, make sure that the
	 * transaction goes to disk before returning to the user.
	 */
419
	if (mp->m_flags & XFS_MOUNT_WSYNC)
L
Linus Torvalds 已提交
420 421
		xfs_trans_set_sync(tp);

422
	error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT);
L
Linus Torvalds 已提交
423

424 425
out:
	kmem_free(tempifp);
L
Linus Torvalds 已提交
426
	return error;
427

428 429 430 431 432
out_unlock:
	xfs_iunlock(ip,  XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
	xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
	goto out;

433 434 435
out_trans_cancel:
	xfs_trans_cancel(tp, 0);
	goto out_unlock;
L
Linus Torvalds 已提交
436
}