ioctl.c 120.0 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
C
Christoph Hellwig 已提交
2 3 4 5 6 7 8 9
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 */

#include <linux/kernel.h>
#include <linux/bio.h>
#include <linux/file.h>
#include <linux/fs.h>
10
#include <linux/fsnotify.h>
C
Christoph Hellwig 已提交
11 12 13 14 15
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/string.h>
#include <linux/backing-dev.h>
16 17
#include <linux/mount.h>
#include <linux/namei.h>
C
Christoph Hellwig 已提交
18 19
#include <linux/writeback.h>
#include <linux/compat.h>
20
#include <linux/security.h>
C
Christoph Hellwig 已提交
21
#include <linux/xattr.h>
22
#include <linux/mm.h>
23
#include <linux/slab.h>
24
#include <linux/blkdev.h>
25
#include <linux/uuid.h>
26
#include <linux/btrfs.h>
M
Mark Fasheh 已提交
27
#include <linux/uaccess.h>
28
#include <linux/iversion.h>
C
Christoph Hellwig 已提交
29 30
#include "ctree.h"
#include "disk-io.h"
31
#include "export.h"
C
Christoph Hellwig 已提交
32 33 34 35
#include "transaction.h"
#include "btrfs_inode.h"
#include "print-tree.h"
#include "volumes.h"
36
#include "locking.h"
37
#include "inode-map.h"
38
#include "backref.h"
39
#include "rcu-string.h"
40
#include "send.h"
41
#include "dev-replace.h"
42
#include "props.h"
43
#include "sysfs.h"
J
Josef Bacik 已提交
44
#include "qgroup.h"
45
#include "tree-log.h"
46
#include "compression.h"
47
#include "space-info.h"
48
#include "delalloc-space.h"
49
#include "block-group.h"
C
Christoph Hellwig 已提交
50

51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
#ifdef CONFIG_64BIT
/* If we have a 32-bit userspace and 64-bit kernel, then the UAPI
 * structures are incorrect, as the timespec structure from userspace
 * is 4 bytes too small. We define these alternatives here to teach
 * the kernel about the 32-bit struct packing.
 */
struct btrfs_ioctl_timespec_32 {
	__u64 sec;
	__u32 nsec;
} __attribute__ ((__packed__));

struct btrfs_ioctl_received_subvol_args_32 {
	char	uuid[BTRFS_UUID_SIZE];	/* in */
	__u64	stransid;		/* in */
	__u64	rtransid;		/* out */
	struct btrfs_ioctl_timespec_32 stime; /* in */
	struct btrfs_ioctl_timespec_32 rtime; /* out */
	__u64	flags;			/* in */
	__u64	reserved[16];		/* in */
} __attribute__ ((__packed__));

#define BTRFS_IOC_SET_RECEIVED_SUBVOL_32 _IOWR(BTRFS_IOCTL_MAGIC, 37, \
				struct btrfs_ioctl_received_subvol_args_32)
#endif

76 77 78 79 80 81 82 83 84 85 86 87 88
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
struct btrfs_ioctl_send_args_32 {
	__s64 send_fd;			/* in */
	__u64 clone_sources_count;	/* in */
	compat_uptr_t clone_sources;	/* in */
	__u64 parent_root;		/* in */
	__u64 flags;			/* in */
	__u64 reserved[4];		/* in */
} __attribute__ ((__packed__));

#define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \
			       struct btrfs_ioctl_send_args_32)
#endif
89

90
/* Mask out flags that are inappropriate for the given type of inode. */
91 92
static unsigned int btrfs_mask_fsflags_for_type(struct inode *inode,
		unsigned int flags)
93
{
94
	if (S_ISDIR(inode->i_mode))
95
		return flags;
96
	else if (S_ISREG(inode->i_mode))
97 98 99 100 101 102
		return flags & ~FS_DIRSYNC_FL;
	else
		return flags & (FS_NODUMP_FL | FS_NOATIME_FL);
}

/*
103 104
 * Export internal inode flags to the format expected by the FS_IOC_GETFLAGS
 * ioctl.
105
 */
106
static unsigned int btrfs_inode_flags_to_fsflags(unsigned int flags)
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
{
	unsigned int iflags = 0;

	if (flags & BTRFS_INODE_SYNC)
		iflags |= FS_SYNC_FL;
	if (flags & BTRFS_INODE_IMMUTABLE)
		iflags |= FS_IMMUTABLE_FL;
	if (flags & BTRFS_INODE_APPEND)
		iflags |= FS_APPEND_FL;
	if (flags & BTRFS_INODE_NODUMP)
		iflags |= FS_NODUMP_FL;
	if (flags & BTRFS_INODE_NOATIME)
		iflags |= FS_NOATIME_FL;
	if (flags & BTRFS_INODE_DIRSYNC)
		iflags |= FS_DIRSYNC_FL;
L
Li Zefan 已提交
122 123 124
	if (flags & BTRFS_INODE_NODATACOW)
		iflags |= FS_NOCOW_FL;

125
	if (flags & BTRFS_INODE_NOCOMPRESS)
L
Li Zefan 已提交
126
		iflags |= FS_NOCOMP_FL;
127 128
	else if (flags & BTRFS_INODE_COMPRESS)
		iflags |= FS_COMPR_FL;
129 130 131 132 133 134 135

	return iflags;
}

/*
 * Update inode->i_flags based on the btrfs internal flags.
 */
136
void btrfs_sync_inode_flags_to_i_flags(struct inode *inode)
137
{
138
	struct btrfs_inode *binode = BTRFS_I(inode);
139
	unsigned int new_fl = 0;
140

141
	if (binode->flags & BTRFS_INODE_SYNC)
142
		new_fl |= S_SYNC;
143
	if (binode->flags & BTRFS_INODE_IMMUTABLE)
144
		new_fl |= S_IMMUTABLE;
145
	if (binode->flags & BTRFS_INODE_APPEND)
146
		new_fl |= S_APPEND;
147
	if (binode->flags & BTRFS_INODE_NOATIME)
148
		new_fl |= S_NOATIME;
149
	if (binode->flags & BTRFS_INODE_DIRSYNC)
150 151 152 153 154
		new_fl |= S_DIRSYNC;

	set_mask_bits(&inode->i_flags,
		      S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC,
		      new_fl);
155 156 157 158
}

static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
{
159 160
	struct btrfs_inode *binode = BTRFS_I(file_inode(file));
	unsigned int flags = btrfs_inode_flags_to_fsflags(binode->flags);
161 162 163 164 165 166

	if (copy_to_user(arg, &flags, sizeof(flags)))
		return -EFAULT;
	return 0;
}

167 168 169 170 171
/*
 * Check if @flags are a supported and valid set of FS_*_FL flags and that
 * the old and new flags are not conflicting
 */
static int check_fsflags(unsigned int old_flags, unsigned int flags)
172 173 174 175
{
	if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
		      FS_NOATIME_FL | FS_NODUMP_FL | \
		      FS_SYNC_FL | FS_DIRSYNC_FL | \
L
Li Zefan 已提交
176 177
		      FS_NOCOMP_FL | FS_COMPR_FL |
		      FS_NOCOW_FL))
178 179
		return -EOPNOTSUPP;

180
	/* COMPR and NOCOMP on new/old are valid */
181 182 183
	if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
		return -EINVAL;

184 185 186 187 188 189 190 191 192
	if ((flags & FS_COMPR_FL) && (flags & FS_NOCOW_FL))
		return -EINVAL;

	/* NOCOW and compression options are mutually exclusive */
	if ((old_flags & FS_NOCOW_FL) && (flags & (FS_COMPR_FL | FS_NOCOMP_FL)))
		return -EINVAL;
	if ((flags & FS_NOCOW_FL) && (old_flags & (FS_COMPR_FL | FS_NOCOMP_FL)))
		return -EINVAL;

193 194 195
	return 0;
}

196 197
static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
{
A
Al Viro 已提交
198
	struct inode *inode = file_inode(file);
199
	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
200 201
	struct btrfs_inode *binode = BTRFS_I(inode);
	struct btrfs_root *root = binode->root;
202
	struct btrfs_trans_handle *trans;
203
	unsigned int fsflags, old_fsflags;
204
	int ret;
205
	const char *comp = NULL;
206
	u32 binode_flags;
207

208 209 210
	if (!inode_owner_or_capable(inode))
		return -EPERM;

L
Li Zefan 已提交
211 212 213
	if (btrfs_root_readonly(root))
		return -EROFS;

214
	if (copy_from_user(&fsflags, arg, sizeof(fsflags)))
215 216
		return -EFAULT;

217 218 219 220
	ret = mnt_want_write_file(file);
	if (ret)
		return ret;

A
Al Viro 已提交
221
	inode_lock(inode);
222
	fsflags = btrfs_mask_fsflags_for_type(inode, fsflags);
223
	old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
224

225 226 227
	ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags);
	if (ret)
		goto out_unlock;
228

229 230 231 232 233
	ret = check_fsflags(old_fsflags, fsflags);
	if (ret)
		goto out_unlock;

	binode_flags = binode->flags;
234
	if (fsflags & FS_SYNC_FL)
235
		binode_flags |= BTRFS_INODE_SYNC;
236
	else
237
		binode_flags &= ~BTRFS_INODE_SYNC;
238
	if (fsflags & FS_IMMUTABLE_FL)
239
		binode_flags |= BTRFS_INODE_IMMUTABLE;
240
	else
241
		binode_flags &= ~BTRFS_INODE_IMMUTABLE;
242
	if (fsflags & FS_APPEND_FL)
243
		binode_flags |= BTRFS_INODE_APPEND;
244
	else
245
		binode_flags &= ~BTRFS_INODE_APPEND;
246
	if (fsflags & FS_NODUMP_FL)
247
		binode_flags |= BTRFS_INODE_NODUMP;
248
	else
249
		binode_flags &= ~BTRFS_INODE_NODUMP;
250
	if (fsflags & FS_NOATIME_FL)
251
		binode_flags |= BTRFS_INODE_NOATIME;
252
	else
253
		binode_flags &= ~BTRFS_INODE_NOATIME;
254
	if (fsflags & FS_DIRSYNC_FL)
255
		binode_flags |= BTRFS_INODE_DIRSYNC;
256
	else
257
		binode_flags &= ~BTRFS_INODE_DIRSYNC;
258
	if (fsflags & FS_NOCOW_FL) {
259
		if (S_ISREG(inode->i_mode)) {
260 261 262 263 264 265
			/*
			 * It's safe to turn csums off here, no extents exist.
			 * Otherwise we want the flag to reflect the real COW
			 * status of the file and will not set it.
			 */
			if (inode->i_size == 0)
266 267
				binode_flags |= BTRFS_INODE_NODATACOW |
						BTRFS_INODE_NODATASUM;
268
		} else {
269
			binode_flags |= BTRFS_INODE_NODATACOW;
270 271 272
		}
	} else {
		/*
273
		 * Revert back under same assumptions as above
274
		 */
275
		if (S_ISREG(inode->i_mode)) {
276
			if (inode->i_size == 0)
277 278
				binode_flags &= ~(BTRFS_INODE_NODATACOW |
						  BTRFS_INODE_NODATASUM);
279
		} else {
280
			binode_flags &= ~BTRFS_INODE_NODATACOW;
281 282
		}
	}
283

284 285 286 287 288
	/*
	 * The COMPRESS flag can only be changed by users, while the NOCOMPRESS
	 * flag may be changed automatically if compression code won't make
	 * things smaller.
	 */
289
	if (fsflags & FS_NOCOMP_FL) {
290 291
		binode_flags &= ~BTRFS_INODE_COMPRESS;
		binode_flags |= BTRFS_INODE_NOCOMPRESS;
292
	} else if (fsflags & FS_COMPR_FL) {
293

294 295 296 297 298
		if (IS_SWAPFILE(inode)) {
			ret = -ETXTBSY;
			goto out_unlock;
		}

299 300
		binode_flags |= BTRFS_INODE_COMPRESS;
		binode_flags &= ~BTRFS_INODE_NOCOMPRESS;
301

302 303 304
		comp = btrfs_compress_type2str(fs_info->compress_type);
		if (!comp || comp[0] == 0)
			comp = btrfs_compress_type2str(BTRFS_COMPRESS_ZLIB);
L
Li Zefan 已提交
305
	} else {
306
		binode_flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
307
	}
308

309 310 311 312 313
	/*
	 * 1 for inode item
	 * 2 for properties
	 */
	trans = btrfs_start_transaction(root, 3);
314 315
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
316
		goto out_unlock;
317
	}
318

319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334
	if (comp) {
		ret = btrfs_set_prop(trans, inode, "btrfs.compression", comp,
				     strlen(comp), 0);
		if (ret) {
			btrfs_abort_transaction(trans, ret);
			goto out_end_trans;
		}
	} else {
		ret = btrfs_set_prop(trans, inode, "btrfs.compression", NULL,
				     0, 0);
		if (ret && ret != -ENODATA) {
			btrfs_abort_transaction(trans, ret);
			goto out_end_trans;
		}
	}

335
	binode->flags = binode_flags;
336
	btrfs_sync_inode_flags_to_i_flags(inode);
337
	inode_inc_iversion(inode);
338
	inode->i_ctime = current_time(inode);
339 340
	ret = btrfs_update_inode(trans, root, inode);

341
 out_end_trans:
342
	btrfs_end_transaction(trans);
343
 out_unlock:
A
Al Viro 已提交
344
	inode_unlock(inode);
345
	mnt_drop_write_file(file);
346
	return ret;
347 348
}

349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380
/*
 * Translate btrfs internal inode flags to xflags as expected by the
 * FS_IOC_FSGETXATT ioctl. Filter only the supported ones, unknown flags are
 * silently dropped.
 */
static unsigned int btrfs_inode_flags_to_xflags(unsigned int flags)
{
	unsigned int xflags = 0;

	if (flags & BTRFS_INODE_APPEND)
		xflags |= FS_XFLAG_APPEND;
	if (flags & BTRFS_INODE_IMMUTABLE)
		xflags |= FS_XFLAG_IMMUTABLE;
	if (flags & BTRFS_INODE_NOATIME)
		xflags |= FS_XFLAG_NOATIME;
	if (flags & BTRFS_INODE_NODUMP)
		xflags |= FS_XFLAG_NODUMP;
	if (flags & BTRFS_INODE_SYNC)
		xflags |= FS_XFLAG_SYNC;

	return xflags;
}

/* Check if @flags are a supported and valid set of FS_XFLAGS_* flags */
static int check_xflags(unsigned int flags)
{
	if (flags & ~(FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE | FS_XFLAG_NOATIME |
		      FS_XFLAG_NODUMP | FS_XFLAG_SYNC))
		return -EOPNOTSUPP;
	return 0;
}

381 382 383 384 385 386 387 388 389
/*
 * Set the xflags from the internal inode flags. The remaining items of fsxattr
 * are zeroed.
 */
static int btrfs_ioctl_fsgetxattr(struct file *file, void __user *arg)
{
	struct btrfs_inode *binode = BTRFS_I(file_inode(file));
	struct fsxattr fa;

390
	simple_fill_fsxattr(&fa, btrfs_inode_flags_to_xflags(binode->flags));
391 392 393 394 395 396
	if (copy_to_user(arg, &fa, sizeof(fa)))
		return -EFAULT;

	return 0;
}

397 398 399 400 401 402
static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg)
{
	struct inode *inode = file_inode(file);
	struct btrfs_inode *binode = BTRFS_I(inode);
	struct btrfs_root *root = binode->root;
	struct btrfs_trans_handle *trans;
403
	struct fsxattr fa, old_fa;
404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
	unsigned old_flags;
	unsigned old_i_flags;
	int ret = 0;

	if (!inode_owner_or_capable(inode))
		return -EPERM;

	if (btrfs_root_readonly(root))
		return -EROFS;

	if (copy_from_user(&fa, arg, sizeof(fa)))
		return -EFAULT;

	ret = check_xflags(fa.fsx_xflags);
	if (ret)
		return ret;

	if (fa.fsx_extsize != 0 || fa.fsx_projid != 0 || fa.fsx_cowextsize != 0)
		return -EOPNOTSUPP;

	ret = mnt_want_write_file(file);
	if (ret)
		return ret;

	inode_lock(inode);

	old_flags = binode->flags;
	old_i_flags = inode->i_flags;

433 434 435 436
	simple_fill_fsxattr(&old_fa,
			    btrfs_inode_flags_to_xflags(binode->flags));
	ret = vfs_ioc_fssetxattr_check(inode, &old_fa, &fa);
	if (ret)
437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
		goto out_unlock;

	if (fa.fsx_xflags & FS_XFLAG_SYNC)
		binode->flags |= BTRFS_INODE_SYNC;
	else
		binode->flags &= ~BTRFS_INODE_SYNC;
	if (fa.fsx_xflags & FS_XFLAG_IMMUTABLE)
		binode->flags |= BTRFS_INODE_IMMUTABLE;
	else
		binode->flags &= ~BTRFS_INODE_IMMUTABLE;
	if (fa.fsx_xflags & FS_XFLAG_APPEND)
		binode->flags |= BTRFS_INODE_APPEND;
	else
		binode->flags &= ~BTRFS_INODE_APPEND;
	if (fa.fsx_xflags & FS_XFLAG_NODUMP)
		binode->flags |= BTRFS_INODE_NODUMP;
	else
		binode->flags &= ~BTRFS_INODE_NODUMP;
	if (fa.fsx_xflags & FS_XFLAG_NOATIME)
		binode->flags |= BTRFS_INODE_NOATIME;
	else
		binode->flags &= ~BTRFS_INODE_NOATIME;

	/* 1 item for the inode */
	trans = btrfs_start_transaction(root, 1);
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
		goto out_unlock;
	}

	btrfs_sync_inode_flags_to_i_flags(inode);
	inode_inc_iversion(inode);
	inode->i_ctime = current_time(inode);
	ret = btrfs_update_inode(trans, root, inode);

	btrfs_end_transaction(trans);

out_unlock:
	if (ret) {
		binode->flags = old_flags;
		inode->i_flags = old_i_flags;
	}

	inode_unlock(inode);
	mnt_drop_write_file(file);

	return ret;
}

486 487
static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
{
A
Al Viro 已提交
488
	struct inode *inode = file_inode(file);
489 490 491

	return put_user(inode->i_generation, arg);
}
C
Christoph Hellwig 已提交
492

493 494
static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info,
					void __user *arg)
495 496 497 498 499 500 501 502 503 504 505
{
	struct btrfs_device *device;
	struct request_queue *q;
	struct fstrim_range range;
	u64 minlen = ULLONG_MAX;
	u64 num_devices = 0;
	int ret;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

506 507 508 509 510 511 512 513 514 515
	/*
	 * If the fs is mounted with nologreplay, which requires it to be
	 * mounted in RO mode as well, we can not allow discard on free space
	 * inside block groups, because log trees refer to extents that are not
	 * pinned in a block group's free space cache (pinning the extents is
	 * precisely the first phase of replaying a log tree).
	 */
	if (btrfs_test_opt(fs_info, NOLOGREPLAY))
		return -EROFS;

516 517 518
	rcu_read_lock();
	list_for_each_entry_rcu(device, &fs_info->fs_devices->devices,
				dev_list) {
519 520 521 522 523
		if (!device->bdev)
			continue;
		q = bdev_get_queue(device->bdev);
		if (blk_queue_discard(q)) {
			num_devices++;
524
			minlen = min_t(u64, q->limits.discard_granularity,
525 526 527
				     minlen);
		}
	}
528
	rcu_read_unlock();
529

530 531 532 533
	if (!num_devices)
		return -EOPNOTSUPP;
	if (copy_from_user(&range, arg, sizeof(range)))
		return -EFAULT;
534 535 536 537 538 539 540

	/*
	 * NOTE: Don't truncate the range using super->total_bytes.  Bytenr of
	 * block group is in the logical address space, which can be any
	 * sectorsize aligned bytenr in  the range [0, U64_MAX].
	 */
	if (range.len < fs_info->sb->s_blocksize)
541
		return -EINVAL;
542 543

	range.minlen = max(range.minlen, minlen);
544
	ret = btrfs_trim_fs(fs_info, &range);
545 546 547 548 549 550 551 552 553
	if (ret < 0)
		return ret;

	if (copy_to_user(arg, &range, sizeof(range)))
		return -EFAULT;

	return 0;
}

554
int __pure btrfs_is_empty_uuid(u8 *uuid)
555
{
C
Chris Mason 已提交
556 557 558 559 560 561 562
	int i;

	for (i = 0; i < BTRFS_UUID_SIZE; i++) {
		if (uuid[i])
			return 0;
	}
	return 1;
563 564
}

565
static noinline int create_subvol(struct inode *dir,
566
				  struct dentry *dentry,
567
				  const char *name, int namelen,
568
				  struct btrfs_qgroup_inherit *inherit)
C
Christoph Hellwig 已提交
569
{
570
	struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
C
Christoph Hellwig 已提交
571 572
	struct btrfs_trans_handle *trans;
	struct btrfs_key key;
573
	struct btrfs_root_item *root_item;
C
Christoph Hellwig 已提交
574 575
	struct btrfs_inode_item *inode_item;
	struct extent_buffer *leaf;
576
	struct btrfs_root *root = BTRFS_I(dir)->root;
577
	struct btrfs_root *new_root;
578
	struct btrfs_block_rsv block_rsv;
579
	struct timespec64 cur_time = current_time(dir);
580
	struct inode *inode;
C
Christoph Hellwig 已提交
581 582
	int ret;
	int err;
583
	dev_t anon_dev = 0;
C
Christoph Hellwig 已提交
584 585
	u64 objectid;
	u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
586
	u64 index = 0;
C
Christoph Hellwig 已提交
587

588 589 590 591
	root_item = kzalloc(sizeof(*root_item), GFP_KERNEL);
	if (!root_item)
		return -ENOMEM;

592
	ret = btrfs_find_free_objectid(fs_info->tree_root, &objectid);
593
	if (ret)
594
		goto fail_free;
595

596 597 598 599
	ret = get_anon_bdev(&anon_dev);
	if (ret < 0)
		goto fail_free;

600 601
	/*
	 * Don't create subvolume whose level is not zero. Or qgroup will be
602
	 * screwed up since it assumes subvolume qgroup's level to be 0.
603
	 */
604 605 606 607
	if (btrfs_qgroup_level(objectid)) {
		ret = -ENOSPC;
		goto fail_free;
	}
608

609
	btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
J
Josef Bacik 已提交
610
	/*
611 612
	 * The same as the snapshot creation, please see the comment
	 * of create_snapshot().
J
Josef Bacik 已提交
613
	 */
614
	ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 8, false);
615
	if (ret)
616
		goto fail_free;
617 618 619 620

	trans = btrfs_start_transaction(root, 0);
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
621
		btrfs_subvolume_release_metadata(fs_info, &block_rsv);
622
		goto fail_free;
623 624 625
	}
	trans->block_rsv = &block_rsv;
	trans->bytes_reserved = block_rsv.size;
C
Christoph Hellwig 已提交
626

627
	ret = btrfs_qgroup_inherit(trans, 0, objectid, inherit);
A
Arne Jansen 已提交
628 629 630
	if (ret)
		goto fail;

631
	leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0);
632 633 634 635
	if (IS_ERR(leaf)) {
		ret = PTR_ERR(leaf);
		goto fail;
	}
C
Christoph Hellwig 已提交
636 637 638

	btrfs_mark_buffer_dirty(leaf);

639
	inode_item = &root_item->inode;
640 641 642
	btrfs_set_stack_inode_generation(inode_item, 1);
	btrfs_set_stack_inode_size(inode_item, 3);
	btrfs_set_stack_inode_nlink(inode_item, 1);
643
	btrfs_set_stack_inode_nbytes(inode_item,
644
				     fs_info->nodesize);
645
	btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755);
C
Christoph Hellwig 已提交
646

647 648
	btrfs_set_root_flags(root_item, 0);
	btrfs_set_root_limit(root_item, 0);
649
	btrfs_set_stack_inode_flags(inode_item, BTRFS_INODE_ROOT_ITEM_INIT);
650

651 652 653 654 655 656
	btrfs_set_root_bytenr(root_item, leaf->start);
	btrfs_set_root_generation(root_item, trans->transid);
	btrfs_set_root_level(root_item, 0);
	btrfs_set_root_refs(root_item, 1);
	btrfs_set_root_used(root_item, leaf->len);
	btrfs_set_root_last_snapshot(root_item, 0);
C
Christoph Hellwig 已提交
657

658 659
	btrfs_set_root_generation_v2(root_item,
			btrfs_root_generation(root_item));
660
	generate_random_guid(root_item->uuid);
661 662 663 664 665
	btrfs_set_stack_timespec_sec(&root_item->otime, cur_time.tv_sec);
	btrfs_set_stack_timespec_nsec(&root_item->otime, cur_time.tv_nsec);
	root_item->ctime = root_item->otime;
	btrfs_set_root_ctransid(root_item, trans->transid);
	btrfs_set_root_otransid(root_item, trans->transid);
C
Christoph Hellwig 已提交
666

667
	btrfs_tree_unlock(leaf);
C
Christoph Hellwig 已提交
668 669 670
	free_extent_buffer(leaf);
	leaf = NULL;

671
	btrfs_set_root_dirid(root_item, new_dirid);
C
Christoph Hellwig 已提交
672 673

	key.objectid = objectid;
674
	key.offset = 0;
675
	key.type = BTRFS_ROOT_ITEM_KEY;
676
	ret = btrfs_insert_root(trans, fs_info->tree_root, &key,
677
				root_item);
C
Christoph Hellwig 已提交
678 679 680
	if (ret)
		goto fail;

681
	key.offset = (u64)-1;
682
	new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev);
683
	if (IS_ERR(new_root)) {
684
		free_anon_bdev(anon_dev);
685
		ret = PTR_ERR(new_root);
686
		btrfs_abort_transaction(trans, ret);
687 688
		goto fail;
	}
689 690
	/* Freeing will be done in btrfs_put_root() of new_root */
	anon_dev = 0;
691 692 693

	btrfs_record_root_in_trans(trans, new_root);

694
	ret = btrfs_create_subvol_root(trans, new_root, root, new_dirid);
695
	btrfs_put_root(new_root);
696 697
	if (ret) {
		/* We potentially lose an unused inode item here */
698
		btrfs_abort_transaction(trans, ret);
699 700 701
		goto fail;
	}

702 703 704 705
	mutex_lock(&new_root->objectid_mutex);
	new_root->highest_objectid = new_dirid;
	mutex_unlock(&new_root->objectid_mutex);

C
Christoph Hellwig 已提交
706 707 708
	/*
	 * insert the directory item
	 */
709
	ret = btrfs_set_inode_index(BTRFS_I(dir), &index);
710
	if (ret) {
711
		btrfs_abort_transaction(trans, ret);
712 713
		goto fail;
	}
714

715
	ret = btrfs_insert_dir_item(trans, name, namelen, BTRFS_I(dir), &key,
716
				    BTRFS_FT_DIR, index);
717
	if (ret) {
718
		btrfs_abort_transaction(trans, ret);
C
Christoph Hellwig 已提交
719
		goto fail;
720
	}
721

722
	btrfs_i_size_write(BTRFS_I(dir), dir->i_size + namelen * 2);
723
	ret = btrfs_update_inode(trans, root, dir);
724 725 726 727
	if (ret) {
		btrfs_abort_transaction(trans, ret);
		goto fail;
	}
728

729
	ret = btrfs_add_root_ref(trans, objectid, root->root_key.objectid,
730
				 btrfs_ino(BTRFS_I(dir)), index, name, namelen);
731 732 733 734
	if (ret) {
		btrfs_abort_transaction(trans, ret);
		goto fail;
	}
C
Christoph Hellwig 已提交
735

736
	ret = btrfs_uuid_tree_add(trans, root_item->uuid,
737
				  BTRFS_UUID_KEY_SUBVOL, objectid);
738
	if (ret)
739
		btrfs_abort_transaction(trans, ret);
740

C
Christoph Hellwig 已提交
741
fail:
742
	kfree(root_item);
743 744
	trans->block_rsv = NULL;
	trans->bytes_reserved = 0;
745
	btrfs_subvolume_release_metadata(fs_info, &block_rsv);
746

747
	err = btrfs_commit_transaction(trans);
C
Christoph Hellwig 已提交
748 749
	if (err && !ret)
		ret = err;
750

751 752
	if (!ret) {
		inode = btrfs_lookup_dentry(dir, dentry);
753 754
		if (IS_ERR(inode))
			return PTR_ERR(inode);
755 756
		d_instantiate(dentry, inode);
	}
C
Christoph Hellwig 已提交
757
	return ret;
758 759

fail_free:
760 761
	if (anon_dev)
		free_anon_bdev(anon_dev);
762 763
	kfree(root_item);
	return ret;
C
Christoph Hellwig 已提交
764 765
}

766
static int create_snapshot(struct btrfs_root *root, struct inode *dir,
767
			   struct dentry *dentry, bool readonly,
768
			   struct btrfs_qgroup_inherit *inherit)
C
Christoph Hellwig 已提交
769
{
770
	struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
771
	struct inode *inode;
C
Christoph Hellwig 已提交
772 773
	struct btrfs_pending_snapshot *pending_snapshot;
	struct btrfs_trans_handle *trans;
774
	int ret;
C
Christoph Hellwig 已提交
775

776
	if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
C
Christoph Hellwig 已提交
777 778
		return -EINVAL;

779 780 781 782 783 784
	if (atomic_read(&root->nr_swapfiles)) {
		btrfs_warn(fs_info,
			   "cannot snapshot subvolume with active swapfile");
		return -ETXTBSY;
	}

785
	pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_KERNEL);
786 787 788
	if (!pending_snapshot)
		return -ENOMEM;

789 790 791
	ret = get_anon_bdev(&pending_snapshot->anon_dev);
	if (ret < 0)
		goto free_pending;
792
	pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item),
793
			GFP_KERNEL);
794 795
	pending_snapshot->path = btrfs_alloc_path();
	if (!pending_snapshot->root_item || !pending_snapshot->path) {
796 797 798 799
		ret = -ENOMEM;
		goto free_pending;
	}

800 801
	btrfs_init_block_rsv(&pending_snapshot->block_rsv,
			     BTRFS_BLOCK_RSV_TEMP);
802 803 804 805 806 807
	/*
	 * 1 - parent dir inode
	 * 2 - dir entries
	 * 1 - root item
	 * 2 - root ref/backref
	 * 1 - root of snapshot
808
	 * 1 - UUID item
809 810
	 */
	ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root,
811
					&pending_snapshot->block_rsv, 8,
812
					false);
813
	if (ret)
814
		goto free_pending;
815

816
	pending_snapshot->dentry = dentry;
C
Christoph Hellwig 已提交
817
	pending_snapshot->root = root;
L
Li Zefan 已提交
818
	pending_snapshot->readonly = readonly;
819
	pending_snapshot->dir = dir;
820
	pending_snapshot->inherit = inherit;
821

822
	trans = btrfs_start_transaction(root, 0);
823 824 825 826 827
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
		goto fail;
	}

828
	spin_lock(&fs_info->trans_lock);
C
Christoph Hellwig 已提交
829 830
	list_add(&pending_snapshot->list,
		 &trans->transaction->pending_snapshots);
831
	spin_unlock(&fs_info->trans_lock);
832 833

	ret = btrfs_commit_transaction(trans);
834
	if (ret)
835
		goto fail;
836 837 838 839 840

	ret = pending_snapshot->error;
	if (ret)
		goto fail;

841 842 843 844
	ret = btrfs_orphan_cleanup(pending_snapshot->snap);
	if (ret)
		goto fail;

845
	inode = btrfs_lookup_dentry(d_inode(dentry->d_parent), dentry);
846 847 848 849
	if (IS_ERR(inode)) {
		ret = PTR_ERR(inode);
		goto fail;
	}
850

851 852
	d_instantiate(dentry, inode);
	ret = 0;
853
	pending_snapshot->anon_dev = 0;
854
fail:
855 856 857
	/* Prevent double freeing of anon_dev */
	if (ret && pending_snapshot->snap)
		pending_snapshot->snap->anon_dev = 0;
858
	btrfs_put_root(pending_snapshot->snap);
859
	btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv);
860
free_pending:
861 862
	if (pending_snapshot->anon_dev)
		free_anon_bdev(pending_snapshot->anon_dev);
863
	kfree(pending_snapshot->root_item);
864
	btrfs_free_path(pending_snapshot->path);
865 866
	kfree(pending_snapshot);

C
Christoph Hellwig 已提交
867 868 869
	return ret;
}

870 871 872 873 874 875 876 877 878 879 880
/*  copy of may_delete in fs/namei.c()
 *	Check whether we can remove a link victim from directory dir, check
 *  whether the type of victim is right.
 *  1. We can't do it if dir is read-only (done in permission())
 *  2. We should have write and exec permissions on dir
 *  3. We can't remove anything from append-only dir
 *  4. We can't do anything with immutable dir (done in permission())
 *  5. If the sticky bit on dir is set we should either
 *	a. be owner of dir, or
 *	b. be owner of victim, or
 *	c. have CAP_FOWNER capability
881
 *  6. If the victim is append-only or immutable we can't do anything with
882 883 884 885 886 887 888 889
 *     links pointing to it.
 *  7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
 *  8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
 *  9. We can't remove a root or mountpoint.
 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
 *     nfs_async_unlink().
 */

890
static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
891 892 893
{
	int error;

894
	if (d_really_is_negative(victim))
895 896
		return -ENOENT;

897
	BUG_ON(d_inode(victim->d_parent) != dir);
898
	audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
899 900 901 902 903 904

	error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
	if (error)
		return error;
	if (IS_APPEND(dir))
		return -EPERM;
905 906
	if (check_sticky(dir, d_inode(victim)) || IS_APPEND(d_inode(victim)) ||
	    IS_IMMUTABLE(d_inode(victim)) || IS_SWAPFILE(d_inode(victim)))
907 908
		return -EPERM;
	if (isdir) {
909
		if (!d_is_dir(victim))
910 911 912
			return -ENOTDIR;
		if (IS_ROOT(victim))
			return -EBUSY;
913
	} else if (d_is_dir(victim))
914 915 916 917 918 919 920 921
		return -EISDIR;
	if (IS_DEADDIR(dir))
		return -ENOENT;
	if (victim->d_flags & DCACHE_NFSFS_RENAMED)
		return -EBUSY;
	return 0;
}

922 923 924
/* copy of may_create in fs/namei.c() */
static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
{
925
	if (d_really_is_positive(child))
926 927 928 929 930 931 932 933 934 935 936
		return -EEXIST;
	if (IS_DEADDIR(dir))
		return -ENOENT;
	return inode_permission(dir, MAY_WRITE | MAY_EXEC);
}

/*
 * Create a new subvolume below @parent.  This is largely modeled after
 * sys_mkdirat and vfs_mkdir, but we only do a single component lookup
 * inside this filesystem so it's quite a bit simpler.
 */
A
Al Viro 已提交
937
static noinline int btrfs_mksubvol(const struct path *parent,
938
				   const char *name, int namelen,
S
Sage Weil 已提交
939
				   struct btrfs_root *snap_src,
940
				   bool readonly,
941
				   struct btrfs_qgroup_inherit *inherit)
942
{
943 944
	struct inode *dir = d_inode(parent->dentry);
	struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
945 946 947
	struct dentry *dentry;
	int error;

948 949 950
	error = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
	if (error == -EINTR)
		return error;
951 952 953 954 955 956

	dentry = lookup_one_len(name, parent->dentry, namelen);
	error = PTR_ERR(dentry);
	if (IS_ERR(dentry))
		goto out_unlock;

957
	error = btrfs_may_create(dir, dentry);
958
	if (error)
959
		goto out_dput;
960

C
Chris Mason 已提交
961 962 963 964 965 966 967 968 969 970
	/*
	 * even if this name doesn't exist, we may get hash collisions.
	 * check for them now when we can safely fail
	 */
	error = btrfs_check_dir_item_collision(BTRFS_I(dir)->root,
					       dir->i_ino, name,
					       namelen);
	if (error)
		goto out_dput;

971
	down_read(&fs_info->subvol_sem);
972 973 974 975

	if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0)
		goto out_up_read;

976 977 978 979 980
	if (snap_src)
		error = create_snapshot(snap_src, dir, dentry, readonly, inherit);
	else
		error = create_subvol(dir, dentry, name, namelen, inherit);

981 982 983
	if (!error)
		fsnotify_mkdir(dir, dentry);
out_up_read:
984
	up_read(&fs_info->subvol_sem);
985 986 987
out_dput:
	dput(dentry);
out_unlock:
A
Al Viro 已提交
988
	inode_unlock(dir);
989 990 991
	return error;
}

992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030
static noinline int btrfs_mksnapshot(const struct path *parent,
				   const char *name, int namelen,
				   struct btrfs_root *root,
				   bool readonly,
				   struct btrfs_qgroup_inherit *inherit)
{
	int ret;
	bool snapshot_force_cow = false;

	/*
	 * Force new buffered writes to reserve space even when NOCOW is
	 * possible. This is to avoid later writeback (running dealloc) to
	 * fallback to COW mode and unexpectedly fail with ENOSPC.
	 */
	btrfs_drew_read_lock(&root->snapshot_lock);

	ret = btrfs_start_delalloc_snapshot(root);
	if (ret)
		goto out;

	/*
	 * All previous writes have started writeback in NOCOW mode, so now
	 * we force future writes to fallback to COW mode during snapshot
	 * creation.
	 */
	atomic_inc(&root->snapshot_force_cow);
	snapshot_force_cow = true;

	btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);

	ret = btrfs_mksubvol(parent, name, namelen,
			     root, readonly, inherit);
out:
	if (snapshot_force_cow)
		atomic_dec(&root->snapshot_force_cow);
	btrfs_drew_read_unlock(&root->snapshot_lock);
	return ret;
}

C
Chris Mason 已提交
1031 1032 1033 1034 1035 1036 1037
/*
 * When we're defragging a range, we don't want to kick it off again
 * if it is really just waiting for delalloc to send it down.
 * If we find a nice big extent or delalloc range for the bytes in the
 * file you want to defrag, we return 0 to let you know to skip this
 * part of the file
 */
1038
static int check_defrag_in_cache(struct inode *inode, u64 offset, u32 thresh)
C
Chris Mason 已提交
1039 1040 1041 1042 1043 1044 1045
{
	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
	struct extent_map *em = NULL;
	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
	u64 end;

	read_lock(&em_tree->lock);
1046
	em = lookup_extent_mapping(em_tree, offset, PAGE_SIZE);
C
Chris Mason 已提交
1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072
	read_unlock(&em_tree->lock);

	if (em) {
		end = extent_map_end(em);
		free_extent_map(em);
		if (end - offset > thresh)
			return 0;
	}
	/* if we already have a nice delalloc here, just stop */
	thresh /= 2;
	end = count_range_bits(io_tree, &offset, offset + thresh,
			       thresh, EXTENT_DELALLOC, 1);
	if (end >= thresh)
		return 0;
	return 1;
}

/*
 * helper function to walk through a file and find extents
 * newer than a specific transid, and smaller than thresh.
 *
 * This is used by the defragging code to find new and small
 * extents
 */
static int find_new_extents(struct btrfs_root *root,
			    struct inode *inode, u64 newer_than,
1073
			    u64 *off, u32 thresh)
C
Chris Mason 已提交
1074 1075 1076 1077 1078 1079 1080
{
	struct btrfs_path *path;
	struct btrfs_key min_key;
	struct extent_buffer *leaf;
	struct btrfs_file_extent_item *extent;
	int type;
	int ret;
1081
	u64 ino = btrfs_ino(BTRFS_I(inode));
C
Chris Mason 已提交
1082 1083 1084 1085 1086

	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

1087
	min_key.objectid = ino;
C
Chris Mason 已提交
1088 1089 1090
	min_key.type = BTRFS_EXTENT_DATA_KEY;
	min_key.offset = *off;

1091
	while (1) {
1092
		ret = btrfs_search_forward(root, &min_key, path, newer_than);
C
Chris Mason 已提交
1093 1094
		if (ret != 0)
			goto none;
1095
process_slot:
1096
		if (min_key.objectid != ino)
C
Chris Mason 已提交
1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113
			goto none;
		if (min_key.type != BTRFS_EXTENT_DATA_KEY)
			goto none;

		leaf = path->nodes[0];
		extent = btrfs_item_ptr(leaf, path->slots[0],
					struct btrfs_file_extent_item);

		type = btrfs_file_extent_type(leaf, extent);
		if (type == BTRFS_FILE_EXTENT_REG &&
		    btrfs_file_extent_num_bytes(leaf, extent) < thresh &&
		    check_defrag_in_cache(inode, min_key.offset, thresh)) {
			*off = min_key.offset;
			btrfs_free_path(path);
			return 0;
		}

1114 1115 1116 1117 1118 1119
		path->slots[0]++;
		if (path->slots[0] < btrfs_header_nritems(leaf)) {
			btrfs_item_key_to_cpu(leaf, &min_key, path->slots[0]);
			goto process_slot;
		}

C
Chris Mason 已提交
1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130
		if (min_key.offset == (u64)-1)
			goto none;

		min_key.offset++;
		btrfs_release_path(path);
	}
none:
	btrfs_free_path(path);
	return -ENOENT;
}

L
Li Zefan 已提交
1131
static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
1132 1133
{
	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
L
Li Zefan 已提交
1134 1135
	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
	struct extent_map *em;
1136
	u64 len = PAGE_SIZE;
1137

L
Li Zefan 已提交
1138 1139 1140 1141
	/*
	 * hopefully we have this extent in the tree already, try without
	 * the full extent lock
	 */
1142
	read_lock(&em_tree->lock);
L
Li Zefan 已提交
1143
	em = lookup_extent_mapping(em_tree, start, len);
1144 1145
	read_unlock(&em_tree->lock);

L
Li Zefan 已提交
1146
	if (!em) {
1147 1148 1149
		struct extent_state *cached = NULL;
		u64 end = start + len - 1;

L
Li Zefan 已提交
1150
		/* get the big lock and read metadata off disk */
1151
		lock_extent_bits(io_tree, start, end, &cached);
1152
		em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len);
1153
		unlock_extent_cached(io_tree, start, end, &cached);
L
Li Zefan 已提交
1154 1155 1156 1157 1158 1159 1160

		if (IS_ERR(em))
			return NULL;
	}

	return em;
}
1161

L
Li Zefan 已提交
1162 1163 1164 1165 1166 1167 1168 1169 1170 1171
static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em)
{
	struct extent_map *next;
	bool ret = true;

	/* this is the last extent */
	if (em->start + em->len >= i_size_read(inode))
		return false;

	next = defrag_lookup_extent(inode, em->start + em->len);
1172 1173 1174
	if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
		ret = false;
	else if ((em->block_start + em->block_len == next->block_start) &&
1175
		 (em->block_len > SZ_128K && next->block_len > SZ_128K))
L
Li Zefan 已提交
1176 1177 1178
		ret = false;

	free_extent_map(next);
1179 1180 1181
	return ret;
}

1182
static int should_defrag_range(struct inode *inode, u64 start, u32 thresh,
1183 1184
			       u64 *last_len, u64 *skip, u64 *defrag_end,
			       int compress)
1185
{
L
Li Zefan 已提交
1186
	struct extent_map *em;
1187
	int ret = 1;
L
Li Zefan 已提交
1188
	bool next_mergeable = true;
1189
	bool prev_mergeable = true;
1190 1191

	/*
1192
	 * make sure that once we start defragging an extent, we keep on
1193 1194 1195 1196 1197 1198 1199
	 * defragging it
	 */
	if (start < *defrag_end)
		return 1;

	*skip = 0;

L
Li Zefan 已提交
1200 1201 1202
	em = defrag_lookup_extent(inode, start);
	if (!em)
		return 0;
1203 1204

	/* this will cover holes, and inline extents */
1205
	if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
1206
		ret = 0;
1207 1208 1209
		goto out;
	}

1210 1211 1212
	if (!*defrag_end)
		prev_mergeable = false;

L
Li Zefan 已提交
1213
	next_mergeable = defrag_check_next_extent(inode, em);
1214
	/*
L
Li Zefan 已提交
1215 1216
	 * we hit a real extent, if it is big or the next extent is not a
	 * real extent, don't bother defragging it
1217
	 */
1218
	if (!compress && (*last_len == 0 || *last_len >= thresh) &&
1219
	    (em->len >= thresh || (!next_mergeable && !prev_mergeable)))
1220
		ret = 0;
1221
out:
1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241
	/*
	 * last_len ends up being a counter of how many bytes we've defragged.
	 * every time we choose not to defrag an extent, we reset *last_len
	 * so that the next tiny extent will force a defrag.
	 *
	 * The end result of this is that tiny extents before a single big
	 * extent will force at least part of that big extent to be defragged.
	 */
	if (ret) {
		*defrag_end = extent_map_end(em);
	} else {
		*last_len = 0;
		*skip = extent_map_end(em);
		*defrag_end = 0;
	}

	free_extent_map(em);
	return ret;
}

C
Chris Mason 已提交
1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256
/*
 * it doesn't do much good to defrag one or two pages
 * at a time.  This pulls in a nice chunk of pages
 * to COW and defrag.
 *
 * It also makes sure the delalloc code has enough
 * dirty data to avoid making new small extents as part
 * of the defrag
 *
 * It's a good idea to start RA on this range
 * before calling this.
 */
static int cluster_pages_for_defrag(struct inode *inode,
				    struct page **pages,
				    unsigned long start_index,
1257
				    unsigned long num_pages)
C
Christoph Hellwig 已提交
1258
{
C
Chris Mason 已提交
1259 1260 1261 1262
	unsigned long file_end;
	u64 isize = i_size_read(inode);
	u64 page_start;
	u64 page_end;
1263
	u64 page_cnt;
C
Chris Mason 已提交
1264 1265 1266
	int ret;
	int i;
	int i_done;
1267
	struct btrfs_ordered_extent *ordered;
C
Chris Mason 已提交
1268
	struct extent_state *cached_state = NULL;
1269
	struct extent_io_tree *tree;
1270
	struct extent_changeset *data_reserved = NULL;
1271
	gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
C
Chris Mason 已提交
1272

1273
	file_end = (isize - 1) >> PAGE_SHIFT;
1274 1275 1276 1277
	if (!isize || start_index > file_end)
		return 0;

	page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1);
C
Chris Mason 已提交
1278

1279
	ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
1280 1281
			start_index << PAGE_SHIFT,
			page_cnt << PAGE_SHIFT);
C
Chris Mason 已提交
1282 1283 1284
	if (ret)
		return ret;
	i_done = 0;
1285
	tree = &BTRFS_I(inode)->io_tree;
C
Chris Mason 已提交
1286 1287

	/* step one, lock all the pages */
1288
	for (i = 0; i < page_cnt; i++) {
C
Chris Mason 已提交
1289
		struct page *page;
1290
again:
1291
		page = find_or_create_page(inode->i_mapping,
1292
					   start_index + i, mask);
C
Chris Mason 已提交
1293 1294 1295
		if (!page)
			break;

1296
		page_start = page_offset(page);
1297
		page_end = page_start + PAGE_SIZE - 1;
1298
		while (1) {
1299
			lock_extent_bits(tree, page_start, page_end,
1300
					 &cached_state);
1301
			ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode),
1302
							      page_start);
1303
			unlock_extent_cached(tree, page_start, page_end,
1304
					     &cached_state);
1305 1306 1307 1308 1309 1310 1311
			if (!ordered)
				break;

			unlock_page(page);
			btrfs_start_ordered_extent(inode, ordered, 1);
			btrfs_put_ordered_extent(ordered);
			lock_page(page);
1312 1313 1314 1315 1316 1317
			/*
			 * we unlocked the page above, so we need check if
			 * it was released or not.
			 */
			if (page->mapping != inode->i_mapping) {
				unlock_page(page);
1318
				put_page(page);
1319 1320
				goto again;
			}
1321 1322
		}

C
Chris Mason 已提交
1323 1324 1325 1326 1327
		if (!PageUptodate(page)) {
			btrfs_readpage(NULL, page);
			lock_page(page);
			if (!PageUptodate(page)) {
				unlock_page(page);
1328
				put_page(page);
C
Chris Mason 已提交
1329 1330 1331 1332
				ret = -EIO;
				break;
			}
		}
1333 1334 1335

		if (page->mapping != inode->i_mapping) {
			unlock_page(page);
1336
			put_page(page);
1337 1338 1339
			goto again;
		}

C
Chris Mason 已提交
1340 1341 1342 1343 1344 1345
		pages[i] = page;
		i_done++;
	}
	if (!i_done || ret)
		goto out;

1346
	if (!(inode->i_sb->s_flags & SB_ACTIVE))
C
Chris Mason 已提交
1347 1348 1349 1350 1351 1352 1353 1354 1355 1356
		goto out;

	/*
	 * so now we have a nice long stream of locked
	 * and up to date pages, lets wait on them
	 */
	for (i = 0; i < i_done; i++)
		wait_on_page_writeback(pages[i]);

	page_start = page_offset(pages[0]);
1357
	page_end = page_offset(pages[i_done - 1]) + PAGE_SIZE;
C
Chris Mason 已提交
1358 1359

	lock_extent_bits(&BTRFS_I(inode)->io_tree,
1360
			 page_start, page_end - 1, &cached_state);
C
Chris Mason 已提交
1361
	clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
1362 1363
			  page_end - 1, EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
			  EXTENT_DEFRAG, 0, 0, &cached_state);
C
Chris Mason 已提交
1364

1365
	if (i_done != page_cnt) {
1366
		spin_lock(&BTRFS_I(inode)->lock);
1367
		btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
1368
		spin_unlock(&BTRFS_I(inode)->lock);
1369
		btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
1370
				start_index << PAGE_SHIFT,
1371
				(page_cnt - i_done) << PAGE_SHIFT, true);
C
Chris Mason 已提交
1372 1373 1374
	}


1375
	set_extent_defrag(&BTRFS_I(inode)->io_tree, page_start, page_end - 1,
1376
			  &cached_state);
C
Chris Mason 已提交
1377 1378

	unlock_extent_cached(&BTRFS_I(inode)->io_tree,
1379
			     page_start, page_end - 1, &cached_state);
C
Chris Mason 已提交
1380 1381 1382 1383 1384 1385 1386

	for (i = 0; i < i_done; i++) {
		clear_page_dirty_for_io(pages[i]);
		ClearPageChecked(pages[i]);
		set_page_extent_mapped(pages[i]);
		set_page_dirty(pages[i]);
		unlock_page(pages[i]);
1387
		put_page(pages[i]);
C
Chris Mason 已提交
1388
	}
1389
	btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
1390
	extent_changeset_free(data_reserved);
C
Chris Mason 已提交
1391 1392 1393 1394
	return i_done;
out:
	for (i = 0; i < i_done; i++) {
		unlock_page(pages[i]);
1395
		put_page(pages[i]);
C
Chris Mason 已提交
1396
	}
1397
	btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
1398
			start_index << PAGE_SHIFT,
1399
			page_cnt << PAGE_SHIFT, true);
1400
	btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
1401
	extent_changeset_free(data_reserved);
C
Chris Mason 已提交
1402 1403 1404 1405 1406 1407 1408 1409
	return ret;

}

int btrfs_defrag_file(struct inode *inode, struct file *file,
		      struct btrfs_ioctl_defrag_range_args *range,
		      u64 newer_than, unsigned long max_to_defrag)
{
1410
	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
C
Chris Mason 已提交
1411 1412
	struct btrfs_root *root = BTRFS_I(inode)->root;
	struct file_ra_state *ra = NULL;
C
Christoph Hellwig 已提交
1413
	unsigned long last_index;
1414
	u64 isize = i_size_read(inode);
1415 1416 1417
	u64 last_len = 0;
	u64 skip = 0;
	u64 defrag_end = 0;
C
Chris Mason 已提交
1418
	u64 newer_off = range->start;
C
Christoph Hellwig 已提交
1419
	unsigned long i;
1420
	unsigned long ra_index = 0;
C
Christoph Hellwig 已提交
1421
	int ret;
C
Chris Mason 已提交
1422
	int defrag_count = 0;
1423
	int compress_type = BTRFS_COMPRESS_ZLIB;
1424
	u32 extent_thresh = range->extent_thresh;
1425
	unsigned long max_cluster = SZ_256K >> PAGE_SHIFT;
1426
	unsigned long cluster = max_cluster;
1427
	u64 new_align = ~((u64)SZ_128K - 1);
C
Chris Mason 已提交
1428
	struct page **pages = NULL;
1429
	bool do_compress = range->flags & BTRFS_DEFRAG_RANGE_COMPRESS;
C
Chris Mason 已提交
1430

1431 1432 1433 1434 1435
	if (isize == 0)
		return 0;

	if (range->start >= isize)
		return -EINVAL;
1436

1437
	if (do_compress) {
1438
		if (range->compress_type >= BTRFS_NR_COMPRESS_TYPES)
1439 1440 1441 1442
			return -EINVAL;
		if (range->compress_type)
			compress_type = range->compress_type;
	}
C
Christoph Hellwig 已提交
1443

1444
	if (extent_thresh == 0)
1445
		extent_thresh = SZ_256K;
1446

C
Chris Mason 已提交
1447
	/*
1448 1449 1450
	 * If we were not given a file, allocate a readahead context. As
	 * readahead is just an optimization, defrag will work without it so
	 * we don't error out.
C
Chris Mason 已提交
1451 1452
	 */
	if (!file) {
1453
		ra = kzalloc(sizeof(*ra), GFP_KERNEL);
1454 1455
		if (ra)
			file_ra_state_init(ra, inode->i_mapping);
C
Chris Mason 已提交
1456 1457 1458 1459
	} else {
		ra = &file->f_ra;
	}

1460
	pages = kmalloc_array(max_cluster, sizeof(struct page *), GFP_KERNEL);
C
Chris Mason 已提交
1461 1462 1463 1464 1465 1466
	if (!pages) {
		ret = -ENOMEM;
		goto out_ra;
	}

	/* find the last page to defrag */
C
Chris Mason 已提交
1467
	if (range->start + range->len > range->start) {
1468
		last_index = min_t(u64, isize - 1,
1469
			 range->start + range->len - 1) >> PAGE_SHIFT;
C
Chris Mason 已提交
1470
	} else {
1471
		last_index = (isize - 1) >> PAGE_SHIFT;
C
Chris Mason 已提交
1472 1473
	}

C
Chris Mason 已提交
1474 1475
	if (newer_than) {
		ret = find_new_extents(root, inode, newer_than,
1476
				       &newer_off, SZ_64K);
C
Chris Mason 已提交
1477 1478 1479 1480 1481 1482
		if (!ret) {
			range->start = newer_off;
			/*
			 * we always align our defrag to help keep
			 * the extents in the file evenly spaced
			 */
1483
			i = (newer_off & new_align) >> PAGE_SHIFT;
C
Chris Mason 已提交
1484 1485 1486
		} else
			goto out_ra;
	} else {
1487
		i = range->start >> PAGE_SHIFT;
C
Chris Mason 已提交
1488 1489
	}
	if (!max_to_defrag)
1490
		max_to_defrag = last_index - i + 1;
C
Chris Mason 已提交
1491

L
Li Zefan 已提交
1492 1493 1494 1495 1496 1497 1498
	/*
	 * make writeback starts from i, so the defrag range can be
	 * written sequentially.
	 */
	if (i < inode->i_mapping->writeback_index)
		inode->i_mapping->writeback_index = i;

1499
	while (i <= last_index && defrag_count < max_to_defrag &&
1500
	       (i < DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE))) {
C
Chris Mason 已提交
1501 1502 1503 1504
		/*
		 * make sure we stop running if someone unmounts
		 * the FS
		 */
1505
		if (!(inode->i_sb->s_flags & SB_ACTIVE))
C
Chris Mason 已提交
1506 1507
			break;

1508 1509
		if (btrfs_defrag_cancelled(fs_info)) {
			btrfs_debug(fs_info, "defrag_file cancelled");
1510 1511 1512 1513
			ret = -EAGAIN;
			break;
		}

1514
		if (!should_defrag_range(inode, (u64)i << PAGE_SHIFT,
L
Li Zefan 已提交
1515
					 extent_thresh, &last_len, &skip,
1516
					 &defrag_end, do_compress)){
1517 1518 1519 1520 1521
			unsigned long next;
			/*
			 * the should_defrag function tells us how much to skip
			 * bump our counter by the suggested amount
			 */
1522
			next = DIV_ROUND_UP(skip, PAGE_SIZE);
1523 1524 1525
			i = max(i + 1, next);
			continue;
		}
1526 1527

		if (!newer_than) {
1528 1529
			cluster = (PAGE_ALIGN(defrag_end) >>
				   PAGE_SHIFT) - i;
1530 1531 1532 1533 1534 1535 1536
			cluster = min(cluster, max_cluster);
		} else {
			cluster = max_cluster;
		}

		if (i + cluster > ra_index) {
			ra_index = max(i, ra_index);
1537
			if (ra)
1538 1539
				page_cache_sync_readahead(inode->i_mapping, ra,
						file, ra_index, cluster);
1540
			ra_index += cluster;
1541
		}
1542

A
Al Viro 已提交
1543
		inode_lock(inode);
1544 1545 1546 1547 1548 1549 1550
		if (IS_SWAPFILE(inode)) {
			ret = -ETXTBSY;
		} else {
			if (do_compress)
				BTRFS_I(inode)->defrag_compress = compress_type;
			ret = cluster_pages_for_defrag(inode, pages, i, cluster);
		}
1551
		if (ret < 0) {
A
Al Viro 已提交
1552
			inode_unlock(inode);
C
Chris Mason 已提交
1553
			goto out_ra;
1554
		}
C
Chris Mason 已提交
1555 1556

		defrag_count += ret;
1557
		balance_dirty_pages_ratelimited(inode->i_mapping);
A
Al Viro 已提交
1558
		inode_unlock(inode);
C
Chris Mason 已提交
1559 1560 1561 1562 1563

		if (newer_than) {
			if (newer_off == (u64)-1)
				break;

1564 1565 1566
			if (ret > 0)
				i += ret;

C
Chris Mason 已提交
1567
			newer_off = max(newer_off + 1,
1568
					(u64)i << PAGE_SHIFT);
C
Chris Mason 已提交
1569

1570 1571
			ret = find_new_extents(root, inode, newer_than,
					       &newer_off, SZ_64K);
C
Chris Mason 已提交
1572 1573
			if (!ret) {
				range->start = newer_off;
1574
				i = (newer_off & new_align) >> PAGE_SHIFT;
C
Chris Mason 已提交
1575 1576
			} else {
				break;
C
Christoph Hellwig 已提交
1577
			}
C
Chris Mason 已提交
1578
		} else {
1579
			if (ret > 0) {
L
Li Zefan 已提交
1580
				i += ret;
1581
				last_len += ret << PAGE_SHIFT;
1582
			} else {
L
Li Zefan 已提交
1583
				i++;
1584 1585
				last_len = 0;
			}
C
Christoph Hellwig 已提交
1586 1587 1588
		}
	}

1589
	if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) {
C
Chris Mason 已提交
1590
		filemap_flush(inode->i_mapping);
1591 1592 1593 1594
		if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
			     &BTRFS_I(inode)->runtime_flags))
			filemap_flush(inode->i_mapping);
	}
C
Chris Mason 已提交
1595

1596
	if (range->compress_type == BTRFS_COMPRESS_LZO) {
1597
		btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
N
Nick Terrell 已提交
1598 1599
	} else if (range->compress_type == BTRFS_COMPRESS_ZSTD) {
		btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
1600 1601
	}

1602
	ret = defrag_count;
1603

C
Chris Mason 已提交
1604
out_ra:
1605
	if (do_compress) {
A
Al Viro 已提交
1606
		inode_lock(inode);
1607
		BTRFS_I(inode)->defrag_compress = BTRFS_COMPRESS_NONE;
A
Al Viro 已提交
1608
		inode_unlock(inode);
1609
	}
C
Chris Mason 已提交
1610 1611 1612
	if (!file)
		kfree(ra);
	kfree(pages);
1613
	return ret;
C
Christoph Hellwig 已提交
1614 1615
}

1616
static noinline int btrfs_ioctl_resize(struct file *file,
1617
					void __user *arg)
C
Christoph Hellwig 已提交
1618
{
1619 1620
	struct inode *inode = file_inode(file);
	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
C
Christoph Hellwig 已提交
1621 1622 1623
	u64 new_size;
	u64 old_size;
	u64 devid = 1;
1624
	struct btrfs_root *root = BTRFS_I(inode)->root;
C
Christoph Hellwig 已提交
1625 1626 1627 1628
	struct btrfs_ioctl_vol_args *vol_args;
	struct btrfs_trans_handle *trans;
	struct btrfs_device *device = NULL;
	char *sizestr;
1629
	char *retptr;
C
Christoph Hellwig 已提交
1630 1631 1632 1633
	char *devstr = NULL;
	int ret = 0;
	int mod = 0;

1634 1635 1636
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

1637 1638 1639 1640
	ret = mnt_want_write_file(file);
	if (ret)
		return ret;

1641
	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
1642
		mnt_drop_write_file(file);
1643
		return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
1644 1645
	}

L
Li Zefan 已提交
1646
	vol_args = memdup_user(arg, sizeof(*vol_args));
1647 1648 1649 1650
	if (IS_ERR(vol_args)) {
		ret = PTR_ERR(vol_args);
		goto out;
	}
1651 1652

	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
C
Christoph Hellwig 已提交
1653 1654 1655 1656 1657 1658 1659

	sizestr = vol_args->name;
	devstr = strchr(sizestr, ':');
	if (devstr) {
		sizestr = devstr + 1;
		*devstr = '\0';
		devstr = vol_args->name;
1660 1661 1662
		ret = kstrtoull(devstr, 10, &devid);
		if (ret)
			goto out_free;
1663 1664 1665 1666
		if (!devid) {
			ret = -EINVAL;
			goto out_free;
		}
1667
		btrfs_info(fs_info, "resizing devid %llu", devid);
C
Christoph Hellwig 已提交
1668
	}
M
Miao Xie 已提交
1669

1670
	device = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
C
Christoph Hellwig 已提交
1671
	if (!device) {
1672 1673
		btrfs_info(fs_info, "resizer unable to find device %llu",
			   devid);
1674
		ret = -ENODEV;
1675
		goto out_free;
C
Christoph Hellwig 已提交
1676
	}
M
Miao Xie 已提交
1677

1678
	if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
1679
		btrfs_info(fs_info,
1680
			   "resizer unable to apply on readonly device %llu",
1681
		       devid);
1682
		ret = -EPERM;
L
Liu Bo 已提交
1683 1684 1685
		goto out_free;
	}

C
Christoph Hellwig 已提交
1686 1687 1688 1689 1690 1691 1692 1693 1694 1695
	if (!strcmp(sizestr, "max"))
		new_size = device->bdev->bd_inode->i_size;
	else {
		if (sizestr[0] == '-') {
			mod = -1;
			sizestr++;
		} else if (sizestr[0] == '+') {
			mod = 1;
			sizestr++;
		}
1696 1697
		new_size = memparse(sizestr, &retptr);
		if (*retptr != '\0' || new_size == 0) {
C
Christoph Hellwig 已提交
1698
			ret = -EINVAL;
1699
			goto out_free;
C
Christoph Hellwig 已提交
1700 1701 1702
		}
	}

1703
	if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
1704
		ret = -EPERM;
1705 1706 1707
		goto out_free;
	}

1708
	old_size = btrfs_device_get_total_bytes(device);
C
Christoph Hellwig 已提交
1709 1710 1711 1712

	if (mod < 0) {
		if (new_size > old_size) {
			ret = -EINVAL;
1713
			goto out_free;
C
Christoph Hellwig 已提交
1714 1715 1716
		}
		new_size = old_size - new_size;
	} else if (mod > 0) {
1717
		if (new_size > ULLONG_MAX - old_size) {
1718
			ret = -ERANGE;
1719 1720
			goto out_free;
		}
C
Christoph Hellwig 已提交
1721 1722 1723
		new_size = old_size + new_size;
	}

1724
	if (new_size < SZ_256M) {
C
Christoph Hellwig 已提交
1725
		ret = -EINVAL;
1726
		goto out_free;
C
Christoph Hellwig 已提交
1727 1728 1729
	}
	if (new_size > device->bdev->bd_inode->i_size) {
		ret = -EFBIG;
1730
		goto out_free;
C
Christoph Hellwig 已提交
1731 1732
	}

1733
	new_size = round_down(new_size, fs_info->sectorsize);
C
Christoph Hellwig 已提交
1734 1735

	if (new_size > old_size) {
1736
		trans = btrfs_start_transaction(root, 0);
1737 1738
		if (IS_ERR(trans)) {
			ret = PTR_ERR(trans);
1739
			goto out_free;
1740
		}
C
Christoph Hellwig 已提交
1741
		ret = btrfs_grow_device(trans, device, new_size);
1742
		btrfs_commit_transaction(trans);
1743
	} else if (new_size < old_size) {
C
Christoph Hellwig 已提交
1744
		ret = btrfs_shrink_device(device, new_size);
1745
	} /* equal, nothing need to do */
C
Christoph Hellwig 已提交
1746

1747 1748 1749 1750 1751
	if (ret == 0 && new_size != old_size)
		btrfs_info_in_rcu(fs_info,
			"resize device %s (devid %llu) from %llu to %llu",
			rcu_str_deref(device->name), device->devid,
			old_size, new_size);
1752
out_free:
C
Christoph Hellwig 已提交
1753
	kfree(vol_args);
1754
out:
1755
	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
1756
	mnt_drop_write_file(file);
C
Christoph Hellwig 已提交
1757 1758 1759
	return ret;
}

1760
static noinline int __btrfs_ioctl_snap_create(struct file *file,
1761
				const char *name, unsigned long fd, int subvol,
1762
				bool readonly,
1763
				struct btrfs_qgroup_inherit *inherit)
C
Christoph Hellwig 已提交
1764 1765
{
	int namelen;
1766
	int ret = 0;
C
Christoph Hellwig 已提交
1767

1768 1769 1770
	if (!S_ISDIR(file_inode(file)->i_mode))
		return -ENOTDIR;

1771 1772 1773 1774
	ret = mnt_want_write_file(file);
	if (ret)
		goto out;

S
Sage Weil 已提交
1775 1776
	namelen = strlen(name);
	if (strchr(name, '/')) {
C
Christoph Hellwig 已提交
1777
		ret = -EINVAL;
1778
		goto out_drop_write;
C
Christoph Hellwig 已提交
1779 1780
	}

1781 1782 1783
	if (name[0] == '.' &&
	   (namelen == 1 || (name[1] == '.' && namelen == 2))) {
		ret = -EEXIST;
1784
		goto out_drop_write;
1785 1786
	}

1787
	if (subvol) {
S
Sage Weil 已提交
1788
		ret = btrfs_mksubvol(&file->f_path, name, namelen,
1789
				     NULL, readonly, inherit);
1790
	} else {
1791
		struct fd src = fdget(fd);
1792
		struct inode *src_inode;
1793
		if (!src.file) {
1794
			ret = -EINVAL;
1795
			goto out_drop_write;
1796 1797
		}

A
Al Viro 已提交
1798 1799
		src_inode = file_inode(src.file);
		if (src_inode->i_sb != file_inode(file)->i_sb) {
J
Josef Bacik 已提交
1800
			btrfs_info(BTRFS_I(file_inode(file))->root->fs_info,
1801
				   "Snapshot src from another FS");
1802
			ret = -EXDEV;
1803 1804 1805 1806 1807 1808
		} else if (!inode_owner_or_capable(src_inode)) {
			/*
			 * Subvolume creation is not restricted, but snapshots
			 * are limited to own subvolumes only
			 */
			ret = -EPERM;
1809
		} else {
1810
			ret = btrfs_mksnapshot(&file->f_path, name, namelen,
1811
					     BTRFS_I(src_inode)->root,
1812
					     readonly, inherit);
1813
		}
1814
		fdput(src);
1815
	}
1816 1817
out_drop_write:
	mnt_drop_write_file(file);
C
Christoph Hellwig 已提交
1818
out:
S
Sage Weil 已提交
1819 1820 1821 1822
	return ret;
}

static noinline int btrfs_ioctl_snap_create(struct file *file,
1823
					    void __user *arg, int subvol)
S
Sage Weil 已提交
1824
{
1825
	struct btrfs_ioctl_vol_args *vol_args;
S
Sage Weil 已提交
1826 1827
	int ret;

1828 1829 1830
	if (!S_ISDIR(file_inode(file)->i_mode))
		return -ENOTDIR;

1831 1832 1833 1834
	vol_args = memdup_user(arg, sizeof(*vol_args));
	if (IS_ERR(vol_args))
		return PTR_ERR(vol_args);
	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
S
Sage Weil 已提交
1835

1836 1837
	ret = __btrfs_ioctl_snap_create(file, vol_args->name, vol_args->fd,
					subvol, false, NULL);
1838

1839 1840 1841
	kfree(vol_args);
	return ret;
}
1842

1843 1844 1845 1846 1847
static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
					       void __user *arg, int subvol)
{
	struct btrfs_ioctl_vol_args_v2 *vol_args;
	int ret;
L
Li Zefan 已提交
1848
	bool readonly = false;
A
Arne Jansen 已提交
1849
	struct btrfs_qgroup_inherit *inherit = NULL;
1850

1851 1852 1853
	if (!S_ISDIR(file_inode(file)->i_mode))
		return -ENOTDIR;

1854 1855 1856 1857
	vol_args = memdup_user(arg, sizeof(*vol_args));
	if (IS_ERR(vol_args))
		return PTR_ERR(vol_args);
	vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
1858

1859
	if (vol_args->flags & ~BTRFS_SUBVOL_CREATE_ARGS_MASK) {
L
Li Zefan 已提交
1860
		ret = -EOPNOTSUPP;
D
Dan Carpenter 已提交
1861
		goto free_args;
S
Sage Weil 已提交
1862
	}
1863

L
Li Zefan 已提交
1864 1865
	if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
		readonly = true;
A
Arne Jansen 已提交
1866
	if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) {
1867
		if (vol_args->size > PAGE_SIZE) {
A
Arne Jansen 已提交
1868
			ret = -EINVAL;
D
Dan Carpenter 已提交
1869
			goto free_args;
A
Arne Jansen 已提交
1870 1871 1872 1873
		}
		inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size);
		if (IS_ERR(inherit)) {
			ret = PTR_ERR(inherit);
D
Dan Carpenter 已提交
1874
			goto free_args;
A
Arne Jansen 已提交
1875 1876
		}
	}
1877

1878 1879
	ret = __btrfs_ioctl_snap_create(file, vol_args->name, vol_args->fd,
					subvol, readonly, inherit);
D
Dan Carpenter 已提交
1880 1881 1882
	if (ret)
		goto free_inherit;
free_inherit:
A
Arne Jansen 已提交
1883
	kfree(inherit);
D
Dan Carpenter 已提交
1884 1885
free_args:
	kfree(vol_args);
C
Christoph Hellwig 已提交
1886 1887 1888
	return ret;
}

1889 1890 1891
static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
						void __user *arg)
{
A
Al Viro 已提交
1892
	struct inode *inode = file_inode(file);
1893
	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1894 1895 1896 1897
	struct btrfs_root *root = BTRFS_I(inode)->root;
	int ret = 0;
	u64 flags = 0;

1898
	if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID)
1899 1900
		return -EINVAL;

1901
	down_read(&fs_info->subvol_sem);
1902 1903
	if (btrfs_root_readonly(root))
		flags |= BTRFS_SUBVOL_RDONLY;
1904
	up_read(&fs_info->subvol_sem);
1905 1906 1907 1908 1909 1910 1911 1912 1913 1914

	if (copy_to_user(arg, &flags, sizeof(flags)))
		ret = -EFAULT;

	return ret;
}

static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
					      void __user *arg)
{
A
Al Viro 已提交
1915
	struct inode *inode = file_inode(file);
1916
	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1917 1918 1919 1920 1921 1922
	struct btrfs_root *root = BTRFS_I(inode)->root;
	struct btrfs_trans_handle *trans;
	u64 root_flags;
	u64 flags;
	int ret = 0;

1923 1924 1925
	if (!inode_owner_or_capable(inode))
		return -EPERM;

1926 1927 1928
	ret = mnt_want_write_file(file);
	if (ret)
		goto out;
1929

1930
	if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
1931 1932 1933
		ret = -EINVAL;
		goto out_drop_write;
	}
1934

1935 1936 1937 1938
	if (copy_from_user(&flags, arg, sizeof(flags))) {
		ret = -EFAULT;
		goto out_drop_write;
	}
1939

1940 1941 1942 1943
	if (flags & ~BTRFS_SUBVOL_RDONLY) {
		ret = -EOPNOTSUPP;
		goto out_drop_write;
	}
1944

1945
	down_write(&fs_info->subvol_sem);
1946 1947 1948

	/* nothing to do */
	if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root))
1949
		goto out_drop_sem;
1950 1951

	root_flags = btrfs_root_flags(&root->root_item);
1952
	if (flags & BTRFS_SUBVOL_RDONLY) {
1953 1954
		btrfs_set_root_flags(&root->root_item,
				     root_flags | BTRFS_ROOT_SUBVOL_RDONLY);
1955 1956 1957 1958 1959 1960 1961 1962
	} else {
		/*
		 * Block RO -> RW transition if this subvolume is involved in
		 * send
		 */
		spin_lock(&root->root_item_lock);
		if (root->send_in_progress == 0) {
			btrfs_set_root_flags(&root->root_item,
1963
				     root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY);
1964 1965 1966
			spin_unlock(&root->root_item_lock);
		} else {
			spin_unlock(&root->root_item_lock);
1967 1968 1969
			btrfs_warn(fs_info,
				   "Attempt to set subvolume %llu read-write during send",
				   root->root_key.objectid);
1970 1971 1972 1973
			ret = -EPERM;
			goto out_drop_sem;
		}
	}
1974 1975 1976 1977 1978 1979 1980

	trans = btrfs_start_transaction(root, 1);
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
		goto out_reset;
	}

1981
	ret = btrfs_update_root(trans, fs_info->tree_root,
1982
				&root->root_key, &root->root_item);
1983 1984 1985 1986 1987 1988
	if (ret < 0) {
		btrfs_end_transaction(trans);
		goto out_reset;
	}

	ret = btrfs_commit_transaction(trans);
1989 1990 1991 1992

out_reset:
	if (ret)
		btrfs_set_root_flags(&root->root_item, root_flags);
1993
out_drop_sem:
1994
	up_write(&fs_info->subvol_sem);
1995 1996 1997
out_drop_write:
	mnt_drop_write_file(file);
out:
1998 1999 2000
	return ret;
}

2001 2002 2003
static noinline int key_in_sk(struct btrfs_key *key,
			      struct btrfs_ioctl_search_key *sk)
{
2004 2005 2006 2007 2008 2009 2010 2011 2012
	struct btrfs_key test;
	int ret;

	test.objectid = sk->min_objectid;
	test.type = sk->min_type;
	test.offset = sk->min_offset;

	ret = btrfs_comp_cpu_keys(key, &test);
	if (ret < 0)
2013
		return 0;
2014 2015 2016 2017 2018 2019 2020

	test.objectid = sk->max_objectid;
	test.type = sk->max_type;
	test.offset = sk->max_offset;

	ret = btrfs_comp_cpu_keys(key, &test);
	if (ret > 0)
2021 2022 2023 2024
		return 0;
	return 1;
}

2025
static noinline int copy_to_sk(struct btrfs_path *path,
2026 2027
			       struct btrfs_key *key,
			       struct btrfs_ioctl_search_key *sk,
2028
			       size_t *buf_size,
2029
			       char __user *ubuf,
2030 2031 2032 2033 2034 2035
			       unsigned long *sk_offset,
			       int *num_found)
{
	u64 found_transid;
	struct extent_buffer *leaf;
	struct btrfs_ioctl_search_header sh;
2036
	struct btrfs_key test;
2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057
	unsigned long item_off;
	unsigned long item_len;
	int nritems;
	int i;
	int slot;
	int ret = 0;

	leaf = path->nodes[0];
	slot = path->slots[0];
	nritems = btrfs_header_nritems(leaf);

	if (btrfs_header_generation(leaf) > sk->max_transid) {
		i = nritems;
		goto advance_key;
	}
	found_transid = btrfs_header_generation(leaf);

	for (i = slot; i < nritems; i++) {
		item_off = btrfs_item_ptr_offset(leaf, i);
		item_len = btrfs_item_size_nr(leaf, i);

2058 2059 2060 2061
		btrfs_item_key_to_cpu(leaf, key, i);
		if (!key_in_sk(key, sk))
			continue;

2062
		if (sizeof(sh) + item_len > *buf_size) {
2063 2064 2065 2066 2067 2068 2069 2070 2071 2072
			if (*num_found) {
				ret = 1;
				goto out;
			}

			/*
			 * return one empty item back for v1, which does not
			 * handle -EOVERFLOW
			 */

2073
			*buf_size = sizeof(sh) + item_len;
2074
			item_len = 0;
2075 2076
			ret = -EOVERFLOW;
		}
2077

2078
		if (sizeof(sh) + item_len + *sk_offset > *buf_size) {
2079
			ret = 1;
2080
			goto out;
2081 2082 2083 2084 2085 2086 2087 2088
		}

		sh.objectid = key->objectid;
		sh.offset = key->offset;
		sh.type = key->type;
		sh.len = item_len;
		sh.transid = found_transid;

2089 2090 2091 2092 2093 2094 2095 2096
		/*
		 * Copy search result header. If we fault then loop again so we
		 * can fault in the pages and -EFAULT there if there's a
		 * problem. Otherwise we'll fault and then copy the buffer in
		 * properly this next time through
		 */
		if (copy_to_user_nofault(ubuf + *sk_offset, &sh, sizeof(sh))) {
			ret = 0;
2097 2098 2099
			goto out;
		}

2100 2101 2102
		*sk_offset += sizeof(sh);

		if (item_len) {
2103
			char __user *up = ubuf + *sk_offset;
2104 2105 2106 2107 2108 2109 2110 2111
			/*
			 * Copy the item, same behavior as above, but reset the
			 * * sk_offset so we copy the full thing again.
			 */
			if (read_extent_buffer_to_user_nofault(leaf, up,
						item_off, item_len)) {
				ret = 0;
				*sk_offset -= sizeof(sh);
2112 2113 2114
				goto out;
			}

2115 2116
			*sk_offset += item_len;
		}
2117
		(*num_found)++;
2118

2119 2120 2121
		if (ret) /* -EOVERFLOW from above */
			goto out;

2122 2123 2124 2125
		if (*num_found >= sk->nr_items) {
			ret = 1;
			goto out;
		}
2126 2127
	}
advance_key:
2128
	ret = 0;
2129 2130 2131 2132 2133 2134
	test.objectid = sk->max_objectid;
	test.type = sk->max_type;
	test.offset = sk->max_offset;
	if (btrfs_comp_cpu_keys(key, &test) >= 0)
		ret = 1;
	else if (key->offset < (u64)-1)
2135
		key->offset++;
2136
	else if (key->type < (u8)-1) {
2137
		key->offset = 0;
2138
		key->type++;
2139
	} else if (key->objectid < (u64)-1) {
2140 2141
		key->offset = 0;
		key->type = 0;
2142
		key->objectid++;
2143 2144
	} else
		ret = 1;
2145
out:
2146 2147 2148 2149 2150 2151 2152 2153 2154
	/*
	 *  0: all items from this leaf copied, continue with next
	 *  1: * more items can be copied, but unused buffer is too small
	 *     * all items were found
	 *     Either way, it will stops the loop which iterates to the next
	 *     leaf
	 *  -EOVERFLOW: item was to large for buffer
	 *  -EFAULT: could not copy extent buffer back to userspace
	 */
2155 2156 2157 2158
	return ret;
}

static noinline int search_ioctl(struct inode *inode,
2159
				 struct btrfs_ioctl_search_key *sk,
2160
				 size_t *buf_size,
2161
				 char __user *ubuf)
2162
{
2163
	struct btrfs_fs_info *info = btrfs_sb(inode->i_sb);
2164 2165 2166 2167 2168 2169 2170
	struct btrfs_root *root;
	struct btrfs_key key;
	struct btrfs_path *path;
	int ret;
	int num_found = 0;
	unsigned long sk_offset = 0;

2171 2172
	if (*buf_size < sizeof(struct btrfs_ioctl_search_header)) {
		*buf_size = sizeof(struct btrfs_ioctl_search_header);
2173
		return -EOVERFLOW;
2174
	}
2175

2176 2177 2178 2179 2180 2181
	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	if (sk->tree_id == 0) {
		/* search the root of the inode that was passed */
2182
		root = btrfs_grab_root(BTRFS_I(inode)->root);
2183
	} else {
D
David Sterba 已提交
2184
		root = btrfs_get_fs_root(info, sk->tree_id, true);
2185 2186
		if (IS_ERR(root)) {
			btrfs_free_path(path);
2187
			return PTR_ERR(root);
2188 2189 2190 2191 2192 2193 2194
		}
	}

	key.objectid = sk->min_objectid;
	key.type = sk->min_type;
	key.offset = sk->min_offset;

2195
	while (1) {
2196 2197
		ret = fault_in_pages_writeable(ubuf + sk_offset,
					       *buf_size - sk_offset);
2198 2199 2200
		if (ret)
			break;

2201
		ret = btrfs_search_forward(root, &key, path, sk->min_transid);
2202 2203 2204 2205 2206
		if (ret != 0) {
			if (ret > 0)
				ret = 0;
			goto err;
		}
2207
		ret = copy_to_sk(path, &key, sk, buf_size, ubuf,
2208
				 &sk_offset, &num_found);
2209
		btrfs_release_path(path);
2210
		if (ret)
2211 2212 2213
			break;

	}
2214 2215
	if (ret > 0)
		ret = 0;
2216 2217
err:
	sk->nr_items = num_found;
2218
	btrfs_put_root(root);
2219 2220 2221 2222 2223 2224 2225
	btrfs_free_path(path);
	return ret;
}

static noinline int btrfs_ioctl_tree_search(struct file *file,
					   void __user *argp)
{
2226 2227
	struct btrfs_ioctl_search_args __user *uargs;
	struct btrfs_ioctl_search_key sk;
2228 2229 2230
	struct inode *inode;
	int ret;
	size_t buf_size;
2231 2232 2233 2234

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

2235 2236 2237 2238
	uargs = (struct btrfs_ioctl_search_args __user *)argp;

	if (copy_from_user(&sk, &uargs->key, sizeof(sk)))
		return -EFAULT;
2239

2240
	buf_size = sizeof(uargs->buf);
2241

A
Al Viro 已提交
2242
	inode = file_inode(file);
2243
	ret = search_ioctl(inode, &sk, &buf_size, uargs->buf);
2244 2245 2246 2247 2248 2249 2250 2251

	/*
	 * In the origin implementation an overflow is handled by returning a
	 * search header with a len of zero, so reset ret.
	 */
	if (ret == -EOVERFLOW)
		ret = 0;

2252
	if (ret == 0 && copy_to_user(&uargs->key, &sk, sizeof(sk)))
2253 2254 2255 2256
		ret = -EFAULT;
	return ret;
}

G
Gerhard Heift 已提交
2257 2258 2259 2260 2261 2262 2263 2264
static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
					       void __user *argp)
{
	struct btrfs_ioctl_search_args_v2 __user *uarg;
	struct btrfs_ioctl_search_args_v2 args;
	struct inode *inode;
	int ret;
	size_t buf_size;
2265
	const size_t buf_limit = SZ_16M;
G
Gerhard Heift 已提交
2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	/* copy search header and buffer size */
	uarg = (struct btrfs_ioctl_search_args_v2 __user *)argp;
	if (copy_from_user(&args, uarg, sizeof(args)))
		return -EFAULT;

	buf_size = args.buf_size;

	/* limit result size to 16MB */
	if (buf_size > buf_limit)
		buf_size = buf_limit;

	inode = file_inode(file);
	ret = search_ioctl(inode, &args.key, &buf_size,
2283
			   (char __user *)(&uarg->buf[0]));
G
Gerhard Heift 已提交
2284 2285 2286 2287 2288 2289
	if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key)))
		ret = -EFAULT;
	else if (ret == -EOVERFLOW &&
		copy_to_user(&uarg->buf_size, &buf_size, sizeof(buf_size)))
		ret = -EFAULT;

2290 2291 2292
	return ret;
}

2293
/*
2294 2295 2296
 * Search INODE_REFs to identify path name of 'dirid' directory
 * in a 'tree_id' tree. and sets path name to 'name'.
 */
2297 2298 2299 2300 2301
static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
				u64 tree_id, u64 dirid, char *name)
{
	struct btrfs_root *root;
	struct btrfs_key key;
2302
	char *ptr;
2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319
	int ret = -1;
	int slot;
	int len;
	int total_len = 0;
	struct btrfs_inode_ref *iref;
	struct extent_buffer *l;
	struct btrfs_path *path;

	if (dirid == BTRFS_FIRST_FREE_OBJECTID) {
		name[0]='\0';
		return 0;
	}

	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

2320
	ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX - 1];
2321

D
David Sterba 已提交
2322
	root = btrfs_get_fs_root(info, tree_id, true);
2323
	if (IS_ERR(root)) {
2324
		ret = PTR_ERR(root);
2325 2326 2327
		root = NULL;
		goto out;
	}
2328 2329 2330

	key.objectid = dirid;
	key.type = BTRFS_INODE_REF_KEY;
2331
	key.offset = (u64)-1;
2332

2333
	while (1) {
2334 2335 2336
		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
		if (ret < 0)
			goto out;
2337 2338 2339 2340 2341 2342 2343 2344 2345 2346
		else if (ret > 0) {
			ret = btrfs_previous_item(root, path, dirid,
						  BTRFS_INODE_REF_KEY);
			if (ret < 0)
				goto out;
			else if (ret > 0) {
				ret = -ENOENT;
				goto out;
			}
		}
2347 2348 2349 2350 2351 2352 2353 2354 2355

		l = path->nodes[0];
		slot = path->slots[0];
		btrfs_item_key_to_cpu(l, &key, slot);

		iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref);
		len = btrfs_inode_ref_name_len(l, iref);
		ptr -= len + 1;
		total_len += len + 1;
2356 2357
		if (ptr < name) {
			ret = -ENAMETOOLONG;
2358
			goto out;
2359
		}
2360 2361

		*(ptr + len) = '/';
2362
		read_extent_buffer(l, ptr, (unsigned long)(iref + 1), len);
2363 2364 2365 2366

		if (key.offset == BTRFS_FIRST_FREE_OBJECTID)
			break;

2367
		btrfs_release_path(path);
2368
		key.objectid = key.offset;
2369
		key.offset = (u64)-1;
2370 2371
		dirid = key.objectid;
	}
2372
	memmove(name, ptr, total_len);
2373
	name[total_len] = '\0';
2374 2375
	ret = 0;
out:
2376
	btrfs_put_root(root);
2377
	btrfs_free_path(path);
2378 2379 2380
	return ret;
}

2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392
static int btrfs_search_path_in_tree_user(struct inode *inode,
				struct btrfs_ioctl_ino_lookup_user_args *args)
{
	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
	struct super_block *sb = inode->i_sb;
	struct btrfs_key upper_limit = BTRFS_I(inode)->location;
	u64 treeid = BTRFS_I(inode)->root->root_key.objectid;
	u64 dirid = args->dirid;
	unsigned long item_off;
	unsigned long item_len;
	struct btrfs_inode_ref *iref;
	struct btrfs_root_ref *rref;
2393
	struct btrfs_root *root = NULL;
2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414
	struct btrfs_path *path;
	struct btrfs_key key, key2;
	struct extent_buffer *leaf;
	struct inode *temp_inode;
	char *ptr;
	int slot;
	int len;
	int total_len = 0;
	int ret;

	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	/*
	 * If the bottom subvolume does not exist directly under upper_limit,
	 * construct the path in from the bottom up.
	 */
	if (dirid != upper_limit.objectid) {
		ptr = &args->path[BTRFS_INO_LOOKUP_USER_PATH_MAX - 1];

D
David Sterba 已提交
2415
		root = btrfs_get_fs_root(fs_info, treeid, true);
2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426
		if (IS_ERR(root)) {
			ret = PTR_ERR(root);
			goto out;
		}

		key.objectid = dirid;
		key.type = BTRFS_INODE_REF_KEY;
		key.offset = (u64)-1;
		while (1) {
			ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
			if (ret < 0) {
2427
				goto out_put;
2428 2429 2430 2431
			} else if (ret > 0) {
				ret = btrfs_previous_item(root, path, dirid,
							  BTRFS_INODE_REF_KEY);
				if (ret < 0) {
2432
					goto out_put;
2433 2434
				} else if (ret > 0) {
					ret = -ENOENT;
2435
					goto out_put;
2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448
				}
			}

			leaf = path->nodes[0];
			slot = path->slots[0];
			btrfs_item_key_to_cpu(leaf, &key, slot);

			iref = btrfs_item_ptr(leaf, slot, struct btrfs_inode_ref);
			len = btrfs_inode_ref_name_len(leaf, iref);
			ptr -= len + 1;
			total_len += len + 1;
			if (ptr < args->path) {
				ret = -ENAMETOOLONG;
2449
				goto out_put;
2450 2451 2452 2453 2454 2455 2456 2457 2458 2459
			}

			*(ptr + len) = '/';
			read_extent_buffer(leaf, ptr,
					(unsigned long)(iref + 1), len);

			/* Check the read+exec permission of this directory */
			ret = btrfs_previous_item(root, path, dirid,
						  BTRFS_INODE_ITEM_KEY);
			if (ret < 0) {
2460
				goto out_put;
2461 2462
			} else if (ret > 0) {
				ret = -ENOENT;
2463
				goto out_put;
2464 2465 2466 2467 2468 2469 2470
			}

			leaf = path->nodes[0];
			slot = path->slots[0];
			btrfs_item_key_to_cpu(leaf, &key2, slot);
			if (key2.objectid != dirid) {
				ret = -ENOENT;
2471
				goto out_put;
2472 2473
			}

D
David Sterba 已提交
2474
			temp_inode = btrfs_iget(sb, key2.objectid, root);
2475 2476
			if (IS_ERR(temp_inode)) {
				ret = PTR_ERR(temp_inode);
2477
				goto out_put;
2478
			}
2479 2480 2481 2482
			ret = inode_permission(temp_inode, MAY_READ | MAY_EXEC);
			iput(temp_inode);
			if (ret) {
				ret = -EACCES;
2483
				goto out_put;
2484 2485 2486 2487 2488 2489
			}

			if (key.offset == upper_limit.objectid)
				break;
			if (key.objectid == BTRFS_FIRST_FREE_OBJECTID) {
				ret = -EACCES;
2490
				goto out_put;
2491 2492 2493 2494 2495 2496 2497 2498 2499 2500
			}

			btrfs_release_path(path);
			key.objectid = key.offset;
			key.offset = (u64)-1;
			dirid = key.objectid;
		}

		memmove(args->path, ptr, total_len);
		args->path[total_len] = '\0';
2501
		btrfs_put_root(root);
2502
		root = NULL;
2503 2504 2505 2506 2507 2508 2509
		btrfs_release_path(path);
	}

	/* Get the bottom subvolume's name from ROOT_REF */
	key.objectid = treeid;
	key.type = BTRFS_ROOT_REF_KEY;
	key.offset = args->treeid;
2510
	ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536
	if (ret < 0) {
		goto out;
	} else if (ret > 0) {
		ret = -ENOENT;
		goto out;
	}

	leaf = path->nodes[0];
	slot = path->slots[0];
	btrfs_item_key_to_cpu(leaf, &key, slot);

	item_off = btrfs_item_ptr_offset(leaf, slot);
	item_len = btrfs_item_size_nr(leaf, slot);
	/* Check if dirid in ROOT_REF corresponds to passed dirid */
	rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref);
	if (args->dirid != btrfs_root_ref_dirid(leaf, rref)) {
		ret = -EINVAL;
		goto out;
	}

	/* Copy subvolume's name */
	item_off += sizeof(struct btrfs_root_ref);
	item_len -= sizeof(struct btrfs_root_ref);
	read_extent_buffer(leaf, args->name, item_off, item_len);
	args->name[item_len] = 0;

2537
out_put:
2538
	btrfs_put_root(root);
2539 2540 2541 2542 2543
out:
	btrfs_free_path(path);
	return ret;
}

2544 2545 2546
static noinline int btrfs_ioctl_ino_lookup(struct file *file,
					   void __user *argp)
{
2547 2548
	struct btrfs_ioctl_ino_lookup_args *args;
	struct inode *inode;
2549
	int ret = 0;
2550

J
Julia Lawall 已提交
2551 2552 2553
	args = memdup_user(argp, sizeof(*args));
	if (IS_ERR(args))
		return PTR_ERR(args);
2554

A
Al Viro 已提交
2555
	inode = file_inode(file);
2556

2557 2558 2559 2560
	/*
	 * Unprivileged query to obtain the containing subvolume root id. The
	 * path is reset so it's consistent with btrfs_search_path_in_tree.
	 */
2561 2562 2563
	if (args->treeid == 0)
		args->treeid = BTRFS_I(inode)->root->root_key.objectid;

2564 2565 2566 2567 2568 2569 2570 2571 2572 2573
	if (args->objectid == BTRFS_FIRST_FREE_OBJECTID) {
		args->name[0] = 0;
		goto out;
	}

	if (!capable(CAP_SYS_ADMIN)) {
		ret = -EPERM;
		goto out;
	}

2574 2575 2576 2577
	ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info,
					args->treeid, args->objectid,
					args->name);

2578
out:
2579 2580 2581 2582
	if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
		ret = -EFAULT;

	kfree(args);
2583 2584 2585
	return ret;
}

2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628
/*
 * Version of ino_lookup ioctl (unprivileged)
 *
 * The main differences from ino_lookup ioctl are:
 *
 *   1. Read + Exec permission will be checked using inode_permission() during
 *      path construction. -EACCES will be returned in case of failure.
 *   2. Path construction will be stopped at the inode number which corresponds
 *      to the fd with which this ioctl is called. If constructed path does not
 *      exist under fd's inode, -EACCES will be returned.
 *   3. The name of bottom subvolume is also searched and filled.
 */
static int btrfs_ioctl_ino_lookup_user(struct file *file, void __user *argp)
{
	struct btrfs_ioctl_ino_lookup_user_args *args;
	struct inode *inode;
	int ret;

	args = memdup_user(argp, sizeof(*args));
	if (IS_ERR(args))
		return PTR_ERR(args);

	inode = file_inode(file);

	if (args->dirid == BTRFS_FIRST_FREE_OBJECTID &&
	    BTRFS_I(inode)->location.objectid != BTRFS_FIRST_FREE_OBJECTID) {
		/*
		 * The subvolume does not exist under fd with which this is
		 * called
		 */
		kfree(args);
		return -EACCES;
	}

	ret = btrfs_search_path_in_tree_user(inode, args);

	if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
		ret = -EFAULT;

	kfree(args);
	return ret;
}

2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660
/* Get the subvolume information in BTRFS_ROOT_ITEM and BTRFS_ROOT_BACKREF */
static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp)
{
	struct btrfs_ioctl_get_subvol_info_args *subvol_info;
	struct btrfs_fs_info *fs_info;
	struct btrfs_root *root;
	struct btrfs_path *path;
	struct btrfs_key key;
	struct btrfs_root_item *root_item;
	struct btrfs_root_ref *rref;
	struct extent_buffer *leaf;
	unsigned long item_off;
	unsigned long item_len;
	struct inode *inode;
	int slot;
	int ret = 0;

	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	subvol_info = kzalloc(sizeof(*subvol_info), GFP_KERNEL);
	if (!subvol_info) {
		btrfs_free_path(path);
		return -ENOMEM;
	}

	inode = file_inode(file);
	fs_info = BTRFS_I(inode)->root->fs_info;

	/* Get root_item of inode's subvolume */
	key.objectid = BTRFS_I(inode)->root->root_key.objectid;
D
David Sterba 已提交
2661
	root = btrfs_get_fs_root(fs_info, key.objectid, true);
2662 2663
	if (IS_ERR(root)) {
		ret = PTR_ERR(root);
2664 2665
		goto out_free;
	}
2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698
	root_item = &root->root_item;

	subvol_info->treeid = key.objectid;

	subvol_info->generation = btrfs_root_generation(root_item);
	subvol_info->flags = btrfs_root_flags(root_item);

	memcpy(subvol_info->uuid, root_item->uuid, BTRFS_UUID_SIZE);
	memcpy(subvol_info->parent_uuid, root_item->parent_uuid,
						    BTRFS_UUID_SIZE);
	memcpy(subvol_info->received_uuid, root_item->received_uuid,
						    BTRFS_UUID_SIZE);

	subvol_info->ctransid = btrfs_root_ctransid(root_item);
	subvol_info->ctime.sec = btrfs_stack_timespec_sec(&root_item->ctime);
	subvol_info->ctime.nsec = btrfs_stack_timespec_nsec(&root_item->ctime);

	subvol_info->otransid = btrfs_root_otransid(root_item);
	subvol_info->otime.sec = btrfs_stack_timespec_sec(&root_item->otime);
	subvol_info->otime.nsec = btrfs_stack_timespec_nsec(&root_item->otime);

	subvol_info->stransid = btrfs_root_stransid(root_item);
	subvol_info->stime.sec = btrfs_stack_timespec_sec(&root_item->stime);
	subvol_info->stime.nsec = btrfs_stack_timespec_nsec(&root_item->stime);

	subvol_info->rtransid = btrfs_root_rtransid(root_item);
	subvol_info->rtime.sec = btrfs_stack_timespec_sec(&root_item->rtime);
	subvol_info->rtime.nsec = btrfs_stack_timespec_nsec(&root_item->rtime);

	if (key.objectid != BTRFS_FS_TREE_OBJECTID) {
		/* Search root tree for ROOT_BACKREF of this subvolume */
		key.type = BTRFS_ROOT_BACKREF_KEY;
		key.offset = 0;
2699
		ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
2700 2701 2702 2703
		if (ret < 0) {
			goto out;
		} else if (path->slots[0] >=
			   btrfs_header_nritems(path->nodes[0])) {
2704
			ret = btrfs_next_leaf(fs_info->tree_root, path);
2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738
			if (ret < 0) {
				goto out;
			} else if (ret > 0) {
				ret = -EUCLEAN;
				goto out;
			}
		}

		leaf = path->nodes[0];
		slot = path->slots[0];
		btrfs_item_key_to_cpu(leaf, &key, slot);
		if (key.objectid == subvol_info->treeid &&
		    key.type == BTRFS_ROOT_BACKREF_KEY) {
			subvol_info->parent_id = key.offset;

			rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref);
			subvol_info->dirid = btrfs_root_ref_dirid(leaf, rref);

			item_off = btrfs_item_ptr_offset(leaf, slot)
					+ sizeof(struct btrfs_root_ref);
			item_len = btrfs_item_size_nr(leaf, slot)
					- sizeof(struct btrfs_root_ref);
			read_extent_buffer(leaf, subvol_info->name,
					   item_off, item_len);
		} else {
			ret = -ENOENT;
			goto out;
		}
	}

	if (copy_to_user(argp, subvol_info, sizeof(*subvol_info)))
		ret = -EFAULT;

out:
2739
	btrfs_put_root(root);
2740
out_free:
2741
	btrfs_free_path(path);
2742
	kfree(subvol_info);
2743 2744 2745
	return ret;
}

2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842
/*
 * Return ROOT_REF information of the subvolume containing this inode
 * except the subvolume name.
 */
static int btrfs_ioctl_get_subvol_rootref(struct file *file, void __user *argp)
{
	struct btrfs_ioctl_get_subvol_rootref_args *rootrefs;
	struct btrfs_root_ref *rref;
	struct btrfs_root *root;
	struct btrfs_path *path;
	struct btrfs_key key;
	struct extent_buffer *leaf;
	struct inode *inode;
	u64 objectid;
	int slot;
	int ret;
	u8 found;

	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

	rootrefs = memdup_user(argp, sizeof(*rootrefs));
	if (IS_ERR(rootrefs)) {
		btrfs_free_path(path);
		return PTR_ERR(rootrefs);
	}

	inode = file_inode(file);
	root = BTRFS_I(inode)->root->fs_info->tree_root;
	objectid = BTRFS_I(inode)->root->root_key.objectid;

	key.objectid = objectid;
	key.type = BTRFS_ROOT_REF_KEY;
	key.offset = rootrefs->min_treeid;
	found = 0;

	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
	if (ret < 0) {
		goto out;
	} else if (path->slots[0] >=
		   btrfs_header_nritems(path->nodes[0])) {
		ret = btrfs_next_leaf(root, path);
		if (ret < 0) {
			goto out;
		} else if (ret > 0) {
			ret = -EUCLEAN;
			goto out;
		}
	}
	while (1) {
		leaf = path->nodes[0];
		slot = path->slots[0];

		btrfs_item_key_to_cpu(leaf, &key, slot);
		if (key.objectid != objectid || key.type != BTRFS_ROOT_REF_KEY) {
			ret = 0;
			goto out;
		}

		if (found == BTRFS_MAX_ROOTREF_BUFFER_NUM) {
			ret = -EOVERFLOW;
			goto out;
		}

		rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref);
		rootrefs->rootref[found].treeid = key.offset;
		rootrefs->rootref[found].dirid =
				  btrfs_root_ref_dirid(leaf, rref);
		found++;

		ret = btrfs_next_item(root, path);
		if (ret < 0) {
			goto out;
		} else if (ret > 0) {
			ret = -EUCLEAN;
			goto out;
		}
	}

out:
	if (!ret || ret == -EOVERFLOW) {
		rootrefs->num_items = found;
		/* update min_treeid for next search */
		if (found)
			rootrefs->min_treeid =
				rootrefs->rootref[found - 1].treeid + 1;
		if (copy_to_user(argp, rootrefs, sizeof(*rootrefs)))
			ret = -EFAULT;
	}

	kfree(rootrefs);
	btrfs_free_path(path);

	return ret;
}

2843
static noinline int btrfs_ioctl_snap_destroy(struct file *file,
2844 2845
					     void __user *arg,
					     bool destroy_v2)
2846
{
A
Al Viro 已提交
2847
	struct dentry *parent = file->f_path.dentry;
2848
	struct btrfs_fs_info *fs_info = btrfs_sb(parent->d_sb);
2849
	struct dentry *dentry;
2850
	struct inode *dir = d_inode(parent);
2851 2852 2853
	struct inode *inode;
	struct btrfs_root *root = BTRFS_I(dir)->root;
	struct btrfs_root *dest = NULL;
2854 2855 2856 2857
	struct btrfs_ioctl_vol_args *vol_args = NULL;
	struct btrfs_ioctl_vol_args_v2 *vol_args2 = NULL;
	char *subvol_name, *subvol_name_ptr = NULL;
	int subvol_namelen;
2858
	int err = 0;
2859
	bool destroy_parent = false;
2860

2861 2862 2863 2864
	if (destroy_v2) {
		vol_args2 = memdup_user(arg, sizeof(*vol_args2));
		if (IS_ERR(vol_args2))
			return PTR_ERR(vol_args2);
2865

2866 2867 2868 2869
		if (vol_args2->flags & ~BTRFS_SUBVOL_DELETE_ARGS_MASK) {
			err = -EOPNOTSUPP;
			goto out;
		}
2870

2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948
		/*
		 * If SPEC_BY_ID is not set, we are looking for the subvolume by
		 * name, same as v1 currently does.
		 */
		if (!(vol_args2->flags & BTRFS_SUBVOL_SPEC_BY_ID)) {
			vol_args2->name[BTRFS_SUBVOL_NAME_MAX] = 0;
			subvol_name = vol_args2->name;

			err = mnt_want_write_file(file);
			if (err)
				goto out;
		} else {
			if (vol_args2->subvolid < BTRFS_FIRST_FREE_OBJECTID) {
				err = -EINVAL;
				goto out;
			}

			err = mnt_want_write_file(file);
			if (err)
				goto out;

			dentry = btrfs_get_dentry(fs_info->sb,
					BTRFS_FIRST_FREE_OBJECTID,
					vol_args2->subvolid, 0, 0);
			if (IS_ERR(dentry)) {
				err = PTR_ERR(dentry);
				goto out_drop_write;
			}

			/*
			 * Change the default parent since the subvolume being
			 * deleted can be outside of the current mount point.
			 */
			parent = btrfs_get_parent(dentry);

			/*
			 * At this point dentry->d_name can point to '/' if the
			 * subvolume we want to destroy is outsite of the
			 * current mount point, so we need to release the
			 * current dentry and execute the lookup to return a new
			 * one with ->d_name pointing to the
			 * <mount point>/subvol_name.
			 */
			dput(dentry);
			if (IS_ERR(parent)) {
				err = PTR_ERR(parent);
				goto out_drop_write;
			}
			dir = d_inode(parent);

			/*
			 * If v2 was used with SPEC_BY_ID, a new parent was
			 * allocated since the subvolume can be outside of the
			 * current mount point. Later on we need to release this
			 * new parent dentry.
			 */
			destroy_parent = true;

			subvol_name_ptr = btrfs_get_subvol_name_from_objectid(
						fs_info, vol_args2->subvolid);
			if (IS_ERR(subvol_name_ptr)) {
				err = PTR_ERR(subvol_name_ptr);
				goto free_parent;
			}
			/* subvol_name_ptr is already NULL termined */
			subvol_name = (char *)kbasename(subvol_name_ptr);
		}
	} else {
		vol_args = memdup_user(arg, sizeof(*vol_args));
		if (IS_ERR(vol_args))
			return PTR_ERR(vol_args);

		vol_args->name[BTRFS_PATH_NAME_MAX] = 0;
		subvol_name = vol_args->name;

		err = mnt_want_write_file(file);
		if (err)
			goto out;
2949 2950
	}

2951
	subvol_namelen = strlen(subvol_name);
2952

2953 2954 2955 2956 2957 2958 2959 2960 2961 2962
	if (strchr(subvol_name, '/') ||
	    strncmp(subvol_name, "..", subvol_namelen) == 0) {
		err = -EINVAL;
		goto free_subvol_name;
	}

	if (!S_ISDIR(dir->i_mode)) {
		err = -ENOTDIR;
		goto free_subvol_name;
	}
2963

2964 2965
	err = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
	if (err == -EINTR)
2966 2967
		goto free_subvol_name;
	dentry = lookup_one_len(subvol_name, parent, subvol_namelen);
2968 2969 2970 2971 2972
	if (IS_ERR(dentry)) {
		err = PTR_ERR(dentry);
		goto out_unlock_dir;
	}

2973
	if (d_really_is_negative(dentry)) {
2974 2975 2976 2977
		err = -ENOENT;
		goto out_dput;
	}

2978
	inode = d_inode(dentry);
2979
	dest = BTRFS_I(inode)->root;
2980
	if (!capable(CAP_SYS_ADMIN)) {
2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994
		/*
		 * Regular user.  Only allow this with a special mount
		 * option, when the user has write+exec access to the
		 * subvol root, and when rmdir(2) would have been
		 * allowed.
		 *
		 * Note that this is _not_ check that the subvol is
		 * empty or doesn't contain data that we wouldn't
		 * otherwise be able to delete.
		 *
		 * Users who want to delete empty subvols should try
		 * rmdir(2).
		 */
		err = -EPERM;
2995
		if (!btrfs_test_opt(fs_info, USER_SUBVOL_RM_ALLOWED))
2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013
			goto out_dput;

		/*
		 * Do not allow deletion if the parent dir is the same
		 * as the dir to be deleted.  That means the ioctl
		 * must be called on the dentry referencing the root
		 * of the subvol, not a random directory contained
		 * within it.
		 */
		err = -EINVAL;
		if (root == dest)
			goto out_dput;

		err = inode_permission(inode, MAY_WRITE | MAY_EXEC);
		if (err)
			goto out_dput;
	}

3014 3015 3016 3017 3018
	/* check if subvolume may be deleted by a user */
	err = btrfs_may_delete(dir, dentry, 1);
	if (err)
		goto out_dput;

3019
	if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
3020 3021 3022 3023
		err = -EINVAL;
		goto out_dput;
	}

A
Al Viro 已提交
3024
	inode_lock(inode);
3025
	err = btrfs_delete_subvolume(dir, dentry);
A
Al Viro 已提交
3026
	inode_unlock(inode);
3027 3028
	if (!err) {
		fsnotify_rmdir(dir, dentry);
3029
		d_delete(dentry);
3030
	}
3031

3032 3033 3034
out_dput:
	dput(dentry);
out_unlock_dir:
A
Al Viro 已提交
3035
	inode_unlock(dir);
3036 3037 3038 3039 3040
free_subvol_name:
	kfree(subvol_name_ptr);
free_parent:
	if (destroy_parent)
		dput(parent);
3041
out_drop_write:
A
Al Viro 已提交
3042
	mnt_drop_write_file(file);
3043
out:
3044
	kfree(vol_args2);
3045 3046 3047 3048
	kfree(vol_args);
	return err;
}

C
Chris Mason 已提交
3049
static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
C
Christoph Hellwig 已提交
3050
{
A
Al Viro 已提交
3051
	struct inode *inode = file_inode(file);
C
Christoph Hellwig 已提交
3052
	struct btrfs_root *root = BTRFS_I(inode)->root;
C
Chris Mason 已提交
3053
	struct btrfs_ioctl_defrag_range_args *range;
Y
Yan Zheng 已提交
3054 3055
	int ret;

3056 3057 3058
	ret = mnt_want_write_file(file);
	if (ret)
		return ret;
L
Li Zefan 已提交
3059

3060 3061 3062
	if (btrfs_root_readonly(root)) {
		ret = -EROFS;
		goto out;
3063
	}
C
Christoph Hellwig 已提交
3064 3065 3066

	switch (inode->i_mode & S_IFMT) {
	case S_IFDIR:
3067 3068 3069 3070
		if (!capable(CAP_SYS_ADMIN)) {
			ret = -EPERM;
			goto out;
		}
3071
		ret = btrfs_defrag_root(root);
C
Christoph Hellwig 已提交
3072 3073
		break;
	case S_IFREG:
3074 3075 3076 3077 3078 3079 3080 3081
		/*
		 * Note that this does not check the file descriptor for write
		 * access. This prevents defragmenting executables that are
		 * running and allows defrag on files open in read-only mode.
		 */
		if (!capable(CAP_SYS_ADMIN) &&
		    inode_permission(inode, MAY_WRITE)) {
			ret = -EPERM;
3082 3083
			goto out;
		}
C
Chris Mason 已提交
3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095

		range = kzalloc(sizeof(*range), GFP_KERNEL);
		if (!range) {
			ret = -ENOMEM;
			goto out;
		}

		if (argp) {
			if (copy_from_user(range, argp,
					   sizeof(*range))) {
				ret = -EFAULT;
				kfree(range);
3096
				goto out;
C
Chris Mason 已提交
3097 3098 3099 3100 3101 3102 3103 3104 3105 3106
			}
			/* compression requires us to start the IO */
			if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
				range->flags |= BTRFS_DEFRAG_RANGE_START_IO;
				range->extent_thresh = (u32)-1;
			}
		} else {
			/* the rest are all set to zero by kzalloc */
			range->len = (u64)-1;
		}
A
Al Viro 已提交
3107
		ret = btrfs_defrag_file(file_inode(file), file,
3108
					range, BTRFS_OLDEST_GENERATION, 0);
C
Chris Mason 已提交
3109 3110
		if (ret > 0)
			ret = 0;
C
Chris Mason 已提交
3111
		kfree(range);
C
Christoph Hellwig 已提交
3112
		break;
3113 3114
	default:
		ret = -EINVAL;
C
Christoph Hellwig 已提交
3115
	}
3116
out:
3117
	mnt_drop_write_file(file);
3118
	return ret;
C
Christoph Hellwig 已提交
3119 3120
}

3121
static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
C
Christoph Hellwig 已提交
3122 3123 3124 3125
{
	struct btrfs_ioctl_vol_args *vol_args;
	int ret;

3126 3127 3128
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

3129
	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags))
3130
		return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
3131

L
Li Zefan 已提交
3132
	vol_args = memdup_user(arg, sizeof(*vol_args));
3133 3134 3135 3136
	if (IS_ERR(vol_args)) {
		ret = PTR_ERR(vol_args);
		goto out;
	}
C
Christoph Hellwig 已提交
3137

3138
	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
3139
	ret = btrfs_init_new_device(fs_info, vol_args->name);
C
Christoph Hellwig 已提交
3140

A
Anand Jain 已提交
3141
	if (!ret)
3142
		btrfs_info(fs_info, "disk added %s", vol_args->name);
A
Anand Jain 已提交
3143

C
Christoph Hellwig 已提交
3144
	kfree(vol_args);
3145
out:
3146
	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
C
Christoph Hellwig 已提交
3147 3148 3149
	return ret;
}

3150
static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
C
Christoph Hellwig 已提交
3151
{
3152 3153
	struct inode *inode = file_inode(file);
	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3154
	struct btrfs_ioctl_vol_args_v2 *vol_args;
C
Christoph Hellwig 已提交
3155 3156
	int ret;

3157 3158 3159
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

3160 3161 3162
	ret = mnt_want_write_file(file);
	if (ret)
		return ret;
Y
Yan Zheng 已提交
3163

L
Li Zefan 已提交
3164
	vol_args = memdup_user(arg, sizeof(*vol_args));
3165 3166
	if (IS_ERR(vol_args)) {
		ret = PTR_ERR(vol_args);
D
Dan Carpenter 已提交
3167
		goto err_drop;
3168
	}
C
Christoph Hellwig 已提交
3169

3170
	if (vol_args->flags & ~BTRFS_DEVICE_REMOVE_ARGS_MASK) {
3171 3172 3173
		ret = -EOPNOTSUPP;
		goto out;
	}
C
Christoph Hellwig 已提交
3174

3175
	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
3176 3177 3178 3179
		ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
		goto out;
	}

3180
	if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) {
3181
		ret = btrfs_rm_device(fs_info, NULL, vol_args->devid);
3182 3183
	} else {
		vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
3184
		ret = btrfs_rm_device(fs_info, vol_args->name, 0);
3185
	}
3186
	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
3187

3188
	if (!ret) {
3189
		if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID)
3190
			btrfs_info(fs_info, "device deleted: id %llu",
3191 3192
					vol_args->devid);
		else
3193
			btrfs_info(fs_info, "device deleted: %s",
3194 3195
					vol_args->name);
	}
3196 3197
out:
	kfree(vol_args);
D
Dan Carpenter 已提交
3198
err_drop:
3199
	mnt_drop_write_file(file);
C
Christoph Hellwig 已提交
3200 3201 3202
	return ret;
}

3203
static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
C
Christoph Hellwig 已提交
3204
{
3205 3206
	struct inode *inode = file_inode(file);
	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
C
Christoph Hellwig 已提交
3207 3208 3209
	struct btrfs_ioctl_vol_args *vol_args;
	int ret;

3210 3211 3212
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

3213 3214 3215
	ret = mnt_want_write_file(file);
	if (ret)
		return ret;
Y
Yan Zheng 已提交
3216

3217
	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
3218
		ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
3219 3220 3221 3222 3223 3224
		goto out_drop_write;
	}

	vol_args = memdup_user(arg, sizeof(*vol_args));
	if (IS_ERR(vol_args)) {
		ret = PTR_ERR(vol_args);
3225 3226 3227
		goto out;
	}

3228
	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
3229
	ret = btrfs_rm_device(fs_info, vol_args->name, 0);
3230

3231
	if (!ret)
3232
		btrfs_info(fs_info, "disk deleted %s", vol_args->name);
3233
	kfree(vol_args);
3234
out:
3235
	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
3236
out_drop_write:
3237
	mnt_drop_write_file(file);
3238

C
Christoph Hellwig 已提交
3239 3240 3241
	return ret;
}

3242 3243
static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
				void __user *arg)
J
Jan Schmidt 已提交
3244
{
3245
	struct btrfs_ioctl_fs_info_args *fi_args;
J
Jan Schmidt 已提交
3246
	struct btrfs_device *device;
3247
	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
3248
	u64 flags_in;
3249
	int ret = 0;
J
Jan Schmidt 已提交
3250

3251 3252 3253 3254 3255 3256
	fi_args = memdup_user(arg, sizeof(*fi_args));
	if (IS_ERR(fi_args))
		return PTR_ERR(fi_args);

	flags_in = fi_args->flags;
	memset(fi_args, 0, sizeof(*fi_args));
3257

3258
	rcu_read_lock();
3259
	fi_args->num_devices = fs_devices->num_devices;
J
Jan Schmidt 已提交
3260

3261
	list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
3262 3263
		if (device->devid > fi_args->max_id)
			fi_args->max_id = device->devid;
J
Jan Schmidt 已提交
3264
	}
3265
	rcu_read_unlock();
J
Jan Schmidt 已提交
3266

3267
	memcpy(&fi_args->fsid, fs_devices->fsid, sizeof(fi_args->fsid));
3268 3269 3270
	fi_args->nodesize = fs_info->nodesize;
	fi_args->sectorsize = fs_info->sectorsize;
	fi_args->clone_alignment = fs_info->sectorsize;
3271

3272 3273 3274 3275 3276 3277
	if (flags_in & BTRFS_FS_INFO_FLAG_CSUM_INFO) {
		fi_args->csum_type = btrfs_super_csum_type(fs_info->super_copy);
		fi_args->csum_size = btrfs_super_csum_size(fs_info->super_copy);
		fi_args->flags |= BTRFS_FS_INFO_FLAG_CSUM_INFO;
	}

3278 3279 3280 3281 3282
	if (flags_in & BTRFS_FS_INFO_FLAG_GENERATION) {
		fi_args->generation = fs_info->generation;
		fi_args->flags |= BTRFS_FS_INFO_FLAG_GENERATION;
	}

3283 3284 3285 3286 3287 3288
	if (flags_in & BTRFS_FS_INFO_FLAG_METADATA_UUID) {
		memcpy(&fi_args->metadata_uuid, fs_devices->metadata_uuid,
		       sizeof(fi_args->metadata_uuid));
		fi_args->flags |= BTRFS_FS_INFO_FLAG_METADATA_UUID;
	}

3289 3290
	if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
		ret = -EFAULT;
J
Jan Schmidt 已提交
3291

3292 3293
	kfree(fi_args);
	return ret;
J
Jan Schmidt 已提交
3294 3295
}

3296 3297
static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info,
				 void __user *arg)
J
Jan Schmidt 已提交
3298 3299 3300 3301 3302 3303 3304 3305 3306 3307
{
	struct btrfs_ioctl_dev_info_args *di_args;
	struct btrfs_device *dev;
	int ret = 0;
	char *s_uuid = NULL;

	di_args = memdup_user(arg, sizeof(*di_args));
	if (IS_ERR(di_args))
		return PTR_ERR(di_args);

3308
	if (!btrfs_is_empty_uuid(di_args->uuid))
J
Jan Schmidt 已提交
3309 3310
		s_uuid = di_args->uuid;

3311
	rcu_read_lock();
3312
	dev = btrfs_find_device(fs_info->fs_devices, di_args->devid, s_uuid,
3313
				NULL, true);
J
Jan Schmidt 已提交
3314 3315 3316 3317 3318 3319 3320

	if (!dev) {
		ret = -ENODEV;
		goto out;
	}

	di_args->devid = dev->devid;
3321 3322
	di_args->bytes_used = btrfs_device_get_bytes_used(dev);
	di_args->total_bytes = btrfs_device_get_total_bytes(dev);
J
Jan Schmidt 已提交
3323
	memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
3324
	if (dev->name) {
3325 3326
		strncpy(di_args->path, rcu_str_deref(dev->name),
				sizeof(di_args->path) - 1);
3327 3328
		di_args->path[sizeof(di_args->path) - 1] = 0;
	} else {
3329
		di_args->path[0] = '\0';
3330
	}
J
Jan Schmidt 已提交
3331 3332

out:
3333
	rcu_read_unlock();
J
Jan Schmidt 已提交
3334 3335 3336 3337 3338 3339 3340
	if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args)))
		ret = -EFAULT;

	kfree(di_args);
	return ret;
}

3341 3342
static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
{
A
Al Viro 已提交
3343
	struct inode *inode = file_inode(file);
3344
	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3345 3346 3347 3348
	struct btrfs_root *root = BTRFS_I(inode)->root;
	struct btrfs_root *new_root;
	struct btrfs_dir_item *di;
	struct btrfs_trans_handle *trans;
3349
	struct btrfs_path *path = NULL;
3350 3351 3352
	struct btrfs_disk_key disk_key;
	u64 objectid = 0;
	u64 dir_id;
3353
	int ret;
3354 3355 3356 3357

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

3358 3359 3360 3361 3362 3363 3364 3365
	ret = mnt_want_write_file(file);
	if (ret)
		return ret;

	if (copy_from_user(&objectid, argp, sizeof(objectid))) {
		ret = -EFAULT;
		goto out;
	}
3366 3367

	if (!objectid)
3368
		objectid = BTRFS_FS_TREE_OBJECTID;
3369

D
David Sterba 已提交
3370
	new_root = btrfs_get_fs_root(fs_info, objectid, true);
3371 3372 3373 3374
	if (IS_ERR(new_root)) {
		ret = PTR_ERR(new_root);
		goto out;
	}
3375 3376 3377 3378
	if (!is_fstree(new_root->root_key.objectid)) {
		ret = -ENOENT;
		goto out_free;
	}
3379 3380

	path = btrfs_alloc_path();
3381 3382
	if (!path) {
		ret = -ENOMEM;
3383
		goto out_free;
3384
	}
3385 3386 3387
	path->leave_spinning = 1;

	trans = btrfs_start_transaction(root, 1);
3388
	if (IS_ERR(trans)) {
3389
		ret = PTR_ERR(trans);
3390
		goto out_free;
3391 3392
	}

3393 3394
	dir_id = btrfs_super_root_dir(fs_info->super_copy);
	di = btrfs_lookup_dir_item(trans, fs_info->tree_root, path,
3395
				   dir_id, "default", 7, 1);
3396
	if (IS_ERR_OR_NULL(di)) {
3397
		btrfs_release_path(path);
3398
		btrfs_end_transaction(trans);
3399
		btrfs_err(fs_info,
J
Jeff Mahoney 已提交
3400
			  "Umm, you don't have the default diritem, this isn't going to work");
3401
		ret = -ENOENT;
3402
		goto out_free;
3403 3404 3405 3406 3407
	}

	btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key);
	btrfs_set_dir_item_key(path->nodes[0], di, &disk_key);
	btrfs_mark_buffer_dirty(path->nodes[0]);
3408
	btrfs_release_path(path);
3409

3410
	btrfs_set_fs_incompat(fs_info, DEFAULT_SUBVOL);
3411
	btrfs_end_transaction(trans);
3412
out_free:
3413
	btrfs_put_root(new_root);
3414
	btrfs_free_path(path);
3415 3416 3417
out:
	mnt_drop_write_file(file);
	return ret;
3418 3419
}

3420 3421
static void get_block_group_info(struct list_head *groups_list,
				 struct btrfs_ioctl_space_info *space)
3422
{
3423
	struct btrfs_block_group *block_group;
3424 3425 3426 3427 3428 3429

	space->total_bytes = 0;
	space->used_bytes = 0;
	space->flags = 0;
	list_for_each_entry(block_group, groups_list, list) {
		space->flags = block_group->flags;
3430
		space->total_bytes += block_group->length;
3431
		space->used_bytes += block_group->used;
3432 3433 3434
	}
}

3435 3436
static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
				   void __user *arg)
J
Josef Bacik 已提交
3437 3438 3439 3440
{
	struct btrfs_ioctl_space_args space_args;
	struct btrfs_ioctl_space_info space;
	struct btrfs_ioctl_space_info *dest;
3441
	struct btrfs_ioctl_space_info *dest_orig;
3442
	struct btrfs_ioctl_space_info __user *user_dest;
J
Josef Bacik 已提交
3443
	struct btrfs_space_info *info;
3444 3445 3446 3447 3448 3449
	static const u64 types[] = {
		BTRFS_BLOCK_GROUP_DATA,
		BTRFS_BLOCK_GROUP_SYSTEM,
		BTRFS_BLOCK_GROUP_METADATA,
		BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA
	};
3450
	int num_types = 4;
3451
	int alloc_size;
J
Josef Bacik 已提交
3452
	int ret = 0;
3453
	u64 slot_count = 0;
3454
	int i, c;
J
Josef Bacik 已提交
3455 3456 3457 3458 3459 3460

	if (copy_from_user(&space_args,
			   (struct btrfs_ioctl_space_args __user *)arg,
			   sizeof(space_args)))
		return -EFAULT;

3461 3462 3463 3464 3465
	for (i = 0; i < num_types; i++) {
		struct btrfs_space_info *tmp;

		info = NULL;
		rcu_read_lock();
3466
		list_for_each_entry_rcu(tmp, &fs_info->space_info,
3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484
					list) {
			if (tmp->flags == types[i]) {
				info = tmp;
				break;
			}
		}
		rcu_read_unlock();

		if (!info)
			continue;

		down_read(&info->groups_sem);
		for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
			if (!list_empty(&info->block_groups[c]))
				slot_count++;
		}
		up_read(&info->groups_sem);
	}
3485

3486 3487 3488 3489 3490
	/*
	 * Global block reserve, exported as a space_info
	 */
	slot_count++;

3491 3492 3493 3494 3495
	/* space_slots == 0 means they are asking for a count */
	if (space_args.space_slots == 0) {
		space_args.total_spaces = slot_count;
		goto out;
	}
3496

3497
	slot_count = min_t(u64, space_args.space_slots, slot_count);
3498

3499
	alloc_size = sizeof(*dest) * slot_count;
3500

3501 3502 3503
	/* we generally have at most 6 or so space infos, one for each raid
	 * level.  So, a whole page should be more than enough for everyone
	 */
3504
	if (alloc_size > PAGE_SIZE)
3505 3506
		return -ENOMEM;

J
Josef Bacik 已提交
3507
	space_args.total_spaces = 0;
3508
	dest = kmalloc(alloc_size, GFP_KERNEL);
3509 3510 3511
	if (!dest)
		return -ENOMEM;
	dest_orig = dest;
J
Josef Bacik 已提交
3512

3513
	/* now we have a buffer to copy into */
3514 3515 3516
	for (i = 0; i < num_types; i++) {
		struct btrfs_space_info *tmp;

3517 3518 3519
		if (!slot_count)
			break;

3520 3521
		info = NULL;
		rcu_read_lock();
3522
		list_for_each_entry_rcu(tmp, &fs_info->space_info,
3523 3524 3525 3526 3527 3528 3529
					list) {
			if (tmp->flags == types[i]) {
				info = tmp;
				break;
			}
		}
		rcu_read_unlock();
3530

3531 3532 3533 3534 3535
		if (!info)
			continue;
		down_read(&info->groups_sem);
		for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
			if (!list_empty(&info->block_groups[c])) {
3536 3537
				get_block_group_info(&info->block_groups[c],
						     &space);
3538 3539 3540
				memcpy(dest, &space, sizeof(space));
				dest++;
				space_args.total_spaces++;
3541
				slot_count--;
3542
			}
3543 3544
			if (!slot_count)
				break;
3545 3546
		}
		up_read(&info->groups_sem);
J
Josef Bacik 已提交
3547 3548
	}

3549 3550 3551 3552
	/*
	 * Add global block reserve
	 */
	if (slot_count) {
3553
		struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
3554 3555 3556 3557 3558 3559 3560 3561 3562 3563

		spin_lock(&block_rsv->lock);
		space.total_bytes = block_rsv->size;
		space.used_bytes = block_rsv->size - block_rsv->reserved;
		spin_unlock(&block_rsv->lock);
		space.flags = BTRFS_SPACE_INFO_GLOBAL_RSV;
		memcpy(dest, &space, sizeof(space));
		space_args.total_spaces++;
	}

D
Daniel J Blueman 已提交
3564
	user_dest = (struct btrfs_ioctl_space_info __user *)
3565 3566 3567 3568 3569 3570 3571 3572
		(arg + sizeof(struct btrfs_ioctl_space_args));

	if (copy_to_user(user_dest, dest_orig, alloc_size))
		ret = -EFAULT;

	kfree(dest_orig);
out:
	if (ret == 0 && copy_to_user(arg, &space_args, sizeof(space_args)))
J
Josef Bacik 已提交
3573 3574 3575 3576 3577
		ret = -EFAULT;

	return ret;
}

3578 3579
static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root,
					    void __user *argp)
3580 3581 3582
{
	struct btrfs_trans_handle *trans;
	u64 transid;
T
Tsutomu Itoh 已提交
3583
	int ret;
3584

M
Miao Xie 已提交
3585
	trans = btrfs_attach_transaction_barrier(root);
3586 3587 3588 3589 3590 3591 3592 3593
	if (IS_ERR(trans)) {
		if (PTR_ERR(trans) != -ENOENT)
			return PTR_ERR(trans);

		/* No running transaction, don't bother */
		transid = root->fs_info->last_trans_committed;
		goto out;
	}
3594
	transid = trans->transid;
3595
	ret = btrfs_commit_transaction_async(trans, 0);
3596
	if (ret) {
3597
		btrfs_end_transaction(trans);
T
Tsutomu Itoh 已提交
3598
		return ret;
3599
	}
3600
out:
3601 3602 3603 3604 3605 3606
	if (argp)
		if (copy_to_user(argp, &transid, sizeof(transid)))
			return -EFAULT;
	return 0;
}

3607
static noinline long btrfs_ioctl_wait_sync(struct btrfs_fs_info *fs_info,
3608
					   void __user *argp)
3609 3610 3611 3612 3613 3614 3615 3616 3617
{
	u64 transid;

	if (argp) {
		if (copy_from_user(&transid, argp, sizeof(transid)))
			return -EFAULT;
	} else {
		transid = 0;  /* current trans */
	}
3618
	return btrfs_wait_for_commit(fs_info, transid);
3619 3620
}

M
Miao Xie 已提交
3621
static long btrfs_ioctl_scrub(struct file *file, void __user *arg)
J
Jan Schmidt 已提交
3622
{
3623
	struct btrfs_fs_info *fs_info = btrfs_sb(file_inode(file)->i_sb);
J
Jan Schmidt 已提交
3624
	struct btrfs_ioctl_scrub_args *sa;
M
Miao Xie 已提交
3625
	int ret;
J
Jan Schmidt 已提交
3626 3627 3628 3629 3630 3631 3632 3633

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	sa = memdup_user(arg, sizeof(*sa));
	if (IS_ERR(sa))
		return PTR_ERR(sa);

M
Miao Xie 已提交
3634 3635 3636 3637 3638 3639
	if (!(sa->flags & BTRFS_SCRUB_READONLY)) {
		ret = mnt_want_write_file(file);
		if (ret)
			goto out;
	}

3640
	ret = btrfs_scrub_dev(fs_info, sa->devid, sa->start, sa->end,
3641 3642
			      &sa->progress, sa->flags & BTRFS_SCRUB_READONLY,
			      0);
J
Jan Schmidt 已提交
3643

3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656
	/*
	 * Copy scrub args to user space even if btrfs_scrub_dev() returned an
	 * error. This is important as it allows user space to know how much
	 * progress scrub has done. For example, if scrub is canceled we get
	 * -ECANCELED from btrfs_scrub_dev() and return that error back to user
	 * space. Later user space can inspect the progress from the structure
	 * btrfs_ioctl_scrub_args and resume scrub from where it left off
	 * previously (btrfs-progs does this).
	 * If we fail to copy the btrfs_ioctl_scrub_args structure to user space
	 * then return -EFAULT to signal the structure was not copied or it may
	 * be corrupt and unreliable due to a partial copy.
	 */
	if (copy_to_user(arg, sa, sizeof(*sa)))
J
Jan Schmidt 已提交
3657 3658
		ret = -EFAULT;

M
Miao Xie 已提交
3659 3660 3661
	if (!(sa->flags & BTRFS_SCRUB_READONLY))
		mnt_drop_write_file(file);
out:
J
Jan Schmidt 已提交
3662 3663 3664 3665
	kfree(sa);
	return ret;
}

3666
static long btrfs_ioctl_scrub_cancel(struct btrfs_fs_info *fs_info)
J
Jan Schmidt 已提交
3667 3668 3669 3670
{
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

3671
	return btrfs_scrub_cancel(fs_info);
J
Jan Schmidt 已提交
3672 3673
}

3674
static long btrfs_ioctl_scrub_progress(struct btrfs_fs_info *fs_info,
J
Jan Schmidt 已提交
3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686
				       void __user *arg)
{
	struct btrfs_ioctl_scrub_args *sa;
	int ret;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	sa = memdup_user(arg, sizeof(*sa));
	if (IS_ERR(sa))
		return PTR_ERR(sa);

3687
	ret = btrfs_scrub_progress(fs_info, sa->devid, &sa->progress);
J
Jan Schmidt 已提交
3688

3689
	if (ret == 0 && copy_to_user(arg, sa, sizeof(*sa)))
J
Jan Schmidt 已提交
3690 3691 3692 3693 3694 3695
		ret = -EFAULT;

	kfree(sa);
	return ret;
}

3696
static long btrfs_ioctl_get_dev_stats(struct btrfs_fs_info *fs_info,
3697
				      void __user *arg)
3698 3699 3700 3701 3702 3703 3704 3705
{
	struct btrfs_ioctl_get_dev_stats *sa;
	int ret;

	sa = memdup_user(arg, sizeof(*sa));
	if (IS_ERR(sa))
		return PTR_ERR(sa);

3706 3707 3708 3709 3710
	if ((sa->flags & BTRFS_DEV_STATS_RESET) && !capable(CAP_SYS_ADMIN)) {
		kfree(sa);
		return -EPERM;
	}

3711
	ret = btrfs_get_dev_stats(fs_info, sa);
3712

3713
	if (ret == 0 && copy_to_user(arg, sa, sizeof(*sa)))
3714 3715 3716 3717 3718 3719
		ret = -EFAULT;

	kfree(sa);
	return ret;
}

3720 3721
static long btrfs_ioctl_dev_replace(struct btrfs_fs_info *fs_info,
				    void __user *arg)
3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734
{
	struct btrfs_ioctl_dev_replace_args *p;
	int ret;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	p = memdup_user(arg, sizeof(*p));
	if (IS_ERR(p))
		return PTR_ERR(p);

	switch (p->cmd) {
	case BTRFS_IOCTL_DEV_REPLACE_CMD_START:
3735
		if (sb_rdonly(fs_info->sb)) {
3736 3737 3738
			ret = -EROFS;
			goto out;
		}
3739
		if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
3740
			ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
3741
		} else {
3742
			ret = btrfs_dev_replace_by_ioctl(fs_info, p);
3743
			clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
3744 3745 3746
		}
		break;
	case BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS:
3747
		btrfs_dev_replace_status(fs_info, p);
3748 3749 3750
		ret = 0;
		break;
	case BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL:
3751
		p->result = btrfs_dev_replace_cancel(fs_info);
3752
		ret = 0;
3753 3754 3755 3756 3757 3758
		break;
	default:
		ret = -EINVAL;
		break;
	}

3759
	if ((ret == 0 || ret == -ECANCELED) && copy_to_user(arg, p, sizeof(*p)))
3760
		ret = -EFAULT;
3761
out:
3762 3763 3764 3765
	kfree(p);
	return ret;
}

3766 3767 3768 3769
static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
{
	int ret = 0;
	int i;
3770
	u64 rel_ptr;
3771
	int size;
3772
	struct btrfs_ioctl_ino_path_args *ipa = NULL;
3773 3774 3775
	struct inode_fs_paths *ipath = NULL;
	struct btrfs_path *path;

3776
	if (!capable(CAP_DAC_READ_SEARCH))
3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804
		return -EPERM;

	path = btrfs_alloc_path();
	if (!path) {
		ret = -ENOMEM;
		goto out;
	}

	ipa = memdup_user(arg, sizeof(*ipa));
	if (IS_ERR(ipa)) {
		ret = PTR_ERR(ipa);
		ipa = NULL;
		goto out;
	}

	size = min_t(u32, ipa->size, 4096);
	ipath = init_ipath(size, root, path);
	if (IS_ERR(ipath)) {
		ret = PTR_ERR(ipath);
		ipath = NULL;
		goto out;
	}

	ret = paths_from_inode(ipa->inum, ipath);
	if (ret < 0)
		goto out;

	for (i = 0; i < ipath->fspath->elem_cnt; ++i) {
3805 3806
		rel_ptr = ipath->fspath->val[i] -
			  (u64)(unsigned long)ipath->fspath->val;
3807
		ipath->fspath->val[i] = rel_ptr;
3808 3809
	}

3810 3811
	ret = copy_to_user((void __user *)(unsigned long)ipa->fspath,
			   ipath->fspath, size);
3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844
	if (ret) {
		ret = -EFAULT;
		goto out;
	}

out:
	btrfs_free_path(path);
	free_ipath(ipath);
	kfree(ipa);

	return ret;
}

static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx)
{
	struct btrfs_data_container *inodes = ctx;
	const size_t c = 3 * sizeof(u64);

	if (inodes->bytes_left >= c) {
		inodes->bytes_left -= c;
		inodes->val[inodes->elem_cnt] = inum;
		inodes->val[inodes->elem_cnt + 1] = offset;
		inodes->val[inodes->elem_cnt + 2] = root;
		inodes->elem_cnt += 3;
	} else {
		inodes->bytes_missing += c - inodes->bytes_left;
		inodes->bytes_left = 0;
		inodes->elem_missed += 3;
	}

	return 0;
}

3845
static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
3846
					void __user *arg, int version)
3847 3848 3849 3850 3851 3852
{
	int ret = 0;
	int size;
	struct btrfs_ioctl_logical_ino_args *loi;
	struct btrfs_data_container *inodes = NULL;
	struct btrfs_path *path = NULL;
3853
	bool ignore_offset;
3854 3855 3856 3857 3858

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	loi = memdup_user(arg, sizeof(*loi));
3859 3860
	if (IS_ERR(loi))
		return PTR_ERR(loi);
3861

3862 3863
	if (version == 1) {
		ignore_offset = false;
3864
		size = min_t(u32, loi->size, SZ_64K);
3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876
	} else {
		/* All reserved bits must be 0 for now */
		if (memchr_inv(loi->reserved, 0, sizeof(loi->reserved))) {
			ret = -EINVAL;
			goto out_loi;
		}
		/* Only accept flags we have defined so far */
		if (loi->flags & ~(BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET)) {
			ret = -EINVAL;
			goto out_loi;
		}
		ignore_offset = loi->flags & BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET;
3877
		size = min_t(u32, loi->size, SZ_16M);
3878 3879
	}

3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892
	path = btrfs_alloc_path();
	if (!path) {
		ret = -ENOMEM;
		goto out;
	}

	inodes = init_data_container(size);
	if (IS_ERR(inodes)) {
		ret = PTR_ERR(inodes);
		inodes = NULL;
		goto out;
	}

3893
	ret = iterate_inodes_from_logical(loi->logical, fs_info, path,
3894
					  build_ino_list, inodes, ignore_offset);
L
Liu Bo 已提交
3895
	if (ret == -EINVAL)
3896 3897 3898 3899
		ret = -ENOENT;
	if (ret < 0)
		goto out;

3900 3901
	ret = copy_to_user((void __user *)(unsigned long)loi->inodes, inodes,
			   size);
3902 3903 3904 3905 3906
	if (ret)
		ret = -EFAULT;

out:
	btrfs_free_path(path);
3907
	kvfree(inodes);
3908
out_loi:
3909 3910 3911 3912 3913
	kfree(loi);

	return ret;
}

3914
void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
3915 3916 3917 3918 3919 3920
			       struct btrfs_ioctl_balance_args *bargs)
{
	struct btrfs_balance_control *bctl = fs_info->balance_ctl;

	bargs->flags = bctl->flags;

3921
	if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags))
3922 3923 3924
		bargs->state |= BTRFS_BALANCE_STATE_RUNNING;
	if (atomic_read(&fs_info->balance_pause_req))
		bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ;
3925 3926
	if (atomic_read(&fs_info->balance_cancel_req))
		bargs->state |= BTRFS_BALANCE_STATE_CANCEL_REQ;
3927

3928 3929 3930
	memcpy(&bargs->data, &bctl->data, sizeof(bargs->data));
	memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta));
	memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys));
3931

3932 3933 3934
	spin_lock(&fs_info->balance_lock);
	memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
	spin_unlock(&fs_info->balance_lock);
3935 3936
}

3937
static long btrfs_ioctl_balance(struct file *file, void __user *arg)
3938
{
A
Al Viro 已提交
3939
	struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
3940 3941 3942
	struct btrfs_fs_info *fs_info = root->fs_info;
	struct btrfs_ioctl_balance_args *bargs;
	struct btrfs_balance_control *bctl;
3943
	bool need_unlock; /* for mut. excl. ops lock */
3944 3945 3946 3947 3948
	int ret;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

3949
	ret = mnt_want_write_file(file);
3950 3951 3952
	if (ret)
		return ret;

3953
again:
3954
	if (!test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
3955 3956 3957 3958 3959 3960
		mutex_lock(&fs_info->balance_mutex);
		need_unlock = true;
		goto locked;
	}

	/*
3961
	 * mut. excl. ops lock is locked.  Three possibilities:
3962 3963 3964 3965
	 *   (1) some other op is running
	 *   (2) balance is running
	 *   (3) balance is paused -- special case (think resume)
	 */
3966
	mutex_lock(&fs_info->balance_mutex);
3967 3968
	if (fs_info->balance_ctl) {
		/* this is either (2) or (3) */
3969
		if (!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
3970
			mutex_unlock(&fs_info->balance_mutex);
3971 3972 3973 3974
			/*
			 * Lock released to allow other waiters to continue,
			 * we'll reexamine the status again.
			 */
3975 3976 3977
			mutex_lock(&fs_info->balance_mutex);

			if (fs_info->balance_ctl &&
3978
			    !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994
				/* this is (3) */
				need_unlock = false;
				goto locked;
			}

			mutex_unlock(&fs_info->balance_mutex);
			goto again;
		} else {
			/* this is (2) */
			mutex_unlock(&fs_info->balance_mutex);
			ret = -EINPROGRESS;
			goto out;
		}
	} else {
		/* this is (1) */
		mutex_unlock(&fs_info->balance_mutex);
3995
		ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
3996 3997 3998 3999
		goto out;
	}

locked:
4000
	BUG_ON(!test_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
4001 4002 4003 4004 4005

	if (arg) {
		bargs = memdup_user(arg, sizeof(*bargs));
		if (IS_ERR(bargs)) {
			ret = PTR_ERR(bargs);
4006
			goto out_unlock;
4007
		}
4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021

		if (bargs->flags & BTRFS_BALANCE_RESUME) {
			if (!fs_info->balance_ctl) {
				ret = -ENOTCONN;
				goto out_bargs;
			}

			bctl = fs_info->balance_ctl;
			spin_lock(&fs_info->balance_lock);
			bctl->flags |= BTRFS_BALANCE_RESUME;
			spin_unlock(&fs_info->balance_lock);

			goto do_balance;
		}
4022 4023 4024 4025
	} else {
		bargs = NULL;
	}

4026
	if (fs_info->balance_ctl) {
4027 4028 4029 4030
		ret = -EINPROGRESS;
		goto out_bargs;
	}

4031
	bctl = kzalloc(sizeof(*bctl), GFP_KERNEL);
4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042
	if (!bctl) {
		ret = -ENOMEM;
		goto out_bargs;
	}

	if (arg) {
		memcpy(&bctl->data, &bargs->data, sizeof(bctl->data));
		memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta));
		memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys));

		bctl->flags = bargs->flags;
4043 4044 4045
	} else {
		/* balance everything - no filters */
		bctl->flags |= BTRFS_BALANCE_TYPE_MASK;
4046 4047
	}

4048 4049
	if (bctl->flags & ~(BTRFS_BALANCE_ARGS_MASK | BTRFS_BALANCE_TYPE_MASK)) {
		ret = -EINVAL;
4050
		goto out_bctl;
4051 4052
	}

4053
do_balance:
4054
	/*
4055 4056 4057 4058
	 * Ownership of bctl and filesystem flag BTRFS_FS_EXCL_OP goes to
	 * btrfs_balance.  bctl is freed in reset_balance_state, or, if
	 * restriper was paused all the way until unmount, in free_fs_info.
	 * The flag should be cleared after reset_balance_state.
4059
	 */
4060 4061
	need_unlock = false;

4062
	ret = btrfs_balance(fs_info, bctl, bargs);
4063
	bctl = NULL;
4064

4065
	if ((ret == 0 || ret == -ECANCELED) && arg) {
4066 4067 4068 4069
		if (copy_to_user(arg, bargs, sizeof(*bargs)))
			ret = -EFAULT;
	}

4070 4071
out_bctl:
	kfree(bctl);
4072 4073
out_bargs:
	kfree(bargs);
4074
out_unlock:
4075
	mutex_unlock(&fs_info->balance_mutex);
4076
	if (need_unlock)
4077
		clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
4078
out:
4079
	mnt_drop_write_file(file);
4080 4081 4082
	return ret;
}

4083
static long btrfs_ioctl_balance_ctl(struct btrfs_fs_info *fs_info, int cmd)
4084 4085 4086 4087 4088 4089
{
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	switch (cmd) {
	case BTRFS_BALANCE_CTL_PAUSE:
4090
		return btrfs_pause_balance(fs_info);
4091
	case BTRFS_BALANCE_CTL_CANCEL:
4092
		return btrfs_cancel_balance(fs_info);
4093 4094 4095 4096 4097
	}

	return -EINVAL;
}

4098
static long btrfs_ioctl_balance_progress(struct btrfs_fs_info *fs_info,
4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112
					 void __user *arg)
{
	struct btrfs_ioctl_balance_args *bargs;
	int ret = 0;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	mutex_lock(&fs_info->balance_mutex);
	if (!fs_info->balance_ctl) {
		ret = -ENOTCONN;
		goto out;
	}

4113
	bargs = kzalloc(sizeof(*bargs), GFP_KERNEL);
4114 4115 4116 4117 4118
	if (!bargs) {
		ret = -ENOMEM;
		goto out;
	}

4119
	btrfs_update_ioctl_balance_args(fs_info, bargs);
4120 4121 4122 4123 4124 4125 4126 4127 4128 4129

	if (copy_to_user(arg, bargs, sizeof(*bargs)))
		ret = -EFAULT;

	kfree(bargs);
out:
	mutex_unlock(&fs_info->balance_mutex);
	return ret;
}

4130
static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
A
Arne Jansen 已提交
4131
{
4132 4133
	struct inode *inode = file_inode(file);
	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
A
Arne Jansen 已提交
4134 4135 4136 4137 4138 4139
	struct btrfs_ioctl_quota_ctl_args *sa;
	int ret;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

4140 4141 4142
	ret = mnt_want_write_file(file);
	if (ret)
		return ret;
A
Arne Jansen 已提交
4143 4144

	sa = memdup_user(arg, sizeof(*sa));
4145 4146 4147 4148
	if (IS_ERR(sa)) {
		ret = PTR_ERR(sa);
		goto drop_write;
	}
A
Arne Jansen 已提交
4149

4150
	down_write(&fs_info->subvol_sem);
A
Arne Jansen 已提交
4151 4152 4153

	switch (sa->cmd) {
	case BTRFS_QUOTA_CTL_ENABLE:
4154
		ret = btrfs_quota_enable(fs_info);
A
Arne Jansen 已提交
4155 4156
		break;
	case BTRFS_QUOTA_CTL_DISABLE:
4157
		ret = btrfs_quota_disable(fs_info);
A
Arne Jansen 已提交
4158 4159 4160 4161 4162 4163 4164
		break;
	default:
		ret = -EINVAL;
		break;
	}

	kfree(sa);
4165
	up_write(&fs_info->subvol_sem);
4166 4167
drop_write:
	mnt_drop_write_file(file);
A
Arne Jansen 已提交
4168 4169 4170
	return ret;
}

4171
static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
A
Arne Jansen 已提交
4172
{
4173 4174 4175
	struct inode *inode = file_inode(file);
	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
	struct btrfs_root *root = BTRFS_I(inode)->root;
A
Arne Jansen 已提交
4176 4177 4178 4179 4180 4181 4182 4183
	struct btrfs_ioctl_qgroup_assign_args *sa;
	struct btrfs_trans_handle *trans;
	int ret;
	int err;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

4184 4185 4186
	ret = mnt_want_write_file(file);
	if (ret)
		return ret;
A
Arne Jansen 已提交
4187 4188

	sa = memdup_user(arg, sizeof(*sa));
4189 4190 4191 4192
	if (IS_ERR(sa)) {
		ret = PTR_ERR(sa);
		goto drop_write;
	}
A
Arne Jansen 已提交
4193 4194 4195 4196 4197 4198 4199 4200

	trans = btrfs_join_transaction(root);
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
		goto out;
	}

	if (sa->assign) {
4201
		ret = btrfs_add_qgroup_relation(trans, sa->src, sa->dst);
A
Arne Jansen 已提交
4202
	} else {
4203
		ret = btrfs_del_qgroup_relation(trans, sa->src, sa->dst);
A
Arne Jansen 已提交
4204 4205
	}

4206
	/* update qgroup status and info */
4207
	err = btrfs_run_qgroups(trans);
4208
	if (err < 0)
4209 4210
		btrfs_handle_fs_error(fs_info, err,
				      "failed to update qgroup status and info");
4211
	err = btrfs_end_transaction(trans);
A
Arne Jansen 已提交
4212 4213 4214 4215 4216
	if (err && !ret)
		ret = err;

out:
	kfree(sa);
4217 4218
drop_write:
	mnt_drop_write_file(file);
A
Arne Jansen 已提交
4219 4220 4221
	return ret;
}

4222
static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
A
Arne Jansen 已提交
4223
{
4224 4225
	struct inode *inode = file_inode(file);
	struct btrfs_root *root = BTRFS_I(inode)->root;
A
Arne Jansen 已提交
4226 4227 4228 4229 4230 4231 4232 4233
	struct btrfs_ioctl_qgroup_create_args *sa;
	struct btrfs_trans_handle *trans;
	int ret;
	int err;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

4234 4235 4236
	ret = mnt_want_write_file(file);
	if (ret)
		return ret;
A
Arne Jansen 已提交
4237 4238

	sa = memdup_user(arg, sizeof(*sa));
4239 4240 4241 4242
	if (IS_ERR(sa)) {
		ret = PTR_ERR(sa);
		goto drop_write;
	}
A
Arne Jansen 已提交
4243

M
Miao Xie 已提交
4244 4245 4246 4247 4248
	if (!sa->qgroupid) {
		ret = -EINVAL;
		goto out;
	}

A
Arne Jansen 已提交
4249 4250 4251 4252 4253 4254 4255
	trans = btrfs_join_transaction(root);
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
		goto out;
	}

	if (sa->create) {
4256
		ret = btrfs_create_qgroup(trans, sa->qgroupid);
A
Arne Jansen 已提交
4257
	} else {
4258
		ret = btrfs_remove_qgroup(trans, sa->qgroupid);
A
Arne Jansen 已提交
4259 4260
	}

4261
	err = btrfs_end_transaction(trans);
A
Arne Jansen 已提交
4262 4263 4264 4265 4266
	if (err && !ret)
		ret = err;

out:
	kfree(sa);
4267 4268
drop_write:
	mnt_drop_write_file(file);
A
Arne Jansen 已提交
4269 4270 4271
	return ret;
}

4272
static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg)
A
Arne Jansen 已提交
4273
{
4274 4275
	struct inode *inode = file_inode(file);
	struct btrfs_root *root = BTRFS_I(inode)->root;
A
Arne Jansen 已提交
4276 4277 4278 4279 4280 4281 4282 4283 4284
	struct btrfs_ioctl_qgroup_limit_args *sa;
	struct btrfs_trans_handle *trans;
	int ret;
	int err;
	u64 qgroupid;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

4285 4286 4287
	ret = mnt_want_write_file(file);
	if (ret)
		return ret;
A
Arne Jansen 已提交
4288 4289

	sa = memdup_user(arg, sizeof(*sa));
4290 4291 4292 4293
	if (IS_ERR(sa)) {
		ret = PTR_ERR(sa);
		goto drop_write;
	}
A
Arne Jansen 已提交
4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306

	trans = btrfs_join_transaction(root);
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
		goto out;
	}

	qgroupid = sa->qgroupid;
	if (!qgroupid) {
		/* take the current subvol as qgroup */
		qgroupid = root->root_key.objectid;
	}

4307
	ret = btrfs_limit_qgroup(trans, qgroupid, &sa->lim);
A
Arne Jansen 已提交
4308

4309
	err = btrfs_end_transaction(trans);
A
Arne Jansen 已提交
4310 4311 4312 4313 4314
	if (err && !ret)
		ret = err;

out:
	kfree(sa);
4315 4316
drop_write:
	mnt_drop_write_file(file);
A
Arne Jansen 已提交
4317 4318 4319
	return ret;
}

J
Jan Schmidt 已提交
4320 4321
static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg)
{
4322 4323
	struct inode *inode = file_inode(file);
	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
J
Jan Schmidt 已提交
4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344
	struct btrfs_ioctl_quota_rescan_args *qsa;
	int ret;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	ret = mnt_want_write_file(file);
	if (ret)
		return ret;

	qsa = memdup_user(arg, sizeof(*qsa));
	if (IS_ERR(qsa)) {
		ret = PTR_ERR(qsa);
		goto drop_write;
	}

	if (qsa->flags) {
		ret = -EINVAL;
		goto out;
	}

4345
	ret = btrfs_qgroup_rescan(fs_info);
J
Jan Schmidt 已提交
4346 4347 4348 4349 4350 4351 4352 4353

out:
	kfree(qsa);
drop_write:
	mnt_drop_write_file(file);
	return ret;
}

4354 4355
static long btrfs_ioctl_quota_rescan_status(struct btrfs_fs_info *fs_info,
						void __user *arg)
J
Jan Schmidt 已提交
4356 4357 4358 4359 4360 4361 4362
{
	struct btrfs_ioctl_quota_rescan_args *qsa;
	int ret = 0;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

4363
	qsa = kzalloc(sizeof(*qsa), GFP_KERNEL);
J
Jan Schmidt 已提交
4364 4365 4366
	if (!qsa)
		return -ENOMEM;

4367
	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
J
Jan Schmidt 已提交
4368
		qsa->flags = 1;
4369
		qsa->progress = fs_info->qgroup_rescan_progress.objectid;
J
Jan Schmidt 已提交
4370 4371 4372 4373 4374 4375 4376 4377 4378
	}

	if (copy_to_user(arg, qsa, sizeof(*qsa)))
		ret = -EFAULT;

	kfree(qsa);
	return ret;
}

4379 4380
static long btrfs_ioctl_quota_rescan_wait(struct btrfs_fs_info *fs_info,
						void __user *arg)
4381 4382 4383 4384
{
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

4385
	return btrfs_qgroup_wait_for_completion(fs_info, true);
4386 4387
}

4388 4389
static long _btrfs_ioctl_set_received_subvol(struct file *file,
					    struct btrfs_ioctl_received_subvol_args *sa)
4390
{
A
Al Viro 已提交
4391
	struct inode *inode = file_inode(file);
4392
	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
4393 4394 4395
	struct btrfs_root *root = BTRFS_I(inode)->root;
	struct btrfs_root_item *root_item = &root->root_item;
	struct btrfs_trans_handle *trans;
4396
	struct timespec64 ct = current_time(inode);
4397
	int ret = 0;
4398
	int received_uuid_changed;
4399

4400 4401 4402
	if (!inode_owner_or_capable(inode))
		return -EPERM;

4403 4404 4405 4406
	ret = mnt_want_write_file(file);
	if (ret < 0)
		return ret;

4407
	down_write(&fs_info->subvol_sem);
4408

4409
	if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
4410 4411 4412 4413 4414 4415 4416 4417 4418
		ret = -EINVAL;
		goto out;
	}

	if (btrfs_root_readonly(root)) {
		ret = -EROFS;
		goto out;
	}

4419 4420 4421 4422 4423
	/*
	 * 1 - root item
	 * 2 - uuid items (received uuid + subvol uuid)
	 */
	trans = btrfs_start_transaction(root, 3);
4424 4425 4426 4427 4428 4429 4430 4431 4432 4433
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
		trans = NULL;
		goto out;
	}

	sa->rtransid = trans->transid;
	sa->rtime.sec = ct.tv_sec;
	sa->rtime.nsec = ct.tv_nsec;

4434 4435 4436
	received_uuid_changed = memcmp(root_item->received_uuid, sa->uuid,
				       BTRFS_UUID_SIZE);
	if (received_uuid_changed &&
4437
	    !btrfs_is_empty_uuid(root_item->received_uuid)) {
4438
		ret = btrfs_uuid_tree_remove(trans, root_item->received_uuid,
4439 4440 4441 4442 4443 4444 4445 4446
					  BTRFS_UUID_KEY_RECEIVED_SUBVOL,
					  root->root_key.objectid);
		if (ret && ret != -ENOENT) {
		        btrfs_abort_transaction(trans, ret);
		        btrfs_end_transaction(trans);
		        goto out;
		}
	}
4447 4448 4449
	memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE);
	btrfs_set_root_stransid(root_item, sa->stransid);
	btrfs_set_root_rtransid(root_item, sa->rtransid);
4450 4451 4452 4453
	btrfs_set_stack_timespec_sec(&root_item->stime, sa->stime.sec);
	btrfs_set_stack_timespec_nsec(&root_item->stime, sa->stime.nsec);
	btrfs_set_stack_timespec_sec(&root_item->rtime, sa->rtime.sec);
	btrfs_set_stack_timespec_nsec(&root_item->rtime, sa->rtime.nsec);
4454

4455
	ret = btrfs_update_root(trans, fs_info->tree_root,
4456 4457
				&root->root_key, &root->root_item);
	if (ret < 0) {
4458
		btrfs_end_transaction(trans);
4459
		goto out;
4460 4461
	}
	if (received_uuid_changed && !btrfs_is_empty_uuid(sa->uuid)) {
4462
		ret = btrfs_uuid_tree_add(trans, sa->uuid,
4463 4464 4465
					  BTRFS_UUID_KEY_RECEIVED_SUBVOL,
					  root->root_key.objectid);
		if (ret < 0 && ret != -EEXIST) {
4466
			btrfs_abort_transaction(trans, ret);
4467
			btrfs_end_transaction(trans);
4468
			goto out;
4469 4470
		}
	}
4471
	ret = btrfs_commit_transaction(trans);
4472
out:
4473
	up_write(&fs_info->subvol_sem);
4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486
	mnt_drop_write_file(file);
	return ret;
}

#ifdef CONFIG_64BIT
static long btrfs_ioctl_set_received_subvol_32(struct file *file,
						void __user *arg)
{
	struct btrfs_ioctl_received_subvol_args_32 *args32 = NULL;
	struct btrfs_ioctl_received_subvol_args *args64 = NULL;
	int ret = 0;

	args32 = memdup_user(arg, sizeof(*args32));
4487 4488
	if (IS_ERR(args32))
		return PTR_ERR(args32);
4489

4490
	args64 = kmalloc(sizeof(*args64), GFP_KERNEL);
4491 4492
	if (!args64) {
		ret = -ENOMEM;
4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535
		goto out;
	}

	memcpy(args64->uuid, args32->uuid, BTRFS_UUID_SIZE);
	args64->stransid = args32->stransid;
	args64->rtransid = args32->rtransid;
	args64->stime.sec = args32->stime.sec;
	args64->stime.nsec = args32->stime.nsec;
	args64->rtime.sec = args32->rtime.sec;
	args64->rtime.nsec = args32->rtime.nsec;
	args64->flags = args32->flags;

	ret = _btrfs_ioctl_set_received_subvol(file, args64);
	if (ret)
		goto out;

	memcpy(args32->uuid, args64->uuid, BTRFS_UUID_SIZE);
	args32->stransid = args64->stransid;
	args32->rtransid = args64->rtransid;
	args32->stime.sec = args64->stime.sec;
	args32->stime.nsec = args64->stime.nsec;
	args32->rtime.sec = args64->rtime.sec;
	args32->rtime.nsec = args64->rtime.nsec;
	args32->flags = args64->flags;

	ret = copy_to_user(arg, args32, sizeof(*args32));
	if (ret)
		ret = -EFAULT;

out:
	kfree(args32);
	kfree(args64);
	return ret;
}
#endif

static long btrfs_ioctl_set_received_subvol(struct file *file,
					    void __user *arg)
{
	struct btrfs_ioctl_received_subvol_args *sa = NULL;
	int ret = 0;

	sa = memdup_user(arg, sizeof(*sa));
4536 4537
	if (IS_ERR(sa))
		return PTR_ERR(sa);
4538 4539 4540 4541 4542 4543

	ret = _btrfs_ioctl_set_received_subvol(file, sa);

	if (ret)
		goto out;

4544 4545 4546 4547 4548 4549 4550 4551 4552
	ret = copy_to_user(arg, sa, sizeof(*sa));
	if (ret)
		ret = -EFAULT;

out:
	kfree(sa);
	return ret;
}

4553 4554
static int btrfs_ioctl_get_fslabel(struct btrfs_fs_info *fs_info,
					void __user *arg)
4555
{
4556
	size_t len;
4557
	int ret;
4558 4559
	char label[BTRFS_LABEL_SIZE];

4560 4561 4562
	spin_lock(&fs_info->super_lock);
	memcpy(label, fs_info->super_copy->label, BTRFS_LABEL_SIZE);
	spin_unlock(&fs_info->super_lock);
4563 4564

	len = strnlen(label, BTRFS_LABEL_SIZE);
4565 4566

	if (len == BTRFS_LABEL_SIZE) {
4567 4568 4569
		btrfs_warn(fs_info,
			   "label is too long, return the first %zu bytes",
			   --len);
4570 4571 4572 4573 4574 4575 4576
	}

	ret = copy_to_user(arg, label, len);

	return ret ? -EFAULT : 0;
}

4577 4578
static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg)
{
4579 4580 4581 4582
	struct inode *inode = file_inode(file);
	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
	struct btrfs_root *root = BTRFS_I(inode)->root;
	struct btrfs_super_block *super_block = fs_info->super_copy;
4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593
	struct btrfs_trans_handle *trans;
	char label[BTRFS_LABEL_SIZE];
	int ret;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	if (copy_from_user(label, arg, sizeof(label)))
		return -EFAULT;

	if (strnlen(label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) {
4594
		btrfs_err(fs_info,
J
Jeff Mahoney 已提交
4595 4596
			  "unable to set label with more than %d bytes",
			  BTRFS_LABEL_SIZE - 1);
4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609
		return -EINVAL;
	}

	ret = mnt_want_write_file(file);
	if (ret)
		return ret;

	trans = btrfs_start_transaction(root, 0);
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
		goto out_unlock;
	}

4610
	spin_lock(&fs_info->super_lock);
4611
	strcpy(super_block->label, label);
4612
	spin_unlock(&fs_info->super_lock);
4613
	ret = btrfs_commit_transaction(trans);
4614 4615 4616 4617 4618 4619

out_unlock:
	mnt_drop_write_file(file);
	return ret;
}

4620 4621 4622 4623 4624
#define INIT_FEATURE_FLAGS(suffix) \
	{ .compat_flags = BTRFS_FEATURE_COMPAT_##suffix, \
	  .compat_ro_flags = BTRFS_FEATURE_COMPAT_RO_##suffix, \
	  .incompat_flags = BTRFS_FEATURE_INCOMPAT_##suffix }

4625
int btrfs_ioctl_get_supported_features(void __user *arg)
4626
{
D
David Sterba 已提交
4627
	static const struct btrfs_ioctl_feature_flags features[3] = {
4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638
		INIT_FEATURE_FLAGS(SUPP),
		INIT_FEATURE_FLAGS(SAFE_SET),
		INIT_FEATURE_FLAGS(SAFE_CLEAR)
	};

	if (copy_to_user(arg, &features, sizeof(features)))
		return -EFAULT;

	return 0;
}

4639 4640
static int btrfs_ioctl_get_features(struct btrfs_fs_info *fs_info,
					void __user *arg)
4641
{
4642
	struct btrfs_super_block *super_block = fs_info->super_copy;
4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654
	struct btrfs_ioctl_feature_flags features;

	features.compat_flags = btrfs_super_compat_flags(super_block);
	features.compat_ro_flags = btrfs_super_compat_ro_flags(super_block);
	features.incompat_flags = btrfs_super_incompat_flags(super_block);

	if (copy_to_user(arg, &features, sizeof(features)))
		return -EFAULT;

	return 0;
}

4655
static int check_feature_bits(struct btrfs_fs_info *fs_info,
4656
			      enum btrfs_feature_set set,
4657 4658 4659
			      u64 change_mask, u64 flags, u64 supported_flags,
			      u64 safe_set, u64 safe_clear)
{
4660
	const char *type = btrfs_feature_set_name(set);
4661
	char *names;
4662 4663 4664 4665 4666 4667
	u64 disallowed, unsupported;
	u64 set_mask = flags & change_mask;
	u64 clear_mask = ~flags & change_mask;

	unsupported = set_mask & ~supported_flags;
	if (unsupported) {
4668 4669
		names = btrfs_printable_features(set, unsupported);
		if (names) {
4670 4671 4672
			btrfs_warn(fs_info,
				   "this kernel does not support the %s feature bit%s",
				   names, strchr(names, ',') ? "s" : "");
4673 4674
			kfree(names);
		} else
4675 4676 4677
			btrfs_warn(fs_info,
				   "this kernel does not support %s bits 0x%llx",
				   type, unsupported);
4678 4679 4680 4681 4682
		return -EOPNOTSUPP;
	}

	disallowed = set_mask & ~safe_set;
	if (disallowed) {
4683 4684
		names = btrfs_printable_features(set, disallowed);
		if (names) {
4685 4686 4687
			btrfs_warn(fs_info,
				   "can't set the %s feature bit%s while mounted",
				   names, strchr(names, ',') ? "s" : "");
4688 4689
			kfree(names);
		} else
4690 4691 4692
			btrfs_warn(fs_info,
				   "can't set %s bits 0x%llx while mounted",
				   type, disallowed);
4693 4694 4695 4696 4697
		return -EPERM;
	}

	disallowed = clear_mask & ~safe_clear;
	if (disallowed) {
4698 4699
		names = btrfs_printable_features(set, disallowed);
		if (names) {
4700 4701 4702
			btrfs_warn(fs_info,
				   "can't clear the %s feature bit%s while mounted",
				   names, strchr(names, ',') ? "s" : "");
4703 4704
			kfree(names);
		} else
4705 4706 4707
			btrfs_warn(fs_info,
				   "can't clear %s bits 0x%llx while mounted",
				   type, disallowed);
4708 4709 4710 4711 4712 4713
		return -EPERM;
	}

	return 0;
}

4714 4715
#define check_feature(fs_info, change_mask, flags, mask_base)	\
check_feature_bits(fs_info, FEAT_##mask_base, change_mask, flags,	\
4716 4717 4718 4719 4720 4721
		   BTRFS_FEATURE_ ## mask_base ## _SUPP,	\
		   BTRFS_FEATURE_ ## mask_base ## _SAFE_SET,	\
		   BTRFS_FEATURE_ ## mask_base ## _SAFE_CLEAR)

static int btrfs_ioctl_set_features(struct file *file, void __user *arg)
{
4722 4723 4724 4725
	struct inode *inode = file_inode(file);
	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
	struct btrfs_root *root = BTRFS_I(inode)->root;
	struct btrfs_super_block *super_block = fs_info->super_copy;
4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741
	struct btrfs_ioctl_feature_flags flags[2];
	struct btrfs_trans_handle *trans;
	u64 newflags;
	int ret;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	if (copy_from_user(flags, arg, sizeof(flags)))
		return -EFAULT;

	/* Nothing to do */
	if (!flags[0].compat_flags && !flags[0].compat_ro_flags &&
	    !flags[0].incompat_flags)
		return 0;

4742
	ret = check_feature(fs_info, flags[0].compat_flags,
4743 4744 4745 4746
			    flags[1].compat_flags, COMPAT);
	if (ret)
		return ret;

4747
	ret = check_feature(fs_info, flags[0].compat_ro_flags,
4748 4749 4750 4751
			    flags[1].compat_ro_flags, COMPAT_RO);
	if (ret)
		return ret;

4752
	ret = check_feature(fs_info, flags[0].incompat_flags,
4753 4754 4755 4756
			    flags[1].incompat_flags, INCOMPAT);
	if (ret)
		return ret;

4757 4758 4759 4760
	ret = mnt_want_write_file(file);
	if (ret)
		return ret;

4761
	trans = btrfs_start_transaction(root, 0);
4762 4763 4764 4765
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
		goto out_drop_write;
	}
4766

4767
	spin_lock(&fs_info->super_lock);
4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781
	newflags = btrfs_super_compat_flags(super_block);
	newflags |= flags[0].compat_flags & flags[1].compat_flags;
	newflags &= ~(flags[0].compat_flags & ~flags[1].compat_flags);
	btrfs_set_super_compat_flags(super_block, newflags);

	newflags = btrfs_super_compat_ro_flags(super_block);
	newflags |= flags[0].compat_ro_flags & flags[1].compat_ro_flags;
	newflags &= ~(flags[0].compat_ro_flags & ~flags[1].compat_ro_flags);
	btrfs_set_super_compat_ro_flags(super_block, newflags);

	newflags = btrfs_super_incompat_flags(super_block);
	newflags |= flags[0].incompat_flags & flags[1].incompat_flags;
	newflags &= ~(flags[0].incompat_flags & ~flags[1].incompat_flags);
	btrfs_set_super_incompat_flags(super_block, newflags);
4782
	spin_unlock(&fs_info->super_lock);
4783

4784
	ret = btrfs_commit_transaction(trans);
4785 4786 4787 4788
out_drop_write:
	mnt_drop_write_file(file);

	return ret;
4789 4790
}

4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825
static int _btrfs_ioctl_send(struct file *file, void __user *argp, bool compat)
{
	struct btrfs_ioctl_send_args *arg;
	int ret;

	if (compat) {
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
		struct btrfs_ioctl_send_args_32 args32;

		ret = copy_from_user(&args32, argp, sizeof(args32));
		if (ret)
			return -EFAULT;
		arg = kzalloc(sizeof(*arg), GFP_KERNEL);
		if (!arg)
			return -ENOMEM;
		arg->send_fd = args32.send_fd;
		arg->clone_sources_count = args32.clone_sources_count;
		arg->clone_sources = compat_ptr(args32.clone_sources);
		arg->parent_root = args32.parent_root;
		arg->flags = args32.flags;
		memcpy(arg->reserved, args32.reserved,
		       sizeof(args32.reserved));
#else
		return -ENOTTY;
#endif
	} else {
		arg = memdup_user(argp, sizeof(*arg));
		if (IS_ERR(arg))
			return PTR_ERR(arg);
	}
	ret = btrfs_ioctl_send(file, arg);
	kfree(arg);
	return ret;
}

C
Christoph Hellwig 已提交
4826 4827 4828
long btrfs_ioctl(struct file *file, unsigned int
		cmd, unsigned long arg)
{
4829 4830 4831
	struct inode *inode = file_inode(file);
	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
	struct btrfs_root *root = BTRFS_I(inode)->root;
4832
	void __user *argp = (void __user *)arg;
C
Christoph Hellwig 已提交
4833 4834

	switch (cmd) {
4835 4836 4837 4838 4839 4840
	case FS_IOC_GETFLAGS:
		return btrfs_ioctl_getflags(file, argp);
	case FS_IOC_SETFLAGS:
		return btrfs_ioctl_setflags(file, argp);
	case FS_IOC_GETVERSION:
		return btrfs_ioctl_getversion(file, argp);
4841
	case FS_IOC_GETFSLABEL:
4842
		return btrfs_ioctl_get_fslabel(fs_info, argp);
4843 4844
	case FS_IOC_SETFSLABEL:
		return btrfs_ioctl_set_fslabel(file, argp);
4845
	case FITRIM:
4846
		return btrfs_ioctl_fitrim(fs_info, argp);
C
Christoph Hellwig 已提交
4847
	case BTRFS_IOC_SNAP_CREATE:
4848
		return btrfs_ioctl_snap_create(file, argp, 0);
4849
	case BTRFS_IOC_SNAP_CREATE_V2:
4850
		return btrfs_ioctl_snap_create_v2(file, argp, 0);
4851
	case BTRFS_IOC_SUBVOL_CREATE:
4852
		return btrfs_ioctl_snap_create(file, argp, 1);
A
Arne Jansen 已提交
4853 4854
	case BTRFS_IOC_SUBVOL_CREATE_V2:
		return btrfs_ioctl_snap_create_v2(file, argp, 1);
4855
	case BTRFS_IOC_SNAP_DESTROY:
4856 4857 4858
		return btrfs_ioctl_snap_destroy(file, argp, false);
	case BTRFS_IOC_SNAP_DESTROY_V2:
		return btrfs_ioctl_snap_destroy(file, argp, true);
4859 4860 4861 4862
	case BTRFS_IOC_SUBVOL_GETFLAGS:
		return btrfs_ioctl_subvol_getflags(file, argp);
	case BTRFS_IOC_SUBVOL_SETFLAGS:
		return btrfs_ioctl_subvol_setflags(file, argp);
4863 4864
	case BTRFS_IOC_DEFAULT_SUBVOL:
		return btrfs_ioctl_default_subvol(file, argp);
C
Christoph Hellwig 已提交
4865
	case BTRFS_IOC_DEFRAG:
C
Chris Mason 已提交
4866 4867 4868
		return btrfs_ioctl_defrag(file, NULL);
	case BTRFS_IOC_DEFRAG_RANGE:
		return btrfs_ioctl_defrag(file, argp);
C
Christoph Hellwig 已提交
4869
	case BTRFS_IOC_RESIZE:
4870
		return btrfs_ioctl_resize(file, argp);
C
Christoph Hellwig 已提交
4871
	case BTRFS_IOC_ADD_DEV:
4872
		return btrfs_ioctl_add_dev(fs_info, argp);
C
Christoph Hellwig 已提交
4873
	case BTRFS_IOC_RM_DEV:
4874
		return btrfs_ioctl_rm_dev(file, argp);
4875 4876
	case BTRFS_IOC_RM_DEV_V2:
		return btrfs_ioctl_rm_dev_v2(file, argp);
J
Jan Schmidt 已提交
4877
	case BTRFS_IOC_FS_INFO:
4878
		return btrfs_ioctl_fs_info(fs_info, argp);
J
Jan Schmidt 已提交
4879
	case BTRFS_IOC_DEV_INFO:
4880
		return btrfs_ioctl_dev_info(fs_info, argp);
C
Christoph Hellwig 已提交
4881
	case BTRFS_IOC_BALANCE:
4882
		return btrfs_ioctl_balance(file, NULL);
4883 4884
	case BTRFS_IOC_TREE_SEARCH:
		return btrfs_ioctl_tree_search(file, argp);
G
Gerhard Heift 已提交
4885 4886
	case BTRFS_IOC_TREE_SEARCH_V2:
		return btrfs_ioctl_tree_search_v2(file, argp);
4887 4888
	case BTRFS_IOC_INO_LOOKUP:
		return btrfs_ioctl_ino_lookup(file, argp);
4889 4890 4891
	case BTRFS_IOC_INO_PATHS:
		return btrfs_ioctl_ino_to_path(root, argp);
	case BTRFS_IOC_LOGICAL_INO:
4892 4893 4894
		return btrfs_ioctl_logical_to_ino(fs_info, argp, 1);
	case BTRFS_IOC_LOGICAL_INO_V2:
		return btrfs_ioctl_logical_to_ino(fs_info, argp, 2);
J
Josef Bacik 已提交
4895
	case BTRFS_IOC_SPACE_INFO:
4896
		return btrfs_ioctl_space_info(fs_info, argp);
4897 4898 4899
	case BTRFS_IOC_SYNC: {
		int ret;

4900
		ret = btrfs_start_delalloc_roots(fs_info, U64_MAX);
4901 4902
		if (ret)
			return ret;
4903
		ret = btrfs_sync_fs(inode->i_sb, 1);
4904 4905
		/*
		 * The transaction thread may want to do more work,
4906
		 * namely it pokes the cleaner kthread that will start
4907 4908
		 * processing uncleaned subvols.
		 */
4909
		wake_up_process(fs_info->transaction_kthread);
4910 4911
		return ret;
	}
4912
	case BTRFS_IOC_START_SYNC:
4913
		return btrfs_ioctl_start_sync(root, argp);
4914
	case BTRFS_IOC_WAIT_SYNC:
4915
		return btrfs_ioctl_wait_sync(fs_info, argp);
J
Jan Schmidt 已提交
4916
	case BTRFS_IOC_SCRUB:
M
Miao Xie 已提交
4917
		return btrfs_ioctl_scrub(file, argp);
J
Jan Schmidt 已提交
4918
	case BTRFS_IOC_SCRUB_CANCEL:
4919
		return btrfs_ioctl_scrub_cancel(fs_info);
J
Jan Schmidt 已提交
4920
	case BTRFS_IOC_SCRUB_PROGRESS:
4921
		return btrfs_ioctl_scrub_progress(fs_info, argp);
4922
	case BTRFS_IOC_BALANCE_V2:
4923
		return btrfs_ioctl_balance(file, argp);
4924
	case BTRFS_IOC_BALANCE_CTL:
4925
		return btrfs_ioctl_balance_ctl(fs_info, arg);
4926
	case BTRFS_IOC_BALANCE_PROGRESS:
4927
		return btrfs_ioctl_balance_progress(fs_info, argp);
4928 4929
	case BTRFS_IOC_SET_RECEIVED_SUBVOL:
		return btrfs_ioctl_set_received_subvol(file, argp);
4930 4931 4932 4933
#ifdef CONFIG_64BIT
	case BTRFS_IOC_SET_RECEIVED_SUBVOL_32:
		return btrfs_ioctl_set_received_subvol_32(file, argp);
#endif
4934
	case BTRFS_IOC_SEND:
4935 4936 4937 4938 4939
		return _btrfs_ioctl_send(file, argp, false);
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
	case BTRFS_IOC_SEND_32:
		return _btrfs_ioctl_send(file, argp, true);
#endif
4940
	case BTRFS_IOC_GET_DEV_STATS:
4941
		return btrfs_ioctl_get_dev_stats(fs_info, argp);
A
Arne Jansen 已提交
4942
	case BTRFS_IOC_QUOTA_CTL:
4943
		return btrfs_ioctl_quota_ctl(file, argp);
A
Arne Jansen 已提交
4944
	case BTRFS_IOC_QGROUP_ASSIGN:
4945
		return btrfs_ioctl_qgroup_assign(file, argp);
A
Arne Jansen 已提交
4946
	case BTRFS_IOC_QGROUP_CREATE:
4947
		return btrfs_ioctl_qgroup_create(file, argp);
A
Arne Jansen 已提交
4948
	case BTRFS_IOC_QGROUP_LIMIT:
4949
		return btrfs_ioctl_qgroup_limit(file, argp);
J
Jan Schmidt 已提交
4950 4951 4952
	case BTRFS_IOC_QUOTA_RESCAN:
		return btrfs_ioctl_quota_rescan(file, argp);
	case BTRFS_IOC_QUOTA_RESCAN_STATUS:
4953
		return btrfs_ioctl_quota_rescan_status(fs_info, argp);
4954
	case BTRFS_IOC_QUOTA_RESCAN_WAIT:
4955
		return btrfs_ioctl_quota_rescan_wait(fs_info, argp);
4956
	case BTRFS_IOC_DEV_REPLACE:
4957
		return btrfs_ioctl_dev_replace(fs_info, argp);
4958
	case BTRFS_IOC_GET_SUPPORTED_FEATURES:
4959
		return btrfs_ioctl_get_supported_features(argp);
4960
	case BTRFS_IOC_GET_FEATURES:
4961
		return btrfs_ioctl_get_features(fs_info, argp);
4962 4963
	case BTRFS_IOC_SET_FEATURES:
		return btrfs_ioctl_set_features(file, argp);
4964 4965
	case FS_IOC_FSGETXATTR:
		return btrfs_ioctl_fsgetxattr(file, argp);
4966 4967
	case FS_IOC_FSSETXATTR:
		return btrfs_ioctl_fssetxattr(file, argp);
4968 4969
	case BTRFS_IOC_GET_SUBVOL_INFO:
		return btrfs_ioctl_get_subvol_info(file, argp);
4970 4971
	case BTRFS_IOC_GET_SUBVOL_ROOTREF:
		return btrfs_ioctl_get_subvol_rootref(file, argp);
4972 4973
	case BTRFS_IOC_INO_LOOKUP_USER:
		return btrfs_ioctl_ino_lookup_user(file, argp);
C
Christoph Hellwig 已提交
4974 4975 4976 4977
	}

	return -ENOTTY;
}
4978 4979 4980 4981

#ifdef CONFIG_COMPAT
long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
4982 4983 4984 4985
	/*
	 * These all access 32-bit values anyway so no further
	 * handling is necessary.
	 */
4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000
	switch (cmd) {
	case FS_IOC32_GETFLAGS:
		cmd = FS_IOC_GETFLAGS;
		break;
	case FS_IOC32_SETFLAGS:
		cmd = FS_IOC_SETFLAGS;
		break;
	case FS_IOC32_GETVERSION:
		cmd = FS_IOC_GETVERSION;
		break;
	}

	return btrfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
}
#endif