ioctl.c 14.6 KB
Newer Older
1
#include <linux/capability.h>
L
Linus Torvalds 已提交
2
#include <linux/blkdev.h>
3
#include <linux/export.h>
4
#include <linux/gfp.h>
L
Linus Torvalds 已提交
5
#include <linux/blkpg.h>
6
#include <linux/hdreg.h>
L
Linus Torvalds 已提交
7
#include <linux/backing-dev.h>
A
Al Viro 已提交
8
#include <linux/fs.h>
9
#include <linux/blktrace_api.h>
10
#include <linux/pr.h>
L
Linus Torvalds 已提交
11 12 13 14 15 16
#include <asm/uaccess.h>

static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg)
{
	struct block_device *bdevp;
	struct gendisk *disk;
17
	struct hd_struct *part, *lpart;
L
Linus Torvalds 已提交
18 19
	struct blkpg_ioctl_arg a;
	struct blkpg_partition p;
20
	struct disk_part_iter piter;
L
Linus Torvalds 已提交
21
	long long start, length;
22
	int partno;
L
Linus Torvalds 已提交
23 24 25 26 27 28 29 30 31 32

	if (!capable(CAP_SYS_ADMIN))
		return -EACCES;
	if (copy_from_user(&a, arg, sizeof(struct blkpg_ioctl_arg)))
		return -EFAULT;
	if (copy_from_user(&p, a.data, sizeof(struct blkpg_partition)))
		return -EFAULT;
	disk = bdev->bd_disk;
	if (bdev != bdev->bd_contains)
		return -EINVAL;
33
	partno = p.pno;
T
Tejun Heo 已提交
34
	if (partno <= 0)
L
Linus Torvalds 已提交
35 36 37 38 39
		return -EINVAL;
	switch (a.op) {
		case BLKPG_ADD_PARTITION:
			start = p.start >> 9;
			length = p.length >> 9;
40 41
			/* check for fit in a hd_struct */
			if (sizeof(sector_t) == sizeof(long) &&
L
Linus Torvalds 已提交
42 43 44
			    sizeof(long long) > sizeof(long)) {
				long pstart = start, plength = length;
				if (pstart != start || plength != length
45
				    || pstart < 0 || plength < 0 || partno > 65535)
L
Linus Torvalds 已提交
46 47
					return -EINVAL;
			}
48

49
			mutex_lock(&bdev->bd_mutex);
50

L
Linus Torvalds 已提交
51
			/* overlap? */
52 53 54 55 56 57
			disk_part_iter_init(&piter, disk,
					    DISK_PITER_INCL_EMPTY);
			while ((part = disk_part_iter_next(&piter))) {
				if (!(start + length <= part->start_sect ||
				      start >= part->start_sect + part->nr_sects)) {
					disk_part_iter_exit(&piter);
58
					mutex_unlock(&bdev->bd_mutex);
L
Linus Torvalds 已提交
59 60 61
					return -EBUSY;
				}
			}
62 63
			disk_part_iter_exit(&piter);

L
Linus Torvalds 已提交
64
			/* all seems OK */
65
			part = add_partition(disk, partno, start, length,
66
					     ADDPART_FLAG_NONE, NULL);
67
			mutex_unlock(&bdev->bd_mutex);
68
			return PTR_ERR_OR_ZERO(part);
L
Linus Torvalds 已提交
69
		case BLKPG_DEL_PARTITION:
70 71
			part = disk_get_part(disk, partno);
			if (!part)
L
Linus Torvalds 已提交
72
				return -ENXIO;
73 74 75

			bdevp = bdget(part_devt(part));
			disk_put_part(part);
L
Linus Torvalds 已提交
76 77
			if (!bdevp)
				return -ENOMEM;
78

79
			mutex_lock(&bdevp->bd_mutex);
L
Linus Torvalds 已提交
80
			if (bdevp->bd_openers) {
81
				mutex_unlock(&bdevp->bd_mutex);
L
Linus Torvalds 已提交
82 83 84 85 86
				bdput(bdevp);
				return -EBUSY;
			}
			/* all seems OK */
			fsync_bdev(bdevp);
87
			invalidate_bdev(bdevp);
L
Linus Torvalds 已提交
88

89
			mutex_lock_nested(&bdev->bd_mutex, 1);
90
			delete_partition(disk, partno);
91 92
			mutex_unlock(&bdev->bd_mutex);
			mutex_unlock(&bdevp->bd_mutex);
L
Linus Torvalds 已提交
93 94
			bdput(bdevp);

95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
			return 0;
		case BLKPG_RESIZE_PARTITION:
			start = p.start >> 9;
			/* new length of partition in bytes */
			length = p.length >> 9;
			/* check for fit in a hd_struct */
			if (sizeof(sector_t) == sizeof(long) &&
			    sizeof(long long) > sizeof(long)) {
				long pstart = start, plength = length;
				if (pstart != start || plength != length
				    || pstart < 0 || plength < 0)
					return -EINVAL;
			}
			part = disk_get_part(disk, partno);
			if (!part)
				return -ENXIO;
			bdevp = bdget(part_devt(part));
			if (!bdevp) {
				disk_put_part(part);
				return -ENOMEM;
			}
			mutex_lock(&bdevp->bd_mutex);
			mutex_lock_nested(&bdev->bd_mutex, 1);
			if (start != part->start_sect) {
				mutex_unlock(&bdevp->bd_mutex);
				mutex_unlock(&bdev->bd_mutex);
				bdput(bdevp);
				disk_put_part(part);
				return -EINVAL;
			}
			/* overlap? */
			disk_part_iter_init(&piter, disk,
					    DISK_PITER_INCL_EMPTY);
			while ((lpart = disk_part_iter_next(&piter))) {
				if (lpart->partno != partno &&
				   !(start + length <= lpart->start_sect ||
				   start >= lpart->start_sect + lpart->nr_sects)
				   ) {
					disk_part_iter_exit(&piter);
					mutex_unlock(&bdevp->bd_mutex);
					mutex_unlock(&bdev->bd_mutex);
					bdput(bdevp);
					disk_put_part(part);
					return -EBUSY;
				}
			}
			disk_part_iter_exit(&piter);
			part_nr_sects_write(part, (sector_t)length);
			i_size_write(bdevp->bd_inode, p.length);
			mutex_unlock(&bdevp->bd_mutex);
			mutex_unlock(&bdev->bd_mutex);
			bdput(bdevp);
			disk_put_part(part);
L
Linus Torvalds 已提交
148 149 150 151 152 153
			return 0;
		default:
			return -EINVAL;
	}
}

154 155 156 157 158 159
/*
 * This is an exported API for the block driver, and will not
 * acquire bd_mutex. This API should be used in case that
 * caller has held bd_mutex already.
 */
int __blkdev_reread_part(struct block_device *bdev)
L
Linus Torvalds 已提交
160 161 162
{
	struct gendisk *disk = bdev->bd_disk;

T
Tejun Heo 已提交
163
	if (!disk_part_scan_enabled(disk) || bdev != bdev->bd_contains)
L
Linus Torvalds 已提交
164 165 166
		return -EINVAL;
	if (!capable(CAP_SYS_ADMIN))
		return -EACCES;
167 168 169 170 171 172 173 174 175 176 177

	lockdep_assert_held(&bdev->bd_mutex);

	return rescan_partitions(disk, bdev);
}
EXPORT_SYMBOL(__blkdev_reread_part);

/*
 * This is an exported API for the block driver, and will
 * try to acquire bd_mutex. If bd_mutex has been held already
 * in current context, please call __blkdev_reread_part().
178 179 180 181 182 183
 *
 * Make sure the held locks in current context aren't required
 * in open()/close() handler and I/O path for avoiding ABBA deadlock:
 * - bd_mutex is held before calling block driver's open/close
 *   handler
 * - reading partition table may submit I/O to the block device
184 185 186 187 188
 */
int blkdev_reread_part(struct block_device *bdev)
{
	int res;

189
	mutex_lock(&bdev->bd_mutex);
190
	res = __blkdev_reread_part(bdev);
191
	mutex_unlock(&bdev->bd_mutex);
192

L
Linus Torvalds 已提交
193 194
	return res;
}
195
EXPORT_SYMBOL(blkdev_reread_part);
L
Linus Torvalds 已提交
196

C
Christoph Hellwig 已提交
197 198
static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
		unsigned long arg, unsigned long flags)
199
{
C
Christoph Hellwig 已提交
200 201 202 203 204 205 206 207 208 209 210
	uint64_t range[2];
	uint64_t start, len;

	if (!(mode & FMODE_WRITE))
		return -EBADF;

	if (copy_from_user(range, (void __user *)arg, sizeof(range)))
		return -EFAULT;

	start = range[0];
	len = range[1];
A
Adrian Hunter 已提交
211

212 213 214 215 216 217 218
	if (start & 511)
		return -EINVAL;
	if (len & 511)
		return -EINVAL;
	start >>= 9;
	len >>= 9;

219
	if (start + len > (i_size_read(bdev->bd_inode) >> 9))
220
		return -EINVAL;
A
Adrian Hunter 已提交
221
	return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags);
222 223
}

C
Christoph Hellwig 已提交
224 225
static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
		unsigned long arg)
226
{
C
Christoph Hellwig 已提交
227 228 229 230 231 232 233 234 235 236 237 238
	uint64_t range[2];
	uint64_t start, len;

	if (!(mode & FMODE_WRITE))
		return -EBADF;

	if (copy_from_user(range, (void __user *)arg, sizeof(range)))
		return -EFAULT;

	start = range[0];
	len = range[1];

239 240 241 242 243 244 245 246 247 248
	if (start & 511)
		return -EINVAL;
	if (len & 511)
		return -EINVAL;
	start >>= 9;
	len >>= 9;

	if (start + len > (i_size_read(bdev->bd_inode) >> 9))
		return -EINVAL;

249
	return blkdev_issue_zeroout(bdev, start, len, GFP_KERNEL, false);
250 251
}

L
Linus Torvalds 已提交
252 253 254 255 256 257 258 259 260 261
static int put_ushort(unsigned long arg, unsigned short val)
{
	return put_user(val, (unsigned short __user *)arg);
}

static int put_int(unsigned long arg, int val)
{
	return put_user(val, (int __user *)arg);
}

M
Martin K. Petersen 已提交
262 263 264 265 266
static int put_uint(unsigned long arg, unsigned int val)
{
	return put_user(val, (unsigned int __user *)arg);
}

L
Linus Torvalds 已提交
267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
static int put_long(unsigned long arg, long val)
{
	return put_user(val, (long __user *)arg);
}

static int put_ulong(unsigned long arg, unsigned long val)
{
	return put_user(val, (unsigned long __user *)arg);
}

static int put_u64(unsigned long arg, u64 val)
{
	return put_user(val, (u64 __user *)arg);
}

282 283 284 285
int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode,
			unsigned cmd, unsigned long arg)
{
	struct gendisk *disk = bdev->bd_disk;
A
Al Viro 已提交
286 287 288

	if (disk->fops->ioctl)
		return disk->fops->ioctl(bdev, mode, cmd, arg);
289 290 291 292 293 294 295 296 297 298

	return -ENOTTY;
}
/*
 * For the record: _GPL here is only because somebody decided to slap it
 * on the previous export.  Sheer idiocy, since it wasn't copyrightable
 * at all and could be open-coded without any exports by anybody who cares.
 */
EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl);

299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388
static int blkdev_pr_register(struct block_device *bdev,
		struct pr_registration __user *arg)
{
	const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
	struct pr_registration reg;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;
	if (!ops || !ops->pr_register)
		return -EOPNOTSUPP;
	if (copy_from_user(&reg, arg, sizeof(reg)))
		return -EFAULT;

	if (reg.flags & ~PR_FL_IGNORE_KEY)
		return -EOPNOTSUPP;
	return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags);
}

static int blkdev_pr_reserve(struct block_device *bdev,
		struct pr_reservation __user *arg)
{
	const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
	struct pr_reservation rsv;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;
	if (!ops || !ops->pr_reserve)
		return -EOPNOTSUPP;
	if (copy_from_user(&rsv, arg, sizeof(rsv)))
		return -EFAULT;

	if (rsv.flags & ~PR_FL_IGNORE_KEY)
		return -EOPNOTSUPP;
	return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags);
}

static int blkdev_pr_release(struct block_device *bdev,
		struct pr_reservation __user *arg)
{
	const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
	struct pr_reservation rsv;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;
	if (!ops || !ops->pr_release)
		return -EOPNOTSUPP;
	if (copy_from_user(&rsv, arg, sizeof(rsv)))
		return -EFAULT;

	if (rsv.flags)
		return -EOPNOTSUPP;
	return ops->pr_release(bdev, rsv.key, rsv.type);
}

static int blkdev_pr_preempt(struct block_device *bdev,
		struct pr_preempt __user *arg, bool abort)
{
	const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
	struct pr_preempt p;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;
	if (!ops || !ops->pr_preempt)
		return -EOPNOTSUPP;
	if (copy_from_user(&p, arg, sizeof(p)))
		return -EFAULT;

	if (p.flags)
		return -EOPNOTSUPP;
	return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort);
}

static int blkdev_pr_clear(struct block_device *bdev,
		struct pr_clear __user *arg)
{
	const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
	struct pr_clear c;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;
	if (!ops || !ops->pr_clear)
		return -EOPNOTSUPP;
	if (copy_from_user(&c, arg, sizeof(c)))
		return -EFAULT;

	if (c.flags)
		return -EOPNOTSUPP;
	return ops->pr_clear(bdev, c.key);
}

389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408
/*
 * Is it an unrecognized ioctl? The correct returns are either
 * ENOTTY (final) or ENOIOCTLCMD ("I don't know this one, try a
 * fallback"). ENOIOCTLCMD gets turned into ENOTTY by the ioctl
 * code before returning.
 *
 * Confused drivers sometimes return EINVAL, which is wrong. It
 * means "I understood the ioctl command, but the parameters to
 * it were wrong".
 *
 * We should aim to just fix the broken drivers, the EINVAL case
 * should go away.
 */
static inline int is_unrecognized_ioctl(int ret)
{
	return	ret == -EINVAL ||
		ret == -ENOTTY ||
		ret == -ENOIOCTLCMD;
}

C
Christoph Hellwig 已提交
409 410
static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode,
		unsigned cmd, unsigned long arg)
411
{
C
Christoph Hellwig 已提交
412
	int ret;
413

C
Christoph Hellwig 已提交
414 415
	if (!capable(CAP_SYS_ADMIN))
		return -EACCES;
416

C
Christoph Hellwig 已提交
417 418 419
	ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
	if (!is_unrecognized_ioctl(ret))
		return ret;
420

C
Christoph Hellwig 已提交
421 422 423 424
	fsync_bdev(bdev);
	invalidate_bdev(bdev);
	return 0;
}
425

C
Christoph Hellwig 已提交
426 427 428 429
static int blkdev_roset(struct block_device *bdev, fmode_t mode,
		unsigned cmd, unsigned long arg)
{
	int ret, n;
430

C
Christoph Hellwig 已提交
431 432 433 434 435 436 437 438 439 440
	ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
	if (!is_unrecognized_ioctl(ret))
		return ret;
	if (!capable(CAP_SYS_ADMIN))
		return -EACCES;
	if (get_user(n, (int __user *)arg))
		return -EFAULT;
	set_device_ro(bdev, n);
	return 0;
}
441

C
Christoph Hellwig 已提交
442 443 444 445 446 447
static int blkdev_getgeo(struct block_device *bdev,
		struct hd_geometry __user *argp)
{
	struct gendisk *disk = bdev->bd_disk;
	struct hd_geometry geo;
	int ret;
448

C
Christoph Hellwig 已提交
449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466
	if (!argp)
		return -EINVAL;
	if (!disk->fops->getgeo)
		return -ENOTTY;

	/*
	 * We need to set the startsect first, the driver may
	 * want to override it.
	 */
	memset(&geo, 0, sizeof(geo));
	geo.start = get_start_sect(bdev);
	ret = disk->fops->getgeo(bdev, &geo);
	if (ret)
		return ret;
	if (copy_to_user(argp, &geo, sizeof(geo)))
		return -EFAULT;
	return 0;
}
467

C
Christoph Hellwig 已提交
468 469 470 471 472
/* set the logical block size */
static int blkdev_bszset(struct block_device *bdev, fmode_t mode,
		int __user *argp)
{
	int ret, n;
473

C
Christoph Hellwig 已提交
474 475 476 477 478 479
	if (!capable(CAP_SYS_ADMIN))
		return -EACCES;
	if (!argp)
		return -EINVAL;
	if (get_user(n, argp))
		return -EFAULT;
480

C
Christoph Hellwig 已提交
481 482 483 484
	if (!(mode & FMODE_EXCL)) {
		bdgrab(bdev);
		if (blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0)
			return -EBUSY;
485
	}
486

C
Christoph Hellwig 已提交
487 488 489 490 491
	ret = set_blocksize(bdev, n);
	if (!(mode & FMODE_EXCL))
		blkdev_put(bdev, mode | FMODE_EXCL);
	return ret;
}
492

C
Christoph Hellwig 已提交
493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517
/*
 * always keep this in sync with compat_blkdev_ioctl()
 */
int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
			unsigned long arg)
{
	struct backing_dev_info *bdi;
	void __user *argp = (void __user *)arg;
	loff_t size;
	unsigned int max_sectors;

	switch (cmd) {
	case BLKFLSBUF:
		return blkdev_flushbuf(bdev, mode, cmd, arg);
	case BLKROSET:
		return blkdev_roset(bdev, mode, cmd, arg);
	case BLKDISCARD:
		return blk_ioctl_discard(bdev, mode, arg, 0);
	case BLKSECDISCARD:
		return blk_ioctl_discard(bdev, mode, arg,
				BLKDEV_DISCARD_SECURE);
	case BLKZEROOUT:
		return blk_ioctl_zeroout(bdev, mode, arg);
	case HDIO_GETGEO:
		return blkdev_getgeo(bdev, argp);
A
Al Viro 已提交
518 519 520 521 522 523 524 525
	case BLKRAGET:
	case BLKFRAGET:
		if (!arg)
			return -EINVAL;
		bdi = blk_get_backing_dev_info(bdev);
		return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512);
	case BLKROGET:
		return put_int(arg, bdev_read_only(bdev) != 0);
M
Martin K. Petersen 已提交
526
	case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */
A
Al Viro 已提交
527
		return put_int(arg, block_size(bdev));
M
Martin K. Petersen 已提交
528
	case BLKSSZGET: /* get block device logical block size */
529
		return put_int(arg, bdev_logical_block_size(bdev));
M
Martin K. Petersen 已提交
530 531 532 533 534 535 536 537
	case BLKPBSZGET: /* get block device physical block size */
		return put_uint(arg, bdev_physical_block_size(bdev));
	case BLKIOMIN:
		return put_uint(arg, bdev_io_min(bdev));
	case BLKIOOPT:
		return put_uint(arg, bdev_io_opt(bdev));
	case BLKALIGNOFF:
		return put_int(arg, bdev_alignment_offset(bdev));
538 539
	case BLKDISCARDZEROES:
		return put_uint(arg, bdev_discard_zeroes_data(bdev));
A
Al Viro 已提交
540
	case BLKSECTGET:
541 542 543
		max_sectors = min_t(unsigned int, USHRT_MAX,
				    queue_max_sectors(bdev_get_queue(bdev)));
		return put_ushort(arg, max_sectors);
544 545
	case BLKROTATIONAL:
		return put_ushort(arg, !blk_queue_nonrot(bdev_get_queue(bdev)));
A
Al Viro 已提交
546 547 548 549 550 551 552 553
	case BLKRASET:
	case BLKFRASET:
		if(!capable(CAP_SYS_ADMIN))
			return -EACCES;
		bdi = blk_get_backing_dev_info(bdev);
		bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
		return 0;
	case BLKBSZSET:
C
Christoph Hellwig 已提交
554
		return blkdev_bszset(bdev, mode, argp);
A
Al Viro 已提交
555
	case BLKPG:
C
Christoph Hellwig 已提交
556
		return blkpg_ioctl(bdev, argp);
A
Al Viro 已提交
557
	case BLKRRPART:
C
Christoph Hellwig 已提交
558
		return blkdev_reread_part(bdev);
A
Al Viro 已提交
559
	case BLKGETSIZE:
560
		size = i_size_read(bdev->bd_inode);
A
Al Viro 已提交
561 562 563 564
		if ((size >> 9) > ~0UL)
			return -EFBIG;
		return put_ulong(arg, size >> 9);
	case BLKGETSIZE64:
565
		return put_u64(arg, i_size_read(bdev->bd_inode));
A
Al Viro 已提交
566 567 568 569
	case BLKTRACESTART:
	case BLKTRACESTOP:
	case BLKTRACESETUP:
	case BLKTRACETEARDOWN:
C
Christoph Hellwig 已提交
570
		return blk_trace_ioctl(bdev, cmd, argp);
571 572 573 574 575 576 577 578 579 580 581 582
	case IOC_PR_REGISTER:
		return blkdev_pr_register(bdev, argp);
	case IOC_PR_RESERVE:
		return blkdev_pr_reserve(bdev, argp);
	case IOC_PR_RELEASE:
		return blkdev_pr_release(bdev, argp);
	case IOC_PR_PREEMPT:
		return blkdev_pr_preempt(bdev, argp, false);
	case IOC_PR_PREEMPT_ABORT:
		return blkdev_pr_preempt(bdev, argp, true);
	case IOC_PR_CLEAR:
		return blkdev_pr_clear(bdev, argp);
A
Al Viro 已提交
583
	default:
C
Christoph Hellwig 已提交
584
		return __blkdev_driver_ioctl(bdev, mode, cmd, arg);
A
Al Viro 已提交
585
	}
L
Linus Torvalds 已提交
586
}
587
EXPORT_SYMBOL_GPL(blkdev_ioctl);