genhd.c 33.4 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
L
Linus Torvalds 已提交
2 3
/*
 *  gendisk handling
C
Christoph Hellwig 已提交
4 5
 *
 * Portions Copyright (C) 2020 Christoph Hellwig
L
Linus Torvalds 已提交
6 7 8
 */

#include <linux/module.h>
9
#include <linux/ctype.h>
L
Linus Torvalds 已提交
10 11
#include <linux/fs.h>
#include <linux/genhd.h>
12
#include <linux/kdev_t.h>
L
Linus Torvalds 已提交
13 14
#include <linux/kernel.h>
#include <linux/blkdev.h>
15
#include <linux/backing-dev.h>
L
Linus Torvalds 已提交
16 17
#include <linux/init.h>
#include <linux/spinlock.h>
18
#include <linux/proc_fs.h>
L
Linus Torvalds 已提交
19 20 21
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/kmod.h>
22
#include <linux/mutex.h>
T
Tejun Heo 已提交
23
#include <linux/idr.h>
24
#include <linux/log2.h>
25
#include <linux/pm_runtime.h>
26
#include <linux/badblocks.h>
L
Linus Torvalds 已提交
27

28 29
#include "blk.h"

30
static struct kobject *block_depr;
L
Linus Torvalds 已提交
31

T
Tejun Heo 已提交
32
/* for extended dynamic devt allocation, currently only one major is used */
33
#define NR_EXT_DEVT		(1 << MINORBITS)
34
static DEFINE_IDA(ext_devt_ida);
T
Tejun Heo 已提交
35

36 37
void set_capacity(struct gendisk *disk, sector_t sectors)
{
38
	struct block_device *bdev = disk->part0;
39

40
	spin_lock(&bdev->bd_size_lock);
41
	i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
42
	spin_unlock(&bdev->bd_size_lock);
43 44 45
}
EXPORT_SYMBOL(set_capacity);

46
/*
47 48
 * Set disk capacity and notify if the size is not currently zero and will not
 * be set to zero.  Returns true if a uevent was sent, otherwise false.
49
 */
50
bool set_capacity_and_notify(struct gendisk *disk, sector_t size)
51 52
{
	sector_t capacity = get_capacity(disk);
53
	char *envp[] = { "RESIZE=1", NULL };
54 55 56

	set_capacity(disk, size);

57 58 59 60 61 62 63 64
	/*
	 * Only print a message and send a uevent if the gendisk is user visible
	 * and alive.  This avoids spamming the log and udev when setting the
	 * initial capacity during probing.
	 */
	if (size == capacity ||
	    (disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP)
		return false;
65

66
	pr_info("%s: detected capacity change from %lld to %lld\n",
M
Ming Lei 已提交
67
		disk->disk_name, capacity, size);
68

69 70 71 72 73 74 75 76
	/*
	 * Historically we did not send a uevent for changes to/from an empty
	 * device.
	 */
	if (!capacity || !size)
		return false;
	kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
	return true;
77
}
78
EXPORT_SYMBOL_GPL(set_capacity_and_notify);
79

80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
/*
 * Format the device name of the indicated disk into the supplied buffer and
 * return a pointer to that same buffer for convenience.
 */
char *disk_name(struct gendisk *hd, int partno, char *buf)
{
	if (!partno)
		snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
	else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
		snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
	else
		snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);

	return buf;
}

const char *bdevname(struct block_device *bdev, char *buf)
{
98
	return disk_name(bdev->bd_disk, bdev->bd_partno, buf);
99 100
}
EXPORT_SYMBOL(bdevname);
101

102 103
static void part_stat_read_all(struct block_device *part,
		struct disk_stats *stat)
104 105 106 107 108
{
	int cpu;

	memset(stat, 0, sizeof(struct disk_stats));
	for_each_possible_cpu(cpu) {
109
		struct disk_stats *ptr = per_cpu_ptr(part->bd_stats, cpu);
110 111 112 113 114 115 116 117 118 119 120 121 122
		int group;

		for (group = 0; group < NR_STAT_GROUPS; group++) {
			stat->nsecs[group] += ptr->nsecs[group];
			stat->sectors[group] += ptr->sectors[group];
			stat->ios[group] += ptr->ios[group];
			stat->merges[group] += ptr->merges[group];
		}

		stat->io_ticks += ptr->io_ticks;
	}
}

123
static unsigned int part_in_flight(struct block_device *part)
124
{
125
	unsigned int inflight = 0;
126
	int cpu;
127

128
	for_each_possible_cpu(cpu) {
129 130
		inflight += part_stat_local_read_cpu(part, in_flight[0], cpu) +
			    part_stat_local_read_cpu(part, in_flight[1], cpu);
131
	}
132 133
	if ((int)inflight < 0)
		inflight = 0;
134

135
	return inflight;
136 137
}

138 139
static void part_in_flight_rw(struct block_device *part,
		unsigned int inflight[2])
140
{
141 142 143 144 145 146 147 148 149 150 151 152
	int cpu;

	inflight[0] = 0;
	inflight[1] = 0;
	for_each_possible_cpu(cpu) {
		inflight[0] += part_stat_local_read_cpu(part, in_flight[0], cpu);
		inflight[1] += part_stat_local_read_cpu(part, in_flight[1], cpu);
	}
	if ((int)inflight[0] < 0)
		inflight[0] = 0;
	if ((int)inflight[1] < 0)
		inflight[1] = 0;
153 154
}

L
Linus Torvalds 已提交
155 156 157 158
/*
 * Can be deleted altogether. Later.
 *
 */
159
#define BLKDEV_MAJOR_HASH_SIZE 255
L
Linus Torvalds 已提交
160 161 162 163
static struct blk_major_name {
	struct blk_major_name *next;
	int major;
	char name[16];
164
	void (*probe)(dev_t devt);
165
} *major_names[BLKDEV_MAJOR_HASH_SIZE];
C
Christoph Hellwig 已提交
166
static DEFINE_MUTEX(major_names_lock);
L
Linus Torvalds 已提交
167 168

/* index in the above - for now: assume no multimajor ranges */
169
static inline int major_to_index(unsigned major)
L
Linus Torvalds 已提交
170
{
171
	return major % BLKDEV_MAJOR_HASH_SIZE;
172 173
}

174
#ifdef CONFIG_PROC_FS
175
void blkdev_show(struct seq_file *seqf, off_t offset)
176
{
177
	struct blk_major_name *dp;
178

C
Christoph Hellwig 已提交
179
	mutex_lock(&major_names_lock);
180 181
	for (dp = major_names[major_to_index(offset)]; dp; dp = dp->next)
		if (dp->major == offset)
182
			seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
C
Christoph Hellwig 已提交
183
	mutex_unlock(&major_names_lock);
L
Linus Torvalds 已提交
184
}
185
#endif /* CONFIG_PROC_FS */
L
Linus Torvalds 已提交
186

187
/**
188
 * __register_blkdev - register a new block device
189
 *
190 191
 * @major: the requested major device number [1..BLKDEV_MAJOR_MAX-1]. If
 *         @major = 0, try to allocate any unused major number.
192
 * @name: the name of the new block device as a zero terminated string
193
 * @probe: allback that is called on access to any minor number of @major
194 195 196
 *
 * The @name must be unique within the system.
 *
197 198
 * The return value depends on the @major input parameter:
 *
199 200
 *  - if a major device number was requested in range [1..BLKDEV_MAJOR_MAX-1]
 *    then the function returns zero on success, or a negative error code
201
 *  - if any unused major number was requested with @major = 0 parameter
202
 *    then the return value is the allocated major number in range
203 204 205 206
 *    [1..BLKDEV_MAJOR_MAX-1] or a negative error code otherwise
 *
 * See Documentation/admin-guide/devices.txt for the list of allocated
 * major numbers.
207 208
 *
 * Use register_blkdev instead for any new code.
209
 */
210 211
int __register_blkdev(unsigned int major, const char *name,
		void (*probe)(dev_t devt))
L
Linus Torvalds 已提交
212 213 214 215
{
	struct blk_major_name **n, *p;
	int index, ret = 0;

C
Christoph Hellwig 已提交
216
	mutex_lock(&major_names_lock);
L
Linus Torvalds 已提交
217 218 219 220 221 222 223 224 225

	/* temporary */
	if (major == 0) {
		for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) {
			if (major_names[index] == NULL)
				break;
		}

		if (index == 0) {
226 227
			printk("%s: failed to get major for %s\n",
			       __func__, name);
L
Linus Torvalds 已提交
228 229 230 231 232 233 234
			ret = -EBUSY;
			goto out;
		}
		major = index;
		ret = major;
	}

235
	if (major >= BLKDEV_MAJOR_MAX) {
236 237
		pr_err("%s: major requested (%u) is greater than the maximum (%u) for %s\n",
		       __func__, major, BLKDEV_MAJOR_MAX-1, name);
238 239 240 241 242

		ret = -EINVAL;
		goto out;
	}

L
Linus Torvalds 已提交
243 244 245 246 247 248 249
	p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL);
	if (p == NULL) {
		ret = -ENOMEM;
		goto out;
	}

	p->major = major;
250
	p->probe = probe;
L
Linus Torvalds 已提交
251 252 253 254 255 256 257 258 259 260 261 262 263 264
	strlcpy(p->name, name, sizeof(p->name));
	p->next = NULL;
	index = major_to_index(major);

	for (n = &major_names[index]; *n; n = &(*n)->next) {
		if ((*n)->major == major)
			break;
	}
	if (!*n)
		*n = p;
	else
		ret = -EBUSY;

	if (ret < 0) {
265
		printk("register_blkdev: cannot get major %u for %s\n",
L
Linus Torvalds 已提交
266 267 268 269
		       major, name);
		kfree(p);
	}
out:
C
Christoph Hellwig 已提交
270
	mutex_unlock(&major_names_lock);
L
Linus Torvalds 已提交
271 272
	return ret;
}
273
EXPORT_SYMBOL(__register_blkdev);
L
Linus Torvalds 已提交
274

A
Akinobu Mita 已提交
275
void unregister_blkdev(unsigned int major, const char *name)
L
Linus Torvalds 已提交
276 277 278 279 280
{
	struct blk_major_name **n;
	struct blk_major_name *p = NULL;
	int index = major_to_index(major);

C
Christoph Hellwig 已提交
281
	mutex_lock(&major_names_lock);
L
Linus Torvalds 已提交
282 283 284
	for (n = &major_names[index]; *n; n = &(*n)->next)
		if ((*n)->major == major)
			break;
285 286 287
	if (!*n || strcmp((*n)->name, name)) {
		WARN_ON(1);
	} else {
L
Linus Torvalds 已提交
288 289 290
		p = *n;
		*n = p->next;
	}
C
Christoph Hellwig 已提交
291
	mutex_unlock(&major_names_lock);
L
Linus Torvalds 已提交
292 293 294 295 296
	kfree(p);
}

EXPORT_SYMBOL(unregister_blkdev);

297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328
/**
 * blk_mangle_minor - scatter minor numbers apart
 * @minor: minor number to mangle
 *
 * Scatter consecutively allocated @minor number apart if MANGLE_DEVT
 * is enabled.  Mangling twice gives the original value.
 *
 * RETURNS:
 * Mangled value.
 *
 * CONTEXT:
 * Don't care.
 */
static int blk_mangle_minor(int minor)
{
#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
	int i;

	for (i = 0; i < MINORBITS / 2; i++) {
		int low = minor & (1 << i);
		int high = minor & (1 << (MINORBITS - 1 - i));
		int distance = MINORBITS - 1 - 2 * i;

		minor ^= low | high;	/* clear both bits */
		low <<= distance;	/* swap the positions */
		high >>= distance;
		minor |= low | high;	/* and set */
	}
#endif
	return minor;
}

329
int blk_alloc_ext_minor(void)
T
Tejun Heo 已提交
330
{
T
Tejun Heo 已提交
331
	int idx;
T
Tejun Heo 已提交
332

333
	idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT, GFP_KERNEL);
334 335 336 337 338 339
	if (idx < 0) {
		if (idx == -ENOSPC)
			return -EBUSY;
		return idx;
	}
	return blk_mangle_minor(idx);
T
Tejun Heo 已提交
340 341
}

342
void blk_free_ext_minor(unsigned int minor)
T
Tejun Heo 已提交
343
{
344
	ida_free(&ext_devt_ida, blk_mangle_minor(minor));
Y
Yufen Yu 已提交
345 346
}

347 348 349 350 351 352 353 354 355 356 357 358
static char *bdevt_str(dev_t devt, char *buf)
{
	if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) {
		char tbuf[BDEVT_SIZE];
		snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt));
		snprintf(buf, BDEVT_SIZE, "%-9s", tbuf);
	} else
		snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt));

	return buf;
}

359 360 361
void disk_uevent(struct gendisk *disk, enum kobject_action action)
{
	struct block_device *part;
C
Christoph Hellwig 已提交
362
	unsigned long idx;
363

C
Christoph Hellwig 已提交
364 365 366 367
	rcu_read_lock();
	xa_for_each(&disk->part_tbl, idx, part) {
		if (bdev_is_partition(part) && !bdev_nr_sectors(part))
			continue;
368
		if (!kobject_get_unless_zero(&part->bd_device.kobj))
C
Christoph Hellwig 已提交
369 370 371
			continue;

		rcu_read_unlock();
372
		kobject_uevent(bdev_kobj(part), action);
373
		put_device(&part->bd_device);
C
Christoph Hellwig 已提交
374 375 376
		rcu_read_lock();
	}
	rcu_read_unlock();
377 378 379
}
EXPORT_SYMBOL_GPL(disk_uevent);

380 381 382 383 384 385 386 387 388 389 390 391 392
static void disk_scan_partitions(struct gendisk *disk)
{
	struct block_device *bdev;

	if (!get_capacity(disk) || !disk_part_scan_enabled(disk))
		return;

	set_bit(GD_NEED_PART_SCAN, &disk->state);
	bdev = blkdev_get_by_dev(disk_devt(disk), FMODE_READ, NULL);
	if (!IS_ERR(bdev))
		blkdev_put(bdev, FMODE_READ);
}

393 394
static void register_disk(struct device *parent, struct gendisk *disk,
			  const struct attribute_group **groups)
395 396 397 398
{
	struct device *ddev = disk_to_dev(disk);
	int err;

399
	ddev->parent = parent;
400

401
	dev_set_name(ddev, "%s", disk->disk_name);
402 403 404 405

	/* delay uevents, until we scanned partition table */
	dev_set_uevent_suppress(ddev, 1);

406 407 408 409
	if (groups) {
		WARN_ON(ddev->groups);
		ddev->groups = groups;
	}
410 411 412 413 414 415 416 417 418 419
	if (device_add(ddev))
		return;
	if (!sysfs_deprecated) {
		err = sysfs_create_link(block_depr, &ddev->kobj,
					kobject_name(&ddev->kobj));
		if (err) {
			device_del(ddev);
			return;
		}
	}
420 421 422 423 424 425 426 427

	/*
	 * avoid probable deadlock caused by allocating memory with
	 * GFP_KERNEL in runtime_resume callback of its all ancestor
	 * devices
	 */
	pm_runtime_set_memalloc_noio(ddev, true);

428 429
	disk->part0->bd_holder_dir =
		kobject_create_and_add("holders", &ddev->kobj);
430 431
	disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);

432
	if (disk->flags & GENHD_FL_HIDDEN)
433 434
		return;

435
	disk_scan_partitions(disk);
436

437
	/* announce the disk and partitions after all partitions are created */
438
	dev_set_uevent_suppress(ddev, 0);
439
	disk_uevent(disk, KOBJ_ADD);
440

441 442 443 444 445 446
	if (disk->queue->backing_dev_info->dev) {
		err = sysfs_create_link(&ddev->kobj,
			  &disk->queue->backing_dev_info->dev->kobj,
			  "bdi");
		WARN_ON(err);
	}
447 448
}

L
Linus Torvalds 已提交
449
/**
450
 * __device_add_disk - add disk information to kernel list
451
 * @parent: parent device for the disk
L
Linus Torvalds 已提交
452
 * @disk: per-device partitioning information
453
 * @groups: Additional per-device sysfs groups
454
 * @register_queue: register the queue if set to true
L
Linus Torvalds 已提交
455 456 457
 *
 * This function registers the partitioning information in @disk
 * with the kernel.
458 459
 *
 * FIXME: error handling
L
Linus Torvalds 已提交
460
 */
461
static void __device_add_disk(struct device *parent, struct gendisk *disk,
462
			      const struct attribute_group **groups,
463
			      bool register_queue)
L
Linus Torvalds 已提交
464
{
465
	int ret;
466

467 468 469 470 471 472 473 474 475
	/*
	 * The disk queue should now be all set with enough information about
	 * the device for the elevator code to pick an adequate default
	 * elevator if one is needed, that is, for devices requesting queue
	 * registration.
	 */
	if (register_queue)
		elevator_init_mq(disk->queue);

476 477 478 479 480 481
	/*
	 * If the driver provides an explicit major number it also must provide
	 * the number of minors numbers supported, and those will be used to
	 * setup the gendisk.
	 * Otherwise just allocate the device numbers for both the whole device
	 * and all partitions from the extended dev_t space.
482
	 */
483 484
	if (disk->major) {
		WARN_ON(!disk->minors);
485 486 487 488 489 490

		if (disk->minors > DISK_MAX_PARTS) {
			pr_err("block: can't allocate more than %d partitions\n",
				DISK_MAX_PARTS);
			disk->minors = DISK_MAX_PARTS;
		}
491 492
	} else {
		WARN_ON(disk->minors);
493

494 495 496 497 498 499 500
		ret = blk_alloc_ext_minor();
		if (ret < 0) {
			WARN_ON(1);
			return;
		}
		disk->major = BLOCK_EXT_MAJOR;
		disk->first_minor = MINOR(ret);
501
		disk->flags |= GENHD_FL_EXT_DEVT;
502
	}
503 504

	disk->flags |= GENHD_FL_UP;
505

506 507
	disk_alloc_events(disk);

508 509 510 511 512 513 514 515
	if (disk->flags & GENHD_FL_HIDDEN) {
		/*
		 * Don't let hidden disks show up in /proc/partitions,
		 * and don't bother scanning for partitions either.
		 */
		disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
		disk->flags |= GENHD_FL_NO_PART_SCAN;
	} else {
516 517
		struct backing_dev_info *bdi = disk->queue->backing_dev_info;
		struct device *dev = disk_to_dev(disk);
518

519
		/* Register BDI before referencing it from bdev */
520 521 522
		dev->devt = MKDEV(disk->major, disk->first_minor);
		ret = bdi_register(bdi, "%u:%u",
				   disk->major, disk->first_minor);
523
		WARN_ON(ret);
524
		bdi_set_owner(bdi, dev);
525
		bdev_add(disk->part0, dev->devt);
526
	}
527
	register_disk(parent, disk, groups);
528 529
	if (register_queue)
		blk_register_queue(disk);
530

531 532 533 534
	/*
	 * Take an extra ref on queue which will be put on disk_release()
	 * so that it sticks around as long as @disk is there.
	 */
535 536 537 538
	if (blk_get_queue(disk->queue))
		set_bit(GD_QUEUE_REF, &disk->state);
	else
		WARN_ON_ONCE(1);
539

540
	disk_add_events(disk);
541
	blk_integrity_add(disk);
L
Linus Torvalds 已提交
542
}
543

544 545 546
void device_add_disk(struct device *parent, struct gendisk *disk,
		     const struct attribute_group **groups)

547
{
548
	__device_add_disk(parent, disk, groups, true);
549
}
550
EXPORT_SYMBOL(device_add_disk);
L
Linus Torvalds 已提交
551

552 553
void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk)
{
554
	__device_add_disk(parent, disk, NULL, false);
555 556 557
}
EXPORT_SYMBOL(device_add_disk_no_queue_reg);

558 559 560 561 562 563 564 565 566 567 568 569 570
/**
 * del_gendisk - remove the gendisk
 * @disk: the struct gendisk to remove
 *
 * Removes the gendisk and all its associated resources. This deletes the
 * partitions associated with the gendisk, and unregisters the associated
 * request_queue.
 *
 * This is the counter to the respective __device_add_disk() call.
 *
 * The final removal of the struct gendisk happens when its refcount reaches 0
 * with put_disk(), which should be called after del_gendisk(), if
 * __device_add_disk() was used.
571 572 573 574 575
 *
 * Drivers exist which depend on the release of the gendisk to be synchronous,
 * it should not be deferred.
 *
 * Context: can sleep
576
 */
577
void del_gendisk(struct gendisk *disk)
L
Linus Torvalds 已提交
578
{
579 580
	might_sleep();

581 582 583
	if (WARN_ON_ONCE(!disk->queue))
		return;

584
	blk_integrity_del(disk);
585 586
	disk_del_events(disk);

587
	mutex_lock(&disk->open_mutex);
588
	remove_inode_hash(disk->part0->bd_inode);
589
	disk->flags &= ~GENHD_FL_UP;
590
	blk_drop_partitions(disk);
591
	mutex_unlock(&disk->open_mutex);
592

593 594 595
	fsync_bdev(disk->part0);
	__invalidate_device(disk->part0, true);

596 597
	set_capacity(disk, 0);

598
	if (!(disk->flags & GENHD_FL_HIDDEN)) {
599
		sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
600

601 602 603 604
		/*
		 * Unregister bdi before releasing device numbers (as they can
		 * get reused and we'd get clashes in sysfs).
		 */
605
		bdi_unregister(disk->queue->backing_dev_info);
606
	}
607

608
	blk_unregister_queue(disk);
609

610
	kobject_put(disk->part0->bd_holder_dir);
611 612
	kobject_put(disk->slave_dir);

613
	part_stat_set_all(disk->part0, 0);
614
	disk->part0->bd_stamp = 0;
615 616
	if (!sysfs_deprecated)
		sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
617
	pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
618
	device_del(disk_to_dev(disk));
L
Linus Torvalds 已提交
619
}
620
EXPORT_SYMBOL(del_gendisk);
L
Linus Torvalds 已提交
621

622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646
/* sysfs access to bad-blocks list. */
static ssize_t disk_badblocks_show(struct device *dev,
					struct device_attribute *attr,
					char *page)
{
	struct gendisk *disk = dev_to_disk(dev);

	if (!disk->bb)
		return sprintf(page, "\n");

	return badblocks_show(disk->bb, page, 0);
}

static ssize_t disk_badblocks_store(struct device *dev,
					struct device_attribute *attr,
					const char *page, size_t len)
{
	struct gendisk *disk = dev_to_disk(dev);

	if (!disk->bb)
		return -ENXIO;

	return badblocks_store(disk->bb, page, len, 0);
}

647
void blk_request_module(dev_t devt)
648
{
649 650 651 652 653 654 655 656 657 658 659 660 661
	unsigned int major = MAJOR(devt);
	struct blk_major_name **n;

	mutex_lock(&major_names_lock);
	for (n = &major_names[major_to_index(major)]; *n; n = &(*n)->next) {
		if ((*n)->major == major && (*n)->probe) {
			(*n)->probe(devt);
			mutex_unlock(&major_names_lock);
			return;
		}
	}
	mutex_unlock(&major_names_lock);

662 663 664 665 666
	if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0)
		/* Make old-style 2.4 aliases work */
		request_module("block-major-%d", MAJOR(devt));
}

667 668 669 670 671 672 673
/*
 * print a full list of all partitions - intended for places where the root
 * filesystem can't be mounted and thus to give the victim some idea of what
 * went wrong
 */
void __init printk_all_partitions(void)
{
674 675 676 677 678 679
	struct class_dev_iter iter;
	struct device *dev;

	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
	while ((dev = class_dev_iter_next(&iter))) {
		struct gendisk *disk = dev_to_disk(dev);
680
		struct block_device *part;
681
		char devt_buf[BDEVT_SIZE];
682
		unsigned long idx;
683 684 685

		/*
		 * Don't show empty devices or things that have been
L
Lucas De Marchi 已提交
686
		 * suppressed
687 688 689 690 691 692
		 */
		if (get_capacity(disk) == 0 ||
		    (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
			continue;

		/*
693 694
		 * Note, unlike /proc/partitions, I am showing the numbers in
		 * hex - the same format as the root= option takes.
695
		 */
696 697 698 699
		rcu_read_lock();
		xa_for_each(&disk->part_tbl, idx, part) {
			if (!bdev_nr_sectors(part))
				continue;
700
			printk("%s%s %10llu %pg %s",
701
			       bdev_is_partition(part) ? "  " : "",
702
			       bdevt_str(part->bd_dev, devt_buf),
703
			       bdev_nr_sectors(part) >> 1, part,
704 705
			       part->bd_meta_info ?
					part->bd_meta_info->uuid : "");
706
			if (bdev_is_partition(part))
T
Tejun Heo 已提交
707
				printk("\n");
708 709 710 711 712
			else if (dev->parent && dev->parent->driver)
				printk(" driver: %s\n",
					dev->parent->driver->name);
			else
				printk(" (driver?)\n");
T
Tejun Heo 已提交
713
		}
714
		rcu_read_unlock();
715 716
	}
	class_dev_iter_exit(&iter);
717 718
}

L
Linus Torvalds 已提交
719 720
#ifdef CONFIG_PROC_FS
/* iterator */
721
static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos)
722
{
723 724 725
	loff_t skip = *pos;
	struct class_dev_iter *iter;
	struct device *dev;
726

727
	iter = kmalloc(sizeof(*iter), GFP_KERNEL);
728 729 730 731 732 733 734 735 736 737 738 739
	if (!iter)
		return ERR_PTR(-ENOMEM);

	seqf->private = iter;
	class_dev_iter_init(iter, &block_class, NULL, &disk_type);
	do {
		dev = class_dev_iter_next(iter);
		if (!dev)
			return NULL;
	} while (skip--);

	return dev_to_disk(dev);
740 741
}

742
static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos)
L
Linus Torvalds 已提交
743
{
744
	struct device *dev;
L
Linus Torvalds 已提交
745

746 747
	(*pos)++;
	dev = class_dev_iter_next(seqf->private);
748
	if (dev)
749
		return dev_to_disk(dev);
750

L
Linus Torvalds 已提交
751 752 753
	return NULL;
}

754
static void disk_seqf_stop(struct seq_file *seqf, void *v)
755
{
756
	struct class_dev_iter *iter = seqf->private;
757

758 759 760 761
	/* stop is called even after start failed :-( */
	if (iter) {
		class_dev_iter_exit(iter);
		kfree(iter);
762
		seqf->private = NULL;
763
	}
L
Linus Torvalds 已提交
764 765
}

766
static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
L
Linus Torvalds 已提交
767
{
768
	void *p;
769 770

	p = disk_seqf_start(seqf, pos);
771
	if (!IS_ERR_OR_NULL(p) && !*pos)
772 773
		seq_puts(seqf, "major minor  #blocks  name\n\n");
	return p;
L
Linus Torvalds 已提交
774 775
}

776
static int show_partition(struct seq_file *seqf, void *v)
L
Linus Torvalds 已提交
777 778
{
	struct gendisk *sgp = v;
779
	struct block_device *part;
780
	unsigned long idx;
L
Linus Torvalds 已提交
781 782 783
	char buf[BDEVNAME_SIZE];

	/* Don't show non-partitionable removeable devices or empty devices */
T
Tejun Heo 已提交
784
	if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
785
				   (sgp->flags & GENHD_FL_REMOVABLE)))
L
Linus Torvalds 已提交
786 787 788 789
		return 0;
	if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
		return 0;

790 791 792 793
	rcu_read_lock();
	xa_for_each(&sgp->part_tbl, idx, part) {
		if (!bdev_nr_sectors(part))
			continue;
794
		seq_printf(seqf, "%4d  %7d %10llu %s\n",
795 796 797
			   MAJOR(part->bd_dev), MINOR(part->bd_dev),
			   bdev_nr_sectors(part) >> 1,
			   disk_name(sgp, part->bd_partno, buf));
798 799
	}
	rcu_read_unlock();
L
Linus Torvalds 已提交
800 801 802
	return 0;
}

803
static const struct seq_operations partitions_op = {
804 805 806
	.start	= show_partition_start,
	.next	= disk_seqf_next,
	.stop	= disk_seqf_stop,
807
	.show	= show_partition
L
Linus Torvalds 已提交
808 809 810 811 812
};
#endif

static int __init genhd_device_init(void)
{
813 814 815 816
	int error;

	block_class.dev_kobj = sysfs_dev_block_kobj;
	error = class_register(&block_class);
R
Roland McGrath 已提交
817 818
	if (unlikely(error))
		return error;
L
Linus Torvalds 已提交
819
	blk_dev_init();
820

821 822
	register_blkdev(BLOCK_EXT_MAJOR, "blkext");

823
	/* create top-level block dir */
824 825
	if (!sysfs_deprecated)
		block_depr = kobject_create_and_add("block", NULL);
826
	return 0;
L
Linus Torvalds 已提交
827 828 829 830
}

subsys_initcall(genhd_device_init);

831 832
static ssize_t disk_range_show(struct device *dev,
			       struct device_attribute *attr, char *buf)
L
Linus Torvalds 已提交
833
{
834
	struct gendisk *disk = dev_to_disk(dev);
L
Linus Torvalds 已提交
835

836
	return sprintf(buf, "%d\n", disk->minors);
L
Linus Torvalds 已提交
837 838
}

839 840 841 842 843
static ssize_t disk_ext_range_show(struct device *dev,
				   struct device_attribute *attr, char *buf)
{
	struct gendisk *disk = dev_to_disk(dev);

T
Tejun Heo 已提交
844
	return sprintf(buf, "%d\n", disk_max_parts(disk));
845 846
}

847 848
static ssize_t disk_removable_show(struct device *dev,
				   struct device_attribute *attr, char *buf)
849
{
850
	struct gendisk *disk = dev_to_disk(dev);
851

852 853
	return sprintf(buf, "%d\n",
		       (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
854 855
}

856 857 858 859 860 861 862 863 864
static ssize_t disk_hidden_show(struct device *dev,
				   struct device_attribute *attr, char *buf)
{
	struct gendisk *disk = dev_to_disk(dev);

	return sprintf(buf, "%d\n",
		       (disk->flags & GENHD_FL_HIDDEN ? 1 : 0));
}

K
Kay Sievers 已提交
865 866 867 868 869
static ssize_t disk_ro_show(struct device *dev,
				   struct device_attribute *attr, char *buf)
{
	struct gendisk *disk = dev_to_disk(dev);

T
Tejun Heo 已提交
870
	return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0);
K
Kay Sievers 已提交
871 872
}

873 874 875
ssize_t part_size_show(struct device *dev,
		       struct device_attribute *attr, char *buf)
{
876
	return sprintf(buf, "%llu\n", bdev_nr_sectors(dev_to_bdev(dev)));
877 878 879 880 881
}

ssize_t part_stat_show(struct device *dev,
		       struct device_attribute *attr, char *buf)
{
882 883
	struct block_device *bdev = dev_to_bdev(dev);
	struct request_queue *q = bdev->bd_disk->queue;
884
	struct disk_stats stat;
885 886
	unsigned int inflight;

887
	part_stat_read_all(bdev, &stat);
888
	if (queue_is_mq(q))
889
		inflight = blk_mq_in_flight(q, bdev);
890
	else
891
		inflight = part_in_flight(bdev);
892

893 894 895 896 897 898 899
	return sprintf(buf,
		"%8lu %8lu %8llu %8u "
		"%8lu %8lu %8llu %8u "
		"%8u %8u %8u "
		"%8lu %8lu %8llu %8u "
		"%8lu %8u"
		"\n",
900 901 902 903 904 905 906 907
		stat.ios[STAT_READ],
		stat.merges[STAT_READ],
		(unsigned long long)stat.sectors[STAT_READ],
		(unsigned int)div_u64(stat.nsecs[STAT_READ], NSEC_PER_MSEC),
		stat.ios[STAT_WRITE],
		stat.merges[STAT_WRITE],
		(unsigned long long)stat.sectors[STAT_WRITE],
		(unsigned int)div_u64(stat.nsecs[STAT_WRITE], NSEC_PER_MSEC),
908
		inflight,
909
		jiffies_to_msecs(stat.io_ticks),
910 911 912 913 914
		(unsigned int)div_u64(stat.nsecs[STAT_READ] +
				      stat.nsecs[STAT_WRITE] +
				      stat.nsecs[STAT_DISCARD] +
				      stat.nsecs[STAT_FLUSH],
						NSEC_PER_MSEC),
915 916 917 918 919 920
		stat.ios[STAT_DISCARD],
		stat.merges[STAT_DISCARD],
		(unsigned long long)stat.sectors[STAT_DISCARD],
		(unsigned int)div_u64(stat.nsecs[STAT_DISCARD], NSEC_PER_MSEC),
		stat.ios[STAT_FLUSH],
		(unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC));
921 922 923 924 925
}

ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
			   char *buf)
{
926 927
	struct block_device *bdev = dev_to_bdev(dev);
	struct request_queue *q = bdev->bd_disk->queue;
928 929
	unsigned int inflight[2];

930
	if (queue_is_mq(q))
931
		blk_mq_in_flight_rw(q, bdev, inflight);
932
	else
933
		part_in_flight_rw(bdev, inflight);
934

935 936 937
	return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]);
}

938 939
static ssize_t disk_capability_show(struct device *dev,
				    struct device_attribute *attr, char *buf)
940
{
941 942 943
	struct gendisk *disk = dev_to_disk(dev);

	return sprintf(buf, "%x\n", disk->flags);
944
}
945

946 947 948 949 950 951 952 953 954
static ssize_t disk_alignment_offset_show(struct device *dev,
					  struct device_attribute *attr,
					  char *buf)
{
	struct gendisk *disk = dev_to_disk(dev);

	return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
}

955 956 957 958 959 960
static ssize_t disk_discard_alignment_show(struct device *dev,
					   struct device_attribute *attr,
					   char *buf)
{
	struct gendisk *disk = dev_to_disk(dev);

961
	return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
962 963
}

964 965 966 967 968 969 970 971 972 973 974 975
static DEVICE_ATTR(range, 0444, disk_range_show, NULL);
static DEVICE_ATTR(ext_range, 0444, disk_ext_range_show, NULL);
static DEVICE_ATTR(removable, 0444, disk_removable_show, NULL);
static DEVICE_ATTR(hidden, 0444, disk_hidden_show, NULL);
static DEVICE_ATTR(ro, 0444, disk_ro_show, NULL);
static DEVICE_ATTR(size, 0444, part_size_show, NULL);
static DEVICE_ATTR(alignment_offset, 0444, disk_alignment_offset_show, NULL);
static DEVICE_ATTR(discard_alignment, 0444, disk_discard_alignment_show, NULL);
static DEVICE_ATTR(capability, 0444, disk_capability_show, NULL);
static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store);
976

977
#ifdef CONFIG_FAIL_MAKE_REQUEST
978 979 980
ssize_t part_fail_show(struct device *dev,
		       struct device_attribute *attr, char *buf)
{
981
	return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_make_it_fail);
982 983 984 985 986 987 988 989 990
}

ssize_t part_fail_store(struct device *dev,
			struct device_attribute *attr,
			const char *buf, size_t count)
{
	int i;

	if (count > 0 && sscanf(buf, "%d", &i) > 0)
991
		dev_to_bdev(dev)->bd_make_it_fail = i;
992 993 994 995

	return count;
}

996
static struct device_attribute dev_attr_fail =
997
	__ATTR(make-it-fail, 0644, part_fail_show, part_fail_store);
998 999
#endif /* CONFIG_FAIL_MAKE_REQUEST */

1000 1001
#ifdef CONFIG_FAIL_IO_TIMEOUT
static struct device_attribute dev_attr_fail_timeout =
1002
	__ATTR(io-timeout-fail, 0644, part_timeout_show, part_timeout_store);
1003
#endif
1004 1005 1006

static struct attribute *disk_attrs[] = {
	&dev_attr_range.attr,
1007
	&dev_attr_ext_range.attr,
1008
	&dev_attr_removable.attr,
1009
	&dev_attr_hidden.attr,
K
Kay Sievers 已提交
1010
	&dev_attr_ro.attr,
1011
	&dev_attr_size.attr,
1012
	&dev_attr_alignment_offset.attr,
1013
	&dev_attr_discard_alignment.attr,
1014 1015
	&dev_attr_capability.attr,
	&dev_attr_stat.attr,
1016
	&dev_attr_inflight.attr,
1017
	&dev_attr_badblocks.attr,
1018 1019 1020
	&dev_attr_events.attr,
	&dev_attr_events_async.attr,
	&dev_attr_events_poll_msecs.attr,
1021 1022
#ifdef CONFIG_FAIL_MAKE_REQUEST
	&dev_attr_fail.attr,
1023 1024 1025
#endif
#ifdef CONFIG_FAIL_IO_TIMEOUT
	&dev_attr_fail_timeout.attr,
1026 1027 1028 1029
#endif
	NULL
};

1030 1031 1032 1033 1034 1035 1036 1037 1038 1039
static umode_t disk_visible(struct kobject *kobj, struct attribute *a, int n)
{
	struct device *dev = container_of(kobj, typeof(*dev), kobj);
	struct gendisk *disk = dev_to_disk(dev);

	if (a == &dev_attr_badblocks.attr && !disk->bb)
		return 0;
	return a->mode;
}

1040 1041
static struct attribute_group disk_attr_group = {
	.attrs = disk_attrs,
1042
	.is_visible = disk_visible,
1043 1044
};

1045
static const struct attribute_group *disk_attr_groups[] = {
1046 1047
	&disk_attr_group,
	NULL
L
Linus Torvalds 已提交
1048 1049
};

1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060
/**
 * disk_release - releases all allocated resources of the gendisk
 * @dev: the device representing this disk
 *
 * This function releases all allocated resources of the gendisk.
 *
 * Drivers which used __device_add_disk() have a gendisk with a request_queue
 * assigned. Since the request_queue sits on top of the gendisk for these
 * drivers we also call blk_put_queue() for them, and we expect the
 * request_queue refcount to reach 0 at this point, and so the request_queue
 * will also be freed prior to the disk.
1061 1062
 *
 * Context: can sleep
1063
 */
1064
static void disk_release(struct device *dev)
L
Linus Torvalds 已提交
1065
{
1066 1067
	struct gendisk *disk = dev_to_disk(dev);

1068 1069
	might_sleep();

1070 1071
	if (MAJOR(dev->devt) == BLOCK_EXT_MAJOR)
		blk_free_ext_minor(MINOR(dev->devt));
1072
	disk_release_events(disk);
L
Linus Torvalds 已提交
1073
	kfree(disk->random);
1074
	xa_destroy(&disk->part_tbl);
1075
	if (test_bit(GD_QUEUE_REF, &disk->state) && disk->queue)
1076
		blk_put_queue(disk->queue);
C
Christoph Hellwig 已提交
1077
	iput(disk->part0->bd_inode);	/* frees the disk */
L
Linus Torvalds 已提交
1078
}
1079 1080
struct class block_class = {
	.name		= "block",
L
Linus Torvalds 已提交
1081 1082
};

1083
static char *block_devnode(struct device *dev, umode_t *mode,
1084
			   kuid_t *uid, kgid_t *gid)
1085 1086 1087
{
	struct gendisk *disk = dev_to_disk(dev);

1088 1089
	if (disk->fops->devnode)
		return disk->fops->devnode(disk, mode);
1090 1091 1092
	return NULL;
}

1093
const struct device_type disk_type = {
1094 1095 1096
	.name		= "disk",
	.groups		= disk_attr_groups,
	.release	= disk_release,
1097
	.devnode	= block_devnode,
L
Linus Torvalds 已提交
1098 1099
};

1100
#ifdef CONFIG_PROC_FS
1101 1102 1103 1104 1105 1106 1107 1108
/*
 * aggregate disk stat collector.  Uses the same stats that the sysfs
 * entries do, above, but makes them available through one seq_file.
 *
 * The output looks suspiciously like /proc/partitions with a bunch of
 * extra fields.
 */
static int diskstats_show(struct seq_file *seqf, void *v)
L
Linus Torvalds 已提交
1109 1110
{
	struct gendisk *gp = v;
1111
	struct block_device *hd;
1112
	unsigned int inflight;
1113
	struct disk_stats stat;
1114
	unsigned long idx;
L
Linus Torvalds 已提交
1115 1116

	/*
1117
	if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
1118
		seq_puts(seqf,	"major minor name"
L
Linus Torvalds 已提交
1119 1120 1121 1122
				"     rio rmerge rsect ruse wio wmerge "
				"wsect wuse running use aveq"
				"\n\n");
	*/
1123

1124 1125 1126 1127
	rcu_read_lock();
	xa_for_each(&gp->part_tbl, idx, hd) {
		if (bdev_is_partition(hd) && !bdev_nr_sectors(hd))
			continue;
1128
		part_stat_read_all(hd, &stat);
1129
		if (queue_is_mq(gp->queue))
1130
			inflight = blk_mq_in_flight(gp->queue, hd);
1131
		else
1132
			inflight = part_in_flight(hd);
1133

1134
		seq_printf(seqf, "%4d %7d %pg "
1135 1136 1137
			   "%lu %lu %lu %u "
			   "%lu %lu %lu %u "
			   "%u %u %u "
1138 1139 1140
			   "%lu %lu %lu %u "
			   "%lu %u"
			   "\n",
1141
			   MAJOR(hd->bd_dev), MINOR(hd->bd_dev), hd,
1142 1143 1144 1145 1146 1147 1148 1149 1150 1151
			   stat.ios[STAT_READ],
			   stat.merges[STAT_READ],
			   stat.sectors[STAT_READ],
			   (unsigned int)div_u64(stat.nsecs[STAT_READ],
							NSEC_PER_MSEC),
			   stat.ios[STAT_WRITE],
			   stat.merges[STAT_WRITE],
			   stat.sectors[STAT_WRITE],
			   (unsigned int)div_u64(stat.nsecs[STAT_WRITE],
							NSEC_PER_MSEC),
1152
			   inflight,
1153
			   jiffies_to_msecs(stat.io_ticks),
1154 1155 1156 1157 1158
			   (unsigned int)div_u64(stat.nsecs[STAT_READ] +
						 stat.nsecs[STAT_WRITE] +
						 stat.nsecs[STAT_DISCARD] +
						 stat.nsecs[STAT_FLUSH],
							NSEC_PER_MSEC),
1159 1160 1161 1162 1163 1164 1165 1166
			   stat.ios[STAT_DISCARD],
			   stat.merges[STAT_DISCARD],
			   stat.sectors[STAT_DISCARD],
			   (unsigned int)div_u64(stat.nsecs[STAT_DISCARD],
						 NSEC_PER_MSEC),
			   stat.ios[STAT_FLUSH],
			   (unsigned int)div_u64(stat.nsecs[STAT_FLUSH],
						 NSEC_PER_MSEC)
1167
			);
L
Linus Torvalds 已提交
1168
	}
1169
	rcu_read_unlock();
1170

L
Linus Torvalds 已提交
1171 1172 1173
	return 0;
}

1174
static const struct seq_operations diskstats_op = {
1175 1176 1177
	.start	= disk_seqf_start,
	.next	= disk_seqf_next,
	.stop	= disk_seqf_stop,
L
Linus Torvalds 已提交
1178 1179
	.show	= diskstats_show
};
1180 1181 1182

static int __init proc_genhd_init(void)
{
1183 1184
	proc_create_seq("diskstats", 0, NULL, &diskstats_op);
	proc_create_seq("partitions", 0, NULL, &partitions_op);
1185 1186 1187
	return 0;
}
module_init(proc_genhd_init);
1188
#endif /* CONFIG_PROC_FS */
L
Linus Torvalds 已提交
1189

1190 1191
dev_t part_devt(struct gendisk *disk, u8 partno)
{
C
Christoph Hellwig 已提交
1192
	struct block_device *part;
1193 1194
	dev_t devt = 0;

C
Christoph Hellwig 已提交
1195 1196 1197
	rcu_read_lock();
	part = xa_load(&disk->part_tbl, partno);
	if (part)
1198
		devt = part->bd_dev;
C
Christoph Hellwig 已提交
1199
	rcu_read_unlock();
1200 1201 1202 1203

	return devt;
}

1204
dev_t blk_lookup_devt(const char *name, int partno)
1205
{
1206 1207 1208
	dev_t devt = MKDEV(0, 0);
	struct class_dev_iter iter;
	struct device *dev;
1209

1210 1211
	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
	while ((dev = class_dev_iter_next(&iter))) {
1212 1213
		struct gendisk *disk = dev_to_disk(dev);

1214
		if (strcmp(dev_name(dev), name))
1215 1216
			continue;

1217 1218 1219 1220 1221 1222
		if (partno < disk->minors) {
			/* We need to return the right devno, even
			 * if the partition doesn't exist yet.
			 */
			devt = MKDEV(MAJOR(dev->devt),
				     MINOR(dev->devt) + partno);
1223 1224 1225 1226
		} else {
			devt = part_devt(disk, partno);
			if (devt)
				break;
1227
		}
1228
	}
1229
	class_dev_iter_exit(&iter);
1230 1231 1232
	return devt;
}

1233
struct gendisk *__alloc_disk_node(int minors, int node_id)
1234 1235 1236
{
	struct gendisk *disk;

1237
	disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id);
1238 1239
	if (!disk)
		return NULL;
1240

1241 1242
	disk->part0 = bdev_alloc(disk, 0);
	if (!disk->part0)
1243 1244
		goto out_free_disk;

1245
	disk->node_id = node_id;
1246
	mutex_init(&disk->open_mutex);
1247 1248 1249
	xa_init(&disk->part_tbl);
	if (xa_insert(&disk->part_tbl, 0, disk->part0, GFP_KERNEL))
		goto out_destroy_part_tbl;
1250 1251 1252 1253 1254 1255

	disk->minors = minors;
	rand_initialize_disk(disk);
	disk_to_dev(disk)->class = &block_class;
	disk_to_dev(disk)->type = &disk_type;
	device_initialize(disk_to_dev(disk));
L
Linus Torvalds 已提交
1256
	return disk;
1257

1258 1259
out_destroy_part_tbl:
	xa_destroy(&disk->part_tbl);
C
Christoph Hellwig 已提交
1260
	iput(disk->part0->bd_inode);
1261 1262 1263
out_free_disk:
	kfree(disk);
	return NULL;
L
Linus Torvalds 已提交
1264
}
1265
EXPORT_SYMBOL(__alloc_disk_node);
L
Linus Torvalds 已提交
1266

1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285
struct gendisk *__blk_alloc_disk(int node)
{
	struct request_queue *q;
	struct gendisk *disk;

	q = blk_alloc_queue(node);
	if (!q)
		return NULL;

	disk = __alloc_disk_node(0, node);
	if (!disk) {
		blk_cleanup_queue(q);
		return NULL;
	}
	disk->queue = q;
	return disk;
}
EXPORT_SYMBOL(__blk_alloc_disk);

1286 1287
/**
 * put_disk - decrements the gendisk refcount
1288
 * @disk: the struct gendisk to decrement the refcount for
1289 1290 1291
 *
 * This decrements the refcount for the struct gendisk. When this reaches 0
 * we'll have disk_release() called.
1292 1293 1294
 *
 * Context: Any context, but the last reference must not be dropped from
 *          atomic context.
1295
 */
L
Linus Torvalds 已提交
1296 1297 1298
void put_disk(struct gendisk *disk)
{
	if (disk)
1299
		put_device(disk_to_dev(disk));
L
Linus Torvalds 已提交
1300 1301 1302
}
EXPORT_SYMBOL(put_disk);

1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318
/**
 * blk_cleanup_disk - shutdown a gendisk allocated by blk_alloc_disk
 * @disk: gendisk to shutdown
 *
 * Mark the queue hanging off @disk DYING, drain all pending requests, then mark
 * the queue DEAD, destroy and put it and the gendisk structure.
 *
 * Context: can sleep
 */
void blk_cleanup_disk(struct gendisk *disk)
{
	blk_cleanup_queue(disk->queue);
	put_disk(disk);
}
EXPORT_SYMBOL(blk_cleanup_disk);

1319 1320 1321 1322 1323 1324 1325 1326 1327 1328
static void set_disk_ro_uevent(struct gendisk *gd, int ro)
{
	char event[] = "DISK_RO=1";
	char *envp[] = { event, NULL };

	if (!ro)
		event[8] = '0';
	kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
}

1329 1330 1331
/**
 * set_disk_ro - set a gendisk read-only
 * @disk:	gendisk to operate on
1332
 * @read_only:	%true to set the disk read-only, %false set the disk read/write
1333 1334 1335 1336 1337 1338
 *
 * This function is used to indicate whether a given disk device should have its
 * read-only flag set. set_disk_ro() is typically used by device drivers to
 * indicate whether the underlying physical device is write-protected.
 */
void set_disk_ro(struct gendisk *disk, bool read_only)
L
Linus Torvalds 已提交
1339
{
1340 1341 1342 1343 1344 1345
	if (read_only) {
		if (test_and_set_bit(GD_READ_ONLY, &disk->state))
			return;
	} else {
		if (!test_and_clear_bit(GD_READ_ONLY, &disk->state))
			return;
1346
	}
1347
	set_disk_ro_uevent(disk, read_only);
L
Linus Torvalds 已提交
1348 1349 1350 1351 1352
}
EXPORT_SYMBOL(set_disk_ro);

int bdev_read_only(struct block_device *bdev)
{
1353
	return bdev->bd_read_only || get_disk_ro(bdev->bd_disk);
L
Linus Torvalds 已提交
1354 1355
}
EXPORT_SYMBOL(bdev_read_only);