genhd.c 33.6 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
L
Linus Torvalds 已提交
2 3
/*
 *  gendisk handling
C
Christoph Hellwig 已提交
4 5
 *
 * Portions Copyright (C) 2020 Christoph Hellwig
L
Linus Torvalds 已提交
6 7 8
 */

#include <linux/module.h>
9
#include <linux/ctype.h>
L
Linus Torvalds 已提交
10 11
#include <linux/fs.h>
#include <linux/genhd.h>
12
#include <linux/kdev_t.h>
L
Linus Torvalds 已提交
13 14
#include <linux/kernel.h>
#include <linux/blkdev.h>
15
#include <linux/backing-dev.h>
L
Linus Torvalds 已提交
16 17
#include <linux/init.h>
#include <linux/spinlock.h>
18
#include <linux/proc_fs.h>
L
Linus Torvalds 已提交
19 20 21
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/kmod.h>
22
#include <linux/mutex.h>
T
Tejun Heo 已提交
23
#include <linux/idr.h>
24
#include <linux/log2.h>
25
#include <linux/pm_runtime.h>
26
#include <linux/badblocks.h>
L
Linus Torvalds 已提交
27

28 29
#include "blk.h"

30
static struct kobject *block_depr;
L
Linus Torvalds 已提交
31

T
Tejun Heo 已提交
32
/* for extended dynamic devt allocation, currently only one major is used */
33
#define NR_EXT_DEVT		(1 << MINORBITS)
34
static DEFINE_IDA(ext_devt_ida);
T
Tejun Heo 已提交
35

36 37
void set_capacity(struct gendisk *disk, sector_t sectors)
{
38
	struct block_device *bdev = disk->part0;
39

40
	spin_lock(&bdev->bd_size_lock);
41
	i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
42
	spin_unlock(&bdev->bd_size_lock);
43 44 45
}
EXPORT_SYMBOL(set_capacity);

46
/*
47 48
 * Set disk capacity and notify if the size is not currently zero and will not
 * be set to zero.  Returns true if a uevent was sent, otherwise false.
49
 */
50
bool set_capacity_and_notify(struct gendisk *disk, sector_t size)
51 52
{
	sector_t capacity = get_capacity(disk);
53
	char *envp[] = { "RESIZE=1", NULL };
54 55 56

	set_capacity(disk, size);

57 58 59 60 61 62 63 64
	/*
	 * Only print a message and send a uevent if the gendisk is user visible
	 * and alive.  This avoids spamming the log and udev when setting the
	 * initial capacity during probing.
	 */
	if (size == capacity ||
	    (disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP)
		return false;
65

66
	pr_info("%s: detected capacity change from %lld to %lld\n",
M
Ming Lei 已提交
67
		disk->disk_name, capacity, size);
68

69 70 71 72 73 74 75 76
	/*
	 * Historically we did not send a uevent for changes to/from an empty
	 * device.
	 */
	if (!capacity || !size)
		return false;
	kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
	return true;
77
}
78
EXPORT_SYMBOL_GPL(set_capacity_and_notify);
79

80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
/*
 * Format the device name of the indicated disk into the supplied buffer and
 * return a pointer to that same buffer for convenience.
 */
char *disk_name(struct gendisk *hd, int partno, char *buf)
{
	if (!partno)
		snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
	else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
		snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
	else
		snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);

	return buf;
}

const char *bdevname(struct block_device *bdev, char *buf)
{
98
	return disk_name(bdev->bd_disk, bdev->bd_partno, buf);
99 100
}
EXPORT_SYMBOL(bdevname);
101

102 103
static void part_stat_read_all(struct block_device *part,
		struct disk_stats *stat)
104 105 106 107 108
{
	int cpu;

	memset(stat, 0, sizeof(struct disk_stats));
	for_each_possible_cpu(cpu) {
109
		struct disk_stats *ptr = per_cpu_ptr(part->bd_stats, cpu);
110 111 112 113 114 115 116 117 118 119 120 121 122
		int group;

		for (group = 0; group < NR_STAT_GROUPS; group++) {
			stat->nsecs[group] += ptr->nsecs[group];
			stat->sectors[group] += ptr->sectors[group];
			stat->ios[group] += ptr->ios[group];
			stat->merges[group] += ptr->merges[group];
		}

		stat->io_ticks += ptr->io_ticks;
	}
}

123
static unsigned int part_in_flight(struct block_device *part)
124
{
125
	unsigned int inflight = 0;
126
	int cpu;
127

128
	for_each_possible_cpu(cpu) {
129 130
		inflight += part_stat_local_read_cpu(part, in_flight[0], cpu) +
			    part_stat_local_read_cpu(part, in_flight[1], cpu);
131
	}
132 133
	if ((int)inflight < 0)
		inflight = 0;
134

135
	return inflight;
136 137
}

138 139
static void part_in_flight_rw(struct block_device *part,
		unsigned int inflight[2])
140
{
141 142 143 144 145 146 147 148 149 150 151 152
	int cpu;

	inflight[0] = 0;
	inflight[1] = 0;
	for_each_possible_cpu(cpu) {
		inflight[0] += part_stat_local_read_cpu(part, in_flight[0], cpu);
		inflight[1] += part_stat_local_read_cpu(part, in_flight[1], cpu);
	}
	if ((int)inflight[0] < 0)
		inflight[0] = 0;
	if ((int)inflight[1] < 0)
		inflight[1] = 0;
153 154
}

L
Linus Torvalds 已提交
155 156 157 158
/*
 * Can be deleted altogether. Later.
 *
 */
159
#define BLKDEV_MAJOR_HASH_SIZE 255
L
Linus Torvalds 已提交
160 161 162 163
static struct blk_major_name {
	struct blk_major_name *next;
	int major;
	char name[16];
164
	void (*probe)(dev_t devt);
165
} *major_names[BLKDEV_MAJOR_HASH_SIZE];
C
Christoph Hellwig 已提交
166
static DEFINE_MUTEX(major_names_lock);
L
Linus Torvalds 已提交
167 168

/* index in the above - for now: assume no multimajor ranges */
169
static inline int major_to_index(unsigned major)
L
Linus Torvalds 已提交
170
{
171
	return major % BLKDEV_MAJOR_HASH_SIZE;
172 173
}

174
#ifdef CONFIG_PROC_FS
175
void blkdev_show(struct seq_file *seqf, off_t offset)
176
{
177
	struct blk_major_name *dp;
178

C
Christoph Hellwig 已提交
179
	mutex_lock(&major_names_lock);
180 181
	for (dp = major_names[major_to_index(offset)]; dp; dp = dp->next)
		if (dp->major == offset)
182
			seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
C
Christoph Hellwig 已提交
183
	mutex_unlock(&major_names_lock);
L
Linus Torvalds 已提交
184
}
185
#endif /* CONFIG_PROC_FS */
L
Linus Torvalds 已提交
186

187
/**
188
 * __register_blkdev - register a new block device
189
 *
190 191
 * @major: the requested major device number [1..BLKDEV_MAJOR_MAX-1]. If
 *         @major = 0, try to allocate any unused major number.
192
 * @name: the name of the new block device as a zero terminated string
193
 * @probe: allback that is called on access to any minor number of @major
194 195 196
 *
 * The @name must be unique within the system.
 *
197 198
 * The return value depends on the @major input parameter:
 *
199 200
 *  - if a major device number was requested in range [1..BLKDEV_MAJOR_MAX-1]
 *    then the function returns zero on success, or a negative error code
201
 *  - if any unused major number was requested with @major = 0 parameter
202
 *    then the return value is the allocated major number in range
203 204 205 206
 *    [1..BLKDEV_MAJOR_MAX-1] or a negative error code otherwise
 *
 * See Documentation/admin-guide/devices.txt for the list of allocated
 * major numbers.
207 208
 *
 * Use register_blkdev instead for any new code.
209
 */
210 211
int __register_blkdev(unsigned int major, const char *name,
		void (*probe)(dev_t devt))
L
Linus Torvalds 已提交
212 213 214 215
{
	struct blk_major_name **n, *p;
	int index, ret = 0;

C
Christoph Hellwig 已提交
216
	mutex_lock(&major_names_lock);
L
Linus Torvalds 已提交
217 218 219 220 221 222 223 224 225

	/* temporary */
	if (major == 0) {
		for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) {
			if (major_names[index] == NULL)
				break;
		}

		if (index == 0) {
226 227
			printk("%s: failed to get major for %s\n",
			       __func__, name);
L
Linus Torvalds 已提交
228 229 230 231 232 233 234
			ret = -EBUSY;
			goto out;
		}
		major = index;
		ret = major;
	}

235
	if (major >= BLKDEV_MAJOR_MAX) {
236 237
		pr_err("%s: major requested (%u) is greater than the maximum (%u) for %s\n",
		       __func__, major, BLKDEV_MAJOR_MAX-1, name);
238 239 240 241 242

		ret = -EINVAL;
		goto out;
	}

L
Linus Torvalds 已提交
243 244 245 246 247 248 249
	p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL);
	if (p == NULL) {
		ret = -ENOMEM;
		goto out;
	}

	p->major = major;
250
	p->probe = probe;
L
Linus Torvalds 已提交
251 252 253 254 255 256 257 258 259 260 261 262 263 264
	strlcpy(p->name, name, sizeof(p->name));
	p->next = NULL;
	index = major_to_index(major);

	for (n = &major_names[index]; *n; n = &(*n)->next) {
		if ((*n)->major == major)
			break;
	}
	if (!*n)
		*n = p;
	else
		ret = -EBUSY;

	if (ret < 0) {
265
		printk("register_blkdev: cannot get major %u for %s\n",
L
Linus Torvalds 已提交
266 267 268 269
		       major, name);
		kfree(p);
	}
out:
C
Christoph Hellwig 已提交
270
	mutex_unlock(&major_names_lock);
L
Linus Torvalds 已提交
271 272
	return ret;
}
273
EXPORT_SYMBOL(__register_blkdev);
L
Linus Torvalds 已提交
274

A
Akinobu Mita 已提交
275
void unregister_blkdev(unsigned int major, const char *name)
L
Linus Torvalds 已提交
276 277 278 279 280
{
	struct blk_major_name **n;
	struct blk_major_name *p = NULL;
	int index = major_to_index(major);

C
Christoph Hellwig 已提交
281
	mutex_lock(&major_names_lock);
L
Linus Torvalds 已提交
282 283 284
	for (n = &major_names[index]; *n; n = &(*n)->next)
		if ((*n)->major == major)
			break;
285 286 287
	if (!*n || strcmp((*n)->name, name)) {
		WARN_ON(1);
	} else {
L
Linus Torvalds 已提交
288 289 290
		p = *n;
		*n = p->next;
	}
C
Christoph Hellwig 已提交
291
	mutex_unlock(&major_names_lock);
L
Linus Torvalds 已提交
292 293 294 295 296
	kfree(p);
}

EXPORT_SYMBOL(unregister_blkdev);

297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328
/**
 * blk_mangle_minor - scatter minor numbers apart
 * @minor: minor number to mangle
 *
 * Scatter consecutively allocated @minor number apart if MANGLE_DEVT
 * is enabled.  Mangling twice gives the original value.
 *
 * RETURNS:
 * Mangled value.
 *
 * CONTEXT:
 * Don't care.
 */
static int blk_mangle_minor(int minor)
{
#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
	int i;

	for (i = 0; i < MINORBITS / 2; i++) {
		int low = minor & (1 << i);
		int high = minor & (1 << (MINORBITS - 1 - i));
		int distance = MINORBITS - 1 - 2 * i;

		minor ^= low | high;	/* clear both bits */
		low <<= distance;	/* swap the positions */
		high >>= distance;
		minor |= low | high;	/* and set */
	}
#endif
	return minor;
}

329
int blk_alloc_ext_minor(void)
T
Tejun Heo 已提交
330
{
T
Tejun Heo 已提交
331
	int idx;
T
Tejun Heo 已提交
332

333
	idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT, GFP_KERNEL);
334 335 336 337 338 339
	if (idx < 0) {
		if (idx == -ENOSPC)
			return -EBUSY;
		return idx;
	}
	return blk_mangle_minor(idx);
T
Tejun Heo 已提交
340 341
}

342
void blk_free_ext_minor(unsigned int minor)
T
Tejun Heo 已提交
343
{
344
	ida_free(&ext_devt_ida, blk_mangle_minor(minor));
Y
Yufen Yu 已提交
345 346
}

347 348 349 350 351 352 353 354 355 356 357 358
static char *bdevt_str(dev_t devt, char *buf)
{
	if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) {
		char tbuf[BDEVT_SIZE];
		snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt));
		snprintf(buf, BDEVT_SIZE, "%-9s", tbuf);
	} else
		snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt));

	return buf;
}

359 360 361
void disk_uevent(struct gendisk *disk, enum kobject_action action)
{
	struct block_device *part;
C
Christoph Hellwig 已提交
362
	unsigned long idx;
363

C
Christoph Hellwig 已提交
364 365 366 367 368 369 370 371
	rcu_read_lock();
	xa_for_each(&disk->part_tbl, idx, part) {
		if (bdev_is_partition(part) && !bdev_nr_sectors(part))
			continue;
		if (!bdgrab(part))
			continue;

		rcu_read_unlock();
372
		kobject_uevent(bdev_kobj(part), action);
C
Christoph Hellwig 已提交
373 374 375 376
		bdput(part);
		rcu_read_lock();
	}
	rcu_read_unlock();
377 378 379
}
EXPORT_SYMBOL_GPL(disk_uevent);

380 381 382 383 384 385 386 387 388 389 390 391 392
static void disk_scan_partitions(struct gendisk *disk)
{
	struct block_device *bdev;

	if (!get_capacity(disk) || !disk_part_scan_enabled(disk))
		return;

	set_bit(GD_NEED_PART_SCAN, &disk->state);
	bdev = blkdev_get_by_dev(disk_devt(disk), FMODE_READ, NULL);
	if (!IS_ERR(bdev))
		blkdev_put(bdev, FMODE_READ);
}

393 394
static void register_disk(struct device *parent, struct gendisk *disk,
			  const struct attribute_group **groups)
395 396 397 398
{
	struct device *ddev = disk_to_dev(disk);
	int err;

399
	ddev->parent = parent;
400

401
	dev_set_name(ddev, "%s", disk->disk_name);
402 403 404 405

	/* delay uevents, until we scanned partition table */
	dev_set_uevent_suppress(ddev, 1);

406 407 408 409
	if (groups) {
		WARN_ON(ddev->groups);
		ddev->groups = groups;
	}
410 411 412 413 414 415 416 417 418 419
	if (device_add(ddev))
		return;
	if (!sysfs_deprecated) {
		err = sysfs_create_link(block_depr, &ddev->kobj,
					kobject_name(&ddev->kobj));
		if (err) {
			device_del(ddev);
			return;
		}
	}
420 421 422 423 424 425 426 427

	/*
	 * avoid probable deadlock caused by allocating memory with
	 * GFP_KERNEL in runtime_resume callback of its all ancestor
	 * devices
	 */
	pm_runtime_set_memalloc_noio(ddev, true);

428 429
	disk->part0->bd_holder_dir =
		kobject_create_and_add("holders", &ddev->kobj);
430 431
	disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);

432
	if (disk->flags & GENHD_FL_HIDDEN)
433 434
		return;

435
	disk_scan_partitions(disk);
436

437
	/* announce the disk and partitions after all partitions are created */
438
	dev_set_uevent_suppress(ddev, 0);
439
	disk_uevent(disk, KOBJ_ADD);
440

441 442 443 444 445 446
	if (disk->queue->backing_dev_info->dev) {
		err = sysfs_create_link(&ddev->kobj,
			  &disk->queue->backing_dev_info->dev->kobj,
			  "bdi");
		WARN_ON(err);
	}
447 448
}

L
Linus Torvalds 已提交
449
/**
450
 * __device_add_disk - add disk information to kernel list
451
 * @parent: parent device for the disk
L
Linus Torvalds 已提交
452
 * @disk: per-device partitioning information
453
 * @groups: Additional per-device sysfs groups
454
 * @register_queue: register the queue if set to true
L
Linus Torvalds 已提交
455 456 457
 *
 * This function registers the partitioning information in @disk
 * with the kernel.
458 459
 *
 * FIXME: error handling
L
Linus Torvalds 已提交
460
 */
461
static void __device_add_disk(struct device *parent, struct gendisk *disk,
462
			      const struct attribute_group **groups,
463
			      bool register_queue)
L
Linus Torvalds 已提交
464
{
465
	int ret;
466

467 468 469 470 471 472 473 474 475
	/*
	 * The disk queue should now be all set with enough information about
	 * the device for the elevator code to pick an adequate default
	 * elevator if one is needed, that is, for devices requesting queue
	 * registration.
	 */
	if (register_queue)
		elevator_init_mq(disk->queue);

476 477 478 479 480 481
	/*
	 * If the driver provides an explicit major number it also must provide
	 * the number of minors numbers supported, and those will be used to
	 * setup the gendisk.
	 * Otherwise just allocate the device numbers for both the whole device
	 * and all partitions from the extended dev_t space.
482
	 */
483 484
	if (disk->major) {
		WARN_ON(!disk->minors);
485 486 487 488 489 490

		if (disk->minors > DISK_MAX_PARTS) {
			pr_err("block: can't allocate more than %d partitions\n",
				DISK_MAX_PARTS);
			disk->minors = DISK_MAX_PARTS;
		}
491 492
	} else {
		WARN_ON(disk->minors);
493

494 495 496 497 498 499 500
		ret = blk_alloc_ext_minor();
		if (ret < 0) {
			WARN_ON(1);
			return;
		}
		disk->major = BLOCK_EXT_MAJOR;
		disk->first_minor = MINOR(ret);
501
		disk->flags |= GENHD_FL_EXT_DEVT;
502
	}
503 504

	disk->flags |= GENHD_FL_UP;
505

506 507
	disk_alloc_events(disk);

508 509 510 511 512 513 514 515
	if (disk->flags & GENHD_FL_HIDDEN) {
		/*
		 * Don't let hidden disks show up in /proc/partitions,
		 * and don't bother scanning for partitions either.
		 */
		disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
		disk->flags |= GENHD_FL_NO_PART_SCAN;
	} else {
516 517
		struct backing_dev_info *bdi = disk->queue->backing_dev_info;
		struct device *dev = disk_to_dev(disk);
518

519
		/* Register BDI before referencing it from bdev */
520 521 522
		dev->devt = MKDEV(disk->major, disk->first_minor);
		ret = bdi_register(bdi, "%u:%u",
				   disk->major, disk->first_minor);
523
		WARN_ON(ret);
524
		bdi_set_owner(bdi, dev);
525
		bdev_add(disk->part0, dev->devt);
526
	}
527
	register_disk(parent, disk, groups);
528 529
	if (register_queue)
		blk_register_queue(disk);
530

531 532 533 534
	/*
	 * Take an extra ref on queue which will be put on disk_release()
	 * so that it sticks around as long as @disk is there.
	 */
535 536 537 538
	if (blk_get_queue(disk->queue))
		set_bit(GD_QUEUE_REF, &disk->state);
	else
		WARN_ON_ONCE(1);
539

540
	disk_add_events(disk);
541
	blk_integrity_add(disk);
L
Linus Torvalds 已提交
542
}
543

544 545 546
void device_add_disk(struct device *parent, struct gendisk *disk,
		     const struct attribute_group **groups)

547
{
548
	__device_add_disk(parent, disk, groups, true);
549
}
550
EXPORT_SYMBOL(device_add_disk);
L
Linus Torvalds 已提交
551

552 553
void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk)
{
554
	__device_add_disk(parent, disk, NULL, false);
555 556 557
}
EXPORT_SYMBOL(device_add_disk_no_queue_reg);

558 559 560 561 562 563 564 565 566 567 568 569 570
/**
 * del_gendisk - remove the gendisk
 * @disk: the struct gendisk to remove
 *
 * Removes the gendisk and all its associated resources. This deletes the
 * partitions associated with the gendisk, and unregisters the associated
 * request_queue.
 *
 * This is the counter to the respective __device_add_disk() call.
 *
 * The final removal of the struct gendisk happens when its refcount reaches 0
 * with put_disk(), which should be called after del_gendisk(), if
 * __device_add_disk() was used.
571 572 573 574 575
 *
 * Drivers exist which depend on the release of the gendisk to be synchronous,
 * it should not be deferred.
 *
 * Context: can sleep
576
 */
577
void del_gendisk(struct gendisk *disk)
L
Linus Torvalds 已提交
578
{
579 580
	might_sleep();

581 582 583
	if (WARN_ON_ONCE(!disk->queue))
		return;

584
	blk_integrity_del(disk);
585 586
	disk_del_events(disk);

587
	mutex_lock(&disk->open_mutex);
588
	disk->flags &= ~GENHD_FL_UP;
589
	blk_drop_partitions(disk);
590
	mutex_unlock(&disk->open_mutex);
591

592 593 594 595 596 597 598 599 600
	fsync_bdev(disk->part0);
	__invalidate_device(disk->part0, true);

	/*
	 * Unhash the bdev inode for this device so that it can't be looked
	 * up any more even if openers still hold references to it.
	 */
	remove_inode_hash(disk->part0->bd_inode);

601 602
	set_capacity(disk, 0);

603
	if (!(disk->flags & GENHD_FL_HIDDEN)) {
604
		sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
605

606 607 608 609
		/*
		 * Unregister bdi before releasing device numbers (as they can
		 * get reused and we'd get clashes in sysfs).
		 */
610
		bdi_unregister(disk->queue->backing_dev_info);
611
	}
612

613
	blk_unregister_queue(disk);
614

615
	kobject_put(disk->part0->bd_holder_dir);
616 617
	kobject_put(disk->slave_dir);

618
	part_stat_set_all(disk->part0, 0);
619
	disk->part0->bd_stamp = 0;
620 621
	if (!sysfs_deprecated)
		sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
622
	pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
623
	device_del(disk_to_dev(disk));
L
Linus Torvalds 已提交
624
}
625
EXPORT_SYMBOL(del_gendisk);
L
Linus Torvalds 已提交
626

627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651
/* sysfs access to bad-blocks list. */
static ssize_t disk_badblocks_show(struct device *dev,
					struct device_attribute *attr,
					char *page)
{
	struct gendisk *disk = dev_to_disk(dev);

	if (!disk->bb)
		return sprintf(page, "\n");

	return badblocks_show(disk->bb, page, 0);
}

static ssize_t disk_badblocks_store(struct device *dev,
					struct device_attribute *attr,
					const char *page, size_t len)
{
	struct gendisk *disk = dev_to_disk(dev);

	if (!disk->bb)
		return -ENXIO;

	return badblocks_store(disk->bb, page, len, 0);
}

652
void blk_request_module(dev_t devt)
653
{
654 655 656 657 658 659 660 661 662 663 664 665 666
	unsigned int major = MAJOR(devt);
	struct blk_major_name **n;

	mutex_lock(&major_names_lock);
	for (n = &major_names[major_to_index(major)]; *n; n = &(*n)->next) {
		if ((*n)->major == major && (*n)->probe) {
			(*n)->probe(devt);
			mutex_unlock(&major_names_lock);
			return;
		}
	}
	mutex_unlock(&major_names_lock);

667 668 669 670 671
	if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0)
		/* Make old-style 2.4 aliases work */
		request_module("block-major-%d", MAJOR(devt));
}

672 673 674 675 676 677 678
/*
 * print a full list of all partitions - intended for places where the root
 * filesystem can't be mounted and thus to give the victim some idea of what
 * went wrong
 */
void __init printk_all_partitions(void)
{
679 680 681 682 683 684
	struct class_dev_iter iter;
	struct device *dev;

	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
	while ((dev = class_dev_iter_next(&iter))) {
		struct gendisk *disk = dev_to_disk(dev);
685
		struct block_device *part;
686 687
		char name_buf[BDEVNAME_SIZE];
		char devt_buf[BDEVT_SIZE];
688
		unsigned long idx;
689 690 691

		/*
		 * Don't show empty devices or things that have been
L
Lucas De Marchi 已提交
692
		 * suppressed
693 694 695 696 697 698
		 */
		if (get_capacity(disk) == 0 ||
		    (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
			continue;

		/*
699 700
		 * Note, unlike /proc/partitions, I am showing the numbers in
		 * hex - the same format as the root= option takes.
701
		 */
702 703 704 705 706 707
		rcu_read_lock();
		xa_for_each(&disk->part_tbl, idx, part) {
			if (!bdev_nr_sectors(part))
				continue;
			printk("%s%s %10llu %s %s",
			       bdev_is_partition(part) ? "  " : "",
708 709 710 711 712
			       bdevt_str(part->bd_dev, devt_buf),
			       bdev_nr_sectors(part) >> 1,
			       disk_name(disk, part->bd_partno, name_buf),
			       part->bd_meta_info ?
					part->bd_meta_info->uuid : "");
713
			if (bdev_is_partition(part))
T
Tejun Heo 已提交
714
				printk("\n");
715 716 717 718 719
			else if (dev->parent && dev->parent->driver)
				printk(" driver: %s\n",
					dev->parent->driver->name);
			else
				printk(" (driver?)\n");
T
Tejun Heo 已提交
720
		}
721
		rcu_read_unlock();
722 723
	}
	class_dev_iter_exit(&iter);
724 725
}

L
Linus Torvalds 已提交
726 727
#ifdef CONFIG_PROC_FS
/* iterator */
728
static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos)
729
{
730 731 732
	loff_t skip = *pos;
	struct class_dev_iter *iter;
	struct device *dev;
733

734
	iter = kmalloc(sizeof(*iter), GFP_KERNEL);
735 736 737 738 739 740 741 742 743 744 745 746
	if (!iter)
		return ERR_PTR(-ENOMEM);

	seqf->private = iter;
	class_dev_iter_init(iter, &block_class, NULL, &disk_type);
	do {
		dev = class_dev_iter_next(iter);
		if (!dev)
			return NULL;
	} while (skip--);

	return dev_to_disk(dev);
747 748
}

749
static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos)
L
Linus Torvalds 已提交
750
{
751
	struct device *dev;
L
Linus Torvalds 已提交
752

753 754
	(*pos)++;
	dev = class_dev_iter_next(seqf->private);
755
	if (dev)
756
		return dev_to_disk(dev);
757

L
Linus Torvalds 已提交
758 759 760
	return NULL;
}

761
static void disk_seqf_stop(struct seq_file *seqf, void *v)
762
{
763
	struct class_dev_iter *iter = seqf->private;
764

765 766 767 768
	/* stop is called even after start failed :-( */
	if (iter) {
		class_dev_iter_exit(iter);
		kfree(iter);
769
		seqf->private = NULL;
770
	}
L
Linus Torvalds 已提交
771 772
}

773
static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
L
Linus Torvalds 已提交
774
{
775
	void *p;
776 777

	p = disk_seqf_start(seqf, pos);
778
	if (!IS_ERR_OR_NULL(p) && !*pos)
779 780
		seq_puts(seqf, "major minor  #blocks  name\n\n");
	return p;
L
Linus Torvalds 已提交
781 782
}

783
static int show_partition(struct seq_file *seqf, void *v)
L
Linus Torvalds 已提交
784 785
{
	struct gendisk *sgp = v;
786
	struct block_device *part;
787
	unsigned long idx;
L
Linus Torvalds 已提交
788 789 790
	char buf[BDEVNAME_SIZE];

	/* Don't show non-partitionable removeable devices or empty devices */
T
Tejun Heo 已提交
791
	if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
792
				   (sgp->flags & GENHD_FL_REMOVABLE)))
L
Linus Torvalds 已提交
793 794 795 796
		return 0;
	if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
		return 0;

797 798 799 800
	rcu_read_lock();
	xa_for_each(&sgp->part_tbl, idx, part) {
		if (!bdev_nr_sectors(part))
			continue;
801
		seq_printf(seqf, "%4d  %7d %10llu %s\n",
802 803 804
			   MAJOR(part->bd_dev), MINOR(part->bd_dev),
			   bdev_nr_sectors(part) >> 1,
			   disk_name(sgp, part->bd_partno, buf));
805 806
	}
	rcu_read_unlock();
L
Linus Torvalds 已提交
807 808 809
	return 0;
}

810
static const struct seq_operations partitions_op = {
811 812 813
	.start	= show_partition_start,
	.next	= disk_seqf_next,
	.stop	= disk_seqf_stop,
814
	.show	= show_partition
L
Linus Torvalds 已提交
815 816 817 818 819
};
#endif

static int __init genhd_device_init(void)
{
820 821 822 823
	int error;

	block_class.dev_kobj = sysfs_dev_block_kobj;
	error = class_register(&block_class);
R
Roland McGrath 已提交
824 825
	if (unlikely(error))
		return error;
L
Linus Torvalds 已提交
826
	blk_dev_init();
827

828 829
	register_blkdev(BLOCK_EXT_MAJOR, "blkext");

830
	/* create top-level block dir */
831 832
	if (!sysfs_deprecated)
		block_depr = kobject_create_and_add("block", NULL);
833
	return 0;
L
Linus Torvalds 已提交
834 835 836 837
}

subsys_initcall(genhd_device_init);

838 839
static ssize_t disk_range_show(struct device *dev,
			       struct device_attribute *attr, char *buf)
L
Linus Torvalds 已提交
840
{
841
	struct gendisk *disk = dev_to_disk(dev);
L
Linus Torvalds 已提交
842

843
	return sprintf(buf, "%d\n", disk->minors);
L
Linus Torvalds 已提交
844 845
}

846 847 848 849 850
static ssize_t disk_ext_range_show(struct device *dev,
				   struct device_attribute *attr, char *buf)
{
	struct gendisk *disk = dev_to_disk(dev);

T
Tejun Heo 已提交
851
	return sprintf(buf, "%d\n", disk_max_parts(disk));
852 853
}

854 855
static ssize_t disk_removable_show(struct device *dev,
				   struct device_attribute *attr, char *buf)
856
{
857
	struct gendisk *disk = dev_to_disk(dev);
858

859 860
	return sprintf(buf, "%d\n",
		       (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
861 862
}

863 864 865 866 867 868 869 870 871
static ssize_t disk_hidden_show(struct device *dev,
				   struct device_attribute *attr, char *buf)
{
	struct gendisk *disk = dev_to_disk(dev);

	return sprintf(buf, "%d\n",
		       (disk->flags & GENHD_FL_HIDDEN ? 1 : 0));
}

K
Kay Sievers 已提交
872 873 874 875 876
static ssize_t disk_ro_show(struct device *dev,
				   struct device_attribute *attr, char *buf)
{
	struct gendisk *disk = dev_to_disk(dev);

T
Tejun Heo 已提交
877
	return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0);
K
Kay Sievers 已提交
878 879
}

880 881 882
ssize_t part_size_show(struct device *dev,
		       struct device_attribute *attr, char *buf)
{
883
	return sprintf(buf, "%llu\n", bdev_nr_sectors(dev_to_bdev(dev)));
884 885 886 887 888
}

ssize_t part_stat_show(struct device *dev,
		       struct device_attribute *attr, char *buf)
{
889 890
	struct block_device *bdev = dev_to_bdev(dev);
	struct request_queue *q = bdev->bd_disk->queue;
891
	struct disk_stats stat;
892 893
	unsigned int inflight;

894
	part_stat_read_all(bdev, &stat);
895
	if (queue_is_mq(q))
896
		inflight = blk_mq_in_flight(q, bdev);
897
	else
898
		inflight = part_in_flight(bdev);
899

900 901 902 903 904 905 906
	return sprintf(buf,
		"%8lu %8lu %8llu %8u "
		"%8lu %8lu %8llu %8u "
		"%8u %8u %8u "
		"%8lu %8lu %8llu %8u "
		"%8lu %8u"
		"\n",
907 908 909 910 911 912 913 914
		stat.ios[STAT_READ],
		stat.merges[STAT_READ],
		(unsigned long long)stat.sectors[STAT_READ],
		(unsigned int)div_u64(stat.nsecs[STAT_READ], NSEC_PER_MSEC),
		stat.ios[STAT_WRITE],
		stat.merges[STAT_WRITE],
		(unsigned long long)stat.sectors[STAT_WRITE],
		(unsigned int)div_u64(stat.nsecs[STAT_WRITE], NSEC_PER_MSEC),
915
		inflight,
916
		jiffies_to_msecs(stat.io_ticks),
917 918 919 920 921
		(unsigned int)div_u64(stat.nsecs[STAT_READ] +
				      stat.nsecs[STAT_WRITE] +
				      stat.nsecs[STAT_DISCARD] +
				      stat.nsecs[STAT_FLUSH],
						NSEC_PER_MSEC),
922 923 924 925 926 927
		stat.ios[STAT_DISCARD],
		stat.merges[STAT_DISCARD],
		(unsigned long long)stat.sectors[STAT_DISCARD],
		(unsigned int)div_u64(stat.nsecs[STAT_DISCARD], NSEC_PER_MSEC),
		stat.ios[STAT_FLUSH],
		(unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC));
928 929 930 931 932
}

ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
			   char *buf)
{
933 934
	struct block_device *bdev = dev_to_bdev(dev);
	struct request_queue *q = bdev->bd_disk->queue;
935 936
	unsigned int inflight[2];

937
	if (queue_is_mq(q))
938
		blk_mq_in_flight_rw(q, bdev, inflight);
939
	else
940
		part_in_flight_rw(bdev, inflight);
941

942 943 944
	return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]);
}

945 946
static ssize_t disk_capability_show(struct device *dev,
				    struct device_attribute *attr, char *buf)
947
{
948 949 950
	struct gendisk *disk = dev_to_disk(dev);

	return sprintf(buf, "%x\n", disk->flags);
951
}
952

953 954 955 956 957 958 959 960 961
static ssize_t disk_alignment_offset_show(struct device *dev,
					  struct device_attribute *attr,
					  char *buf)
{
	struct gendisk *disk = dev_to_disk(dev);

	return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
}

962 963 964 965 966 967
static ssize_t disk_discard_alignment_show(struct device *dev,
					   struct device_attribute *attr,
					   char *buf)
{
	struct gendisk *disk = dev_to_disk(dev);

968
	return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
969 970
}

971 972 973 974 975 976 977 978 979 980 981 982
static DEVICE_ATTR(range, 0444, disk_range_show, NULL);
static DEVICE_ATTR(ext_range, 0444, disk_ext_range_show, NULL);
static DEVICE_ATTR(removable, 0444, disk_removable_show, NULL);
static DEVICE_ATTR(hidden, 0444, disk_hidden_show, NULL);
static DEVICE_ATTR(ro, 0444, disk_ro_show, NULL);
static DEVICE_ATTR(size, 0444, part_size_show, NULL);
static DEVICE_ATTR(alignment_offset, 0444, disk_alignment_offset_show, NULL);
static DEVICE_ATTR(discard_alignment, 0444, disk_discard_alignment_show, NULL);
static DEVICE_ATTR(capability, 0444, disk_capability_show, NULL);
static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store);
983

984
#ifdef CONFIG_FAIL_MAKE_REQUEST
985 986 987
ssize_t part_fail_show(struct device *dev,
		       struct device_attribute *attr, char *buf)
{
988
	return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_make_it_fail);
989 990 991 992 993 994 995 996 997
}

ssize_t part_fail_store(struct device *dev,
			struct device_attribute *attr,
			const char *buf, size_t count)
{
	int i;

	if (count > 0 && sscanf(buf, "%d", &i) > 0)
998
		dev_to_bdev(dev)->bd_make_it_fail = i;
999 1000 1001 1002

	return count;
}

1003
static struct device_attribute dev_attr_fail =
1004
	__ATTR(make-it-fail, 0644, part_fail_show, part_fail_store);
1005 1006
#endif /* CONFIG_FAIL_MAKE_REQUEST */

1007 1008
#ifdef CONFIG_FAIL_IO_TIMEOUT
static struct device_attribute dev_attr_fail_timeout =
1009
	__ATTR(io-timeout-fail, 0644, part_timeout_show, part_timeout_store);
1010
#endif
1011 1012 1013

static struct attribute *disk_attrs[] = {
	&dev_attr_range.attr,
1014
	&dev_attr_ext_range.attr,
1015
	&dev_attr_removable.attr,
1016
	&dev_attr_hidden.attr,
K
Kay Sievers 已提交
1017
	&dev_attr_ro.attr,
1018
	&dev_attr_size.attr,
1019
	&dev_attr_alignment_offset.attr,
1020
	&dev_attr_discard_alignment.attr,
1021 1022
	&dev_attr_capability.attr,
	&dev_attr_stat.attr,
1023
	&dev_attr_inflight.attr,
1024
	&dev_attr_badblocks.attr,
1025 1026 1027
	&dev_attr_events.attr,
	&dev_attr_events_async.attr,
	&dev_attr_events_poll_msecs.attr,
1028 1029
#ifdef CONFIG_FAIL_MAKE_REQUEST
	&dev_attr_fail.attr,
1030 1031 1032
#endif
#ifdef CONFIG_FAIL_IO_TIMEOUT
	&dev_attr_fail_timeout.attr,
1033 1034 1035 1036
#endif
	NULL
};

1037 1038 1039 1040 1041 1042 1043 1044 1045 1046
static umode_t disk_visible(struct kobject *kobj, struct attribute *a, int n)
{
	struct device *dev = container_of(kobj, typeof(*dev), kobj);
	struct gendisk *disk = dev_to_disk(dev);

	if (a == &dev_attr_badblocks.attr && !disk->bb)
		return 0;
	return a->mode;
}

1047 1048
static struct attribute_group disk_attr_group = {
	.attrs = disk_attrs,
1049
	.is_visible = disk_visible,
1050 1051
};

1052
static const struct attribute_group *disk_attr_groups[] = {
1053 1054
	&disk_attr_group,
	NULL
L
Linus Torvalds 已提交
1055 1056
};

1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067
/**
 * disk_release - releases all allocated resources of the gendisk
 * @dev: the device representing this disk
 *
 * This function releases all allocated resources of the gendisk.
 *
 * Drivers which used __device_add_disk() have a gendisk with a request_queue
 * assigned. Since the request_queue sits on top of the gendisk for these
 * drivers we also call blk_put_queue() for them, and we expect the
 * request_queue refcount to reach 0 at this point, and so the request_queue
 * will also be freed prior to the disk.
1068 1069
 *
 * Context: can sleep
1070
 */
1071
static void disk_release(struct device *dev)
L
Linus Torvalds 已提交
1072
{
1073 1074
	struct gendisk *disk = dev_to_disk(dev);

1075 1076
	might_sleep();

1077 1078
	if (MAJOR(dev->devt) == BLOCK_EXT_MAJOR)
		blk_free_ext_minor(MINOR(dev->devt));
1079
	disk_release_events(disk);
L
Linus Torvalds 已提交
1080
	kfree(disk->random);
1081
	xa_destroy(&disk->part_tbl);
1082
	bdput(disk->part0);
1083
	if (test_bit(GD_QUEUE_REF, &disk->state) && disk->queue)
1084
		blk_put_queue(disk->queue);
L
Linus Torvalds 已提交
1085 1086
	kfree(disk);
}
1087 1088
struct class block_class = {
	.name		= "block",
L
Linus Torvalds 已提交
1089 1090
};

1091
static char *block_devnode(struct device *dev, umode_t *mode,
1092
			   kuid_t *uid, kgid_t *gid)
1093 1094 1095
{
	struct gendisk *disk = dev_to_disk(dev);

1096 1097
	if (disk->fops->devnode)
		return disk->fops->devnode(disk, mode);
1098 1099 1100
	return NULL;
}

1101
const struct device_type disk_type = {
1102 1103 1104
	.name		= "disk",
	.groups		= disk_attr_groups,
	.release	= disk_release,
1105
	.devnode	= block_devnode,
L
Linus Torvalds 已提交
1106 1107
};

1108
#ifdef CONFIG_PROC_FS
1109 1110 1111 1112 1113 1114 1115 1116
/*
 * aggregate disk stat collector.  Uses the same stats that the sysfs
 * entries do, above, but makes them available through one seq_file.
 *
 * The output looks suspiciously like /proc/partitions with a bunch of
 * extra fields.
 */
static int diskstats_show(struct seq_file *seqf, void *v)
L
Linus Torvalds 已提交
1117 1118
{
	struct gendisk *gp = v;
1119
	struct block_device *hd;
L
Linus Torvalds 已提交
1120
	char buf[BDEVNAME_SIZE];
1121
	unsigned int inflight;
1122
	struct disk_stats stat;
1123
	unsigned long idx;
L
Linus Torvalds 已提交
1124 1125

	/*
1126
	if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
1127
		seq_puts(seqf,	"major minor name"
L
Linus Torvalds 已提交
1128 1129 1130 1131
				"     rio rmerge rsect ruse wio wmerge "
				"wsect wuse running use aveq"
				"\n\n");
	*/
1132

1133 1134 1135 1136
	rcu_read_lock();
	xa_for_each(&gp->part_tbl, idx, hd) {
		if (bdev_is_partition(hd) && !bdev_nr_sectors(hd))
			continue;
1137
		part_stat_read_all(hd, &stat);
1138
		if (queue_is_mq(gp->queue))
1139
			inflight = blk_mq_in_flight(gp->queue, hd);
1140
		else
1141
			inflight = part_in_flight(hd);
1142

1143 1144 1145 1146
		seq_printf(seqf, "%4d %7d %s "
			   "%lu %lu %lu %u "
			   "%lu %lu %lu %u "
			   "%u %u %u "
1147 1148 1149
			   "%lu %lu %lu %u "
			   "%lu %u"
			   "\n",
1150 1151
			   MAJOR(hd->bd_dev), MINOR(hd->bd_dev),
			   disk_name(gp, hd->bd_partno, buf),
1152 1153 1154 1155 1156 1157 1158 1159 1160 1161
			   stat.ios[STAT_READ],
			   stat.merges[STAT_READ],
			   stat.sectors[STAT_READ],
			   (unsigned int)div_u64(stat.nsecs[STAT_READ],
							NSEC_PER_MSEC),
			   stat.ios[STAT_WRITE],
			   stat.merges[STAT_WRITE],
			   stat.sectors[STAT_WRITE],
			   (unsigned int)div_u64(stat.nsecs[STAT_WRITE],
							NSEC_PER_MSEC),
1162
			   inflight,
1163
			   jiffies_to_msecs(stat.io_ticks),
1164 1165 1166 1167 1168
			   (unsigned int)div_u64(stat.nsecs[STAT_READ] +
						 stat.nsecs[STAT_WRITE] +
						 stat.nsecs[STAT_DISCARD] +
						 stat.nsecs[STAT_FLUSH],
							NSEC_PER_MSEC),
1169 1170 1171 1172 1173 1174 1175 1176
			   stat.ios[STAT_DISCARD],
			   stat.merges[STAT_DISCARD],
			   stat.sectors[STAT_DISCARD],
			   (unsigned int)div_u64(stat.nsecs[STAT_DISCARD],
						 NSEC_PER_MSEC),
			   stat.ios[STAT_FLUSH],
			   (unsigned int)div_u64(stat.nsecs[STAT_FLUSH],
						 NSEC_PER_MSEC)
1177
			);
L
Linus Torvalds 已提交
1178
	}
1179
	rcu_read_unlock();
1180

L
Linus Torvalds 已提交
1181 1182 1183
	return 0;
}

1184
static const struct seq_operations diskstats_op = {
1185 1186 1187
	.start	= disk_seqf_start,
	.next	= disk_seqf_next,
	.stop	= disk_seqf_stop,
L
Linus Torvalds 已提交
1188 1189
	.show	= diskstats_show
};
1190 1191 1192

static int __init proc_genhd_init(void)
{
1193 1194
	proc_create_seq("diskstats", 0, NULL, &diskstats_op);
	proc_create_seq("partitions", 0, NULL, &partitions_op);
1195 1196 1197
	return 0;
}
module_init(proc_genhd_init);
1198
#endif /* CONFIG_PROC_FS */
L
Linus Torvalds 已提交
1199

1200 1201
dev_t part_devt(struct gendisk *disk, u8 partno)
{
C
Christoph Hellwig 已提交
1202
	struct block_device *part;
1203 1204
	dev_t devt = 0;

C
Christoph Hellwig 已提交
1205 1206 1207
	rcu_read_lock();
	part = xa_load(&disk->part_tbl, partno);
	if (part)
1208
		devt = part->bd_dev;
C
Christoph Hellwig 已提交
1209
	rcu_read_unlock();
1210 1211 1212 1213

	return devt;
}

1214
dev_t blk_lookup_devt(const char *name, int partno)
1215
{
1216 1217 1218
	dev_t devt = MKDEV(0, 0);
	struct class_dev_iter iter;
	struct device *dev;
1219

1220 1221
	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
	while ((dev = class_dev_iter_next(&iter))) {
1222 1223
		struct gendisk *disk = dev_to_disk(dev);

1224
		if (strcmp(dev_name(dev), name))
1225 1226
			continue;

1227 1228 1229 1230 1231 1232
		if (partno < disk->minors) {
			/* We need to return the right devno, even
			 * if the partition doesn't exist yet.
			 */
			devt = MKDEV(MAJOR(dev->devt),
				     MINOR(dev->devt) + partno);
1233 1234 1235 1236
		} else {
			devt = part_devt(disk, partno);
			if (devt)
				break;
1237
		}
1238
	}
1239
	class_dev_iter_exit(&iter);
1240 1241 1242
	return devt;
}

1243
struct gendisk *__alloc_disk_node(int minors, int node_id)
1244 1245 1246
{
	struct gendisk *disk;

1247
	disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id);
1248 1249
	if (!disk)
		return NULL;
1250

1251 1252
	disk->part0 = bdev_alloc(disk, 0);
	if (!disk->part0)
1253 1254
		goto out_free_disk;

1255
	disk->node_id = node_id;
1256
	mutex_init(&disk->open_mutex);
1257 1258 1259
	xa_init(&disk->part_tbl);
	if (xa_insert(&disk->part_tbl, 0, disk->part0, GFP_KERNEL))
		goto out_destroy_part_tbl;
1260 1261 1262 1263 1264 1265

	disk->minors = minors;
	rand_initialize_disk(disk);
	disk_to_dev(disk)->class = &block_class;
	disk_to_dev(disk)->type = &disk_type;
	device_initialize(disk_to_dev(disk));
L
Linus Torvalds 已提交
1266
	return disk;
1267

1268 1269
out_destroy_part_tbl:
	xa_destroy(&disk->part_tbl);
1270
	bdput(disk->part0);
1271 1272 1273
out_free_disk:
	kfree(disk);
	return NULL;
L
Linus Torvalds 已提交
1274
}
1275
EXPORT_SYMBOL(__alloc_disk_node);
L
Linus Torvalds 已提交
1276

1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295
struct gendisk *__blk_alloc_disk(int node)
{
	struct request_queue *q;
	struct gendisk *disk;

	q = blk_alloc_queue(node);
	if (!q)
		return NULL;

	disk = __alloc_disk_node(0, node);
	if (!disk) {
		blk_cleanup_queue(q);
		return NULL;
	}
	disk->queue = q;
	return disk;
}
EXPORT_SYMBOL(__blk_alloc_disk);

1296 1297
/**
 * put_disk - decrements the gendisk refcount
1298
 * @disk: the struct gendisk to decrement the refcount for
1299 1300 1301
 *
 * This decrements the refcount for the struct gendisk. When this reaches 0
 * we'll have disk_release() called.
1302 1303 1304
 *
 * Context: Any context, but the last reference must not be dropped from
 *          atomic context.
1305
 */
L
Linus Torvalds 已提交
1306 1307 1308
void put_disk(struct gendisk *disk)
{
	if (disk)
1309
		put_device(disk_to_dev(disk));
L
Linus Torvalds 已提交
1310 1311 1312
}
EXPORT_SYMBOL(put_disk);

1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328
/**
 * blk_cleanup_disk - shutdown a gendisk allocated by blk_alloc_disk
 * @disk: gendisk to shutdown
 *
 * Mark the queue hanging off @disk DYING, drain all pending requests, then mark
 * the queue DEAD, destroy and put it and the gendisk structure.
 *
 * Context: can sleep
 */
void blk_cleanup_disk(struct gendisk *disk)
{
	blk_cleanup_queue(disk->queue);
	put_disk(disk);
}
EXPORT_SYMBOL(blk_cleanup_disk);

1329 1330 1331 1332 1333 1334 1335 1336 1337 1338
static void set_disk_ro_uevent(struct gendisk *gd, int ro)
{
	char event[] = "DISK_RO=1";
	char *envp[] = { event, NULL };

	if (!ro)
		event[8] = '0';
	kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
}

1339 1340 1341
/**
 * set_disk_ro - set a gendisk read-only
 * @disk:	gendisk to operate on
1342
 * @read_only:	%true to set the disk read-only, %false set the disk read/write
1343 1344 1345 1346 1347 1348
 *
 * This function is used to indicate whether a given disk device should have its
 * read-only flag set. set_disk_ro() is typically used by device drivers to
 * indicate whether the underlying physical device is write-protected.
 */
void set_disk_ro(struct gendisk *disk, bool read_only)
L
Linus Torvalds 已提交
1349
{
1350 1351 1352 1353 1354 1355
	if (read_only) {
		if (test_and_set_bit(GD_READ_ONLY, &disk->state))
			return;
	} else {
		if (!test_and_clear_bit(GD_READ_ONLY, &disk->state))
			return;
1356
	}
1357
	set_disk_ro_uevent(disk, read_only);
L
Linus Torvalds 已提交
1358 1359 1360 1361 1362
}
EXPORT_SYMBOL(set_disk_ro);

int bdev_read_only(struct block_device *bdev)
{
1363
	return bdev->bd_read_only || get_disk_ro(bdev->bd_disk);
L
Linus Torvalds 已提交
1364 1365
}
EXPORT_SYMBOL(bdev_read_only);