提交 253d38f9 编写于 作者: H Hou Tao 提交者: Yang Yingliang

blk: reuse lookup_sem to serialize partition operations

hulk inclusion
category: bugfix
bugzilla: 55097
CVE: NA

-------------------------------------------------

Now there no protection between partition operations (e.g, partition
rescan) and delete_partition() in del_gendisk(), so the following
scenario is possible:

CPU 1

blkdev_ioctl                   del_gendisk
  blkdev_reread_part
    lock bd_mutex
      drop_partitions
        check_partition
                                 lock lookup_sem
				 // for each partition
				 deletion_partion

        // for each partition
        add_partition

The newly added partitions, the device files (e.g, /dev/sdXN)
and the symlinks in /sys/class/block will be left behind. If
the deleted disk is online again, the scan of partition will
fail with the following error:

  sysfs: cannot create duplicate filename '/class/block/sdaN'
  sdX: pN could not be added: 17

Vanilla kernel tries to fix that by commit c76f48eb
("block: take bd_mutex around delete_partitions in del_gendisk"),
but it introduces dead-lock for nbd/loop/xen-frontblk drivers.
These in-tree drivers can be fixed, but there may be other affected
block drivers, especially the out-of-tree ones, so fixing it in
another way.

Two methods are considered. The first is waiting for the end
of partition operations in del_gendisk(). It is OK but it needs
adding new fields in gendisk (bool & wait_queue_head_t). The second
is reusing lookup_sem and GENHD_FL_UP to serialize partition operations
and del_gendisk(). Now the latter is chose and here are the details.

There are six partition operations:

(1) add_partition() in blkpg_ioctl()
(2) deletion_partion() in blkpg_ioctl()
(3) resize in blkpg_ioctl()
(4) partition rescan in __blkdev_reread_part()
(5) partition revalidate in bdev_disk_changed()
(6) deletion_partion() in del_gendisk()

op (1)~(5) already take bd_mutex, so using down_read() to
serialize with down_write() in del_gendisk() is OK. op (3)
only updates the values in hd_struct, so no lock is needed,
because it already increase the ref of hd_struct.

lookup_sem is used to prevent a newly-created blocking device inode from
associating with a deleting gendisk, and the locking order is:

  part->bd_mutex -> disk->lookup_sem
    or
  whole->bd_mutex -> disk->lookup_sem

Now it is also used to serialize the partition operations and the new
locking order will be:

  part->bd_mutex -> whole->bd_mutex -> disk->lookup_sem

and it is OK.
Signed-off-by: NHou Tao <houtao1@huawei.com>
Reviewed-by: NJason Yan <yanaijie@huawei.com>
Signed-off-by: NYang Yingliang <yangyingliang@huawei.com>
上级 cfbc280a
...@@ -64,9 +64,16 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user ...@@ -64,9 +64,16 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
} }
disk_part_iter_exit(&piter); disk_part_iter_exit(&piter);
down_read(&disk->lookup_sem);
if (!(disk->flags & GENHD_FL_UP)) {
up_read(&disk->lookup_sem);
mutex_unlock(&bdev->bd_mutex);
return -ENXIO;
}
/* all seems OK */ /* all seems OK */
part = add_partition(disk, partno, start, length, part = add_partition(disk, partno, start, length,
ADDPART_FLAG_NONE, NULL); ADDPART_FLAG_NONE, NULL);
up_read(&disk->lookup_sem);
mutex_unlock(&bdev->bd_mutex); mutex_unlock(&bdev->bd_mutex);
return PTR_ERR_OR_ZERO(part); return PTR_ERR_OR_ZERO(part);
case BLKPG_DEL_PARTITION: case BLKPG_DEL_PARTITION:
...@@ -90,7 +97,9 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user ...@@ -90,7 +97,9 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
invalidate_bdev(bdevp); invalidate_bdev(bdevp);
mutex_lock_nested(&bdev->bd_mutex, 1); mutex_lock_nested(&bdev->bd_mutex, 1);
down_read(&disk->lookup_sem);
delete_partition(disk, partno); delete_partition(disk, partno);
up_read(&disk->lookup_sem);
mutex_unlock(&bdev->bd_mutex); mutex_unlock(&bdev->bd_mutex);
mutex_unlock(&bdevp->bd_mutex); mutex_unlock(&bdevp->bd_mutex);
bdput(bdevp); bdput(bdevp);
...@@ -162,6 +171,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user ...@@ -162,6 +171,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
int __blkdev_reread_part(struct block_device *bdev) int __blkdev_reread_part(struct block_device *bdev)
{ {
struct gendisk *disk = bdev->bd_disk; struct gendisk *disk = bdev->bd_disk;
int err;
if (!disk_part_scan_enabled(disk) || bdev != bdev->bd_contains) if (!disk_part_scan_enabled(disk) || bdev != bdev->bd_contains)
return -EINVAL; return -EINVAL;
...@@ -170,7 +180,14 @@ int __blkdev_reread_part(struct block_device *bdev) ...@@ -170,7 +180,14 @@ int __blkdev_reread_part(struct block_device *bdev)
lockdep_assert_held(&bdev->bd_mutex); lockdep_assert_held(&bdev->bd_mutex);
return rescan_partitions(disk, bdev); down_read(&disk->lookup_sem);
if (disk->flags & GENHD_FL_UP)
err = rescan_partitions(disk, bdev);
else
err = -ENXIO;
up_read(&disk->lookup_sem);
return err;
} }
EXPORT_SYMBOL(__blkdev_reread_part); EXPORT_SYMBOL(__blkdev_reread_part);
......
...@@ -1499,11 +1499,20 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); ...@@ -1499,11 +1499,20 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
static void bdev_disk_changed(struct block_device *bdev, bool invalidate) static void bdev_disk_changed(struct block_device *bdev, bool invalidate)
{ {
if (disk_part_scan_enabled(bdev->bd_disk)) { struct gendisk *disk = bdev->bd_disk;
if (disk_part_scan_enabled(disk)) {
down_read(&disk->lookup_sem);
if (!(disk->flags & GENHD_FL_UP)) {
up_read(&disk->lookup_sem);
return;
}
if (invalidate) if (invalidate)
invalidate_partitions(bdev->bd_disk, bdev); invalidate_partitions(bdev->bd_disk, bdev);
else else
rescan_partitions(bdev->bd_disk, bdev); rescan_partitions(bdev->bd_disk, bdev);
up_read(&disk->lookup_sem);
} else { } else {
check_disk_size_change(bdev->bd_disk, bdev, !invalidate); check_disk_size_change(bdev->bd_disk, bdev, !invalidate);
bdev->bd_invalidated = 0; bdev->bd_invalidated = 0;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册