提交 b87570f5 编写于 作者: M Mikulas Patocka 提交者: Jens Axboe

Fix a crash when block device is read and block size is changed at the same time

The kernel may crash when block size is changed and I/O is issued
simultaneously.

Because some subsystems (udev or lvm) may read any block device anytime,
the bug actually puts any code that changes a block device size in
jeopardy.

The crash can be reproduced if you place "msleep(1000)" to
blkdev_get_blocks just before "bh->b_size = max_blocks <<
inode->i_blkbits;".
Then, run "dd if=/dev/ram0 of=/dev/null bs=4k count=1 iflag=direct"
While it is waiting in msleep, run "blockdev --setbsz 2048 /dev/ram0"
You get a BUG.

The direct and non-direct I/O is written with the assumption that block
size does not change. It doesn't seem practical to fix these crashes
one-by-one there may be many crash possibilities when block size changes
at a certain place and it is impossible to find them all and verify the
code.

This patch introduces a new rw-lock bd_block_size_semaphore. The lock is
taken for read during I/O. It is taken for write when changing block
size. Consequently, block size can't be changed while I/O is being
submitted.

For asynchronous I/O, the patch only prevents block size change while
the I/O is being submitted. The block size can change when the I/O is in
progress or when the I/O is being finished. This is acceptable because
there are no accesses to block size when asynchronous I/O is being
finished.

The patch prevents block size changing while the device is mapped with
mmap.
Signed-off-by: NMikulas Patocka <mpatocka@redhat.com>
Signed-off-by: NJens Axboe <axboe@kernel.dk>
上级 60ea8226
master alk-4.19.24 alk-4.19.30 alk-4.19.34 alk-4.19.36 alk-4.19.43 alk-4.19.48 alk-4.19.57 ck-4.19.67 ck-4.19.81 ck-4.19.91 github/fork/deepanshu1422/fix-typo-in-comment github/fork/haosdent/fix-typo linux-next v4.19.91 v4.19.90 v4.19.89 v4.19.88 v4.19.87 v4.19.86 v4.19.85 v4.19.84 v4.19.83 v4.19.82 v4.19.81 v4.19.80 v4.19.79 v4.19.78 v4.19.77 v4.19.76 v4.19.75 v4.19.74 v4.19.73 v4.19.72 v4.19.71 v4.19.70 v4.19.69 v4.19.68 v4.19.67 v4.19.66 v4.19.65 v4.19.64 v4.19.63 v4.19.62 v4.19.61 v4.19.60 v4.19.59 v4.19.58 v4.19.57 v4.19.56 v4.19.55 v4.19.54 v4.19.53 v4.19.52 v4.19.51 v4.19.50 v4.19.49 v4.19.48 v4.19.47 v4.19.46 v4.19.45 v4.19.44 v4.19.43 v4.19.42 v4.19.41 v4.19.40 v4.19.39 v4.19.38 v4.19.37 v4.19.36 v4.19.35 v4.19.34 v4.19.33 v4.19.32 v4.19.31 v4.19.30 v4.19.29 v4.19.28 v4.19.27 v4.19.26 v4.19.25 v4.19.24 v4.19.23 v4.19.22 v4.19.21 v4.19.20 v4.19.19 v4.19.18 v4.19.17 v4.19.16 v4.19.15 v4.19.14 v4.19.13 v4.19.12 v4.19.11 v4.19.10 v4.19.9 v4.19.8 v4.19.7 v4.19.6 v4.19.5 v4.19.4 v4.19.3 v4.19.2 v4.19.1 v4.19 v4.19-rc8 v4.19-rc7 v4.19-rc6 v4.19-rc5 v4.19-rc4 v4.19-rc3 v4.19-rc2 v4.19-rc1 ck-release-21 ck-release-20 ck-release-19.2 ck-release-19.1 ck-release-19 ck-release-18 ck-release-17.2 ck-release-17.1 ck-release-17 ck-release-16 ck-release-15.1 ck-release-15 ck-release-14 ck-release-13.2 ck-release-13 ck-release-12 ck-release-11 ck-release-10 ck-release-9 ck-release-7 alk-release-15 alk-release-14 alk-release-13.2 alk-release-13 alk-release-12 alk-release-11 alk-release-10 alk-release-9 alk-release-7
无相关合并请求
......@@ -285,7 +285,7 @@ static long raw_ctl_compat_ioctl(struct file *file, unsigned int cmd,
static const struct file_operations raw_fops = {
.read = do_sync_read,
.aio_read = generic_file_aio_read,
.aio_read = blkdev_aio_read,
.write = do_sync_write,
.aio_write = blkdev_aio_write,
.fsync = blkdev_fsync,
......
......@@ -116,6 +116,8 @@ EXPORT_SYMBOL(invalidate_bdev);
int set_blocksize(struct block_device *bdev, int size)
{
struct address_space *mapping;
/* Size must be a power of two, and between 512 and PAGE_SIZE */
if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
return -EINVAL;
......@@ -124,6 +126,20 @@ int set_blocksize(struct block_device *bdev, int size)
if (size < bdev_logical_block_size(bdev))
return -EINVAL;
/* Prevent starting I/O or mapping the device */
down_write(&bdev->bd_block_size_semaphore);
/* Check that the block device is not memory mapped */
mapping = bdev->bd_inode->i_mapping;
mutex_lock(&mapping->i_mmap_mutex);
if (!prio_tree_empty(&mapping->i_mmap) ||
!list_empty(&mapping->i_mmap_nonlinear)) {
mutex_unlock(&mapping->i_mmap_mutex);
up_write(&bdev->bd_block_size_semaphore);
return -EBUSY;
}
mutex_unlock(&mapping->i_mmap_mutex);
/* Don't change the size if it is same as current */
if (bdev->bd_block_size != size) {
sync_blockdev(bdev);
......@@ -131,6 +147,9 @@ int set_blocksize(struct block_device *bdev, int size)
bdev->bd_inode->i_blkbits = blksize_bits(size);
kill_bdev(bdev);
}
up_write(&bdev->bd_block_size_semaphore);
return 0;
}
......@@ -472,6 +491,7 @@ static void init_once(void *foo)
inode_init_once(&ei->vfs_inode);
/* Initialize mutex for freeze. */
mutex_init(&bdev->bd_fsfreeze_mutex);
init_rwsem(&bdev->bd_block_size_semaphore);
}
static inline void __bd_forget(struct inode *inode)
......@@ -1567,6 +1587,22 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
return blkdev_ioctl(bdev, mode, cmd, arg);
}
ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
ssize_t ret;
struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
down_read(&bdev->bd_block_size_semaphore);
ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
up_read(&bdev->bd_block_size_semaphore);
return ret;
}
EXPORT_SYMBOL_GPL(blkdev_aio_read);
/*
* Write data to the block device. Only intended for the block device itself
* and the raw driver which basically is a fake block device.
......@@ -1578,12 +1614,16 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct block_device *bdev = I_BDEV(file->f_mapping->host);
struct blk_plug plug;
ssize_t ret;
BUG_ON(iocb->ki_pos != pos);
blk_start_plug(&plug);
down_read(&bdev->bd_block_size_semaphore);
ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
if (ret > 0 || ret == -EIOCBQUEUED) {
ssize_t err;
......@@ -1592,11 +1632,29 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
if (err < 0 && ret > 0)
ret = err;
}
up_read(&bdev->bd_block_size_semaphore);
blk_finish_plug(&plug);
return ret;
}
EXPORT_SYMBOL_GPL(blkdev_aio_write);
int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
{
int ret;
struct block_device *bdev = I_BDEV(file->f_mapping->host);
down_read(&bdev->bd_block_size_semaphore);
ret = generic_file_mmap(file, vma);
up_read(&bdev->bd_block_size_semaphore);
return ret;
}
/*
* Try to release a page associated with block device when the system
* is under memory pressure.
......@@ -1627,9 +1685,9 @@ const struct file_operations def_blk_fops = {
.llseek = block_llseek,
.read = do_sync_read,
.write = do_sync_write,
.aio_read = generic_file_aio_read,
.aio_read = blkdev_aio_read,
.aio_write = blkdev_aio_write,
.mmap = generic_file_mmap,
.mmap = blkdev_mmap,
.fsync = blkdev_fsync,
.unlocked_ioctl = block_ioctl,
#ifdef CONFIG_COMPAT
......
......@@ -725,6 +725,8 @@ struct block_device {
int bd_fsfreeze_count;
/* Mutex for freeze */
struct mutex bd_fsfreeze_mutex;
/* A semaphore that prevents I/O while block size is being changed */
struct rw_semaphore bd_block_size_semaphore;
};
/*
......@@ -2565,6 +2567,8 @@ extern int generic_segment_checks(const struct iovec *iov,
unsigned long *nr_segs, size_t *count, int access_flags);
/* fs/block_dev.c */
extern ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos);
extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos);
extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
反馈
建议
客服 返回
顶部