提交 abbf198d 编写于 作者: J Jan Kara 提交者: Yang Yingliang

block: Do not discard buffers under a mounted filesystem

mainline inclusion
from mainline-v5.9
commit 384d87ef
category: bugfix
bugzilla: 41569
CVE: NA

-----------------------------------------------

Discarding blocks and buffers under a mounted filesystem is hardly
anything admin wants to do. Usually it will confuse the filesystem and
sometimes the loss of buffer_head state (including b_private field) can
even cause crashes like:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
PGD 0 P4D 0
Oops: 0002 [#1] SMP PTI
CPU: 4 PID: 203778 Comm: jbd2/dm-3-8 Kdump: loaded Tainted: G O     --------- -  - 4.18.0-147.5.0.5.h126.eulerosv2r9.x86_64 #1
Hardware name: Huawei RH2288H V3/BC11HGSA0, BIOS 1.57 08/11/2015
RIP: 0010:jbd2_journal_grab_journal_head+0x1b/0x40 [jbd2]
...
Call Trace:
 __jbd2_journal_insert_checkpoint+0x23/0x70 [jbd2]
 jbd2_journal_commit_transaction+0x155f/0x1b60 [jbd2]
 kjournald2+0xbd/0x270 [jbd2]

So if we don't have block device open with O_EXCL already, claim the
block device while we truncate buffer cache. This makes sure any
exclusive block device user (such as filesystem) cannot operate on the
device while we are discarding buffer cache.
Reported-by: NYe Bin <yebin10@huawei.com>
Signed-off-by: NJan Kara <jack@suse.cz>
Reviewed-by: NChristoph Hellwig <hch@lst.de>
[axboe: fix !CONFIG_BLOCK error in truncate_bdev_range()]

conflicts:
fs/block_dev.c
include/linux/blkdev.h
Signed-off-by: NJens Axboe <axboe@kernel.dk>
Signed-off-by: NYe Bin <yebin10@huawei.com>
Reviewed-by: Nzhangyi (F) <yi.zhang@huawei.com>
Signed-off-by: NYang Yingliang <yangyingliang@huawei.com>
上级 a210489b
...@@ -203,8 +203,7 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode, ...@@ -203,8 +203,7 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
uint64_t range[2]; uint64_t range[2];
uint64_t start, len; uint64_t start, len;
struct request_queue *q = bdev_get_queue(bdev); struct request_queue *q = bdev_get_queue(bdev);
struct address_space *mapping = bdev->bd_inode->i_mapping; int err;
if (!(mode & FMODE_WRITE)) if (!(mode & FMODE_WRITE))
return -EBADF; return -EBADF;
...@@ -225,7 +224,11 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode, ...@@ -225,7 +224,11 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
if (start + len > i_size_read(bdev->bd_inode)) if (start + len > i_size_read(bdev->bd_inode))
return -EINVAL; return -EINVAL;
truncate_inode_pages_range(mapping, start, start + len - 1);
err = truncate_bdev_range(bdev, mode, start, start + len - 1);
if (err)
return err;
return blkdev_issue_discard(bdev, start >> 9, len >> 9, return blkdev_issue_discard(bdev, start >> 9, len >> 9,
GFP_KERNEL, flags); GFP_KERNEL, flags);
} }
...@@ -234,8 +237,8 @@ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode, ...@@ -234,8 +237,8 @@ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
unsigned long arg) unsigned long arg)
{ {
uint64_t range[2]; uint64_t range[2];
struct address_space *mapping;
uint64_t start, end, len; uint64_t start, end, len;
int err;
if (!(mode & FMODE_WRITE)) if (!(mode & FMODE_WRITE))
return -EBADF; return -EBADF;
...@@ -257,8 +260,9 @@ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode, ...@@ -257,8 +260,9 @@ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
return -EINVAL; return -EINVAL;
/* Invalidate the page cache, including dirty pages */ /* Invalidate the page cache, including dirty pages */
mapping = bdev->bd_inode->i_mapping; err = truncate_bdev_range(bdev, mode, start, end);
truncate_inode_pages_range(mapping, start, end); if (err)
return err;
return blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL, return blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL,
BLKDEV_ZERO_NOUNMAP); BLKDEV_ZERO_NOUNMAP);
......
...@@ -1074,6 +1074,63 @@ static int bd_prepare_to_claim(struct block_device *bdev, ...@@ -1074,6 +1074,63 @@ static int bd_prepare_to_claim(struct block_device *bdev,
return 0; return 0;
} }
static void bd_clear_claiming(struct block_device *whole, void *holder)
{
lockdep_assert_held(&bdev_lock);
/* tell others that we're done */
BUG_ON(whole->bd_claiming != holder);
whole->bd_claiming = NULL;
wake_up_bit(&whole->bd_claiming, 0);
}
/**
* bd_abort_claiming - abort claiming of a block device
* @bdev: block device of interest
* @whole: whole block device (returned from bd_start_claiming())
* @holder: holder that has claimed @bdev
*
* Abort claiming of a block device when the exclusive open failed. This can be
* also used when exclusive open is not actually desired and we just needed
* to block other exclusive openers for a while.
*/
void bd_abort_claiming(struct block_device *bdev, struct block_device *whole,
void *holder)
{
spin_lock(&bdev_lock);
bd_clear_claiming(whole, holder);
spin_unlock(&bdev_lock);
}
EXPORT_SYMBOL(bd_abort_claiming);
/*
* Drop all buffers & page cache for given bdev range. This function bails
* with error if bdev has other exclusive owner (such as filesystem).
*/
int truncate_bdev_range(struct block_device *bdev, fmode_t mode,
loff_t lstart, loff_t lend)
{
struct block_device *claimed_bdev = NULL;
int err;
/*
* If we don't hold exclusive handle for the device, upgrade to it
* while we discard the buffer cache to avoid discarding buffers
* under live filesystem.
*/
if (!(mode & FMODE_EXCL)) {
claimed_bdev = bdev->bd_contains;
err = bd_prepare_to_claim(bdev, claimed_bdev,
truncate_bdev_range);
if (err)
return err;
}
truncate_inode_pages_range(bdev->bd_inode->i_mapping, lstart, lend);
if (claimed_bdev)
bd_abort_claiming(bdev, claimed_bdev, truncate_bdev_range);
return 0;
}
EXPORT_SYMBOL(truncate_bdev_range);
static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno) static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno)
{ {
struct gendisk *disk = get_gendisk(bdev->bd_dev, partno); struct gendisk *disk = get_gendisk(bdev->bd_dev, partno);
...@@ -2016,7 +2073,6 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, ...@@ -2016,7 +2073,6 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
loff_t len) loff_t len)
{ {
struct block_device *bdev = I_BDEV(bdev_file_inode(file)); struct block_device *bdev = I_BDEV(bdev_file_inode(file));
struct address_space *mapping;
loff_t end = start + len - 1; loff_t end = start + len - 1;
loff_t isize; loff_t isize;
int error; int error;
...@@ -2044,8 +2100,9 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, ...@@ -2044,8 +2100,9 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
return -EINVAL; return -EINVAL;
/* Invalidate the page cache, including dirty pages. */ /* Invalidate the page cache, including dirty pages. */
mapping = bdev->bd_inode->i_mapping; error = truncate_bdev_range(bdev, file->f_mode, start, end);
truncate_inode_pages_range(mapping, start, end); if (error)
return error;
switch (mode) { switch (mode) {
case FALLOC_FL_ZERO_RANGE: case FALLOC_FL_ZERO_RANGE:
...@@ -2072,7 +2129,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, ...@@ -2072,7 +2129,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
* the caller will be given -EBUSY. The third argument is * the caller will be given -EBUSY. The third argument is
* inclusive, so the rounding here is safe. * inclusive, so the rounding here is safe.
*/ */
return invalidate_inode_pages2_range(mapping, return invalidate_inode_pages2_range(bdev->bd_inode->i_mapping,
start >> PAGE_SHIFT, start >> PAGE_SHIFT,
end >> PAGE_SHIFT); end >> PAGE_SHIFT);
} }
......
...@@ -2551,6 +2551,8 @@ extern void bd_forget(struct inode *inode); ...@@ -2551,6 +2551,8 @@ extern void bd_forget(struct inode *inode);
extern void bdput(struct block_device *); extern void bdput(struct block_device *);
extern void invalidate_bdev(struct block_device *); extern void invalidate_bdev(struct block_device *);
extern void iterate_bdevs(void (*)(struct block_device *, void *), void *); extern void iterate_bdevs(void (*)(struct block_device *, void *), void *);
int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart,
loff_t lend);
extern int sync_blockdev(struct block_device *bdev); extern int sync_blockdev(struct block_device *bdev);
extern void kill_bdev(struct block_device *); extern void kill_bdev(struct block_device *);
extern struct super_block *freeze_bdev(struct block_device *); extern struct super_block *freeze_bdev(struct block_device *);
...@@ -2567,6 +2569,11 @@ static inline bool sb_is_blkdev_sb(struct super_block *sb) ...@@ -2567,6 +2569,11 @@ static inline bool sb_is_blkdev_sb(struct super_block *sb)
} }
#else #else
static inline void bd_forget(struct inode *inode) {} static inline void bd_forget(struct inode *inode) {}
static inline int truncate_bdev_range(struct block_device *bdev, fmode_t mode,
loff_t lstart, loff_t lend)
{
return 0;
}
static inline int sync_blockdev(struct block_device *bdev) { return 0; } static inline int sync_blockdev(struct block_device *bdev) { return 0; }
static inline void kill_bdev(struct block_device *bdev) {} static inline void kill_bdev(struct block_device *bdev) {}
static inline void invalidate_bdev(struct block_device *bdev) {} static inline void invalidate_bdev(struct block_device *bdev) {}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册