提交 a22c4d7e 编写于 作者: M Ming Lin 提交者: Jens Axboe

block: re-add discard_granularity and alignment checks

In commit b49a0871("block: remove split code in
blkdev_issue_{discard,write_same}"), discard_granularity and alignment
checks were removed. Ideally, with bio late splitting, the upper layers
shouldn't need to depend on device's limits.

Christoph reported a discard regression on the HGST Ultrastar SN100 NVMe
device when mkfs.xfs. We have not found the root cause yet.

This patch re-adds discard_granularity and alignment checks by reverting
the related changes in commit b49a0871. The good thing is now we can
remove the 2G discard size cap and just use UINT_MAX to avoid bi_size
overflow.
Reviewed-by: NChristoph Hellwig <hch@lst.de>
Tested-by: NChristoph Hellwig <hch@lst.de>
Signed-off-by: NMing Lin <ming.l@ssi.samsung.com>
Reviewed-by: NMike Snitzer <snitzer@redhat.com>
Signed-off-by: NJens Axboe <axboe@fb.com>
上级 23d88271
...@@ -26,13 +26,6 @@ static void bio_batch_end_io(struct bio *bio) ...@@ -26,13 +26,6 @@ static void bio_batch_end_io(struct bio *bio)
bio_put(bio); bio_put(bio);
} }
/*
* Ensure that max discard sectors doesn't overflow bi_size and hopefully
* it is of the proper granularity as long as the granularity is a power
* of two.
*/
#define MAX_BIO_SECTORS ((1U << 31) >> 9)
/** /**
* blkdev_issue_discard - queue a discard * blkdev_issue_discard - queue a discard
* @bdev: blockdev to issue discard for * @bdev: blockdev to issue discard for
...@@ -50,6 +43,8 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, ...@@ -50,6 +43,8 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
DECLARE_COMPLETION_ONSTACK(wait); DECLARE_COMPLETION_ONSTACK(wait);
struct request_queue *q = bdev_get_queue(bdev); struct request_queue *q = bdev_get_queue(bdev);
int type = REQ_WRITE | REQ_DISCARD; int type = REQ_WRITE | REQ_DISCARD;
unsigned int granularity;
int alignment;
struct bio_batch bb; struct bio_batch bb;
struct bio *bio; struct bio *bio;
int ret = 0; int ret = 0;
...@@ -61,6 +56,10 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, ...@@ -61,6 +56,10 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
if (!blk_queue_discard(q)) if (!blk_queue_discard(q))
return -EOPNOTSUPP; return -EOPNOTSUPP;
/* Zero-sector (unknown) and one-sector granularities are the same. */
granularity = max(q->limits.discard_granularity >> 9, 1U);
alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
if (flags & BLKDEV_DISCARD_SECURE) { if (flags & BLKDEV_DISCARD_SECURE) {
if (!blk_queue_secdiscard(q)) if (!blk_queue_secdiscard(q))
return -EOPNOTSUPP; return -EOPNOTSUPP;
...@@ -74,7 +73,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, ...@@ -74,7 +73,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
blk_start_plug(&plug); blk_start_plug(&plug);
while (nr_sects) { while (nr_sects) {
unsigned int req_sects; unsigned int req_sects;
sector_t end_sect; sector_t end_sect, tmp;
bio = bio_alloc(gfp_mask, 1); bio = bio_alloc(gfp_mask, 1);
if (!bio) { if (!bio) {
...@@ -82,8 +81,22 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, ...@@ -82,8 +81,22 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
break; break;
} }
req_sects = min_t(sector_t, nr_sects, MAX_BIO_SECTORS); /* Make sure bi_size doesn't overflow */
req_sects = min_t(sector_t, nr_sects, UINT_MAX >> 9);
/*
* If splitting a request, and the next starting sector would be
* misaligned, stop the discard at the previous aligned sector.
*/
end_sect = sector + req_sects; end_sect = sector + req_sects;
tmp = end_sect;
if (req_sects < nr_sects &&
sector_div(tmp, granularity) != alignment) {
end_sect = end_sect - alignment;
sector_div(end_sect, granularity);
end_sect = end_sect * granularity + alignment;
req_sects = end_sect - sector;
}
bio->bi_iter.bi_sector = sector; bio->bi_iter.bi_sector = sector;
bio->bi_end_io = bio_batch_end_io; bio->bi_end_io = bio_batch_end_io;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册