提交 61cce6f6 编写于 作者: J Jens Axboe 提交者: Martin K. Petersen

scsi: sd: use mempool for discard special page

When boxes are run near (or to) OOM, we have a problem with the discard
page allocation in sd. If we fail allocating the special page, we return
busy, and it'll get retried. But since ordering is honored for dispatch
requests, we can keep retrying this same IO and failing. Behind that IO
could be requests that want to free memory, but they never get the
chance. This means you get repeated spews of traces like this:

[1201401.625972] Call Trace:
[1201401.631748]  dump_stack+0x4d/0x65
[1201401.639445]  warn_alloc+0xec/0x190
[1201401.647335]  __alloc_pages_slowpath+0xe84/0xf30
[1201401.657722]  ? get_page_from_freelist+0x11b/0xb10
[1201401.668475]  ? __alloc_pages_slowpath+0x2e/0xf30
[1201401.679054]  __alloc_pages_nodemask+0x1f9/0x210
[1201401.689424]  alloc_pages_current+0x8c/0x110
[1201401.699025]  sd_setup_write_same16_cmnd+0x51/0x150
[1201401.709987]  sd_init_command+0x49c/0xb70
[1201401.719029]  scsi_setup_cmnd+0x9c/0x160
[1201401.727877]  scsi_queue_rq+0x4d9/0x610
[1201401.736535]  blk_mq_dispatch_rq_list+0x19a/0x360
[1201401.747113]  blk_mq_sched_dispatch_requests+0xff/0x190
[1201401.758844]  __blk_mq_run_hw_queue+0x95/0xa0
[1201401.768653]  blk_mq_run_work_fn+0x2c/0x30
[1201401.777886]  process_one_work+0x14b/0x400
[1201401.787119]  worker_thread+0x4b/0x470
[1201401.795586]  kthread+0x110/0x150
[1201401.803089]  ? rescuer_thread+0x320/0x320
[1201401.812322]  ? kthread_park+0x90/0x90
[1201401.820787]  ? do_syscall_64+0x53/0x150
[1201401.829635]  ret_from_fork+0x29/0x40

Ensure that the discard page allocation has a mempool backing, so we
know we can make progress.

Cc: stable@vger.kernel.org
Signed-off-by: NJens Axboe <axboe@kernel.dk>
Reviewed-by: NChristoph Hellwig <hch@lst.de>
Signed-off-by: NMartin K. Petersen <martin.petersen@oracle.com>
上级 9e6371d3
...@@ -133,6 +133,7 @@ static DEFINE_MUTEX(sd_ref_mutex); ...@@ -133,6 +133,7 @@ static DEFINE_MUTEX(sd_ref_mutex);
static struct kmem_cache *sd_cdb_cache; static struct kmem_cache *sd_cdb_cache;
static mempool_t *sd_cdb_pool; static mempool_t *sd_cdb_pool;
static mempool_t *sd_page_pool;
static const char *sd_cache_types[] = { static const char *sd_cache_types[] = {
"write through", "none", "write back", "write through", "none", "write back",
...@@ -759,9 +760,10 @@ static int sd_setup_unmap_cmnd(struct scsi_cmnd *cmd) ...@@ -759,9 +760,10 @@ static int sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
unsigned int data_len = 24; unsigned int data_len = 24;
char *buf; char *buf;
rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO); rq->special_vec.bv_page = mempool_alloc(sd_page_pool, GFP_ATOMIC);
if (!rq->special_vec.bv_page) if (!rq->special_vec.bv_page)
return BLKPREP_DEFER; return BLKPREP_DEFER;
clear_highpage(rq->special_vec.bv_page);
rq->special_vec.bv_offset = 0; rq->special_vec.bv_offset = 0;
rq->special_vec.bv_len = data_len; rq->special_vec.bv_len = data_len;
rq->rq_flags |= RQF_SPECIAL_PAYLOAD; rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
...@@ -792,9 +794,10 @@ static int sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, bool unmap) ...@@ -792,9 +794,10 @@ static int sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, bool unmap)
u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9); u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
u32 data_len = sdp->sector_size; u32 data_len = sdp->sector_size;
rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO); rq->special_vec.bv_page = mempool_alloc(sd_page_pool, GFP_ATOMIC);
if (!rq->special_vec.bv_page) if (!rq->special_vec.bv_page)
return BLKPREP_DEFER; return BLKPREP_DEFER;
clear_highpage(rq->special_vec.bv_page);
rq->special_vec.bv_offset = 0; rq->special_vec.bv_offset = 0;
rq->special_vec.bv_len = data_len; rq->special_vec.bv_len = data_len;
rq->rq_flags |= RQF_SPECIAL_PAYLOAD; rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
...@@ -822,9 +825,10 @@ static int sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, bool unmap) ...@@ -822,9 +825,10 @@ static int sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, bool unmap)
u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9); u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
u32 data_len = sdp->sector_size; u32 data_len = sdp->sector_size;
rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO); rq->special_vec.bv_page = mempool_alloc(sd_page_pool, GFP_ATOMIC);
if (!rq->special_vec.bv_page) if (!rq->special_vec.bv_page)
return BLKPREP_DEFER; return BLKPREP_DEFER;
clear_highpage(rq->special_vec.bv_page);
rq->special_vec.bv_offset = 0; rq->special_vec.bv_offset = 0;
rq->special_vec.bv_len = data_len; rq->special_vec.bv_len = data_len;
rq->rq_flags |= RQF_SPECIAL_PAYLOAD; rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
...@@ -1286,7 +1290,7 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt) ...@@ -1286,7 +1290,7 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt)
u8 *cmnd; u8 *cmnd;
if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
__free_page(rq->special_vec.bv_page); mempool_free(rq->special_vec.bv_page, sd_page_pool);
if (SCpnt->cmnd != scsi_req(rq)->cmd) { if (SCpnt->cmnd != scsi_req(rq)->cmd) {
cmnd = SCpnt->cmnd; cmnd = SCpnt->cmnd;
...@@ -3623,6 +3627,13 @@ static int __init init_sd(void) ...@@ -3623,6 +3627,13 @@ static int __init init_sd(void)
goto err_out_cache; goto err_out_cache;
} }
sd_page_pool = mempool_create_page_pool(SD_MEMPOOL_SIZE, 0);
if (!sd_page_pool) {
printk(KERN_ERR "sd: can't init discard page pool\n");
err = -ENOMEM;
goto err_out_ppool;
}
err = scsi_register_driver(&sd_template.gendrv); err = scsi_register_driver(&sd_template.gendrv);
if (err) if (err)
goto err_out_driver; goto err_out_driver;
...@@ -3630,6 +3641,9 @@ static int __init init_sd(void) ...@@ -3630,6 +3641,9 @@ static int __init init_sd(void)
return 0; return 0;
err_out_driver: err_out_driver:
mempool_destroy(sd_page_pool);
err_out_ppool:
mempool_destroy(sd_cdb_pool); mempool_destroy(sd_cdb_pool);
err_out_cache: err_out_cache:
...@@ -3656,6 +3670,7 @@ static void __exit exit_sd(void) ...@@ -3656,6 +3670,7 @@ static void __exit exit_sd(void)
scsi_unregister_driver(&sd_template.gendrv); scsi_unregister_driver(&sd_template.gendrv);
mempool_destroy(sd_cdb_pool); mempool_destroy(sd_cdb_pool);
mempool_destroy(sd_page_pool);
kmem_cache_destroy(sd_cdb_cache); kmem_cache_destroy(sd_cdb_cache);
class_unregister(&sd_disk_class); class_unregister(&sd_disk_class);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册