diff --git a/block/blk-core.c b/block/blk-core.c index a7747ed9c7672dbb4883efcf6c171d368e8ae2b9..130709b4fc08c486ac396494ee903b97fc86d79b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1017,6 +1017,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, q->last_merge = NULL; q->end_sector = 0; q->boundary_rq = NULL; + q->rq_hang_threshold = BLK_REQ_HANG_THRESHOLD; q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask); if (q->id < 0) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index a5ea86835fcb2dc2bb7f1a1593637709bce62b2a..ba701e65446562c6941947921799d6737b71e9fa 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -213,6 +213,39 @@ static ssize_t queue_write_hint_store(void *data, const char __user *buf, return count; } +static void blk_mq_debugfs_rq_hang_show(struct seq_file *m, struct request *rq); + +static void blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + struct seq_file *m = priv; + u64 now = ktime_get_ns(); + u64 duration; + + duration = div_u64(now - rq->start_time_ns, NSEC_PER_MSEC); + if (duration < rq->q->rq_hang_threshold) + return; + + /* See comments in blk_mq_check_expired() */ + if (!refcount_inc_not_zero(&rq->ref)) + return; + + duration = div_u64(now - rq->start_time_ns, NSEC_PER_MSEC); + if (duration >= rq->q->rq_hang_threshold) + blk_mq_debugfs_rq_hang_show(m, rq); + + if (refcount_dec_and_test(&rq->ref)) + __blk_mq_free_request(rq); +} + +static int queue_rq_hang_show(void *data, struct seq_file *m) +{ + struct request_queue *q = data; + + blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, m); + return 0; +} + static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = { { "poll_stat", 0400, queue_poll_stat_show }, { "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops }, @@ -220,6 +253,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = { { "state", 0600, queue_state_show, queue_state_write }, { "write_hints", 0600, queue_write_hint_show, queue_write_hint_store }, { "zone_wlock", 0400, queue_zone_wlock_show, NULL }, + { "rq_hang", 0400, queue_rq_hang_show, NULL }, { }, }; @@ -381,6 +415,56 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq) } EXPORT_SYMBOL_GPL(__blk_mq_debugfs_rq_show); +static void blk_mq_debugfs_rq_hang_show(struct seq_file *m, struct request *rq) +{ + const struct blk_mq_ops *const mq_ops = rq->q->mq_ops; + const unsigned int op = rq->cmd_flags & REQ_OP_MASK; + struct bio *bio; + struct bio_vec *bvec; + int i; + + seq_printf(m, "%px {.op=", rq); + if (op < ARRAY_SIZE(op_name) && op_name[op]) + seq_printf(m, "%s", op_name[op]); + else + seq_printf(m, "%d", op); + seq_puts(m, ", .cmd_flags="); + blk_flags_show(m, rq->cmd_flags & ~REQ_OP_MASK, cmd_flag_name, + ARRAY_SIZE(cmd_flag_name)); + seq_puts(m, ", .rq_flags="); + blk_flags_show(m, (__force unsigned int)rq->rq_flags, rqf_name, + ARRAY_SIZE(rqf_name)); + seq_printf(m, ", .state=%s", blk_mq_rq_state_name(blk_mq_rq_state(rq))); + seq_printf(m, ", .tag=%d, .internal_tag=%d", rq->tag, + rq->internal_tag); + seq_printf(m, ", .start_time_ns=%llu", rq->start_time_ns); + seq_printf(m, ", .io_start_time_ns=%llu", rq->io_start_time_ns); + seq_printf(m, ", .current_time=%llu", ktime_get_ns()); + + bio = rq->bio; + if (bio) { + while (1) { + seq_printf(m, ", .bio = %px", bio); + seq_puts(m, ", .bio_pages = { "); + bio_for_each_segment_all(bvec, bio, i) { + struct page *page = bvec->bv_page; + + if (!page) + continue; + seq_printf(m, "%px ", page); + } + seq_puts(m, "}"); + bio = bio->bi_next; + if (bio == NULL) + break; + } + } + if (mq_ops->show_rq) + mq_ops->show_rq(m, rq); + seq_puts(m, "}\n"); + return; +} + int blk_mq_debugfs_rq_show(struct seq_file *m, void *v) { return __blk_mq_debugfs_rq_show(m, list_entry_rq(v)); diff --git a/block/blk-mq.c b/block/blk-mq.c index 43a29e9886c1bcf2e60b15dd234c159c990b15c7..53b42883ad6a0bd65e9127c7b40ee7f9ac879c45 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -88,6 +88,11 @@ struct mq_inflight { unsigned int *inflight; }; +struct mq_hang { + struct hd_struct *part; + unsigned int *hang; +}; + static void blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx, struct request *rq, void *priv, bool reserved) @@ -133,6 +138,30 @@ void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part, blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight_rw, &mi); } +static void blk_mq_check_hang_rw(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + struct mq_hang *mh = priv; + u64 now = ktime_get_ns(); + u64 duration; + + duration = div_u64(now - rq->start_time_ns, NSEC_PER_MSEC); + if (duration < rq->q->rq_hang_threshold) + return; + + if (!mh->part->partno || rq->part == mh->part) + mh->hang[rq_data_dir(rq)]++; +} + +void blk_mq_in_hang_rw(struct request_queue *q, struct hd_struct *part, + unsigned int hang[2]) +{ + struct mq_hang mh = { .part = part, .hang = hang, }; + + hang[0] = hang[1] = 0; + blk_mq_queue_tag_busy_iter(q, blk_mq_check_hang_rw, &mh); +} + void blk_freeze_queue_start(struct request_queue *q) { int freeze_depth; @@ -476,7 +505,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, } EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx); -static void __blk_mq_free_request(struct request *rq) +void __blk_mq_free_request(struct request *rq) { struct request_queue *q = rq->q; struct blk_mq_ctx *ctx = rq->mq_ctx; diff --git a/block/blk-mq.h b/block/blk-mq.h index 5ad9251627f80567e1fb459cf699f5d1a76a3342..23fc5e3dea43e78ceda78d07d6a17211e05c1290 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -154,6 +154,8 @@ void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part, unsigned int inflight[2]); void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part, unsigned int inflight[2]); +void blk_mq_in_hang_rw(struct request_queue *q, struct hd_struct *part, + unsigned int hang[2]); static inline void blk_mq_put_dispatch_budget(struct blk_mq_hw_ctx *hctx) { diff --git a/block/blk-settings.c b/block/blk-settings.c index 49fac8749a42160673dc3c1ed7d1a32db738d13b..fab05b0e69c7480aa96a3a50cdb17cfee736ee47 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -66,6 +66,13 @@ void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout) } EXPORT_SYMBOL_GPL(blk_queue_rq_timeout); +void blk_queue_rq_hang_threshold(struct request_queue *q, + unsigned int hang_threshold) +{ + q->rq_hang_threshold = hang_threshold; +} +EXPORT_SYMBOL_GPL(blk_queue_rq_hang_threshold); + void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn) { WARN_ON_ONCE(q->mq_ops); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 8286640d4d663fe51a207421f5a4190ee2d9bfb8..474c84de0ca89946886efb28b16a42bec8421806 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -516,6 +516,27 @@ static ssize_t queue_dax_show(struct request_queue *q, char *page) return queue_var_show(blk_queue_dax(q), page); } +static ssize_t queue_hang_threshold_show(struct request_queue *q, char *page) +{ + return sprintf(page, "%u\n", q->rq_hang_threshold); +} + +static ssize_t queue_hang_threshold_store(struct request_queue *q, + const char *page, size_t count) +{ + unsigned int hang_threshold; + int err; + + err = kstrtou32(page, 10, &hang_threshold); + if (err || hang_threshold == 0) + return -EINVAL; + + blk_queue_rq_hang_threshold(q, hang_threshold); + + return count; +} + + static struct queue_sysfs_entry queue_requests_entry = { .attr = {.name = "nr_requests", .mode = 0644 }, .show = queue_requests_show, @@ -695,6 +716,12 @@ static struct queue_sysfs_entry queue_wb_lat_entry = { .store = queue_wb_lat_store, }; +static struct queue_sysfs_entry queue_hang_entry = { + .attr = {.name = "hang_threshold", .mode = 0644 }, + .show = queue_hang_threshold_show, + .store = queue_hang_threshold_store, +}; + #ifdef CONFIG_BLK_DEV_THROTTLING_LOW static struct queue_sysfs_entry throtl_sample_time_entry = { .attr = {.name = "throttle_sample_time", .mode = 0644 }, @@ -737,6 +764,7 @@ static struct attribute *default_attrs[] = { &queue_dax_entry.attr, &queue_wb_lat_entry.attr, &queue_poll_delay_entry.attr, + &queue_hang_entry.attr, #ifdef CONFIG_BLK_DEV_THROTTLING_LOW &throtl_sample_time_entry.attr, #endif diff --git a/block/genhd.c b/block/genhd.c index ee4da1ba5afe80e3874ce8009d71a8b1aedc728f..81640b7bf85a1d73813f4a458b2cb1787b34d556 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -94,6 +94,39 @@ void part_in_flight_rw(struct request_queue *q, struct hd_struct *part, inflight[1] = atomic_read(&part->in_flight[1]); } +static void blk_in_hang_rw(struct request_queue *q, struct hd_struct *part, + unsigned int hang[2]) +{ + u64 now, duration; + unsigned long flags; + struct request *rq, *tmp; + + now = ktime_get_ns(); + spin_lock_irqsave(q->queue_lock, flags); + + list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list) { + duration = div_u64(now - rq->start_time_ns, NSEC_PER_MSEC); + if (duration < rq->q->rq_hang_threshold) + continue; + + if (!part->partno || rq->part == part) + hang[rq_data_dir(rq)]++; + } + + spin_unlock_irqrestore(q->queue_lock, flags); +} + +void part_in_hang_rw(struct request_queue *q, struct hd_struct *part, + unsigned int hang[2]) +{ + if (q->mq_ops) { + blk_mq_in_hang_rw(q, part, hang); + return; + } + + blk_in_hang_rw(q, part, hang); +} + struct hd_struct *__disk_get_part(struct gendisk *disk, int partno) { struct disk_part_tbl *ptbl = rcu_dereference(disk->part_tbl); @@ -1157,6 +1190,7 @@ static DEVICE_ATTR(discard_alignment, 0444, disk_discard_alignment_show, NULL); static DEVICE_ATTR(capability, 0444, disk_capability_show, NULL); static DEVICE_ATTR(stat, 0444, part_stat_show, NULL); static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL); +static DEVICE_ATTR(hang, 0444, part_hang_show, NULL); static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store); #ifdef CONFIG_FAIL_MAKE_REQUEST static struct device_attribute dev_attr_fail = @@ -1179,6 +1213,7 @@ static struct attribute *disk_attrs[] = { &dev_attr_capability.attr, &dev_attr_stat.attr, &dev_attr_inflight.attr, + &dev_attr_hang.attr, &dev_attr_badblocks.attr, #ifdef CONFIG_FAIL_MAKE_REQUEST &dev_attr_fail.attr, diff --git a/block/partition-generic.c b/block/partition-generic.c index ed4bc5f7967e83d4002065fde0251514c97e5632..d9d9b656a1f0ddd82869939888cd6acd7e5fc59c 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -164,6 +164,17 @@ ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]); } +ssize_t part_hang_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hd_struct *p = dev_to_part(dev); + struct request_queue *q = part_to_disk(p)->queue; + unsigned int hang[2]; + + part_in_hang_rw(q, p, hang); + return sprintf(buf, "%8u %8u\n", hang[0], hang[1]); +} + #ifdef CONFIG_FAIL_MAKE_REQUEST ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -195,6 +206,7 @@ static DEVICE_ATTR(alignment_offset, 0444, part_alignment_offset_show, NULL); static DEVICE_ATTR(discard_alignment, 0444, part_discard_alignment_show, NULL); static DEVICE_ATTR(stat, 0444, part_stat_show, NULL); static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL); +static DEVICE_ATTR(hang, 0444, part_hang_show, NULL); #ifdef CONFIG_FAIL_MAKE_REQUEST static struct device_attribute dev_attr_fail = __ATTR(make-it-fail, 0644, part_fail_show, part_fail_store); @@ -209,6 +221,7 @@ static struct attribute *part_attrs[] = { &dev_attr_discard_alignment.attr, &dev_attr_stat.attr, &dev_attr_inflight.attr, + &dev_attr_hang.attr, #ifdef CONFIG_FAIL_MAKE_REQUEST &dev_attr_fail.attr, #endif diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 75086cb24b36e1a5dfecdd321baea48dc9b5ff05..4d72b4ebfbc4560357509f8fed63a9506c9b32c3 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -233,6 +233,7 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set); void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); +void __blk_mq_free_request(struct request *rq); void blk_mq_free_request(struct request *rq); bool blk_mq_can_queue(struct blk_mq_hw_ctx *); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cf883af195893d94b4550ab4c9faea9937b77237..545a39090984cefe9eb86ce02ddda1e956c0d0e2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -436,6 +436,12 @@ static inline int blkdev_reset_zones_ioctl(struct block_device *bdev, #endif /* CONFIG_BLK_DEV_ZONED */ +/* + * default request hang threshold, unit is millisecond. If one request does + * not complete in this threashold time, consider this request as hang. + */ +#define BLK_REQ_HANG_THRESHOLD 5000 + struct request_queue { /* * Together with queue_head for cacheline sharing @@ -581,6 +587,7 @@ struct request_queue { unsigned int request_fn_active; unsigned int rq_timeout; + unsigned int rq_hang_threshold; int poll_nsec; struct blk_stat_callback *poll_cb; @@ -1264,6 +1271,8 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable); extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); +extern void blk_queue_rq_hang_threshold(struct request_queue *q, + unsigned int threshold); /* * Number of physical segments as sent to the device. diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 75ccb2848c11a14df31c54053ecba97fa8ecbb5a..bd2bcd2961930087c5a90f6709aac94e4055b706 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -398,6 +398,8 @@ void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, int rw); void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, int rw); +void part_in_hang_rw(struct request_queue *q, struct hd_struct *part, + unsigned int hang[2]); static inline struct partition_meta_info *alloc_part_info(struct gendisk *disk) { @@ -643,6 +645,8 @@ extern ssize_t part_stat_show(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t part_hang_show(struct device *dev, + struct device_attribute *attr, char *buf); #ifdef CONFIG_FAIL_MAKE_REQUEST extern ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, char *buf);