提交 69e55430 编写于 作者: Z Zhang Wensheng 提交者: Yang Yingliang

block: add a switch for precise iostat accounting

hulk inclusion
category: bugfix
bugzilla: 39265, https://gitee.com/openeuler/kernel/issues/I4WC06
CVE: NA

-----------------------------------------------

When the inflight IOs are slow and no new IOs are issued, we expect
iostat could manifest the IO hang problem. However after
commit 9c6dea45 ("block: delete part_round_stats and switch to less
precise counting"), io_tick and time_in_queue will not be updated until
the end of IO, and the avgqu-sz and %util columns of iostat will be zero.

To fix it, we could fallback to the implementation before commit
9c6dea45, but it may cause performance regression on NVMe device
or bio-based device (due to overhead of inflight calculation),
so add a switch to control whether or not to use precise iostat
accounting. It can be enabled by adding "precise_iostat=1" in kernel
boot cmdline. When precise accouting is enabled, io_tick and time_in_queue
will be updated when accessing /proc/diskstats and
/sys/block/sdX/sdXN/stat.

Fixes: 9c6dea45 ("block: delete part_round_stats and switch to less precise counting")
Signed-off-by: NZhang Wensheng <zhangwensheng5@huawei.com>
Reviewed-by: NJason Yan <yanaijie@huawei.com>
Signed-off-by: NYang Yingliang <yangyingliang@huawei.com>
上级 e1c15e20
......@@ -1706,9 +1706,13 @@ void generic_end_io_acct(struct request_queue *q, int req_op,
const int sgrp = op_stat_group(req_op);
int cpu = part_stat_lock();
update_io_ticks(cpu, part, now);
if (precise_iostat) {
part_round_stats(q, cpu, part);
} else {
update_io_ticks(cpu, part, now);
part_stat_add(cpu, part, time_in_queue, duration);
}
part_stat_add(cpu, part, nsecs[sgrp], jiffies_to_nsecs(duration));
part_stat_add(cpu, part, time_in_queue, duration);
part_dec_in_flight(q, part, op_is_write(req_op));
part_stat_unlock();
......
......@@ -56,6 +56,20 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
DEFINE_IDA(blk_queue_ida);
bool precise_iostat;
static int __init precise_iostat_setup(char *str)
{
bool precise;
if (!strtobool(str, &precise)) {
precise_iostat = precise;
pr_info("precise iostat %d\n", precise_iostat);
}
return 1;
}
__setup("precise_iostat=", precise_iostat_setup);
/*
* For the allocated request tables
*/
......@@ -1700,8 +1714,13 @@ static void part_round_stats_single(struct request_queue *q, int cpu,
struct hd_struct *part, unsigned long now,
unsigned int inflight)
{
if (inflight)
if (inflight) {
if (precise_iostat) {
__part_stat_add(cpu, part, time_in_queue,
inflight * (now - part->stamp));
}
__part_stat_add(cpu, part, io_ticks, (now - part->stamp));
}
part->stamp = now;
}
......@@ -2771,10 +2790,15 @@ void blk_account_io_done(struct request *req, u64 now)
cpu = part_stat_lock();
part = req->part;
update_io_ticks(cpu, part, jiffies);
if (!precise_iostat) {
update_io_ticks(cpu, part, jiffies);
part_stat_add(cpu, part, time_in_queue,
nsecs_to_jiffies64(now - req->start_time_ns));
} else {
part_round_stats(req->q, cpu, part);
}
part_stat_inc(cpu, part, ios[sgrp]);
part_stat_add(cpu, part, nsecs[sgrp], now - req->start_time_ns);
part_stat_add(cpu, part, time_in_queue, nsecs_to_jiffies64(now - req->start_time_ns));
part_dec_in_flight(req->q, part, rq_data_dir(req));
hd_struct_put(part);
......
......@@ -669,6 +669,8 @@ static void blk_account_io_merge(struct request *req)
cpu = part_stat_lock();
part = req->part;
if (precise_iostat)
part_round_stats(req->q, cpu, part);
part_dec_in_flight(req->q, part, rq_data_dir(req));
hd_struct_put(part);
......
......@@ -1352,6 +1352,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
struct hd_struct *hd;
char buf[BDEVNAME_SIZE];
unsigned int inflight[2];
int cpu;
/*
if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
......@@ -1363,6 +1364,12 @@ static int diskstats_show(struct seq_file *seqf, void *v)
disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
while ((hd = disk_part_iter_next(&piter))) {
if (precise_iostat) {
cpu = part_stat_lock();
part_round_stats(gp->queue, cpu, hd);
part_stat_unlock();
}
part_in_flight(gp->queue, hd, inflight);
seq_printf(seqf, "%4d %7d %s "
"%lu %lu %lu %u "
......
......@@ -18,6 +18,7 @@
#include <linux/ctype.h>
#include <linux/genhd.h>
#include <linux/blktrace_api.h>
#include <linux/blkdev.h>
#include "partitions/check.h"
......@@ -121,6 +122,13 @@ ssize_t part_stat_show(struct device *dev,
struct hd_struct *p = dev_to_part(dev);
struct request_queue *q = part_to_disk(p)->queue;
unsigned int inflight[2];
int cpu;
if (precise_iostat) {
cpu = part_stat_lock();
part_round_stats(q, cpu, p);
part_stat_unlock();
}
part_in_flight(q, p, inflight);
return sprintf(buf,
......
......@@ -28,6 +28,7 @@
#include <linux/scatterlist.h>
#include <linux/blkzoned.h>
extern bool precise_iostat;
struct module;
struct scsi_ioctl_command;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册