提交 dc61ad52 编写于 作者: J Joseph Qi 提交者: Caspar Zhang

alinux: blk-throttle: support io delay stats

Add blkio.throttle.io_service_time and blkio.throttle.io_wait_time to
get per-cgroup io delay statistics.
io_service_time represents the time spent after io throttle to io
completion, while io_wait_time represents the time spent on throttle
queue.
Signed-off-by: NJoseph Qi <joseph.qi@linux.alibaba.com>
Reviewed-by: NJiufei Xue <jiufei.xue@linux.alibaba.com>
Reviewed-by: NXiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
Acked-by: NCaspar Zhang <caspar@linux.alibaba.com>
上级 2cde0dfb
......@@ -1776,6 +1776,10 @@ void bio_endio(struct bio *bio)
blk_throtl_bio_endio(bio);
/* release cgroup info */
bio_uninit(bio);
#ifdef CONFIG_BLK_DEV_THROTTLING
if (bio->bi_tg_end_io)
bio->bi_tg_end_io(bio);
#endif
if (bio->bi_end_io)
bio->bi_end_io(bio);
}
......
......@@ -177,6 +177,11 @@ struct throtl_grp {
unsigned int bio_cnt; /* total bios */
unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
unsigned long bio_cnt_reset_time;
/* total time spent on lower layer: scheduler, device and others */
struct blkg_rwstat service_time;
/* total time spent on block throttle */
struct blkg_rwstat wait_time;
};
/* We measure latency for request size from <= 4k to >= 1M */
......@@ -490,6 +495,10 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp,
if (!tg)
return NULL;
if (blkg_rwstat_init(&tg->service_time, gfp) ||
blkg_rwstat_init(&tg->wait_time, gfp))
goto err;
throtl_service_queue_init(&tg->service_queue);
for (rw = READ; rw <= WRITE; rw++) {
......@@ -514,6 +523,12 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp,
tg->idletime_threshold_conf = DFL_IDLE_THRESHOLD;
return &tg->pd;
err:
blkg_rwstat_exit(&tg->service_time);
blkg_rwstat_exit(&tg->wait_time);
kfree(tg);
return NULL;
}
static void throtl_pd_init(struct blkg_policy_data *pd)
......@@ -597,6 +612,8 @@ static void throtl_upgrade_state(struct throtl_data *td);
static void throtl_pd_offline(struct blkg_policy_data *pd)
{
struct throtl_grp *tg = pd_to_tg(pd);
struct blkcg_gq *blkg = pd_to_blkg(pd);
struct blkcg_gq *parent = blkg->parent;
tg->bps[READ][LIMIT_LOW] = 0;
tg->bps[WRITE][LIMIT_LOW] = 0;
......@@ -607,6 +624,12 @@ static void throtl_pd_offline(struct blkg_policy_data *pd)
if (!tg->td->limit_valid[tg->td->limit_index])
throtl_upgrade_state(tg->td);
if (parent) {
blkg_rwstat_add_aux(&blkg_to_tg(parent)->service_time,
&tg->service_time);
blkg_rwstat_add_aux(&blkg_to_tg(parent)->wait_time,
&tg->wait_time);
}
}
static void throtl_pd_free(struct blkg_policy_data *pd)
......@@ -614,9 +637,19 @@ static void throtl_pd_free(struct blkg_policy_data *pd)
struct throtl_grp *tg = pd_to_tg(pd);
del_timer_sync(&tg->service_queue.pending_timer);
blkg_rwstat_exit(&tg->service_time);
blkg_rwstat_exit(&tg->wait_time);
kfree(tg);
}
static void throtl_pd_reset(struct blkg_policy_data *pd)
{
struct throtl_grp *tg = pd_to_tg(pd);
blkg_rwstat_reset(&tg->service_time);
blkg_rwstat_reset(&tg->wait_time);
}
static struct throtl_grp *
throtl_rb_first(struct throtl_service_queue *parent_sq)
{
......@@ -1034,6 +1067,51 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
return false;
}
static void throtl_stats_update_completion(struct throtl_grp *tg,
uint64_t start_time,
uint64_t io_start_time,
int op)
{
unsigned long flags;
uint64_t now = sched_clock();
local_irq_save(flags);
if (time_after64(now, io_start_time))
blkg_rwstat_add(&tg->service_time, op, now - io_start_time);
if (time_after64(io_start_time, start_time))
blkg_rwstat_add(&tg->wait_time, op, io_start_time - start_time);
local_irq_restore(flags);
}
static void throtl_bio_end_io(struct bio *bio)
{
struct throtl_grp *tg;
rcu_read_lock();
tg = (struct throtl_grp *)bio->bi_tg_private;
if (!tg)
goto out;
throtl_stats_update_completion(tg, bio_start_time_ns(bio),
bio_io_start_time_ns(bio),
bio_op(bio));
blkg_put(tg_to_blkg(tg));
out:
rcu_read_unlock();
}
static inline void throtl_bio_stats_start(struct bio *bio, struct throtl_grp *tg)
{
int op = bio_op(bio);
if (op == REQ_OP_READ || op == REQ_OP_WRITE) {
bio->bi_tg_end_io = throtl_bio_end_io;
bio->bi_tg_private = tg;
bio_set_start_time_ns(bio);
blkg_get(tg_to_blkg(tg));
}
}
static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
{
bool rw = bio_data_dir(bio);
......@@ -1477,6 +1555,32 @@ static ssize_t tg_set_conf_uint(struct kernfs_open_file *of,
return tg_set_conf(of, buf, nbytes, off, false);
}
static u64 tg_prfill_rwstat_field(struct seq_file *sf,
struct blkg_policy_data *pd,
int off)
{
struct throtl_grp *tg = pd_to_tg(pd);
struct blkg_rwstat rwstat = blkg_rwstat_read((void *)tg + off);
return __blkg_prfill_rwstat(sf, pd, &rwstat);
}
static int tg_print_service_time(struct seq_file *sf, void *v)
{
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
tg_prfill_rwstat_field, &blkcg_policy_throtl,
seq_cft(sf)->private, true);
return 0;
}
static int tg_print_wait_time(struct seq_file *sf, void *v)
{
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
tg_prfill_rwstat_field, &blkcg_policy_throtl,
seq_cft(sf)->private, true);
return 0;
}
static struct cftype throtl_legacy_files[] = {
{
.name = "throttle.read_bps_device",
......@@ -1522,6 +1626,16 @@ static struct cftype throtl_legacy_files[] = {
.private = (unsigned long)&blkcg_policy_throtl,
.seq_show = blkg_print_stat_ios_recursive,
},
{
.name = "throttle.io_service_time",
.private = offsetof(struct throtl_grp, service_time),
.seq_show = tg_print_service_time,
},
{
.name = "throttle.io_wait_time",
.private = offsetof(struct throtl_grp, wait_time),
.seq_show = tg_print_wait_time,
},
{ } /* terminate */
};
......@@ -1747,6 +1861,7 @@ static struct blkcg_policy blkcg_policy_throtl = {
.pd_online_fn = throtl_pd_online,
.pd_offline_fn = throtl_pd_offline,
.pd_free_fn = throtl_pd_free,
.pd_reset_stats_fn = throtl_pd_reset,
};
static unsigned long __tg_last_low_overflow_time(struct throtl_grp *tg)
......@@ -2150,7 +2265,12 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
WARN_ON_ONCE(!rcu_read_lock_held());
/* see throtl_charge_bio() */
if (bio_flagged(bio, BIO_THROTTLED) || !tg->has_rules[rw])
if (bio_flagged(bio, BIO_THROTTLED))
goto out;
throtl_bio_stats_start(bio, tg);
if (!tg->has_rules[rw])
goto out;
spin_lock_irq(q->queue_lock);
......@@ -2241,6 +2361,8 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
out_unlock:
spin_unlock_irq(q->queue_lock);
out:
if (!throttled)
bio_set_io_start_time_ns(bio);
bio_set_flag(bio, BIO_THROTTLED);
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
......
......@@ -9,6 +9,7 @@
#include <linux/types.h>
#include <linux/bvec.h>
#include <linux/ktime.h>
#include <linux/sched/clock.h>
struct bio_set;
struct bio;
......@@ -181,6 +182,12 @@ struct bio {
struct cgroup_subsys_state *bi_css;
struct blkcg_gq *bi_blkg;
struct bio_issue bi_issue;
#ifdef CONFIG_BLK_DEV_THROTTLING
unsigned long long start_time_ns; /* when passed to block throttle */
unsigned long long io_start_time_ns; /* when no more throttle */
bio_end_io_t *bi_tg_end_io;
void *bi_tg_private;
#endif
#ifdef CONFIG_BLK_CGROUP_IOCOST
u64 bi_iocost_cost;
#endif
......@@ -259,6 +266,32 @@ struct bio {
*/
#define BIO_RESET_BITS BVEC_POOL_OFFSET
#ifdef CONFIG_BLK_DEV_THROTTLING
static inline void bio_set_start_time_ns(struct bio *bio)
{
preempt_disable();
bio->start_time_ns = sched_clock();
preempt_enable();
}
static inline void bio_set_io_start_time_ns(struct bio *bio)
{
preempt_disable();
bio->io_start_time_ns = sched_clock();
preempt_enable();
}
static inline uint64_t bio_start_time_ns(struct bio *bio)
{
return bio->start_time_ns;
}
static inline uint64_t bio_io_start_time_ns(struct bio *bio)
{
return bio->io_start_time_ns;
}
#endif
typedef __u32 __bitwise blk_mq_req_flags_t;
/*
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册