提交 4da72359 编写于 作者: J Johannes Weiner 提交者: zhongjiang-ali

block: annotate refault stalls from IO submission

task #28327019

commit b8e24a9300b0836a9d39f6b20746766b3b81f1bd upstream

psi tracks the time tasks wait for refaulting pages to become
uptodate, but it does not track the time spent submitting the IO. The
submission part can be significant if backing storage is contended or
when cgroup throttling (io.latency) is in effect - a lot of time is
spent in submit_bio(). In that case, we underreport memory pressure.

Annotate submit_bio() to account submission time as memory stall when
the bio is reading userspace workingset pages.
Tested-by: NSuren Baghdasaryan <surenb@google.com>
Signed-off-by: NJohannes Weiner <hannes@cmpxchg.org>
Signed-off-by: NJens Axboe <axboe@kernel.dk>
Signed-off-by: Nzhongjiang-ali <zhongjiang-ali@linux.alibaba.com>
Reviewed-by: NXunlei Pang <xlpang@linux.alibaba.com>
上级 422652e5
...@@ -803,6 +803,9 @@ void __bio_add_page(struct bio *bio, struct page *page, ...@@ -803,6 +803,9 @@ void __bio_add_page(struct bio *bio, struct page *page,
bio->bi_iter.bi_size += len; bio->bi_iter.bi_size += len;
bio->bi_vcnt++; bio->bi_vcnt++;
if (!bio_flagged(bio, BIO_WORKINGSET) && unlikely(PageWorkingset(page)))
bio_set_flag(bio, BIO_WORKINGSET);
} }
EXPORT_SYMBOL_GPL(__bio_add_page); EXPORT_SYMBOL_GPL(__bio_add_page);
......
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include <linux/blk-cgroup.h> #include <linux/blk-cgroup.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <linux/bpf.h> #include <linux/bpf.h>
#include <linux/psi.h>
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/block.h> #include <trace/events/block.h>
...@@ -2544,6 +2545,10 @@ EXPORT_SYMBOL_GPL(direct_make_request); ...@@ -2544,6 +2545,10 @@ EXPORT_SYMBOL_GPL(direct_make_request);
*/ */
blk_qc_t submit_bio(struct bio *bio) blk_qc_t submit_bio(struct bio *bio)
{ {
bool workingset_read = false;
unsigned long pflags;
blk_qc_t ret;
/* /*
* If it's a regular read/write or a barrier with data attached, * If it's a regular read/write or a barrier with data attached,
* go through the normal accounting stuff before submission. * go through the normal accounting stuff before submission.
...@@ -2559,6 +2564,8 @@ blk_qc_t submit_bio(struct bio *bio) ...@@ -2559,6 +2564,8 @@ blk_qc_t submit_bio(struct bio *bio)
if (op_is_write(bio_op(bio))) { if (op_is_write(bio_op(bio))) {
count_vm_events(PGPGOUT, count); count_vm_events(PGPGOUT, count);
} else { } else {
if (bio_flagged(bio, BIO_WORKINGSET))
workingset_read = true;
task_io_account_read(bio->bi_iter.bi_size); task_io_account_read(bio->bi_iter.bi_size);
count_vm_events(PGPGIN, count); count_vm_events(PGPGIN, count);
} }
...@@ -2573,7 +2580,21 @@ blk_qc_t submit_bio(struct bio *bio) ...@@ -2573,7 +2580,21 @@ blk_qc_t submit_bio(struct bio *bio)
} }
} }
return generic_make_request(bio); /*
* If we're reading data that is part of the userspace
* workingset, count submission time as memory stall. When the
* device is congested, or the submitting cgroup IO-throttled,
* submission can be a significant part of overall IO time.
*/
if (workingset_read)
psi_memstall_enter(&pflags);
ret = generic_make_request(bio);
if (workingset_read)
psi_memstall_leave(&pflags);
return ret;
} }
EXPORT_SYMBOL(submit_bio); EXPORT_SYMBOL(submit_bio);
......
...@@ -245,6 +245,7 @@ struct bio { ...@@ -245,6 +245,7 @@ struct bio {
#define BIO_TRACE_COMPLETION 10 /* bio_endio() should trace the final completion #define BIO_TRACE_COMPLETION 10 /* bio_endio() should trace the final completion
* of this bio. */ * of this bio. */
#define BIO_QUEUE_ENTERED 11 /* can use blk_queue_enter_live() */ #define BIO_QUEUE_ENTERED 11 /* can use blk_queue_enter_live() */
#define BIO_WORKINGSET 12 /* contains userspace workingset pages */
/* /*
* Extend bio flags should be added in here * Extend bio flags should be added in here
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册