From 792097079b91ce133c122a18e176b44757d0ec37 Mon Sep 17 00:00:00 2001 From: Xiaoguang Wang Date: Thu, 7 Nov 2019 11:39:35 +0800 Subject: [PATCH] alinux: fs: record page or bio info while process is waitting on it If one process context is stucked in wait_on_buffer(), lock_buffer(), lock_page() and wait_on_page_writeback() and wait_on_bit_io(), it's hard to tell ture reason, for example, whether this page is under io, or this page is just locked too long by other process context. Normally io request has multiple bios, and every bio contains multiple pages which will hold data to be read from or written to device, so here we record page info or bio info in task_struct while process calls lock_page(), lock_buffer(), wait_on_page_writeback(), wait_on_buffer() and wait_on_bit_io(), we add a new proce interface: [lege@localhost linux]$ cat /proc/4516/wait_res 1 ffffd0969f95d3c0 4295369599 4295381596 Above info means that thread 4516 is waitting on a page, address is ffffd0969f95d3c0, and has waited for 11997ms. First field denotes the page address process is waitting on. Second field denotes the wait moment and the third denotes current moment. In practice, if we found a process waitting on one page for too long time, we can get page's address by reading /proc/$pid/wait_page, and search this page address in all block devices' /sys/kernel/debug/block/${devname}/rq_hang, if search operation hits one, we can get the request and know why this io request hangs that long. Reviewed-by: Joseph Qi Signed-off-by: Xiaoguang Wang --- block/bio.c | 3 +++ fs/jbd2/transaction.c | 2 ++ fs/proc/base.c | 11 +++++++++++ include/linux/buffer_head.h | 10 ++++++++-- include/linux/pagemap.h | 10 ++++++++-- include/linux/sched.h | 30 ++++++++++++++++++++++++++++++ 6 files changed, 62 insertions(+), 4 deletions(-) diff --git a/block/bio.c b/block/bio.c index 31d6b9b3dc6e..89c862a1c433 100644 --- a/block/bio.c +++ b/block/bio.c @@ -977,7 +977,10 @@ int submit_bio_wait(struct bio *bio) bio->bi_end_io = submit_bio_wait_endio; bio->bi_opf |= REQ_SYNC; submit_bio(bio); + + task_set_wait_res(TASK_WAIT_BIO, bio); wait_for_completion_io(&done); + task_clear_wait_res(); return blk_status_to_errno(bio->bi_status); } diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 524953bf0994..cfe3810bccfd 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -960,7 +960,9 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, if (buffer_shadow(bh)) { JBUFFER_TRACE(jh, "on shadow: sleep"); jbd_unlock_bh_state(bh); + task_set_wait_res(TASK_WAIT_PAGE, bh->b_page); wait_on_bit_io(&bh->b_state, BH_Shadow, TASK_UNINTERRUPTIBLE); + task_clear_wait_res(); goto repeat; } diff --git a/fs/proc/base.c b/fs/proc/base.c index 3b9b726b1a6c..92d822d320e9 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -558,6 +558,15 @@ static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns, return 0; } +static int proc_wait_res(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) +{ + seq_printf(m, "%d %px %lu %lu\n", task->wait_res_type, task->wait_page, + task->wait_moment, jiffies); + + return 0; +} + struct limit_names { const char *name; const char *unit; @@ -3041,6 +3050,7 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_LIVEPATCH ONE("patch_state", S_IRUSR, proc_pid_patch_state), #endif + ONE("wait_res", S_IRUGO, proc_wait_res), }; static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) @@ -3419,6 +3429,7 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_LIVEPATCH ONE("patch_state", S_IRUSR, proc_pid_patch_state), #endif + ONE("wait_res", S_IRUGO, proc_wait_res), }; static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 96225a77c112..180d91f6a45b 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -350,8 +350,11 @@ map_bh(struct buffer_head *bh, struct super_block *sb, sector_t block) static inline void wait_on_buffer(struct buffer_head *bh) { might_sleep(); - if (buffer_locked(bh)) + if (buffer_locked(bh)) { + task_set_wait_res(TASK_WAIT_PAGE, bh->b_page); __wait_on_buffer(bh); + task_clear_wait_res(); + } } static inline int trylock_buffer(struct buffer_head *bh) @@ -362,8 +365,11 @@ static inline int trylock_buffer(struct buffer_head *bh) static inline void lock_buffer(struct buffer_head *bh) { might_sleep(); - if (!trylock_buffer(bh)) + if (!trylock_buffer(bh)) { + task_set_wait_res(TASK_WAIT_PAGE, bh->b_page); __lock_buffer(bh); + task_clear_wait_res(); + } } static inline struct buffer_head *getblk_unmovable(struct block_device *bdev, diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index b1bd2186e6d2..40c87e5dd0c8 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -479,8 +479,11 @@ static inline int trylock_page(struct page *page) static inline void lock_page(struct page *page) { might_sleep(); - if (!trylock_page(page)) + if (!trylock_page(page)) { + task_set_wait_res(TASK_WAIT_PAGE, page); __lock_page(page); + task_clear_wait_res(); + } } /* @@ -542,8 +545,11 @@ static inline int wait_on_page_locked_killable(struct page *page) */ static inline void wait_on_page_writeback(struct page *page) { - if (PageWriteback(page)) + if (PageWriteback(page)) { + task_set_wait_res(TASK_WAIT_PAGE, page); wait_on_page_bit(page, PG_writeback); + task_clear_wait_res(); + } } extern void end_page_writeback(struct page *page); diff --git a/include/linux/sched.h b/include/linux/sched.h index fe40353fa920..dbd927399de8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1217,6 +1217,14 @@ struct task_struct { void *security; #endif + + int wait_res_type; + union { + struct page *wait_page; + struct bio *wait_bio; + }; + unsigned long wait_moment; + ALI_HOTFIX_RESERVE(1) ALI_HOTFIX_RESERVE(2) ALI_HOTFIX_RESERVE(3) @@ -1243,6 +1251,28 @@ struct task_struct { */ }; +enum { + TASK_WAIT_PAGE = 1, + TASK_WAIT_BIO, +}; + +static inline void task_set_wait_res(int type, void *res) +{ + if (type == TASK_WAIT_PAGE) + current->wait_page = (struct page *)res; + else if (type == TASK_WAIT_BIO) + current->wait_bio = (struct bio *)res; + + current->wait_res_type = type; + current->wait_moment = jiffies; +} + +static inline void task_clear_wait_res(void) +{ + current->wait_page = NULL; + current->wait_res_type = 0; +} + static inline struct pid *task_pid(struct task_struct *task) { return task->thread_pid; -- GitLab