From 88ec97eca1f970bbcbbe503fcc53e1b95b0fd821 Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Fri, 15 Nov 2019 17:34:36 -0800 Subject: [PATCH] mm: fix trying to reclaim unevictable lru page when calling madvise_pageout commit 82072962973008201b817fae1896512977dd5083 upstream Recently, I hit the following issue when running upstream. kernel BUG at mm/vmscan.c:1521! invalid opcode: 0000 [#1] SMP KASAN PTI CPU: 0 PID: 23385 Comm: syz-executor.6 Not tainted 5.4.0-rc4+ #1 RIP: 0010:shrink_page_list+0x12b6/0x3530 mm/vmscan.c:1521 Call Trace: reclaim_pages+0x499/0x800 mm/vmscan.c:2188 madvise_cold_or_pageout_pte_range+0x58a/0x710 mm/madvise.c:453 walk_pmd_range mm/pagewalk.c:53 [inline] walk_pud_range mm/pagewalk.c:112 [inline] walk_p4d_range mm/pagewalk.c:139 [inline] walk_pgd_range mm/pagewalk.c:166 [inline] __walk_page_range+0x45a/0xc20 mm/pagewalk.c:261 walk_page_range+0x179/0x310 mm/pagewalk.c:349 madvise_pageout_page_range mm/madvise.c:506 [inline] madvise_pageout+0x1f0/0x330 mm/madvise.c:542 madvise_vma mm/madvise.c:931 [inline] __do_sys_madvise+0x7d2/0x1600 mm/madvise.c:1113 do_syscall_64+0x9f/0x4c0 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe madvise_pageout() accesses the specified range of the vma and isolates them, then runs shrink_page_list() to reclaim its memory. But it also isolates the unevictable pages to reclaim. Hence, we can catch the cases in shrink_page_list(). The root cause is that we scan the page tables instead of specific LRU list. and so we need to filter out the unevictable lru pages from our end. Link: http://lkml.kernel.org/r/1572616245-18946-1-git-send-email-zhongjiang@huawei.com Fixes: 1a4e58cce84e ("mm: introduce MADV_PAGEOUT") Signed-off-by: zhong jiang Suggested-by: Johannes Weiner Acked-by: Johannes Weiner Acked-by: Minchan Kim Acked-by: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Reviewed-by: Yang Shi Signed-off-by: Xunlei Pang --- mm/madvise.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index 9b7b50ea26b2..94509104d133 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -381,8 +381,12 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, ClearPageReferenced(page); test_and_clear_page_young(page); if (pageout) { - if (!isolate_lru_page(page)) - list_add(&page->lru, &page_list); + if (!isolate_lru_page(page)) { + if (PageUnevictable(page)) + putback_lru_page(page); + else + list_add(&page->lru, &page_list); + } } else deactivate_page(page); huge_unlock: @@ -459,8 +463,12 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, ClearPageReferenced(page); test_and_clear_page_young(page); if (pageout) { - if (!isolate_lru_page(page)) - list_add(&page->lru, &page_list); + if (!isolate_lru_page(page)) { + if (PageUnevictable(page)) + putback_lru_page(page); + else + list_add(&page->lru, &page_list); + } } else deactivate_page(page); } -- GitLab