diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index d85889710f9b6a4fbf5040fd941c1cfca7847428..fd5be240c0b72f075d135d2bc0e00c7d7dd9aca8 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -40,6 +40,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, PGSCAN_ZONE_RECLAIM_FAILED, #endif PGINODESTEAL, SLABS_SCANNED, KSWAPD_STEAL, KSWAPD_INODESTEAL, + KSWAPD_PREMATURE_FAST, KSWAPD_PREMATURE_SLOW, PAGEOUTRUN, ALLOCSTALL, PGROTATED, #ifdef CONFIG_HUGETLB_PAGE HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL, diff --git a/mm/vmscan.c b/mm/vmscan.c index 61d3a9a0d96f87ec0ff29fdb68908ca13a810e7b..e176bd3936dad2a353e1747de62d128e79f0587b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1904,6 +1904,24 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, } #endif +/* is kswapd sleeping prematurely? */ +static int sleeping_prematurely(int order, long remaining) +{ + struct zone *zone; + + /* If a direct reclaimer woke kswapd within HZ/10, it's premature */ + if (remaining) + return 1; + + /* If after HZ/10, a zone is below the high mark, it's premature */ + for_each_populated_zone(zone) + if (!zone_watermark_ok(zone, order, high_wmark_pages(zone), + 0, 0)) + return 1; + + return 0; +} + /* * For kswapd, balance_pgdat() will work across all this node's zones until * they are all at high_wmark_pages(zone). @@ -2185,8 +2203,30 @@ static int kswapd(void *p) */ order = new_order; } else { - if (!freezing(current) && !kthread_should_stop()) - schedule(); + if (!freezing(current) && !kthread_should_stop()) { + long remaining = 0; + + /* Try to sleep for a short interval */ + if (!sleeping_prematurely(order, remaining)) { + remaining = schedule_timeout(HZ/10); + finish_wait(&pgdat->kswapd_wait, &wait); + prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); + } + + /* + * After a short sleep, check if it was a + * premature sleep. If not, then go fully + * to sleep until explicitly woken up + */ + if (!sleeping_prematurely(order, remaining)) + schedule(); + else { + if (remaining) + count_vm_event(KSWAPD_PREMATURE_FAST); + else + count_vm_event(KSWAPD_PREMATURE_SLOW); + } + } order = pgdat->kswapd_max_order; } diff --git a/mm/vmstat.c b/mm/vmstat.c index dad2327e45804e16a8ff07564a80eb0cac79f482..63ab71455c5bcd1e6ca46b77ee9c939654ae3fcd 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -683,6 +683,8 @@ static const char * const vmstat_text[] = { "slabs_scanned", "kswapd_steal", "kswapd_inodesteal", + "kswapd_slept_prematurely_fast", + "kswapd_slept_prematurely_slow", "pageoutrun", "allocstall",