diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index bae6cf6402629c5bfc16febad04ccee600b70920..2fd358cef3aef997694fc7218cfc04b04197c6d9 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -432,6 +432,18 @@ static inline void cgroup_put(struct cgroup *cgrp) css_put(&cgrp->self); } +extern struct mutex cgroup_mutex; + +static inline void cgroup_lock(void) +{ + mutex_lock(&cgroup_mutex); +} + +static inline void cgroup_unlock(void) +{ + mutex_unlock(&cgroup_mutex); +} + /** * task_css_set_check - obtain a task's css_set with extra access conditions * @task: the task to obtain css_set for @@ -446,7 +458,6 @@ static inline void cgroup_put(struct cgroup *cgrp) * as locks used during the cgroup_subsys::attach() methods. */ #ifdef CONFIG_PROVE_RCU -extern struct mutex cgroup_mutex; extern spinlock_t css_set_lock; #define task_css_set_check(task, __c) \ rcu_dereference_check((task)->cgroups, \ @@ -704,6 +715,8 @@ struct cgroup; static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; } static inline void css_get(struct cgroup_subsys_state *css) {} static inline void css_put(struct cgroup_subsys_state *css) {} +static inline void cgroup_lock(void) {} +static inline void cgroup_unlock(void) {} static inline int cgroup_attach_task_all(struct task_struct *from, struct task_struct *t) { return 0; } static inline int cgroupstats_build(struct cgroupstats *stats, diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index e971097491e32d1e08dc5ff0e5f01ed8d3aefef5..452a0c22cf66c630ea8b0163aba10dc5fc29ac93 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -112,7 +112,15 @@ static __always_inline enum lru_list page_lru(struct page *page) static inline bool lru_gen_enabled(void) { - return true; +#ifdef CONFIG_LRU_GEN_ENABLED + DECLARE_STATIC_KEY_TRUE(lru_gen_caps[NR_LRU_GEN_CAPS]); + + return static_branch_likely(&lru_gen_caps[LRU_GEN_CORE]); +#else + DECLARE_STATIC_KEY_FALSE(lru_gen_caps[NR_LRU_GEN_CAPS]); + + return static_branch_unlikely(&lru_gen_caps[LRU_GEN_CORE]); +#endif } static inline bool lru_gen_in_fault(void) @@ -202,7 +210,7 @@ static inline bool lru_gen_add_page(struct lruvec *lruvec, struct page *page, bo int zone = page_zonenum(page); struct lru_gen_struct *lrugen = &lruvec->lrugen; - if (PageUnevictable(page)) + if (PageUnevictable(page) || !lrugen->enabled) return false; /* * There are three common cases for this page: diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 931aae8b614011f91f40acfe850971a2e6fb0419..c29bd2966da7cad6b8e6a0d5234f5d402c85dfb1 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -346,6 +346,13 @@ enum { LRU_GEN_FILE, }; +enum { + LRU_GEN_CORE, + LRU_GEN_MM_WALK, + LRU_GEN_NONLEAF_YOUNG, + NR_LRU_GEN_CAPS +}; + #define MIN_LRU_BATCH BITS_PER_LONG #define MAX_LRU_BATCH (MIN_LRU_BATCH * 128) @@ -384,6 +391,8 @@ struct lru_gen_struct { /* can be modified without holding the LRU lock */ atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; + /* whether the multi-gen LRU is enabled */ + bool enabled; }; enum { diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 3f116765bb001ef612585f57584abccb3ed6e1b3..7b0842630257b7cd1f3c8193428c72ec3a6aaa5f 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -165,7 +165,6 @@ struct cgroup_mgctx { #define DEFINE_CGROUP_MGCTX(name) \ struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name) -extern struct mutex cgroup_mutex; extern spinlock_t css_set_lock; extern struct cgroup_subsys *cgroup_subsys[]; extern struct list_head cgroup_roots; diff --git a/mm/Kconfig b/mm/Kconfig index 57aa760050f01aac7687eff9bd7703d7d260cc3d..746d229ff7092725712c398d9de0e76c56fed4b0 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -994,6 +994,12 @@ config LRU_GEN help A high performance LRU implementation to overcommit memory. +config LRU_GEN_ENABLED + bool "Enable by default" + depends on LRU_GEN + help + This option enables the multi-gen LRU by default. + config LRU_GEN_STATS bool "Full stats for debugging" depends on LRU_GEN diff --git a/mm/vmscan.c b/mm/vmscan.c index d79f4c3922540087a73b69b4f8587f900cfb8a63..8609c94576b1493dd104eb5cde14f5b4149c62e0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include @@ -2763,6 +2764,12 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, #ifdef CONFIG_LRU_GEN +#ifdef CONFIG_LRU_GEN_ENABLED +DEFINE_STATIC_KEY_ARRAY_TRUE(lru_gen_caps, NR_LRU_GEN_CAPS); +#else +DEFINE_STATIC_KEY_ARRAY_FALSE(lru_gen_caps, NR_LRU_GEN_CAPS); +#endif + /****************************************************************************** * shorthand helpers ******************************************************************************/ @@ -2799,6 +2806,15 @@ static int page_lru_tier(struct page *page) return lru_tier_from_refs(refs); } +static bool get_cap(int cap) +{ +#ifdef CONFIG_LRU_GEN_ENABLED + return static_branch_likely(&lru_gen_caps[cap]); +#else + return static_branch_unlikely(&lru_gen_caps[cap]); +#endif +} + static struct lruvec *get_lruvec(struct mem_cgroup *memcg, int nid) { struct pglist_data *pgdat = NODE_DATA(nid); @@ -3604,7 +3620,8 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area goto next; if (!pmd_trans_huge(pmd[i])) { - if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)) + if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && + get_cap(LRU_GEN_NONLEAF_YOUNG)) pmdp_test_and_clear_young(vma, addr, pmd + i); goto next; } @@ -3711,10 +3728,12 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, priv->mm_stats[MM_PMD_TOTAL]++; #ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG - if (!pmd_young(val)) - continue; + if (get_cap(LRU_GEN_NONLEAF_YOUNG)) { + if (!pmd_young(val)) + continue; - walk_pmd_range_locked(pud, addr, vma, walk, &pos); + walk_pmd_range_locked(pud, addr, vma, walk, &pos); + } #endif if (!priv->full_scan && !test_bloom_filter(priv->lruvec, priv->max_seq, pmd + i)) continue; @@ -3955,7 +3974,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, * handful of PTEs. Spreading the work out over a period of time usually * is less efficient, but it avoids bursty page faults. */ - if (!full_scan && !arch_has_hw_pte_young()) { + if (!full_scan && (!arch_has_hw_pte_young() || !get_cap(LRU_GEN_MM_WALK))) { success = iterate_mm_list_nowalk(lruvec, max_seq); goto done; } @@ -4680,6 +4699,223 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc blk_finish_plug(&plug); } +/****************************************************************************** + * state change + ******************************************************************************/ + +static bool __maybe_unused state_is_valid(struct lruvec *lruvec) +{ + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + if (lrugen->enabled) { + enum lru_list lru; + + for_each_evictable_lru(lru) { + if (!list_empty(&lruvec->lists[lru])) + return false; + } + } else { + int gen, type, zone; + + for_each_gen_type_zone(gen, type, zone) { + if (!list_empty(&lrugen->lists[gen][type][zone])) + return false; + + /* unlikely but not a bug when reset_batch_size() is pending */ + VM_WARN_ON(lrugen->nr_pages[gen][type][zone]); + } + } + + return true; +} + +static bool fill_evictable(struct lruvec *lruvec) +{ + enum lru_list lru; + int remaining = MAX_LRU_BATCH; + + for_each_evictable_lru(lru) { + int type = is_file_lru(lru); + bool active = is_active_lru(lru); + struct list_head *head = &lruvec->lists[lru]; + + while (!list_empty(head)) { + bool success; + struct page *page = lru_to_page(head); + + VM_BUG_ON_PAGE(PageTail(page), page); + VM_BUG_ON_PAGE(PageUnevictable(page), page); + VM_BUG_ON_PAGE(PageActive(page) != active, page); + VM_BUG_ON_PAGE(page_is_file_lru(page) != type, page); + VM_BUG_ON_PAGE(page_lru_gen(page) < MAX_NR_GENS, page); + + prefetchw_prev_lru_page(page, head, flags); + + del_page_from_lru_list(page, lruvec, page_lru(page)); + success = lru_gen_add_page(lruvec, page, false); + VM_BUG_ON(!success); + + if (!--remaining) + return false; + } + } + + return true; +} + +static bool drain_evictable(struct lruvec *lruvec) +{ + int gen, type, zone; + int remaining = MAX_LRU_BATCH; + + for_each_gen_type_zone(gen, type, zone) { + struct list_head *head = &lruvec->lrugen.lists[gen][type][zone]; + + while (!list_empty(head)) { + bool success; + struct page *page = lru_to_page(head); + + VM_BUG_ON_PAGE(PageTail(page), page); + VM_BUG_ON_PAGE(PageUnevictable(page), page); + VM_BUG_ON_PAGE(PageActive(page), page); + VM_BUG_ON_PAGE(page_is_file_lru(page) != type, page); + VM_BUG_ON_PAGE(page_zonenum(page) != zone, page); + + prefetchw_prev_lru_page(page, head, flags); + + success = lru_gen_del_page(lruvec, page, false); + VM_BUG_ON(!success); + add_page_to_lru_list(page, lruvec, page_lru(page)); + + if (!--remaining) + return false; + } + } + + return true; +} + +static void lru_gen_change_state(bool enable) +{ + static DEFINE_MUTEX(state_mutex); + + struct mem_cgroup *memcg; + + cgroup_lock(); + cpus_read_lock(); + get_online_mems(); + mutex_lock(&state_mutex); + + if (enable == lru_gen_enabled()) + goto unlock; + + if (enable) + static_branch_enable_cpuslocked(&lru_gen_caps[LRU_GEN_CORE]); + else + static_branch_disable_cpuslocked(&lru_gen_caps[LRU_GEN_CORE]); + + memcg = mem_cgroup_iter(NULL, NULL, NULL); + do { + int nid; + + for_each_node(nid) { + struct pglist_data *pgdat = NODE_DATA(nid); + struct lruvec *lruvec = get_lruvec(memcg, nid); + + if (!lruvec) + continue; + + if (!pgdat) { + lruvec->lrugen.enabled = enable; + continue; + } + + spin_lock_irq(&lruvec->lru_lock); + + VM_BUG_ON(!seq_is_valid(lruvec)); + VM_BUG_ON(!state_is_valid(lruvec)); + + lruvec->lrugen.enabled = enable; + + while (!(enable ? fill_evictable(lruvec) : drain_evictable(lruvec))) { + spin_unlock_irq(&lruvec->lru_lock); + cond_resched(); + spin_lock_irq(&lruvec->lru_lock); + } + + spin_unlock_irq(&lruvec->lru_lock); + } + + cond_resched(); + } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL))); +unlock: + mutex_unlock(&state_mutex); + put_online_mems(); + cpus_read_unlock(); + cgroup_unlock(); +} + +/****************************************************************************** + * sysfs interface + ******************************************************************************/ + +static ssize_t show_enable(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + unsigned int caps = 0; + + if (get_cap(LRU_GEN_CORE)) + caps |= BIT(LRU_GEN_CORE); + + if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK)) + caps |= BIT(LRU_GEN_MM_WALK); + + if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && get_cap(LRU_GEN_NONLEAF_YOUNG)) + caps |= BIT(LRU_GEN_NONLEAF_YOUNG); + + return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps); +} + +static ssize_t store_enable(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t len) +{ + int i; + unsigned int caps; + + if (tolower(*buf) == 'n') + caps = 0; + else if (tolower(*buf) == 'y') + caps = -1; + else if (kstrtouint(buf, 0, &caps)) + return -EINVAL; + + for (i = 0; i < NR_LRU_GEN_CAPS; i++) { + bool enable = caps & BIT(i); + + if (i == LRU_GEN_CORE) + lru_gen_change_state(enable); + else if (enable) + static_branch_enable(&lru_gen_caps[i]); + else + static_branch_disable(&lru_gen_caps[i]); + } + + return len; +} + +static struct kobj_attribute lru_gen_enabled_attr = __ATTR( + enabled, 0644, show_enable, store_enable +); + +static struct attribute *lru_gen_attrs[] = { + &lru_gen_enabled_attr.attr, + NULL +}; + +static struct attribute_group lru_gen_attr_group = { + .name = "lru_gen", + .attrs = lru_gen_attrs, +}; + /****************************************************************************** * initialization ******************************************************************************/ @@ -4690,6 +4926,7 @@ void lru_gen_init_lruvec(struct lruvec *lruvec) struct lru_gen_struct *lrugen = &lruvec->lrugen; lrugen->max_seq = MIN_NR_GENS + 1; + lrugen->enabled = lru_gen_enabled(); for_each_gen_type_zone(gen, type, zone) INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]); @@ -4730,6 +4967,9 @@ static int __init init_lru_gen(void) BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS); BUILD_BUG_ON(sizeof(MM_STAT_CODES) != NR_MM_STATS + 1); + if (sysfs_create_group(mm_kobj, &lru_gen_attr_group)) + pr_err("lru_gen: failed to create sysfs group\n"); + return 0; }; late_initcall(init_lru_gen);