diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 77227224ca880d10fb264d42fa2ad0ca5c1189b5..631716d70644f74307f41c6f84b5fdf7ec25e15f 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -244,6 +244,15 @@ static inline bool thp_migration_supported(void) return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION); } +static inline struct list_head *page_deferred_list(struct page *page) +{ + /* + * Global or memcg deferred list in the second tail pages is + * occupied by compound_head. + */ + return &page[2].deferred_list; +} + #else /* CONFIG_TRANSPARENT_HUGEPAGE */ #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; }) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index c7709d677b2477ca99b7b1f55493e6805e1bbc6a..992e9139497fc9ade0feab5ce7e85b3011d84a5f 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -325,6 +325,10 @@ struct mem_cgroup { struct idle_page_stats idle_stats[KIDLED_STATS_NR_TYPE]; #endif +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + struct deferred_split deferred_split_queue; +#endif + struct mem_cgroup_per_node *nodeinfo[0]; /* WARNING: nodeinfo must be the last member here */ }; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 3a9a996af229eaefafb78c1392514cb25e7ab56b..971d01c1c196b7169b67d7922c5215d095a1f82d 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -133,6 +133,7 @@ struct page { struct { /* Second tail page of compound page */ unsigned long _compound_pad_1; /* compound_head */ unsigned long _compound_pad_2; + /* For both global and memcg */ struct list_head deferred_list; }; struct { /* Page table pages */ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 47ae0b7e0a775dc6f277d060bc8642f945e1ef0c..f5f0de66e49c87c363a08f6aa31354e4b8dbc399 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -482,11 +482,25 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) return pmd; } -static inline struct list_head *page_deferred_list(struct page *page) +#ifdef CONFIG_MEMCG +static inline struct deferred_split *get_deferred_split_queue(struct page *page) { - /* ->lru in the tail pages is occupied by compound_head. */ - return &page[2].deferred_list; + struct mem_cgroup *memcg = compound_head(page)->mem_cgroup; + struct pglist_data *pgdat = NODE_DATA(page_to_nid(page)); + + if (memcg) + return &memcg->deferred_split_queue; + else + return &pgdat->deferred_split_queue; } +#else +static inline struct deferred_split *get_deferred_split_queue(struct page *page) +{ + struct pglist_data *pgdat = NODE_DATA(page_to_nid(page)); + + return &pgdat->deferred_split_queue; +} +#endif void prep_transhuge_page(struct page *page) { @@ -2635,7 +2649,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) { struct page *head = compound_head(page); struct pglist_data *pgdata = NODE_DATA(page_to_nid(head)); - struct deferred_split *ds_queue = &pgdata->deferred_split_queue; + struct deferred_split *ds_queue = get_deferred_split_queue(page); struct anon_vma *anon_vma = NULL; struct address_space *mapping = NULL; int count, mapcount, extra_pins, ret; @@ -2774,8 +2788,7 @@ fail: if (mapping) void free_transhuge_page(struct page *page) { - struct pglist_data *pgdata = NODE_DATA(page_to_nid(page)); - struct deferred_split *ds_queue = &pgdata->deferred_split_queue; + struct deferred_split *ds_queue = get_deferred_split_queue(page); unsigned long flags; spin_lock_irqsave(&ds_queue->split_queue_lock, flags); @@ -2789,17 +2802,37 @@ void free_transhuge_page(struct page *page) void deferred_split_huge_page(struct page *page) { - struct pglist_data *pgdata = NODE_DATA(page_to_nid(page)); - struct deferred_split *ds_queue = &pgdata->deferred_split_queue; + struct deferred_split *ds_queue = get_deferred_split_queue(page); +#ifdef CONFIG_MEMCG + struct mem_cgroup *memcg = compound_head(page)->mem_cgroup; +#endif unsigned long flags; VM_BUG_ON_PAGE(!PageTransHuge(page), page); + /* + * The try_to_unmap() in page reclaim path might reach here too, + * this may cause a race condition to corrupt deferred split queue. + * And, if page reclaim is already handling the same page, it is + * unnecessary to handle it again in shrinker. + * + * Check PageSwapCache to determine if the page is being + * handled by page reclaim since THP swap would add the page into + * swap cache before calling try_to_unmap(). + */ + if (PageSwapCache(page)) + return; + spin_lock_irqsave(&ds_queue->split_queue_lock, flags); if (list_empty(page_deferred_list(page))) { count_vm_event(THP_DEFERRED_SPLIT_PAGE); list_add_tail(page_deferred_list(page), &ds_queue->split_queue); ds_queue->split_queue_len++; +#ifdef CONFIG_MEMCG + if (memcg) + memcg_set_shrinker_bit(memcg, page_to_nid(page), + deferred_split_shrinker.id); +#endif } spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); } @@ -2809,6 +2842,11 @@ static unsigned long deferred_split_count(struct shrinker *shrink, { struct pglist_data *pgdata = NODE_DATA(sc->nid); struct deferred_split *ds_queue = &pgdata->deferred_split_queue; + +#ifdef CONFIG_MEMCG + if (sc->memcg) + ds_queue = &sc->memcg->deferred_split_queue; +#endif return READ_ONCE(ds_queue->split_queue_len); } @@ -2822,6 +2860,11 @@ static unsigned long deferred_split_scan(struct shrinker *shrink, struct page *page; int split = 0; +#ifdef CONFIG_MEMCG + if (sc->memcg) + ds_queue = &sc->memcg->deferred_split_queue; +#endif + spin_lock_irqsave(&ds_queue->split_queue_lock, flags); /* Take pin on all head pages to avoid freeing them under us */ list_for_each_safe(pos, next, &ds_queue->split_queue) { @@ -2868,7 +2911,8 @@ static struct shrinker deferred_split_shrinker = { .count_objects = deferred_split_count, .scan_objects = deferred_split_scan, .seeks = DEFAULT_SEEKS, - .flags = SHRINKER_NUMA_AWARE, + .flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE | + SHRINKER_NONSLAB, }; #ifdef CONFIG_DEBUG_FS diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 5c510b5208874dad37aaf65c5e5002bff7bf0edd..4c52f6c7146e9fcdb639343916defeb457328bd6 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5084,6 +5084,11 @@ static struct mem_cgroup *mem_cgroup_alloc(void) INIT_LIST_HEAD(&memcg->cgwb_list); #endif kidled_memcg_init(memcg); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + spin_lock_init(&memcg->deferred_split_queue.split_queue_lock); + INIT_LIST_HEAD(&memcg->deferred_split_queue.split_queue); + memcg->deferred_split_queue.split_queue_len = 0; +#endif idr_replace(&mem_cgroup_idr, memcg, memcg->id.id); return memcg; fail: @@ -5465,6 +5470,14 @@ static int mem_cgroup_move_account(struct page *page, __mod_memcg_state(to, NR_WRITEBACK, nr_pages); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (compound && !list_empty(page_deferred_list(page))) { + spin_lock(&from->deferred_split_queue.split_queue_lock); + list_del_init(page_deferred_list(page)); + from->deferred_split_queue.split_queue_len--; + spin_unlock(&from->deferred_split_queue.split_queue_lock); + } +#endif /* * It is safe to change page->mem_cgroup here because the page * is referenced, charged, and isolated - we can't race with @@ -5473,6 +5486,17 @@ static int mem_cgroup_move_account(struct page *page, /* caller should have done css_get */ page->mem_cgroup = to; + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (compound && list_empty(page_deferred_list(page))) { + spin_lock(&to->deferred_split_queue.split_queue_lock); + list_add_tail(page_deferred_list(page), + &to->deferred_split_queue.split_queue); + to->deferred_split_queue.split_queue_len++; + spin_unlock(&to->deferred_split_queue.split_queue_lock); + } +#endif + spin_unlock_irqrestore(&from->move_lock, flags); ret = 0;