diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 2298122e71adae38b5f92d86f1bcc607043184ad..79fcf0cd71864d82114bb7a10d623db3d6276ebf 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -453,6 +453,8 @@ void memcg_update_array_size(int num_groups); struct kmem_cache * __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp); +void mem_cgroup_destroy_cache(struct kmem_cache *cachep); + /** * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed. * @gfp: the gfp allocation flags. diff --git a/include/linux/slab.h b/include/linux/slab.h index c0fcf28c15b20d05b0d93e0aeaedab6ece14c33a..869efb8d2377bfbed557a90b3da246d248055847 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -11,6 +11,8 @@ #include #include +#include + /* * Flags to pass to kmem_cache_create(). @@ -179,7 +181,6 @@ void kmem_cache_free(struct kmem_cache *, void *); #ifndef ARCH_SLAB_MINALIGN #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) #endif - /* * This is the main placeholder for memcg-related information in kmem caches. * struct kmem_cache will hold a pointer to it, so the memory cost while @@ -197,6 +198,10 @@ void kmem_cache_free(struct kmem_cache *, void *); * @memcg: pointer to the memcg this cache belongs to * @list: list_head for the list of all caches in this memcg * @root_cache: pointer to the global, root cache, this cache was derived from + * @dead: set to true after the memcg dies; the cache may still be around. + * @nr_pages: number of pages that belongs to this cache. + * @destroy: worker to be called whenever we are ready, or believe we may be + * ready, to destroy this cache. */ struct memcg_cache_params { bool is_root_cache; @@ -206,6 +211,9 @@ struct memcg_cache_params { struct mem_cgroup *memcg; struct list_head list; struct kmem_cache *root_cache; + bool dead; + atomic_t nr_pages; + struct work_struct destroy; }; }; }; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index cc13797d0fbcf7472ac8b8b7970558e06ddab7c1..270a36789859a43059cb72e14701b4680575976c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2779,6 +2779,19 @@ static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg) (memcg->kmem_account_flags & KMEM_ACCOUNTED_MASK); } +/* + * This is a bit cumbersome, but it is rarely used and avoids a backpointer + * in the memcg_cache_params struct. + */ +static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p) +{ + struct kmem_cache *cachep; + + VM_BUG_ON(p->is_root_cache); + cachep = p->root_cache; + return cachep->memcg_params->memcg_caches[memcg_cache_id(p->memcg)]; +} + static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size) { struct res_counter *fail_res; @@ -3056,6 +3069,31 @@ static inline void memcg_resume_kmem_account(void) current->memcg_kmem_skip_account--; } +static void kmem_cache_destroy_work_func(struct work_struct *w) +{ + struct kmem_cache *cachep; + struct memcg_cache_params *p; + + p = container_of(w, struct memcg_cache_params, destroy); + + cachep = memcg_params_to_cache(p); + + if (!atomic_read(&cachep->memcg_params->nr_pages)) + kmem_cache_destroy(cachep); +} + +void mem_cgroup_destroy_cache(struct kmem_cache *cachep) +{ + if (!cachep->memcg_params->dead) + return; + + /* + * We have to defer the actual destroying to a workqueue, because + * we might currently be in a context that cannot sleep. + */ + schedule_work(&cachep->memcg_params->destroy); +} + static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *s) { char *name; @@ -3125,6 +3163,7 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, mem_cgroup_get(memcg); new_cachep->memcg_params->root_cache = cachep; + atomic_set(&new_cachep->memcg_params->nr_pages , 0); cachep->memcg_params->memcg_caches[idx] = new_cachep; /* @@ -3143,6 +3182,25 @@ struct create_work { struct work_struct work; }; +static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg) +{ + struct kmem_cache *cachep; + struct memcg_cache_params *params; + + if (!memcg_kmem_is_active(memcg)) + return; + + mutex_lock(&memcg->slab_caches_mutex); + list_for_each_entry(params, &memcg->memcg_slab_caches, list) { + cachep = memcg_params_to_cache(params); + cachep->memcg_params->dead = true; + INIT_WORK(&cachep->memcg_params->destroy, + kmem_cache_destroy_work_func); + schedule_work(&cachep->memcg_params->destroy); + } + mutex_unlock(&memcg->slab_caches_mutex); +} + static void memcg_create_cache_work_func(struct work_struct *w) { struct create_work *cw; @@ -3358,6 +3416,10 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order) VM_BUG_ON(mem_cgroup_is_root(memcg)); memcg_uncharge_kmem(memcg, PAGE_SIZE << order); } +#else +static inline void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg) +{ +} #endif /* CONFIG_MEMCG_KMEM */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -5975,6 +6037,7 @@ static void mem_cgroup_css_offline(struct cgroup *cont) struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); mem_cgroup_reparent_charges(memcg); + mem_cgroup_destroy_all_caches(memcg); } static void mem_cgroup_css_free(struct cgroup *cont) diff --git a/mm/slab.c b/mm/slab.c index e265865e8700bc2787e4c00483602116c85459ed..7467343f9fe7653456c4e6f469511ec63b3dfe0e 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1895,6 +1895,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) if (page->pfmemalloc) SetPageSlabPfmemalloc(page + i); } + memcg_bind_pages(cachep, cachep->gfporder); if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid); @@ -1931,6 +1932,8 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) __ClearPageSlab(page); page++; } + + memcg_release_pages(cachep, cachep->gfporder); if (current->reclaim_state) current->reclaim_state->reclaimed_slab += nr_freed; free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder); diff --git a/mm/slab.h b/mm/slab.h index c95e922b166db73c15fdb23f82468cb44100f197..43d8a38b534f18fc09c3c4973974ca4f21b00ec6 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -117,6 +117,21 @@ static inline bool cache_match_memcg(struct kmem_cache *cachep, (cachep->memcg_params->memcg == memcg); } +static inline void memcg_bind_pages(struct kmem_cache *s, int order) +{ + if (!is_root_cache(s)) + atomic_add(1 << order, &s->memcg_params->nr_pages); +} + +static inline void memcg_release_pages(struct kmem_cache *s, int order) +{ + if (is_root_cache(s)) + return; + + if (atomic_sub_and_test((1 << order), &s->memcg_params->nr_pages)) + mem_cgroup_destroy_cache(s); +} + static inline bool slab_equal_or_root(struct kmem_cache *s, struct kmem_cache *p) { @@ -135,6 +150,14 @@ static inline bool cache_match_memcg(struct kmem_cache *cachep, return true; } +static inline void memcg_bind_pages(struct kmem_cache *s, int order) +{ +} + +static inline void memcg_release_pages(struct kmem_cache *s, int order) +{ +} + static inline bool slab_equal_or_root(struct kmem_cache *s, struct kmem_cache *p) { diff --git a/mm/slub.c b/mm/slub.c index ef39e872b8eb665c6217bf8dd09c14210ef5381a..692177bebdf05ef001b10e12670d5de143e0e094 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1344,6 +1344,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) void *start; void *last; void *p; + int order; BUG_ON(flags & GFP_SLAB_BUG_MASK); @@ -1352,7 +1353,9 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) if (!page) goto out; + order = compound_order(page); inc_slabs_node(s, page_to_nid(page), page->objects); + memcg_bind_pages(s, order); page->slab_cache = s; __SetPageSlab(page); if (page->pfmemalloc) @@ -1361,7 +1364,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) start = page_address(page); if (unlikely(s->flags & SLAB_POISON)) - memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page)); + memset(start, POISON_INUSE, PAGE_SIZE << order); last = start; for_each_object(p, s, start, page->objects) { @@ -1402,6 +1405,8 @@ static void __free_slab(struct kmem_cache *s, struct page *page) __ClearPageSlabPfmemalloc(page); __ClearPageSlab(page); + + memcg_release_pages(s, order); reset_page_mapcount(page); if (current->reclaim_state) current->reclaim_state->reclaimed_slab += pages;