diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 18ccb2988979cf61c715d1e3c9477d57d418f9c9..fb212e1d700d29b42036e448023bc70b2c95227d 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -407,8 +407,6 @@ int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, unsigned long nr_pages); void memcg_uncharge_kmem(struct mem_cgroup *memcg, unsigned long nr_pages); -int __memcg_cleanup_cache_params(struct kmem_cache *s); - /** * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed. * @gfp: the gfp allocation flags. diff --git a/include/linux/slab.h b/include/linux/slab.h index eca9ed303a1b2bdf5822505ca47a25f9b6d6fb4f..2e3b448cfa2d8bb922d8c1a8a2215862926b1c54 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -116,8 +116,8 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, unsigned long, void (*)(void *)); #ifdef CONFIG_MEMCG_KMEM -struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *, - struct kmem_cache *); +void memcg_create_kmem_cache(struct mem_cgroup *, struct kmem_cache *); +void memcg_destroy_kmem_caches(struct mem_cgroup *); #endif void kmem_cache_destroy(struct kmem_cache *); int kmem_cache_shrink(struct kmem_cache *); @@ -490,7 +490,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) * Child caches will hold extra metadata needed for its operation. Fields are: * * @memcg: pointer to the memcg this cache belongs to - * @list: list_head for the list of all caches in this memcg * @root_cache: pointer to the global, root cache, this cache was derived from */ struct memcg_cache_params { @@ -502,7 +501,6 @@ struct memcg_cache_params { }; struct { struct mem_cgroup *memcg; - struct list_head list; struct kmem_cache *root_cache; }; }; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index baf7eb27e3aeef239db28fa73d641ba935737ce6..f3f8a4f52a0ca31f0731663f0240e856651c5cc4 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -343,9 +343,6 @@ struct mem_cgroup { struct cg_proto tcp_mem; #endif #if defined(CONFIG_MEMCG_KMEM) - /* analogous to slab_common's slab_caches list, but per-memcg; - * protected by memcg_slab_mutex */ - struct list_head memcg_slab_caches; /* Index in the kmem_cache->memcg_params->memcg_caches array */ int kmemcg_id; #endif @@ -2476,25 +2473,6 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg, } #ifdef CONFIG_MEMCG_KMEM -/* - * The memcg_slab_mutex is held whenever a per memcg kmem cache is created or - * destroyed. It protects memcg_caches arrays and memcg_slab_caches lists. - */ -static DEFINE_MUTEX(memcg_slab_mutex); - -/* - * This is a bit cumbersome, but it is rarely used and avoids a backpointer - * in the memcg_cache_params struct. - */ -static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p) -{ - struct kmem_cache *cachep; - - VM_BUG_ON(p->is_root_cache); - cachep = p->root_cache; - return cache_from_memcg_idx(cachep, memcg_cache_id(p->memcg)); -} - int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, unsigned long nr_pages) { @@ -2578,10 +2556,7 @@ static int memcg_alloc_cache_id(void) else if (size > MEMCG_CACHES_MAX_SIZE) size = MEMCG_CACHES_MAX_SIZE; - mutex_lock(&memcg_slab_mutex); err = memcg_update_all_caches(size); - mutex_unlock(&memcg_slab_mutex); - if (err) { ida_simple_remove(&kmem_limited_groups, id); return err; @@ -2604,120 +2579,20 @@ void memcg_update_array_size(int num) memcg_limited_groups_array_size = num; } -static void memcg_register_cache(struct mem_cgroup *memcg, - struct kmem_cache *root_cache) -{ - struct kmem_cache *cachep; - int id; - - lockdep_assert_held(&memcg_slab_mutex); - - id = memcg_cache_id(memcg); - - /* - * Since per-memcg caches are created asynchronously on first - * allocation (see memcg_kmem_get_cache()), several threads can try to - * create the same cache, but only one of them may succeed. - */ - if (cache_from_memcg_idx(root_cache, id)) - return; - - cachep = memcg_create_kmem_cache(memcg, root_cache); - /* - * If we could not create a memcg cache, do not complain, because - * that's not critical at all as we can always proceed with the root - * cache. - */ - if (!cachep) - return; - - list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches); - - /* - * Since readers won't lock (see cache_from_memcg_idx()), we need a - * barrier here to ensure nobody will see the kmem_cache partially - * initialized. - */ - smp_wmb(); - - BUG_ON(root_cache->memcg_params->memcg_caches[id]); - root_cache->memcg_params->memcg_caches[id] = cachep; -} - -static void memcg_unregister_cache(struct kmem_cache *cachep) -{ - struct kmem_cache *root_cache; - struct mem_cgroup *memcg; - int id; - - lockdep_assert_held(&memcg_slab_mutex); - - BUG_ON(is_root_cache(cachep)); - - root_cache = cachep->memcg_params->root_cache; - memcg = cachep->memcg_params->memcg; - id = memcg_cache_id(memcg); - - BUG_ON(root_cache->memcg_params->memcg_caches[id] != cachep); - root_cache->memcg_params->memcg_caches[id] = NULL; - - list_del(&cachep->memcg_params->list); - - kmem_cache_destroy(cachep); -} - -int __memcg_cleanup_cache_params(struct kmem_cache *s) -{ - struct kmem_cache *c; - int i, failed = 0; - - mutex_lock(&memcg_slab_mutex); - for_each_memcg_cache_index(i) { - c = cache_from_memcg_idx(s, i); - if (!c) - continue; - - memcg_unregister_cache(c); - - if (cache_from_memcg_idx(s, i)) - failed++; - } - mutex_unlock(&memcg_slab_mutex); - return failed; -} - -static void memcg_unregister_all_caches(struct mem_cgroup *memcg) -{ - struct kmem_cache *cachep; - struct memcg_cache_params *params, *tmp; - - if (!memcg_kmem_is_active(memcg)) - return; - - mutex_lock(&memcg_slab_mutex); - list_for_each_entry_safe(params, tmp, &memcg->memcg_slab_caches, list) { - cachep = memcg_params_to_cache(params); - memcg_unregister_cache(cachep); - } - mutex_unlock(&memcg_slab_mutex); -} - -struct memcg_register_cache_work { +struct memcg_kmem_cache_create_work { struct mem_cgroup *memcg; struct kmem_cache *cachep; struct work_struct work; }; -static void memcg_register_cache_func(struct work_struct *w) +static void memcg_kmem_cache_create_func(struct work_struct *w) { - struct memcg_register_cache_work *cw = - container_of(w, struct memcg_register_cache_work, work); + struct memcg_kmem_cache_create_work *cw = + container_of(w, struct memcg_kmem_cache_create_work, work); struct mem_cgroup *memcg = cw->memcg; struct kmem_cache *cachep = cw->cachep; - mutex_lock(&memcg_slab_mutex); - memcg_register_cache(memcg, cachep); - mutex_unlock(&memcg_slab_mutex); + memcg_create_kmem_cache(memcg, cachep); css_put(&memcg->css); kfree(cw); @@ -2726,10 +2601,10 @@ static void memcg_register_cache_func(struct work_struct *w) /* * Enqueue the creation of a per-memcg kmem_cache. */ -static void __memcg_schedule_register_cache(struct mem_cgroup *memcg, - struct kmem_cache *cachep) +static void __memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg, + struct kmem_cache *cachep) { - struct memcg_register_cache_work *cw; + struct memcg_kmem_cache_create_work *cw; cw = kmalloc(sizeof(*cw), GFP_NOWAIT); if (!cw) @@ -2739,18 +2614,18 @@ static void __memcg_schedule_register_cache(struct mem_cgroup *memcg, cw->memcg = memcg; cw->cachep = cachep; + INIT_WORK(&cw->work, memcg_kmem_cache_create_func); - INIT_WORK(&cw->work, memcg_register_cache_func); schedule_work(&cw->work); } -static void memcg_schedule_register_cache(struct mem_cgroup *memcg, - struct kmem_cache *cachep) +static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg, + struct kmem_cache *cachep) { /* * We need to stop accounting when we kmalloc, because if the * corresponding kmalloc cache is not yet created, the first allocation - * in __memcg_schedule_register_cache will recurse. + * in __memcg_schedule_kmem_cache_create will recurse. * * However, it is better to enclose the whole function. Depending on * the debugging options enabled, INIT_WORK(), for instance, can @@ -2759,7 +2634,7 @@ static void memcg_schedule_register_cache(struct mem_cgroup *memcg, * the safest choice is to do it like this, wrapping the whole function. */ current->memcg_kmem_skip_account = 1; - __memcg_schedule_register_cache(memcg, cachep); + __memcg_schedule_kmem_cache_create(memcg, cachep); current->memcg_kmem_skip_account = 0; } @@ -2807,7 +2682,7 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep) * could happen with the slab_mutex held. So it's better to * defer everything. */ - memcg_schedule_register_cache(memcg, cachep); + memcg_schedule_kmem_cache_create(memcg, cachep); out: css_put(&memcg->css); return cachep; @@ -4136,7 +4011,7 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) static void memcg_destroy_kmem(struct mem_cgroup *memcg) { - memcg_unregister_all_caches(memcg); + memcg_destroy_kmem_caches(memcg); mem_cgroup_sockets_destroy(memcg); } #else @@ -4664,7 +4539,6 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) spin_lock_init(&memcg->event_list_lock); #ifdef CONFIG_MEMCG_KMEM memcg->kmemcg_id = -1; - INIT_LIST_HEAD(&memcg->memcg_slab_caches); #endif return &memcg->css; diff --git a/mm/slab_common.c b/mm/slab_common.c index 1b782a2d3b3d1a7006fcc027fc7895767a04c51f..6e1e4cf65836cbc5eff3570bd19278a420ec3185 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -425,6 +425,49 @@ kmem_cache_create(const char *name, size_t size, size_t align, } EXPORT_SYMBOL(kmem_cache_create); +static int do_kmem_cache_shutdown(struct kmem_cache *s, + struct list_head *release, bool *need_rcu_barrier) +{ + if (__kmem_cache_shutdown(s) != 0) { + printk(KERN_ERR "kmem_cache_destroy %s: " + "Slab cache still has objects\n", s->name); + dump_stack(); + return -EBUSY; + } + + if (s->flags & SLAB_DESTROY_BY_RCU) + *need_rcu_barrier = true; + +#ifdef CONFIG_MEMCG_KMEM + if (!is_root_cache(s)) { + struct kmem_cache *root_cache = s->memcg_params->root_cache; + int memcg_id = memcg_cache_id(s->memcg_params->memcg); + + BUG_ON(root_cache->memcg_params->memcg_caches[memcg_id] != s); + root_cache->memcg_params->memcg_caches[memcg_id] = NULL; + } +#endif + list_move(&s->list, release); + return 0; +} + +static void do_kmem_cache_release(struct list_head *release, + bool need_rcu_barrier) +{ + struct kmem_cache *s, *s2; + + if (need_rcu_barrier) + rcu_barrier(); + + list_for_each_entry_safe(s, s2, release, list) { +#ifdef SLAB_SUPPORTS_SYSFS + sysfs_slab_remove(s); +#else + slab_kmem_cache_release(s); +#endif + } +} + #ifdef CONFIG_MEMCG_KMEM /* * memcg_create_kmem_cache - Create a cache for a memory cgroup. @@ -435,10 +478,11 @@ EXPORT_SYMBOL(kmem_cache_create); * requests going from @memcg to @root_cache. The new cache inherits properties * from its parent. */ -struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, - struct kmem_cache *root_cache) +void memcg_create_kmem_cache(struct mem_cgroup *memcg, + struct kmem_cache *root_cache) { static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */ + int memcg_id = memcg_cache_id(memcg); struct kmem_cache *s = NULL; char *cache_name; @@ -447,6 +491,14 @@ struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, mutex_lock(&slab_mutex); + /* + * Since per-memcg caches are created asynchronously on first + * allocation (see memcg_kmem_get_cache()), several threads can try to + * create the same cache, but only one of them may succeed. + */ + if (cache_from_memcg_idx(root_cache, memcg_id)) + goto out_unlock; + cgroup_name(mem_cgroup_css(memcg)->cgroup, memcg_name_buf, sizeof(memcg_name_buf)); cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name, @@ -458,49 +510,73 @@ struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, root_cache->size, root_cache->align, root_cache->flags, root_cache->ctor, memcg, root_cache); + /* + * If we could not create a memcg cache, do not complain, because + * that's not critical at all as we can always proceed with the root + * cache. + */ if (IS_ERR(s)) { kfree(cache_name); - s = NULL; + goto out_unlock; } + /* + * Since readers won't lock (see cache_from_memcg_idx()), we need a + * barrier here to ensure nobody will see the kmem_cache partially + * initialized. + */ + smp_wmb(); + root_cache->memcg_params->memcg_caches[memcg_id] = s; + out_unlock: mutex_unlock(&slab_mutex); put_online_mems(); put_online_cpus(); - - return s; } -static int memcg_cleanup_cache_params(struct kmem_cache *s) +void memcg_destroy_kmem_caches(struct mem_cgroup *memcg) { - int rc; + LIST_HEAD(release); + bool need_rcu_barrier = false; + struct kmem_cache *s, *s2; - if (!s->memcg_params || - !s->memcg_params->is_root_cache) - return 0; + get_online_cpus(); + get_online_mems(); - mutex_unlock(&slab_mutex); - rc = __memcg_cleanup_cache_params(s); mutex_lock(&slab_mutex); + list_for_each_entry_safe(s, s2, &slab_caches, list) { + if (is_root_cache(s) || s->memcg_params->memcg != memcg) + continue; + /* + * The cgroup is about to be freed and therefore has no charges + * left. Hence, all its caches must be empty by now. + */ + BUG_ON(do_kmem_cache_shutdown(s, &release, &need_rcu_barrier)); + } + mutex_unlock(&slab_mutex); - return rc; -} -#else -static int memcg_cleanup_cache_params(struct kmem_cache *s) -{ - return 0; + put_online_mems(); + put_online_cpus(); + + do_kmem_cache_release(&release, need_rcu_barrier); } #endif /* CONFIG_MEMCG_KMEM */ void slab_kmem_cache_release(struct kmem_cache *s) { + memcg_free_cache_params(s); kfree(s->name); kmem_cache_free(kmem_cache, s); } void kmem_cache_destroy(struct kmem_cache *s) { + int i; + LIST_HEAD(release); + bool need_rcu_barrier = false; + bool busy = false; + get_online_cpus(); get_online_mems(); @@ -510,35 +586,23 @@ void kmem_cache_destroy(struct kmem_cache *s) if (s->refcount) goto out_unlock; - if (memcg_cleanup_cache_params(s) != 0) - goto out_unlock; + for_each_memcg_cache_index(i) { + struct kmem_cache *c = cache_from_memcg_idx(s, i); - if (__kmem_cache_shutdown(s) != 0) { - printk(KERN_ERR "kmem_cache_destroy %s: " - "Slab cache still has objects\n", s->name); - dump_stack(); - goto out_unlock; + if (c && do_kmem_cache_shutdown(c, &release, &need_rcu_barrier)) + busy = true; } - list_del(&s->list); - - mutex_unlock(&slab_mutex); - if (s->flags & SLAB_DESTROY_BY_RCU) - rcu_barrier(); - - memcg_free_cache_params(s); -#ifdef SLAB_SUPPORTS_SYSFS - sysfs_slab_remove(s); -#else - slab_kmem_cache_release(s); -#endif - goto out; + if (!busy) + do_kmem_cache_shutdown(s, &release, &need_rcu_barrier); out_unlock: mutex_unlock(&slab_mutex); -out: + put_online_mems(); put_online_cpus(); + + do_kmem_cache_release(&release, need_rcu_barrier); } EXPORT_SYMBOL(kmem_cache_destroy);