提交 03afc0e2 编写于 作者: V Vladimir Davydov 提交者: Linus Torvalds

slab: get_online_mems for kmem_cache_{create,destroy,shrink}

When we create a sl[au]b cache, we allocate kmem_cache_node structures
for each online NUMA node.  To handle nodes taken online/offline, we
register memory hotplug notifier and allocate/free kmem_cache_node
corresponding to the node that changes its state for each kmem cache.

To synchronize between the two paths we hold the slab_mutex during both
the cache creationg/destruction path and while tuning per-node parts of
kmem caches in memory hotplug handler, but that's not quite right,
because it does not guarantee that a newly created cache will have all
kmem_cache_nodes initialized in case it races with memory hotplug.  For
instance, in case of slub:

    CPU0                            CPU1
    ----                            ----
    kmem_cache_create:              online_pages:
     __kmem_cache_create:            slab_memory_callback:
                                      slab_mem_going_online_callback:
                                       lock slab_mutex
                                       for each slab_caches list entry
                                           allocate kmem_cache node
                                       unlock slab_mutex
      lock slab_mutex
      init_kmem_cache_nodes:
       for_each_node_state(node, N_NORMAL_MEMORY)
           allocate kmem_cache node
      add kmem_cache to slab_caches list
      unlock slab_mutex
                                    online_pages (continued):
                                     node_states_set_node

As a result we'll get a kmem cache with not all kmem_cache_nodes
allocated.

To avoid issues like that we should hold get/put_online_mems() during
the whole kmem cache creation/destruction/shrink paths, just like we
deal with cpu hotplug.  This patch does the trick.

Note, that after it's applied, there is no need in taking the slab_mutex
for kmem_cache_shrink any more, so it is removed from there.
Signed-off-by: NVladimir Davydov <vdavydov@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Tang Chen <tangchen@cn.fujitsu.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: Jiang Liu <liuj97@gmail.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Wen Congyang <wency@cn.fujitsu.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 bfc8c901
...@@ -2480,8 +2480,7 @@ static int drain_freelist(struct kmem_cache *cache, ...@@ -2480,8 +2480,7 @@ static int drain_freelist(struct kmem_cache *cache,
return nr_freed; return nr_freed;
} }
/* Called with slab_mutex held to protect against cpu hotplug */ int __kmem_cache_shrink(struct kmem_cache *cachep)
static int __cache_shrink(struct kmem_cache *cachep)
{ {
int ret = 0, i = 0; int ret = 0, i = 0;
struct kmem_cache_node *n; struct kmem_cache_node *n;
...@@ -2502,32 +2501,11 @@ static int __cache_shrink(struct kmem_cache *cachep) ...@@ -2502,32 +2501,11 @@ static int __cache_shrink(struct kmem_cache *cachep)
return (ret ? 1 : 0); return (ret ? 1 : 0);
} }
/**
* kmem_cache_shrink - Shrink a cache.
* @cachep: The cache to shrink.
*
* Releases as many slabs as possible for a cache.
* To help debugging, a zero exit status indicates all slabs were released.
*/
int kmem_cache_shrink(struct kmem_cache *cachep)
{
int ret;
BUG_ON(!cachep || in_interrupt());
get_online_cpus();
mutex_lock(&slab_mutex);
ret = __cache_shrink(cachep);
mutex_unlock(&slab_mutex);
put_online_cpus();
return ret;
}
EXPORT_SYMBOL(kmem_cache_shrink);
int __kmem_cache_shutdown(struct kmem_cache *cachep) int __kmem_cache_shutdown(struct kmem_cache *cachep)
{ {
int i; int i;
struct kmem_cache_node *n; struct kmem_cache_node *n;
int rc = __cache_shrink(cachep); int rc = __kmem_cache_shrink(cachep);
if (rc) if (rc)
return rc; return rc;
......
...@@ -91,6 +91,7 @@ __kmem_cache_alias(const char *name, size_t size, size_t align, ...@@ -91,6 +91,7 @@ __kmem_cache_alias(const char *name, size_t size, size_t align,
#define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS) #define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS)
int __kmem_cache_shutdown(struct kmem_cache *); int __kmem_cache_shutdown(struct kmem_cache *);
int __kmem_cache_shrink(struct kmem_cache *);
void slab_kmem_cache_release(struct kmem_cache *); void slab_kmem_cache_release(struct kmem_cache *);
struct seq_file; struct seq_file;
......
...@@ -205,6 +205,8 @@ kmem_cache_create(const char *name, size_t size, size_t align, ...@@ -205,6 +205,8 @@ kmem_cache_create(const char *name, size_t size, size_t align,
int err; int err;
get_online_cpus(); get_online_cpus();
get_online_mems();
mutex_lock(&slab_mutex); mutex_lock(&slab_mutex);
err = kmem_cache_sanity_check(name, size); err = kmem_cache_sanity_check(name, size);
...@@ -239,6 +241,8 @@ kmem_cache_create(const char *name, size_t size, size_t align, ...@@ -239,6 +241,8 @@ kmem_cache_create(const char *name, size_t size, size_t align,
out_unlock: out_unlock:
mutex_unlock(&slab_mutex); mutex_unlock(&slab_mutex);
put_online_mems();
put_online_cpus(); put_online_cpus();
if (err) { if (err) {
...@@ -272,6 +276,8 @@ void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_c ...@@ -272,6 +276,8 @@ void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_c
char *cache_name; char *cache_name;
get_online_cpus(); get_online_cpus();
get_online_mems();
mutex_lock(&slab_mutex); mutex_lock(&slab_mutex);
/* /*
...@@ -295,6 +301,8 @@ void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_c ...@@ -295,6 +301,8 @@ void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_c
out_unlock: out_unlock:
mutex_unlock(&slab_mutex); mutex_unlock(&slab_mutex);
put_online_mems();
put_online_cpus(); put_online_cpus();
} }
...@@ -328,6 +336,8 @@ void slab_kmem_cache_release(struct kmem_cache *s) ...@@ -328,6 +336,8 @@ void slab_kmem_cache_release(struct kmem_cache *s)
void kmem_cache_destroy(struct kmem_cache *s) void kmem_cache_destroy(struct kmem_cache *s)
{ {
get_online_cpus(); get_online_cpus();
get_online_mems();
mutex_lock(&slab_mutex); mutex_lock(&slab_mutex);
s->refcount--; s->refcount--;
...@@ -359,15 +369,36 @@ void kmem_cache_destroy(struct kmem_cache *s) ...@@ -359,15 +369,36 @@ void kmem_cache_destroy(struct kmem_cache *s)
#else #else
slab_kmem_cache_release(s); slab_kmem_cache_release(s);
#endif #endif
goto out_put_cpus; goto out;
out_unlock: out_unlock:
mutex_unlock(&slab_mutex); mutex_unlock(&slab_mutex);
out_put_cpus: out:
put_online_mems();
put_online_cpus(); put_online_cpus();
} }
EXPORT_SYMBOL(kmem_cache_destroy); EXPORT_SYMBOL(kmem_cache_destroy);
/**
* kmem_cache_shrink - Shrink a cache.
* @cachep: The cache to shrink.
*
* Releases as many slabs as possible for a cache.
* To help debugging, a zero exit status indicates all slabs were released.
*/
int kmem_cache_shrink(struct kmem_cache *cachep)
{
int ret;
get_online_cpus();
get_online_mems();
ret = __kmem_cache_shrink(cachep);
put_online_mems();
put_online_cpus();
return ret;
}
EXPORT_SYMBOL(kmem_cache_shrink);
int slab_is_available(void) int slab_is_available(void)
{ {
return slab_state >= UP; return slab_state >= UP;
......
...@@ -620,11 +620,10 @@ int __kmem_cache_shutdown(struct kmem_cache *c) ...@@ -620,11 +620,10 @@ int __kmem_cache_shutdown(struct kmem_cache *c)
return 0; return 0;
} }
int kmem_cache_shrink(struct kmem_cache *d) int __kmem_cache_shrink(struct kmem_cache *d)
{ {
return 0; return 0;
} }
EXPORT_SYMBOL(kmem_cache_shrink);
struct kmem_cache kmem_cache_boot = { struct kmem_cache kmem_cache_boot = {
.name = "kmem_cache", .name = "kmem_cache",
......
...@@ -3398,7 +3398,7 @@ EXPORT_SYMBOL(kfree); ...@@ -3398,7 +3398,7 @@ EXPORT_SYMBOL(kfree);
* being allocated from last increasing the chance that the last objects * being allocated from last increasing the chance that the last objects
* are freed in them. * are freed in them.
*/ */
int kmem_cache_shrink(struct kmem_cache *s) int __kmem_cache_shrink(struct kmem_cache *s)
{ {
int node; int node;
int i; int i;
...@@ -3454,7 +3454,6 @@ int kmem_cache_shrink(struct kmem_cache *s) ...@@ -3454,7 +3454,6 @@ int kmem_cache_shrink(struct kmem_cache *s)
kfree(slabs_by_inuse); kfree(slabs_by_inuse);
return 0; return 0;
} }
EXPORT_SYMBOL(kmem_cache_shrink);
static int slab_mem_going_offline_callback(void *arg) static int slab_mem_going_offline_callback(void *arg)
{ {
...@@ -3462,7 +3461,7 @@ static int slab_mem_going_offline_callback(void *arg) ...@@ -3462,7 +3461,7 @@ static int slab_mem_going_offline_callback(void *arg)
mutex_lock(&slab_mutex); mutex_lock(&slab_mutex);
list_for_each_entry(s, &slab_caches, list) list_for_each_entry(s, &slab_caches, list)
kmem_cache_shrink(s); __kmem_cache_shrink(s);
mutex_unlock(&slab_mutex); mutex_unlock(&slab_mutex);
return 0; return 0;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册