提交 cedfb2db 编写于 作者: L Linus Torvalds

Merge branch 'slab-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6

* 'slab-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6:
  slub: Use alloc_pages_exact_node() for page allocation
  slub: __kmalloc_node_track_caller should trace kmalloc_large_node case
  slub: Potential stack overflow
  crypto: Use ARCH_KMALLOC_MINALIGN for CRYPTO_MINALIGN now that it's exposed
  mm: Move ARCH_SLAB_MINALIGN and ARCH_KMALLOC_MINALIGN to <linux/slub_def.h>
  mm: Move ARCH_SLAB_MINALIGN and ARCH_KMALLOC_MINALIGN to <linux/slob_def.h>
  mm: Move ARCH_SLAB_MINALIGN and ARCH_KMALLOC_MINALIGN to <linux/slab_def.h>
  slab: Fix missing DEBUG_SLAB last user
  slab: add memory hotplug support
  slab: Fix continuation lines
......@@ -99,13 +99,7 @@
* as arm where pointers are 32-bit aligned but there are data types such as
* u64 which require 64-bit alignment.
*/
#if defined(ARCH_KMALLOC_MINALIGN)
#define CRYPTO_MINALIGN ARCH_KMALLOC_MINALIGN
#elif defined(ARCH_SLAB_MINALIGN)
#define CRYPTO_MINALIGN ARCH_SLAB_MINALIGN
#else
#define CRYPTO_MINALIGN __alignof__(unsigned long long)
#endif
#define CRYPTO_MINALIGN_ATTR __attribute__ ((__aligned__(CRYPTO_MINALIGN)))
......
......@@ -16,6 +16,30 @@
#include <linux/compiler.h>
#include <linux/kmemtrace.h>
#ifndef ARCH_KMALLOC_MINALIGN
/*
* Enforce a minimum alignment for the kmalloc caches.
* Usually, the kmalloc caches are cache_line_size() aligned, except when
* DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned.
* Some archs want to perform DMA into kmalloc caches and need a guaranteed
* alignment larger than the alignment of a 64-bit integer.
* ARCH_KMALLOC_MINALIGN allows that.
* Note that increasing this value may disable some debug features.
*/
#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
#endif
#ifndef ARCH_SLAB_MINALIGN
/*
* Enforce a minimum alignment for all caches.
* Intended for archs that get misalignment faults even for BYTES_PER_WORD
* aligned buffers. Includes ARCH_KMALLOC_MINALIGN.
* If possible: Do not enable this flag for CONFIG_DEBUG_SLAB, it disables
* some debug features.
*/
#define ARCH_SLAB_MINALIGN 0
#endif
/*
* struct kmem_cache
*
......
#ifndef __LINUX_SLOB_DEF_H
#define __LINUX_SLOB_DEF_H
#ifndef ARCH_KMALLOC_MINALIGN
#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long)
#endif
#ifndef ARCH_SLAB_MINALIGN
#define ARCH_SLAB_MINALIGN __alignof__(unsigned long)
#endif
void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
static __always_inline void *kmem_cache_alloc(struct kmem_cache *cachep,
......
......@@ -116,6 +116,14 @@ struct kmem_cache {
#define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE)
#ifndef ARCH_KMALLOC_MINALIGN
#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
#endif
#ifndef ARCH_SLAB_MINALIGN
#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
#endif
/*
* Maximum kmalloc object size handled by SLUB. Larger object allocations
* are passed through to the page allocator. The page allocator "fastpath"
......
......@@ -115,6 +115,7 @@
#include <linux/reciprocal_div.h>
#include <linux/debugobjects.h>
#include <linux/kmemcheck.h>
#include <linux/memory.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
......@@ -144,30 +145,6 @@
#define BYTES_PER_WORD sizeof(void *)
#define REDZONE_ALIGN max(BYTES_PER_WORD, __alignof__(unsigned long long))
#ifndef ARCH_KMALLOC_MINALIGN
/*
* Enforce a minimum alignment for the kmalloc caches.
* Usually, the kmalloc caches are cache_line_size() aligned, except when
* DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned.
* Some archs want to perform DMA into kmalloc caches and need a guaranteed
* alignment larger than the alignment of a 64-bit integer.
* ARCH_KMALLOC_MINALIGN allows that.
* Note that increasing this value may disable some debug features.
*/
#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
#endif
#ifndef ARCH_SLAB_MINALIGN
/*
* Enforce a minimum alignment for all caches.
* Intended for archs that get misalignment faults even for BYTES_PER_WORD
* aligned buffers. Includes ARCH_KMALLOC_MINALIGN.
* If possible: Do not enable this flag for CONFIG_DEBUG_SLAB, it disables
* some debug features.
*/
#define ARCH_SLAB_MINALIGN 0
#endif
#ifndef ARCH_KMALLOC_FLAGS
#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
#endif
......@@ -1102,6 +1079,52 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
}
#endif
/*
* Allocates and initializes nodelists for a node on each slab cache, used for
* either memory or cpu hotplug. If memory is being hot-added, the kmem_list3
* will be allocated off-node since memory is not yet online for the new node.
* When hotplugging memory or a cpu, existing nodelists are not replaced if
* already in use.
*
* Must hold cache_chain_mutex.
*/
static int init_cache_nodelists_node(int node)
{
struct kmem_cache *cachep;
struct kmem_list3 *l3;
const int memsize = sizeof(struct kmem_list3);
list_for_each_entry(cachep, &cache_chain, next) {
/*
* Set up the size64 kmemlist for cpu before we can
* begin anything. Make sure some other cpu on this
* node has not already allocated this
*/
if (!cachep->nodelists[node]) {
l3 = kmalloc_node(memsize, GFP_KERNEL, node);
if (!l3)
return -ENOMEM;
kmem_list3_init(l3);
l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
((unsigned long)cachep) % REAPTIMEOUT_LIST3;
/*
* The l3s don't come and go as CPUs come and
* go. cache_chain_mutex is sufficient
* protection here.
*/
cachep->nodelists[node] = l3;
}
spin_lock_irq(&cachep->nodelists[node]->list_lock);
cachep->nodelists[node]->free_limit =
(1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
spin_unlock_irq(&cachep->nodelists[node]->list_lock);
}
return 0;
}
static void __cpuinit cpuup_canceled(long cpu)
{
struct kmem_cache *cachep;
......@@ -1172,7 +1195,7 @@ static int __cpuinit cpuup_prepare(long cpu)
struct kmem_cache *cachep;
struct kmem_list3 *l3 = NULL;
int node = cpu_to_node(cpu);
const int memsize = sizeof(struct kmem_list3);
int err;
/*
* We need to do this right in the beginning since
......@@ -1180,35 +1203,9 @@ static int __cpuinit cpuup_prepare(long cpu)
* kmalloc_node allows us to add the slab to the right
* kmem_list3 and not this cpu's kmem_list3
*/
list_for_each_entry(cachep, &cache_chain, next) {
/*
* Set up the size64 kmemlist for cpu before we can
* begin anything. Make sure some other cpu on this
* node has not already allocated this
*/
if (!cachep->nodelists[node]) {
l3 = kmalloc_node(memsize, GFP_KERNEL, node);
if (!l3)
goto bad;
kmem_list3_init(l3);
l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
((unsigned long)cachep) % REAPTIMEOUT_LIST3;
/*
* The l3s don't come and go as CPUs come and
* go. cache_chain_mutex is sufficient
* protection here.
*/
cachep->nodelists[node] = l3;
}
spin_lock_irq(&cachep->nodelists[node]->list_lock);
cachep->nodelists[node]->free_limit =
(1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
spin_unlock_irq(&cachep->nodelists[node]->list_lock);
}
err = init_cache_nodelists_node(node);
if (err < 0)
goto bad;
/*
* Now we can go ahead with allocating the shared arrays and
......@@ -1331,11 +1328,75 @@ static struct notifier_block __cpuinitdata cpucache_notifier = {
&cpuup_callback, NULL, 0
};
#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
/*
* Drains freelist for a node on each slab cache, used for memory hot-remove.
* Returns -EBUSY if all objects cannot be drained so that the node is not
* removed.
*
* Must hold cache_chain_mutex.
*/
static int __meminit drain_cache_nodelists_node(int node)
{
struct kmem_cache *cachep;
int ret = 0;
list_for_each_entry(cachep, &cache_chain, next) {
struct kmem_list3 *l3;
l3 = cachep->nodelists[node];
if (!l3)
continue;
drain_freelist(cachep, l3, l3->free_objects);
if (!list_empty(&l3->slabs_full) ||
!list_empty(&l3->slabs_partial)) {
ret = -EBUSY;
break;
}
}
return ret;
}
static int __meminit slab_memory_callback(struct notifier_block *self,
unsigned long action, void *arg)
{
struct memory_notify *mnb = arg;
int ret = 0;
int nid;
nid = mnb->status_change_nid;
if (nid < 0)
goto out;
switch (action) {
case MEM_GOING_ONLINE:
mutex_lock(&cache_chain_mutex);
ret = init_cache_nodelists_node(nid);
mutex_unlock(&cache_chain_mutex);
break;
case MEM_GOING_OFFLINE:
mutex_lock(&cache_chain_mutex);
ret = drain_cache_nodelists_node(nid);
mutex_unlock(&cache_chain_mutex);
break;
case MEM_ONLINE:
case MEM_OFFLINE:
case MEM_CANCEL_ONLINE:
case MEM_CANCEL_OFFLINE:
break;
}
out:
return ret ? notifier_from_errno(ret) : NOTIFY_OK;
}
#endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */
/*
* swap the static kmem_list3 with kmalloced memory
*/
static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
int nodeid)
static void __init init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
int nodeid)
{
struct kmem_list3 *ptr;
......@@ -1580,6 +1641,14 @@ void __init kmem_cache_init_late(void)
*/
register_cpu_notifier(&cpucache_notifier);
#ifdef CONFIG_NUMA
/*
* Register a memory hotplug callback that initializes and frees
* nodelists.
*/
hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
#endif
/*
* The reap timers are started later, with a module init call: That part
* of the kernel is not yet operational.
......@@ -2220,8 +2289,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
if (ralign < align) {
ralign = align;
}
/* disable debug if necessary */
if (ralign > __alignof__(unsigned long long))
/* disable debug if not aligning with REDZONE_ALIGN */
if (ralign & (__alignof__(unsigned long long) - 1))
flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
/*
* 4) Store it.
......@@ -2247,8 +2316,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
*/
if (flags & SLAB_RED_ZONE) {
/* add space for red zone words */
cachep->obj_offset += sizeof(unsigned long long);
size += 2 * sizeof(unsigned long long);
cachep->obj_offset += align;
size += align + sizeof(unsigned long long);
}
if (flags & SLAB_STORE_USER) {
/* user store requires one word storage behind the end of
......@@ -4216,10 +4285,11 @@ static int s_show(struct seq_file *m, void *p)
unsigned long node_frees = cachep->node_frees;
unsigned long overflows = cachep->node_overflow;
seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \
%4lu %4lu %4lu %4lu %4lu", allocs, high, grown,
reaped, errors, max_freeable, node_allocs,
node_frees, overflows);
seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu "
"%4lu %4lu %4lu %4lu %4lu",
allocs, high, grown,
reaped, errors, max_freeable, node_allocs,
node_frees, overflows);
}
/* cpu stats */
{
......
......@@ -467,14 +467,6 @@ static void slob_free(void *block, int size)
* End of slob allocator proper. Begin kmem_cache_alloc and kmalloc frontend.
*/
#ifndef ARCH_KMALLOC_MINALIGN
#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long)
#endif
#ifndef ARCH_SLAB_MINALIGN
#define ARCH_SLAB_MINALIGN __alignof__(unsigned long)
#endif
void *__kmalloc_node(size_t size, gfp_t gfp, int node)
{
unsigned int *m;
......
......@@ -157,14 +157,6 @@
#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
SLAB_CACHE_DMA | SLAB_NOTRACK)
#ifndef ARCH_KMALLOC_MINALIGN
#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
#endif
#ifndef ARCH_SLAB_MINALIGN
#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
#endif
#define OO_SHIFT 16
#define OO_MASK ((1 << OO_SHIFT) - 1)
#define MAX_OBJS_PER_PAGE 65535 /* since page.objects is u16 */
......@@ -1084,7 +1076,7 @@ static inline struct page *alloc_slab_page(gfp_t flags, int node,
if (node == -1)
return alloc_pages(flags, order);
else
return alloc_pages_node(node, flags, order);
return alloc_pages_exact_node(node, flags, order);
}
static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
......@@ -2429,9 +2421,11 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
#ifdef CONFIG_SLUB_DEBUG
void *addr = page_address(page);
void *p;
DECLARE_BITMAP(map, page->objects);
long *map = kzalloc(BITS_TO_LONGS(page->objects) * sizeof(long),
GFP_ATOMIC);
bitmap_zero(map, page->objects);
if (!map)
return;
slab_err(s, page, "%s", text);
slab_lock(page);
for_each_free_object(p, s, page->freelist)
......@@ -2446,6 +2440,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
}
}
slab_unlock(page);
kfree(map);
#endif
}
......@@ -3338,8 +3333,15 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
struct kmem_cache *s;
void *ret;
if (unlikely(size > SLUB_MAX_SIZE))
return kmalloc_large_node(size, gfpflags, node);
if (unlikely(size > SLUB_MAX_SIZE)) {
ret = kmalloc_large_node(size, gfpflags, node);
trace_kmalloc_node(caller, ret,
size, PAGE_SIZE << get_order(size),
gfpflags, node);
return ret;
}
s = get_slab(size, gfpflags);
......@@ -3651,10 +3653,10 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
}
static void process_slab(struct loc_track *t, struct kmem_cache *s,
struct page *page, enum track_item alloc)
struct page *page, enum track_item alloc,
long *map)
{
void *addr = page_address(page);
DECLARE_BITMAP(map, page->objects);
void *p;
bitmap_zero(map, page->objects);
......@@ -3673,11 +3675,14 @@ static int list_locations(struct kmem_cache *s, char *buf,
unsigned long i;
struct loc_track t = { 0, 0, NULL };
int node;
unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
sizeof(unsigned long), GFP_KERNEL);
if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
GFP_TEMPORARY))
if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
GFP_TEMPORARY)) {
kfree(map);
return sprintf(buf, "Out of memory\n");
}
/* Push back cpu slabs */
flush_all(s);
......@@ -3691,9 +3696,9 @@ static int list_locations(struct kmem_cache *s, char *buf,
spin_lock_irqsave(&n->list_lock, flags);
list_for_each_entry(page, &n->partial, lru)
process_slab(&t, s, page, alloc);
process_slab(&t, s, page, alloc, map);
list_for_each_entry(page, &n->full, lru)
process_slab(&t, s, page, alloc);
process_slab(&t, s, page, alloc, map);
spin_unlock_irqrestore(&n->list_lock, flags);
}
......@@ -3744,6 +3749,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
}
free_loc_track(&t);
kfree(map);
if (!t.count)
len += sprintf(buf, "No data\n");
return len;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册