diff --git a/include/linux/slab.h b/include/linux/slab.h index 96940772bb92715b10028fde03ead671e6c3df56..2037a861e3679910152a98ba98a667b395c6773c 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -316,7 +316,7 @@ void kmem_cache_free(struct kmem_cache *, void *); * Note that interrupts must be enabled when calling these functions. */ void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); -bool kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); +int kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); #ifdef CONFIG_NUMA void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment; diff --git a/mm/slab.c b/mm/slab.c index e0819fa96559f3d166a35001dbefd4982ffd2a08..4765c97ce6900d98b8ce2968cf9ff62a176f6e42 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3419,7 +3419,7 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) } EXPORT_SYMBOL(kmem_cache_free_bulk); -bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, +int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p) { return __kmem_cache_alloc_bulk(s, flags, size, p); diff --git a/mm/slab.h b/mm/slab.h index 27492eb678f722400147e22da62a68de893497e8..7b608719799763cb075c35e3fd4c15e0fddc18b5 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -170,7 +170,7 @@ ssize_t slabinfo_write(struct file *file, const char __user *buffer, * may be allocated or freed using these operations. */ void __kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); -bool __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); +int __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); #ifdef CONFIG_MEMCG_KMEM /* diff --git a/mm/slab_common.c b/mm/slab_common.c index d88e97c10a2e31669753681efc74f6bf50fdf2ef..3c6a86b4ec25f8462c1584dcb5bcf01e4edbd4ff 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -112,7 +112,7 @@ void __kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p) kmem_cache_free(s, p[i]); } -bool __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr, +int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr, void **p) { size_t i; @@ -121,10 +121,10 @@ bool __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr, void *x = p[i] = kmem_cache_alloc(s, flags); if (!x) { __kmem_cache_free_bulk(s, i, p); - return false; + return 0; } } - return true; + return i; } #ifdef CONFIG_MEMCG_KMEM diff --git a/mm/slob.c b/mm/slob.c index 0d7e5df74d1f03e7069d1d938452b9474bc9a969..17e8f8cc7c534adca165f6d0e9b546e2f8484148 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -617,7 +617,7 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) } EXPORT_SYMBOL(kmem_cache_free_bulk); -bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, +int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p) { return __kmem_cache_alloc_bulk(s, flags, size, p); diff --git a/mm/slub.c b/mm/slub.c index a0c1365f64269f1ba117614180f6e34c1f081f3a..46997517406ede0e987388763c9fa1d07875c1db 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1065,11 +1065,15 @@ static noinline int alloc_debug_processing(struct kmem_cache *s, return 0; } +/* Supports checking bulk free of a constructed freelist */ static noinline struct kmem_cache_node *free_debug_processing( - struct kmem_cache *s, struct page *page, void *object, + struct kmem_cache *s, struct page *page, + void *head, void *tail, int bulk_cnt, unsigned long addr, unsigned long *flags) { struct kmem_cache_node *n = get_node(s, page_to_nid(page)); + void *object = head; + int cnt = 0; spin_lock_irqsave(&n->list_lock, *flags); slab_lock(page); @@ -1077,6 +1081,9 @@ static noinline struct kmem_cache_node *free_debug_processing( if (!check_slab(s, page)) goto fail; +next_object: + cnt++; + if (!check_valid_pointer(s, page, object)) { slab_err(s, page, "Invalid object pointer 0x%p", object); goto fail; @@ -1107,8 +1114,19 @@ static noinline struct kmem_cache_node *free_debug_processing( if (s->flags & SLAB_STORE_USER) set_track(s, object, TRACK_FREE, addr); trace(s, page, object, 0); + /* Freepointer not overwritten by init_object(), SLAB_POISON moved it */ init_object(s, object, SLUB_RED_INACTIVE); + + /* Reached end of constructed freelist yet? */ + if (object != tail) { + object = get_freepointer(s, object); + goto next_object; + } out: + if (cnt != bulk_cnt) + slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n", + bulk_cnt, cnt); + slab_unlock(page); /* * Keep node_lock to preserve integrity @@ -1212,7 +1230,8 @@ static inline int alloc_debug_processing(struct kmem_cache *s, struct page *page, void *object, unsigned long addr) { return 0; } static inline struct kmem_cache_node *free_debug_processing( - struct kmem_cache *s, struct page *page, void *object, + struct kmem_cache *s, struct page *page, + void *head, void *tail, int bulk_cnt, unsigned long addr, unsigned long *flags) { return NULL; } static inline int slab_pad_check(struct kmem_cache *s, struct page *page) @@ -1273,14 +1292,21 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, return memcg_kmem_get_cache(s, flags); } -static inline void slab_post_alloc_hook(struct kmem_cache *s, - gfp_t flags, void *object) +static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, + size_t size, void **p) { + size_t i; + flags &= gfp_allowed_mask; - kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); - kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags); + for (i = 0; i < size; i++) { + void *object = p[i]; + + kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); + kmemleak_alloc_recursive(object, s->object_size, 1, + s->flags, flags); + kasan_slab_alloc(s, object); + } memcg_kmem_put_cache(s); - kasan_slab_alloc(s, object); } static inline void slab_free_hook(struct kmem_cache *s, void *x) @@ -1308,6 +1334,29 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x) kasan_slab_free(s, x); } +static inline void slab_free_freelist_hook(struct kmem_cache *s, + void *head, void *tail) +{ +/* + * Compiler cannot detect this function can be removed if slab_free_hook() + * evaluates to nothing. Thus, catch all relevant config debug options here. + */ +#if defined(CONFIG_KMEMCHECK) || \ + defined(CONFIG_LOCKDEP) || \ + defined(CONFIG_DEBUG_KMEMLEAK) || \ + defined(CONFIG_DEBUG_OBJECTS_FREE) || \ + defined(CONFIG_KASAN) + + void *object = head; + void *tail_obj = tail ? : head; + + do { + slab_free_hook(s, object); + } while ((object != tail_obj) && + (object = get_freepointer(s, object))); +#endif +} + static void setup_object(struct kmem_cache *s, struct page *page, void *object) { @@ -2433,7 +2482,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, static __always_inline void *slab_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node, unsigned long addr) { - void **object; + void *object; struct kmem_cache_cpu *c; struct page *page; unsigned long tid; @@ -2512,7 +2561,7 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, if (unlikely(gfpflags & __GFP_ZERO) && object) memset(object, 0, s->object_size); - slab_post_alloc_hook(s, gfpflags, object); + slab_post_alloc_hook(s, gfpflags, 1, &object); return object; } @@ -2583,10 +2632,11 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_trace); * handling required then we can return immediately. */ static void __slab_free(struct kmem_cache *s, struct page *page, - void *x, unsigned long addr) + void *head, void *tail, int cnt, + unsigned long addr) + { void *prior; - void **object = (void *)x; int was_frozen; struct page new; unsigned long counters; @@ -2596,7 +2646,8 @@ static void __slab_free(struct kmem_cache *s, struct page *page, stat(s, FREE_SLOWPATH); if (kmem_cache_debug(s) && - !(n = free_debug_processing(s, page, x, addr, &flags))) + !(n = free_debug_processing(s, page, head, tail, cnt, + addr, &flags))) return; do { @@ -2606,10 +2657,10 @@ static void __slab_free(struct kmem_cache *s, struct page *page, } prior = page->freelist; counters = page->counters; - set_freepointer(s, object, prior); + set_freepointer(s, tail, prior); new.counters = counters; was_frozen = new.frozen; - new.inuse--; + new.inuse -= cnt; if ((!new.inuse || !prior) && !was_frozen) { if (kmem_cache_has_cpu_partial(s) && !prior) { @@ -2640,7 +2691,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, } while (!cmpxchg_double_slab(s, page, prior, counters, - object, new.counters, + head, new.counters, "__slab_free")); if (likely(!n)) { @@ -2705,15 +2756,20 @@ static void __slab_free(struct kmem_cache *s, struct page *page, * * If fastpath is not possible then fall back to __slab_free where we deal * with all sorts of special processing. + * + * Bulk free of a freelist with several objects (all pointing to the + * same page) possible by specifying head and tail ptr, plus objects + * count (cnt). Bulk free indicated by tail pointer being set. */ -static __always_inline void slab_free(struct kmem_cache *s, - struct page *page, void *x, unsigned long addr) +static __always_inline void slab_free(struct kmem_cache *s, struct page *page, + void *head, void *tail, int cnt, + unsigned long addr) { - void **object = (void *)x; + void *tail_obj = tail ? : head; struct kmem_cache_cpu *c; unsigned long tid; - slab_free_hook(s, x); + slab_free_freelist_hook(s, head, tail); redo: /* @@ -2732,19 +2788,19 @@ static __always_inline void slab_free(struct kmem_cache *s, barrier(); if (likely(page == c->page)) { - set_freepointer(s, object, c->freelist); + set_freepointer(s, tail_obj, c->freelist); if (unlikely(!this_cpu_cmpxchg_double( s->cpu_slab->freelist, s->cpu_slab->tid, c->freelist, tid, - object, next_tid(tid)))) { + head, next_tid(tid)))) { note_cmpxchg_failure("slab_free", s, tid); goto redo; } stat(s, FREE_FASTPATH); } else - __slab_free(s, page, x, addr); + __slab_free(s, page, head, tail_obj, cnt, addr); } @@ -2753,59 +2809,116 @@ void kmem_cache_free(struct kmem_cache *s, void *x) s = cache_from_obj(s, x); if (!s) return; - slab_free(s, virt_to_head_page(x), x, _RET_IP_); + slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_); trace_kmem_cache_free(_RET_IP_, x); } EXPORT_SYMBOL(kmem_cache_free); -/* Note that interrupts must be enabled when calling this function. */ -void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) -{ - struct kmem_cache_cpu *c; +struct detached_freelist { struct page *page; - int i; + void *tail; + void *freelist; + int cnt; +}; - local_irq_disable(); - c = this_cpu_ptr(s->cpu_slab); +/* + * This function progressively scans the array with free objects (with + * a limited look ahead) and extract objects belonging to the same + * page. It builds a detached freelist directly within the given + * page/objects. This can happen without any need for + * synchronization, because the objects are owned by running process. + * The freelist is build up as a single linked list in the objects. + * The idea is, that this detached freelist can then be bulk + * transferred to the real freelist(s), but only requiring a single + * synchronization primitive. Look ahead in the array is limited due + * to performance reasons. + */ +static int build_detached_freelist(struct kmem_cache *s, size_t size, + void **p, struct detached_freelist *df) +{ + size_t first_skipped_index = 0; + int lookahead = 3; + void *object; - for (i = 0; i < size; i++) { - void *object = p[i]; + /* Always re-init detached_freelist */ + df->page = NULL; - BUG_ON(!object); - /* kmem cache debug support */ - s = cache_from_obj(s, object); - if (unlikely(!s)) - goto exit; - slab_free_hook(s, object); + do { + object = p[--size]; + } while (!object && size); - page = virt_to_head_page(object); + if (!object) + return 0; - if (c->page == page) { - /* Fastpath: local CPU free */ - set_freepointer(s, object, c->freelist); - c->freelist = object; - } else { - c->tid = next_tid(c->tid); - local_irq_enable(); - /* Slowpath: overhead locked cmpxchg_double_slab */ - __slab_free(s, page, object, _RET_IP_); - local_irq_disable(); - c = this_cpu_ptr(s->cpu_slab); + /* Start new detached freelist */ + set_freepointer(s, object, NULL); + df->page = virt_to_head_page(object); + df->tail = object; + df->freelist = object; + p[size] = NULL; /* mark object processed */ + df->cnt = 1; + + while (size) { + object = p[--size]; + if (!object) + continue; /* Skip processed objects */ + + /* df->page is always set at this point */ + if (df->page == virt_to_head_page(object)) { + /* Opportunity build freelist */ + set_freepointer(s, object, df->freelist); + df->freelist = object; + df->cnt++; + p[size] = NULL; /* mark object processed */ + + continue; } + + /* Limit look ahead search */ + if (!--lookahead) + break; + + if (!first_skipped_index) + first_skipped_index = size + 1; } -exit: - c->tid = next_tid(c->tid); - local_irq_enable(); + + return first_skipped_index; +} + + +/* Note that interrupts must be enabled when calling this function. */ +void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p) +{ + if (WARN_ON(!size)) + return; + + do { + struct detached_freelist df; + struct kmem_cache *s; + + /* Support for memcg */ + s = cache_from_obj(orig_s, p[size - 1]); + + size = build_detached_freelist(s, size, p, &df); + if (unlikely(!df.page)) + continue; + + slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_); + } while (likely(size)); } EXPORT_SYMBOL(kmem_cache_free_bulk); /* Note that interrupts must be enabled when calling this function. */ -bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, - void **p) +int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, + void **p) { struct kmem_cache_cpu *c; int i; + /* memcg and kmem_cache debug support */ + s = slab_pre_alloc_hook(s, flags); + if (unlikely(!s)) + return false; /* * Drain objects in the per cpu slab, while disabling local * IRQs, which protects against PREEMPT and interrupts @@ -2830,17 +2943,8 @@ bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, c = this_cpu_ptr(s->cpu_slab); continue; /* goto for-loop */ } - - /* kmem_cache debug support */ - s = slab_pre_alloc_hook(s, flags); - if (unlikely(!s)) - goto error; - c->freelist = get_freepointer(s, object); p[i] = object; - - /* kmem_cache debug support */ - slab_post_alloc_hook(s, flags, object); } c->tid = next_tid(c->tid); local_irq_enable(); @@ -2853,12 +2957,14 @@ bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, memset(p[j], 0, s->object_size); } - return true; - + /* memcg and kmem_cache debug support */ + slab_post_alloc_hook(s, flags, size, p); + return i; error: - __kmem_cache_free_bulk(s, i, p); local_irq_enable(); - return false; + slab_post_alloc_hook(s, flags, i, p); + __kmem_cache_free_bulk(s, i, p); + return 0; } EXPORT_SYMBOL(kmem_cache_alloc_bulk); @@ -3523,7 +3629,7 @@ void kfree(const void *x) __free_kmem_pages(page, compound_order(page)); return; } - slab_free(page->slab_cache, page, object, _RET_IP_); + slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_); } EXPORT_SYMBOL(kfree);