提交 02b709df 编写于 作者: N Nick Piggin 提交者: Linus Torvalds

mm: purge fragmented percpu vmap blocks

Improve handling of fragmented per-CPU vmaps.  We previously don't free
up per-CPU maps until all its addresses have been used and freed.  So
fragmented blocks could fill up vmalloc space even if they actually had
no active vmap regions within them.

Add some logic to allow all CPUs to have these blocks purged in the case
of failure to allocate a new vm area, and also put some logic to trim
such blocks of a current CPU if we hit them in the allocation path (so
as to avoid a large build up of them).

Christoph reported some vmap allocation failures when using the per CPU
vmap APIs in XFS, which cannot be reproduced after this patch and the
previous bug fix.

Cc: linux-mm@kvack.org
Cc: stable@kernel.org
Tested-by: NChristoph Hellwig <hch@infradead.org>
Signed-off-by: NNick Piggin <npiggin@suse.de>
--
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 de560423
...@@ -509,6 +509,9 @@ static unsigned long lazy_max_pages(void) ...@@ -509,6 +509,9 @@ static unsigned long lazy_max_pages(void)
static atomic_t vmap_lazy_nr = ATOMIC_INIT(0); static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
/* for per-CPU blocks */
static void purge_fragmented_blocks_allcpus(void);
/* /*
* Purges all lazily-freed vmap areas. * Purges all lazily-freed vmap areas.
* *
...@@ -539,6 +542,9 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, ...@@ -539,6 +542,9 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
} else } else
spin_lock(&purge_lock); spin_lock(&purge_lock);
if (sync)
purge_fragmented_blocks_allcpus();
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu(va, &vmap_area_list, list) { list_for_each_entry_rcu(va, &vmap_area_list, list) {
if (va->flags & VM_LAZY_FREE) { if (va->flags & VM_LAZY_FREE) {
...@@ -678,6 +684,7 @@ struct vmap_block { ...@@ -678,6 +684,7 @@ struct vmap_block {
DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS); DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
struct list_head free_list; struct list_head free_list;
struct rcu_head rcu_head; struct rcu_head rcu_head;
struct list_head purge;
}; };
/* Queue of free and dirty vmap blocks, for allocation and flushing purposes */ /* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
...@@ -782,12 +789,61 @@ static void free_vmap_block(struct vmap_block *vb) ...@@ -782,12 +789,61 @@ static void free_vmap_block(struct vmap_block *vb)
call_rcu(&vb->rcu_head, rcu_free_vb); call_rcu(&vb->rcu_head, rcu_free_vb);
} }
static void purge_fragmented_blocks(int cpu)
{
LIST_HEAD(purge);
struct vmap_block *vb;
struct vmap_block *n_vb;
struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
rcu_read_lock();
list_for_each_entry_rcu(vb, &vbq->free, free_list) {
if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
continue;
spin_lock(&vb->lock);
if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
vb->free = 0; /* prevent further allocs after releasing lock */
vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */
bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS);
bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS);
spin_lock(&vbq->lock);
list_del_rcu(&vb->free_list);
spin_unlock(&vbq->lock);
spin_unlock(&vb->lock);
list_add_tail(&vb->purge, &purge);
} else
spin_unlock(&vb->lock);
}
rcu_read_unlock();
list_for_each_entry_safe(vb, n_vb, &purge, purge) {
list_del(&vb->purge);
free_vmap_block(vb);
}
}
static void purge_fragmented_blocks_thiscpu(void)
{
purge_fragmented_blocks(smp_processor_id());
}
static void purge_fragmented_blocks_allcpus(void)
{
int cpu;
for_each_possible_cpu(cpu)
purge_fragmented_blocks(cpu);
}
static void *vb_alloc(unsigned long size, gfp_t gfp_mask) static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
{ {
struct vmap_block_queue *vbq; struct vmap_block_queue *vbq;
struct vmap_block *vb; struct vmap_block *vb;
unsigned long addr = 0; unsigned long addr = 0;
unsigned int order; unsigned int order;
int purge = 0;
BUG_ON(size & ~PAGE_MASK); BUG_ON(size & ~PAGE_MASK);
BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
...@@ -800,10 +856,20 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) ...@@ -800,10 +856,20 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
int i; int i;
spin_lock(&vb->lock); spin_lock(&vb->lock);
if (vb->free < 1UL << order)
goto next;
i = bitmap_find_free_region(vb->alloc_map, i = bitmap_find_free_region(vb->alloc_map,
VMAP_BBMAP_BITS, order); VMAP_BBMAP_BITS, order);
if (i >= 0) { if (i < 0) {
if (vb->free + vb->dirty == VMAP_BBMAP_BITS) {
/* fragmented and no outstanding allocations */
BUG_ON(vb->dirty != VMAP_BBMAP_BITS);
purge = 1;
}
goto next;
}
addr = vb->va->va_start + (i << PAGE_SHIFT); addr = vb->va->va_start + (i << PAGE_SHIFT);
BUG_ON(addr_to_vb_idx(addr) != BUG_ON(addr_to_vb_idx(addr) !=
addr_to_vb_idx(vb->va->va_start)); addr_to_vb_idx(vb->va->va_start));
...@@ -815,9 +881,13 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) ...@@ -815,9 +881,13 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
} }
spin_unlock(&vb->lock); spin_unlock(&vb->lock);
break; break;
} next:
spin_unlock(&vb->lock); spin_unlock(&vb->lock);
} }
if (purge)
purge_fragmented_blocks_thiscpu();
put_cpu_var(vmap_block_queue); put_cpu_var(vmap_block_queue);
rcu_read_unlock(); rcu_read_unlock();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册