提交 a4c20b9a 编写于 作者: L Linus Torvalds

Merge branch 'for-4.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu

Pull percpu updates from Tejun Heo:
 "These are the percpu changes for the v4.13-rc1 merge window. There are
  a couple visibility related changes - tracepoints and allocator stats
  through debugfs, along with __ro_after_init markings and a cosmetic
  rename in percpu_counter.

  Please note that the simple O(#elements_in_the_chunk) area allocator
  used by percpu allocator is again showing scalability issues,
  primarily with bpf allocating and freeing large number of counters.
  Dennis is working on the replacement allocator and the percpu
  allocator will be seeing increased churns in the coming cycles"

* 'for-4.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu:
  percpu: fix static checker warnings in pcpu_destroy_chunk
  percpu: fix early calls for spinlock in pcpu_stats
  percpu: resolve err may not be initialized in pcpu_alloc
  percpu_counter: Rename __percpu_counter_add to percpu_counter_add_batch
  percpu: add tracepoint support for percpu memory
  percpu: expose statistics about percpu memory via debugfs
  percpu: migrate percpu data structures to internal header
  percpu: add missing lockdep_assert_held to func pcpu_free_area
  mark most percpu globals as __ro_after_init
...@@ -1256,7 +1256,7 @@ void clean_tree_block(struct btrfs_fs_info *fs_info, ...@@ -1256,7 +1256,7 @@ void clean_tree_block(struct btrfs_fs_info *fs_info,
btrfs_assert_tree_locked(buf); btrfs_assert_tree_locked(buf);
if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) { if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
__percpu_counter_add(&fs_info->dirty_metadata_bytes, percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
-buf->len, -buf->len,
fs_info->dirty_metadata_batch); fs_info->dirty_metadata_batch);
/* ugh, clear_extent_buffer_dirty needs to lock the page */ /* ugh, clear_extent_buffer_dirty needs to lock the page */
...@@ -4047,7 +4047,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) ...@@ -4047,7 +4047,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
buf->start, transid, fs_info->generation); buf->start, transid, fs_info->generation);
was_dirty = set_extent_buffer_dirty(buf); was_dirty = set_extent_buffer_dirty(buf);
if (!was_dirty) if (!was_dirty)
__percpu_counter_add(&fs_info->dirty_metadata_bytes, percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
buf->len, buf->len,
fs_info->dirty_metadata_batch); fs_info->dirty_metadata_batch);
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
......
...@@ -3577,7 +3577,7 @@ lock_extent_buffer_for_io(struct extent_buffer *eb, ...@@ -3577,7 +3577,7 @@ lock_extent_buffer_for_io(struct extent_buffer *eb,
set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
spin_unlock(&eb->refs_lock); spin_unlock(&eb->refs_lock);
btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
__percpu_counter_add(&fs_info->dirty_metadata_bytes, percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
-eb->len, -eb->len,
fs_info->dirty_metadata_batch); fs_info->dirty_metadata_batch);
ret = 1; ret = 1;
......
...@@ -1763,7 +1763,7 @@ static void btrfs_set_bit_hook(void *private_data, ...@@ -1763,7 +1763,7 @@ static void btrfs_set_bit_hook(void *private_data,
if (btrfs_is_testing(fs_info)) if (btrfs_is_testing(fs_info))
return; return;
__percpu_counter_add(&fs_info->delalloc_bytes, len, percpu_counter_add_batch(&fs_info->delalloc_bytes, len,
fs_info->delalloc_batch); fs_info->delalloc_batch);
spin_lock(&BTRFS_I(inode)->lock); spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->delalloc_bytes += len; BTRFS_I(inode)->delalloc_bytes += len;
...@@ -1838,7 +1838,7 @@ static void btrfs_clear_bit_hook(void *private_data, ...@@ -1838,7 +1838,7 @@ static void btrfs_clear_bit_hook(void *private_data,
&inode->vfs_inode, &inode->vfs_inode,
state->start, len); state->start, len);
__percpu_counter_add(&fs_info->delalloc_bytes, -len, percpu_counter_add_batch(&fs_info->delalloc_bytes, -len,
fs_info->delalloc_batch); fs_info->delalloc_batch);
spin_lock(&inode->lock); spin_lock(&inode->lock);
inode->delalloc_bytes -= len; inode->delalloc_bytes -= len;
......
...@@ -1211,7 +1211,7 @@ xfs_mod_icount( ...@@ -1211,7 +1211,7 @@ xfs_mod_icount(
struct xfs_mount *mp, struct xfs_mount *mp,
int64_t delta) int64_t delta)
{ {
__percpu_counter_add(&mp->m_icount, delta, XFS_ICOUNT_BATCH); percpu_counter_add_batch(&mp->m_icount, delta, XFS_ICOUNT_BATCH);
if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) { if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) {
ASSERT(0); ASSERT(0);
percpu_counter_add(&mp->m_icount, -delta); percpu_counter_add(&mp->m_icount, -delta);
...@@ -1290,7 +1290,7 @@ xfs_mod_fdblocks( ...@@ -1290,7 +1290,7 @@ xfs_mod_fdblocks(
else else
batch = XFS_FDBLOCKS_BATCH; batch = XFS_FDBLOCKS_BATCH;
__percpu_counter_add(&mp->m_fdblocks, delta, batch); percpu_counter_add_batch(&mp->m_fdblocks, delta, batch);
if (__percpu_counter_compare(&mp->m_fdblocks, mp->m_alloc_set_aside, if (__percpu_counter_compare(&mp->m_fdblocks, mp->m_alloc_set_aside,
XFS_FDBLOCKS_BATCH) >= 0) { XFS_FDBLOCKS_BATCH) >= 0) {
/* we had space! */ /* we had space! */
......
...@@ -66,7 +66,7 @@ static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi) ...@@ -66,7 +66,7 @@ static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi)
static inline void __add_wb_stat(struct bdi_writeback *wb, static inline void __add_wb_stat(struct bdi_writeback *wb,
enum wb_stat_item item, s64 amount) enum wb_stat_item item, s64 amount)
{ {
__percpu_counter_add(&wb->stat[item], amount, WB_STAT_BATCH); percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH);
} }
static inline void __inc_wb_stat(struct bdi_writeback *wb, static inline void __inc_wb_stat(struct bdi_writeback *wb,
......
...@@ -518,7 +518,7 @@ static inline void blkg_stat_exit(struct blkg_stat *stat) ...@@ -518,7 +518,7 @@ static inline void blkg_stat_exit(struct blkg_stat *stat)
*/ */
static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val) static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val)
{ {
__percpu_counter_add(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH); percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH);
} }
/** /**
...@@ -597,14 +597,14 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, ...@@ -597,14 +597,14 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
else else
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ]; cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];
__percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH); percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
if (op_is_sync(op)) if (op_is_sync(op))
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC]; cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
else else
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC]; cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];
__percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH); percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
} }
/** /**
......
...@@ -22,7 +22,7 @@ unsigned long vm_memory_committed(void); ...@@ -22,7 +22,7 @@ unsigned long vm_memory_committed(void);
static inline void vm_acct_memory(long pages) static inline void vm_acct_memory(long pages)
{ {
__percpu_counter_add(&vm_committed_as, pages, vm_committed_as_batch); percpu_counter_add_batch(&vm_committed_as, pages, vm_committed_as_batch);
} }
static inline void vm_unacct_memory(long pages) static inline void vm_unacct_memory(long pages)
......
...@@ -39,7 +39,8 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp, ...@@ -39,7 +39,8 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp,
void percpu_counter_destroy(struct percpu_counter *fbc); void percpu_counter_destroy(struct percpu_counter *fbc);
void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount,
s32 batch);
s64 __percpu_counter_sum(struct percpu_counter *fbc); s64 __percpu_counter_sum(struct percpu_counter *fbc);
int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch); int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch);
...@@ -50,7 +51,7 @@ static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) ...@@ -50,7 +51,7 @@ static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
{ {
__percpu_counter_add(fbc, amount, percpu_counter_batch); percpu_counter_add_batch(fbc, amount, percpu_counter_batch);
} }
static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
...@@ -136,7 +137,7 @@ percpu_counter_add(struct percpu_counter *fbc, s64 amount) ...@@ -136,7 +137,7 @@ percpu_counter_add(struct percpu_counter *fbc, s64 amount)
} }
static inline void static inline void
__percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch) percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
{ {
percpu_counter_add(fbc, amount); percpu_counter_add(fbc, amount);
} }
......
...@@ -154,12 +154,12 @@ static inline int frag_mem_limit(struct netns_frags *nf) ...@@ -154,12 +154,12 @@ static inline int frag_mem_limit(struct netns_frags *nf)
static inline void sub_frag_mem_limit(struct netns_frags *nf, int i) static inline void sub_frag_mem_limit(struct netns_frags *nf, int i)
{ {
__percpu_counter_add(&nf->mem, -i, frag_percpu_counter_batch); percpu_counter_add_batch(&nf->mem, -i, frag_percpu_counter_batch);
} }
static inline void add_frag_mem_limit(struct netns_frags *nf, int i) static inline void add_frag_mem_limit(struct netns_frags *nf, int i)
{ {
__percpu_counter_add(&nf->mem, i, frag_percpu_counter_batch); percpu_counter_add_batch(&nf->mem, i, frag_percpu_counter_batch);
} }
static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf) static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf)
......
#undef TRACE_SYSTEM
#define TRACE_SYSTEM percpu
#if !defined(_TRACE_PERCPU_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_PERCPU_H
#include <linux/tracepoint.h>
TRACE_EVENT(percpu_alloc_percpu,
TP_PROTO(bool reserved, bool is_atomic, size_t size,
size_t align, void *base_addr, int off, void __percpu *ptr),
TP_ARGS(reserved, is_atomic, size, align, base_addr, off, ptr),
TP_STRUCT__entry(
__field( bool, reserved )
__field( bool, is_atomic )
__field( size_t, size )
__field( size_t, align )
__field( void *, base_addr )
__field( int, off )
__field( void __percpu *, ptr )
),
TP_fast_assign(
__entry->reserved = reserved;
__entry->is_atomic = is_atomic;
__entry->size = size;
__entry->align = align;
__entry->base_addr = base_addr;
__entry->off = off;
__entry->ptr = ptr;
),
TP_printk("reserved=%d is_atomic=%d size=%zu align=%zu base_addr=%p off=%d ptr=%p",
__entry->reserved, __entry->is_atomic,
__entry->size, __entry->align,
__entry->base_addr, __entry->off, __entry->ptr)
);
TRACE_EVENT(percpu_free_percpu,
TP_PROTO(void *base_addr, int off, void __percpu *ptr),
TP_ARGS(base_addr, off, ptr),
TP_STRUCT__entry(
__field( void *, base_addr )
__field( int, off )
__field( void __percpu *, ptr )
),
TP_fast_assign(
__entry->base_addr = base_addr;
__entry->off = off;
__entry->ptr = ptr;
),
TP_printk("base_addr=%p off=%d ptr=%p",
__entry->base_addr, __entry->off, __entry->ptr)
);
TRACE_EVENT(percpu_alloc_percpu_fail,
TP_PROTO(bool reserved, bool is_atomic, size_t size, size_t align),
TP_ARGS(reserved, is_atomic, size, align),
TP_STRUCT__entry(
__field( bool, reserved )
__field( bool, is_atomic )
__field( size_t, size )
__field( size_t, align )
),
TP_fast_assign(
__entry->reserved = reserved;
__entry->is_atomic = is_atomic;
__entry->size = size;
__entry->align = align;
),
TP_printk("reserved=%d is_atomic=%d size=%zu align=%zu",
__entry->reserved, __entry->is_atomic,
__entry->size, __entry->align)
);
TRACE_EVENT(percpu_create_chunk,
TP_PROTO(void *base_addr),
TP_ARGS(base_addr),
TP_STRUCT__entry(
__field( void *, base_addr )
),
TP_fast_assign(
__entry->base_addr = base_addr;
),
TP_printk("base_addr=%p", __entry->base_addr)
);
TRACE_EVENT(percpu_destroy_chunk,
TP_PROTO(void *base_addr),
TP_ARGS(base_addr),
TP_STRUCT__entry(
__field( void *, base_addr )
),
TP_fast_assign(
__entry->base_addr = base_addr;
),
TP_printk("base_addr=%p", __entry->base_addr)
);
#endif /* _TRACE_PERCPU_H */
#include <trace/define_trace.h>
...@@ -207,7 +207,7 @@ static void fprop_reflect_period_percpu(struct fprop_global *p, ...@@ -207,7 +207,7 @@ static void fprop_reflect_period_percpu(struct fprop_global *p,
if (val < (nr_cpu_ids * PROP_BATCH)) if (val < (nr_cpu_ids * PROP_BATCH))
val = percpu_counter_sum(&pl->events); val = percpu_counter_sum(&pl->events);
__percpu_counter_add(&pl->events, percpu_counter_add_batch(&pl->events,
-val + (val >> (period-pl->period)), PROP_BATCH); -val + (val >> (period-pl->period)), PROP_BATCH);
} else } else
percpu_counter_set(&pl->events, 0); percpu_counter_set(&pl->events, 0);
...@@ -219,7 +219,7 @@ static void fprop_reflect_period_percpu(struct fprop_global *p, ...@@ -219,7 +219,7 @@ static void fprop_reflect_period_percpu(struct fprop_global *p,
void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl) void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl)
{ {
fprop_reflect_period_percpu(p, pl); fprop_reflect_period_percpu(p, pl);
__percpu_counter_add(&pl->events, 1, PROP_BATCH); percpu_counter_add_batch(&pl->events, 1, PROP_BATCH);
percpu_counter_add(&p->events, 1); percpu_counter_add(&p->events, 1);
} }
...@@ -267,6 +267,6 @@ void __fprop_inc_percpu_max(struct fprop_global *p, ...@@ -267,6 +267,6 @@ void __fprop_inc_percpu_max(struct fprop_global *p,
return; return;
} else } else
fprop_reflect_period_percpu(p, pl); fprop_reflect_period_percpu(p, pl);
__percpu_counter_add(&pl->events, 1, PROP_BATCH); percpu_counter_add_batch(&pl->events, 1, PROP_BATCH);
percpu_counter_add(&p->events, 1); percpu_counter_add(&p->events, 1);
} }
...@@ -72,7 +72,7 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount) ...@@ -72,7 +72,7 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
} }
EXPORT_SYMBOL(percpu_counter_set); EXPORT_SYMBOL(percpu_counter_set);
void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch) void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
{ {
s64 count; s64 count;
...@@ -89,7 +89,7 @@ void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch) ...@@ -89,7 +89,7 @@ void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch)
} }
preempt_enable(); preempt_enable();
} }
EXPORT_SYMBOL(__percpu_counter_add); EXPORT_SYMBOL(percpu_counter_add_batch);
/* /*
* Add up all the per-cpu counts, return the result. This is a more accurate * Add up all the per-cpu counts, return the result. This is a more accurate
......
...@@ -706,3 +706,11 @@ config ARCH_USES_HIGH_VMA_FLAGS ...@@ -706,3 +706,11 @@ config ARCH_USES_HIGH_VMA_FLAGS
bool bool
config ARCH_HAS_PKEYS config ARCH_HAS_PKEYS
bool bool
config PERCPU_STATS
bool "Collect percpu memory statistics"
default n
help
This feature collects and exposes statistics via debugfs. The
information includes global and per chunk statistics, which can
be used to help understand percpu memory usage.
...@@ -103,3 +103,4 @@ obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o ...@@ -103,3 +103,4 @@ obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o
obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o
obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o
#ifndef _MM_PERCPU_INTERNAL_H
#define _MM_PERCPU_INTERNAL_H
#include <linux/types.h>
#include <linux/percpu.h>
struct pcpu_chunk {
#ifdef CONFIG_PERCPU_STATS
int nr_alloc; /* # of allocations */
size_t max_alloc_size; /* largest allocation size */
#endif
struct list_head list; /* linked to pcpu_slot lists */
int free_size; /* free bytes in the chunk */
int contig_hint; /* max contiguous size hint */
void *base_addr; /* base address of this chunk */
int map_used; /* # of map entries used before the sentry */
int map_alloc; /* # of map entries allocated */
int *map; /* allocation map */
struct list_head map_extend_list;/* on pcpu_map_extend_chunks */
void *data; /* chunk data */
int first_free; /* no free below this */
bool immutable; /* no [de]population allowed */
bool has_reserved; /* Indicates if chunk has reserved space
at the beginning. Reserved chunk will
contain reservation for static chunk.
Dynamic chunk will contain reservation
for static and reserved chunks. */
int nr_populated; /* # of populated pages */
unsigned long populated[]; /* populated bitmap */
};
extern spinlock_t pcpu_lock;
extern struct list_head *pcpu_slot;
extern int pcpu_nr_slots;
extern struct pcpu_chunk *pcpu_first_chunk;
extern struct pcpu_chunk *pcpu_reserved_chunk;
#ifdef CONFIG_PERCPU_STATS
#include <linux/spinlock.h>
struct percpu_stats {
u64 nr_alloc; /* lifetime # of allocations */
u64 nr_dealloc; /* lifetime # of deallocations */
u64 nr_cur_alloc; /* current # of allocations */
u64 nr_max_alloc; /* max # of live allocations */
u32 nr_chunks; /* current # of live chunks */
u32 nr_max_chunks; /* max # of live chunks */
size_t min_alloc_size; /* min allocaiton size */
size_t max_alloc_size; /* max allocation size */
};
extern struct percpu_stats pcpu_stats;
extern struct pcpu_alloc_info pcpu_stats_ai;
/*
* For debug purposes. We don't care about the flexible array.
*/
static inline void pcpu_stats_save_ai(const struct pcpu_alloc_info *ai)
{
memcpy(&pcpu_stats_ai, ai, sizeof(struct pcpu_alloc_info));
/* initialize min_alloc_size to unit_size */
pcpu_stats.min_alloc_size = pcpu_stats_ai.unit_size;
}
/*
* pcpu_stats_area_alloc - increment area allocation stats
* @chunk: the location of the area being allocated
* @size: size of area to allocate in bytes
*
* CONTEXT:
* pcpu_lock.
*/
static inline void pcpu_stats_area_alloc(struct pcpu_chunk *chunk, size_t size)
{
lockdep_assert_held(&pcpu_lock);
pcpu_stats.nr_alloc++;
pcpu_stats.nr_cur_alloc++;
pcpu_stats.nr_max_alloc =
max(pcpu_stats.nr_max_alloc, pcpu_stats.nr_cur_alloc);
pcpu_stats.min_alloc_size =
min(pcpu_stats.min_alloc_size, size);
pcpu_stats.max_alloc_size =
max(pcpu_stats.max_alloc_size, size);
chunk->nr_alloc++;
chunk->max_alloc_size = max(chunk->max_alloc_size, size);
}
/*
* pcpu_stats_area_dealloc - decrement allocation stats
* @chunk: the location of the area being deallocated
*
* CONTEXT:
* pcpu_lock.
*/
static inline void pcpu_stats_area_dealloc(struct pcpu_chunk *chunk)
{
lockdep_assert_held(&pcpu_lock);
pcpu_stats.nr_dealloc++;
pcpu_stats.nr_cur_alloc--;
chunk->nr_alloc--;
}
/*
* pcpu_stats_chunk_alloc - increment chunk stats
*/
static inline void pcpu_stats_chunk_alloc(void)
{
unsigned long flags;
spin_lock_irqsave(&pcpu_lock, flags);
pcpu_stats.nr_chunks++;
pcpu_stats.nr_max_chunks =
max(pcpu_stats.nr_max_chunks, pcpu_stats.nr_chunks);
spin_unlock_irqrestore(&pcpu_lock, flags);
}
/*
* pcpu_stats_chunk_dealloc - decrement chunk stats
*/
static inline void pcpu_stats_chunk_dealloc(void)
{
unsigned long flags;
spin_lock_irqsave(&pcpu_lock, flags);
pcpu_stats.nr_chunks--;
spin_unlock_irqrestore(&pcpu_lock, flags);
}
#else
static inline void pcpu_stats_save_ai(const struct pcpu_alloc_info *ai)
{
}
static inline void pcpu_stats_area_alloc(struct pcpu_chunk *chunk, size_t size)
{
}
static inline void pcpu_stats_area_dealloc(struct pcpu_chunk *chunk)
{
}
static inline void pcpu_stats_chunk_alloc(void)
{
}
static inline void pcpu_stats_chunk_dealloc(void)
{
}
#endif /* !CONFIG_PERCPU_STATS */
#endif
...@@ -72,6 +72,9 @@ static struct pcpu_chunk *pcpu_create_chunk(void) ...@@ -72,6 +72,9 @@ static struct pcpu_chunk *pcpu_create_chunk(void)
pcpu_chunk_populated(chunk, 0, nr_pages); pcpu_chunk_populated(chunk, 0, nr_pages);
spin_unlock_irq(&pcpu_lock); spin_unlock_irq(&pcpu_lock);
pcpu_stats_chunk_alloc();
trace_percpu_create_chunk(chunk->base_addr);
return chunk; return chunk;
} }
...@@ -79,7 +82,13 @@ static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) ...@@ -79,7 +82,13 @@ static void pcpu_destroy_chunk(struct pcpu_chunk *chunk)
{ {
const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT; const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT;
if (chunk && chunk->data) if (!chunk)
return;
pcpu_stats_chunk_dealloc();
trace_percpu_destroy_chunk(chunk->base_addr);
if (chunk->data)
__free_pages(chunk->data, order_base_2(nr_pages)); __free_pages(chunk->data, order_base_2(nr_pages));
pcpu_free_chunk(chunk); pcpu_free_chunk(chunk);
} }
......
/*
* mm/percpu-debug.c
*
* Copyright (C) 2017 Facebook Inc.
* Copyright (C) 2017 Dennis Zhou <dennisz@fb.com>
*
* This file is released under the GPLv2.
*
* Prints statistics about the percpu allocator and backing chunks.
*/
#include <linux/debugfs.h>
#include <linux/list.h>
#include <linux/percpu.h>
#include <linux/seq_file.h>
#include <linux/sort.h>
#include <linux/vmalloc.h>
#include "percpu-internal.h"
#define P(X, Y) \
seq_printf(m, " %-24s: %8lld\n", X, (long long int)Y)
struct percpu_stats pcpu_stats;
struct pcpu_alloc_info pcpu_stats_ai;
static int cmpint(const void *a, const void *b)
{
return *(int *)a - *(int *)b;
}
/*
* Iterates over all chunks to find the max # of map entries used.
*/
static int find_max_map_used(void)
{
struct pcpu_chunk *chunk;
int slot, max_map_used;
max_map_used = 0;
for (slot = 0; slot < pcpu_nr_slots; slot++)
list_for_each_entry(chunk, &pcpu_slot[slot], list)
max_map_used = max(max_map_used, chunk->map_used);
return max_map_used;
}
/*
* Prints out chunk state. Fragmentation is considered between
* the beginning of the chunk to the last allocation.
*/
static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk,
void *buffer)
{
int i, s_index, last_alloc, alloc_sign, as_len;
int *alloc_sizes, *p;
/* statistics */
int sum_frag = 0, max_frag = 0;
int cur_min_alloc = 0, cur_med_alloc = 0, cur_max_alloc = 0;
alloc_sizes = buffer;
s_index = chunk->has_reserved ? 1 : 0;
/* find last allocation */
last_alloc = -1;
for (i = chunk->map_used - 1; i >= s_index; i--) {
if (chunk->map[i] & 1) {
last_alloc = i;
break;
}
}
/* if the chunk is not empty - ignoring reserve */
if (last_alloc >= s_index) {
as_len = last_alloc + 1 - s_index;
/*
* Iterate through chunk map computing size info.
* The first bit is overloaded to be a used flag.
* negative = free space, positive = allocated
*/
for (i = 0, p = chunk->map + s_index; i < as_len; i++, p++) {
alloc_sign = (*p & 1) ? 1 : -1;
alloc_sizes[i] = alloc_sign *
((p[1] & ~1) - (p[0] & ~1));
}
sort(alloc_sizes, as_len, sizeof(chunk->map[0]), cmpint, NULL);
/* Iterate through the unallocated fragements. */
for (i = 0, p = alloc_sizes; *p < 0 && i < as_len; i++, p++) {
sum_frag -= *p;
max_frag = max(max_frag, -1 * (*p));
}
cur_min_alloc = alloc_sizes[i];
cur_med_alloc = alloc_sizes[(i + as_len - 1) / 2];
cur_max_alloc = alloc_sizes[as_len - 1];
}
P("nr_alloc", chunk->nr_alloc);
P("max_alloc_size", chunk->max_alloc_size);
P("free_size", chunk->free_size);
P("contig_hint", chunk->contig_hint);
P("sum_frag", sum_frag);
P("max_frag", max_frag);
P("cur_min_alloc", cur_min_alloc);
P("cur_med_alloc", cur_med_alloc);
P("cur_max_alloc", cur_max_alloc);
seq_putc(m, '\n');
}
static int percpu_stats_show(struct seq_file *m, void *v)
{
struct pcpu_chunk *chunk;
int slot, max_map_used;
void *buffer;
alloc_buffer:
spin_lock_irq(&pcpu_lock);
max_map_used = find_max_map_used();
spin_unlock_irq(&pcpu_lock);
buffer = vmalloc(max_map_used * sizeof(pcpu_first_chunk->map[0]));
if (!buffer)
return -ENOMEM;
spin_lock_irq(&pcpu_lock);
/* if the buffer allocated earlier is too small */
if (max_map_used < find_max_map_used()) {
spin_unlock_irq(&pcpu_lock);
vfree(buffer);
goto alloc_buffer;
}
#define PL(X) \
seq_printf(m, " %-24s: %8lld\n", #X, (long long int)pcpu_stats_ai.X)
seq_printf(m,
"Percpu Memory Statistics\n"
"Allocation Info:\n"
"----------------------------------------\n");
PL(unit_size);
PL(static_size);
PL(reserved_size);
PL(dyn_size);
PL(atom_size);
PL(alloc_size);
seq_putc(m, '\n');
#undef PL
#define PU(X) \
seq_printf(m, " %-18s: %14llu\n", #X, (unsigned long long)pcpu_stats.X)
seq_printf(m,
"Global Stats:\n"
"----------------------------------------\n");
PU(nr_alloc);
PU(nr_dealloc);
PU(nr_cur_alloc);
PU(nr_max_alloc);
PU(nr_chunks);
PU(nr_max_chunks);
PU(min_alloc_size);
PU(max_alloc_size);
seq_putc(m, '\n');
#undef PU
seq_printf(m,
"Per Chunk Stats:\n"
"----------------------------------------\n");
if (pcpu_reserved_chunk) {
seq_puts(m, "Chunk: <- Reserved Chunk\n");
chunk_map_stats(m, pcpu_reserved_chunk, buffer);
}
for (slot = 0; slot < pcpu_nr_slots; slot++) {
list_for_each_entry(chunk, &pcpu_slot[slot], list) {
if (chunk == pcpu_first_chunk) {
seq_puts(m, "Chunk: <- First Chunk\n");
chunk_map_stats(m, chunk, buffer);
} else {
seq_puts(m, "Chunk:\n");
chunk_map_stats(m, chunk, buffer);
}
}
}
spin_unlock_irq(&pcpu_lock);
vfree(buffer);
return 0;
}
static int percpu_stats_open(struct inode *inode, struct file *filp)
{
return single_open(filp, percpu_stats_show, NULL);
}
static const struct file_operations percpu_stats_fops = {
.open = percpu_stats_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int __init init_percpu_stats_debugfs(void)
{
debugfs_create_file("percpu_stats", 0444, NULL, NULL,
&percpu_stats_fops);
return 0;
}
late_initcall(init_percpu_stats_debugfs);
...@@ -343,12 +343,22 @@ static struct pcpu_chunk *pcpu_create_chunk(void) ...@@ -343,12 +343,22 @@ static struct pcpu_chunk *pcpu_create_chunk(void)
chunk->data = vms; chunk->data = vms;
chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0]; chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0];
pcpu_stats_chunk_alloc();
trace_percpu_create_chunk(chunk->base_addr);
return chunk; return chunk;
} }
static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) static void pcpu_destroy_chunk(struct pcpu_chunk *chunk)
{ {
if (chunk && chunk->data) if (!chunk)
return;
pcpu_stats_chunk_dealloc();
trace_percpu_destroy_chunk(chunk->base_addr);
if (chunk->data)
pcpu_free_vm_areas(chunk->data, pcpu_nr_groups); pcpu_free_vm_areas(chunk->data, pcpu_nr_groups);
pcpu_free_chunk(chunk); pcpu_free_chunk(chunk);
} }
......
...@@ -76,6 +76,11 @@ ...@@ -76,6 +76,11 @@
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/io.h> #include <asm/io.h>
#define CREATE_TRACE_POINTS
#include <trace/events/percpu.h>
#include "percpu-internal.h"
#define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */
#define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */
#define PCPU_ATOMIC_MAP_MARGIN_LOW 32 #define PCPU_ATOMIC_MAP_MARGIN_LOW 32
...@@ -103,53 +108,35 @@ ...@@ -103,53 +108,35 @@
#define __pcpu_ptr_to_addr(ptr) (void __force *)(ptr) #define __pcpu_ptr_to_addr(ptr) (void __force *)(ptr)
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
struct pcpu_chunk { static int pcpu_unit_pages __ro_after_init;
struct list_head list; /* linked to pcpu_slot lists */ static int pcpu_unit_size __ro_after_init;
int free_size; /* free bytes in the chunk */ static int pcpu_nr_units __ro_after_init;
int contig_hint; /* max contiguous size hint */ static int pcpu_atom_size __ro_after_init;
void *base_addr; /* base address of this chunk */ int pcpu_nr_slots __ro_after_init;
static size_t pcpu_chunk_struct_size __ro_after_init;
int map_used; /* # of map entries used before the sentry */
int map_alloc; /* # of map entries allocated */
int *map; /* allocation map */
struct list_head map_extend_list;/* on pcpu_map_extend_chunks */
void *data; /* chunk data */
int first_free; /* no free below this */
bool immutable; /* no [de]population allowed */
int nr_populated; /* # of populated pages */
unsigned long populated[]; /* populated bitmap */
};
static int pcpu_unit_pages __read_mostly;
static int pcpu_unit_size __read_mostly;
static int pcpu_nr_units __read_mostly;
static int pcpu_atom_size __read_mostly;
static int pcpu_nr_slots __read_mostly;
static size_t pcpu_chunk_struct_size __read_mostly;
/* cpus with the lowest and highest unit addresses */ /* cpus with the lowest and highest unit addresses */
static unsigned int pcpu_low_unit_cpu __read_mostly; static unsigned int pcpu_low_unit_cpu __ro_after_init;
static unsigned int pcpu_high_unit_cpu __read_mostly; static unsigned int pcpu_high_unit_cpu __ro_after_init;
/* the address of the first chunk which starts with the kernel static area */ /* the address of the first chunk which starts with the kernel static area */
void *pcpu_base_addr __read_mostly; void *pcpu_base_addr __ro_after_init;
EXPORT_SYMBOL_GPL(pcpu_base_addr); EXPORT_SYMBOL_GPL(pcpu_base_addr);
static const int *pcpu_unit_map __read_mostly; /* cpu -> unit */ static const int *pcpu_unit_map __ro_after_init; /* cpu -> unit */
const unsigned long *pcpu_unit_offsets __read_mostly; /* cpu -> unit offset */ const unsigned long *pcpu_unit_offsets __ro_after_init; /* cpu -> unit offset */
/* group information, used for vm allocation */ /* group information, used for vm allocation */
static int pcpu_nr_groups __read_mostly; static int pcpu_nr_groups __ro_after_init;
static const unsigned long *pcpu_group_offsets __read_mostly; static const unsigned long *pcpu_group_offsets __ro_after_init;
static const size_t *pcpu_group_sizes __read_mostly; static const size_t *pcpu_group_sizes __ro_after_init;
/* /*
* The first chunk which always exists. Note that unlike other * The first chunk which always exists. Note that unlike other
* chunks, this one can be allocated and mapped in several different * chunks, this one can be allocated and mapped in several different
* ways and thus often doesn't live in the vmalloc area. * ways and thus often doesn't live in the vmalloc area.
*/ */
static struct pcpu_chunk *pcpu_first_chunk; struct pcpu_chunk *pcpu_first_chunk __ro_after_init;
/* /*
* Optional reserved chunk. This chunk reserves part of the first * Optional reserved chunk. This chunk reserves part of the first
...@@ -158,13 +145,13 @@ static struct pcpu_chunk *pcpu_first_chunk; ...@@ -158,13 +145,13 @@ static struct pcpu_chunk *pcpu_first_chunk;
* area doesn't exist, the following variables contain NULL and 0 * area doesn't exist, the following variables contain NULL and 0
* respectively. * respectively.
*/ */
static struct pcpu_chunk *pcpu_reserved_chunk; struct pcpu_chunk *pcpu_reserved_chunk __ro_after_init;
static int pcpu_reserved_chunk_limit; static int pcpu_reserved_chunk_limit __ro_after_init;
static DEFINE_SPINLOCK(pcpu_lock); /* all internal data structures */ DEFINE_SPINLOCK(pcpu_lock); /* all internal data structures */
static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop, map ext */ static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop, map ext */
static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ struct list_head *pcpu_slot __ro_after_init; /* chunk list slots */
/* chunks which need their map areas extended, protected by pcpu_lock */ /* chunks which need their map areas extended, protected by pcpu_lock */
static LIST_HEAD(pcpu_map_extend_chunks); static LIST_HEAD(pcpu_map_extend_chunks);
...@@ -672,6 +659,9 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme, ...@@ -672,6 +659,9 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme,
int to_free = 0; int to_free = 0;
int *p; int *p;
lockdep_assert_held(&pcpu_lock);
pcpu_stats_area_dealloc(chunk);
freeme |= 1; /* we are searching for <given offset, in use> pair */ freeme |= 1; /* we are searching for <given offset, in use> pair */
i = 0; i = 0;
...@@ -735,6 +725,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void) ...@@ -735,6 +725,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
chunk->map[0] = 0; chunk->map[0] = 0;
chunk->map[1] = pcpu_unit_size | 1; chunk->map[1] = pcpu_unit_size | 1;
chunk->map_used = 1; chunk->map_used = 1;
chunk->has_reserved = false;
INIT_LIST_HEAD(&chunk->list); INIT_LIST_HEAD(&chunk->list);
INIT_LIST_HEAD(&chunk->map_extend_list); INIT_LIST_HEAD(&chunk->map_extend_list);
...@@ -965,8 +956,10 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, ...@@ -965,8 +956,10 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
* tasks to create chunks simultaneously. Serialize and create iff * tasks to create chunks simultaneously. Serialize and create iff
* there's still no empty chunk after grabbing the mutex. * there's still no empty chunk after grabbing the mutex.
*/ */
if (is_atomic) if (is_atomic) {
err = "atomic alloc failed, no space left";
goto fail; goto fail;
}
if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) { if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) {
chunk = pcpu_create_chunk(); chunk = pcpu_create_chunk();
...@@ -984,6 +977,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, ...@@ -984,6 +977,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
goto restart; goto restart;
area_found: area_found:
pcpu_stats_area_alloc(chunk, size);
spin_unlock_irqrestore(&pcpu_lock, flags); spin_unlock_irqrestore(&pcpu_lock, flags);
/* populate if not all pages are already there */ /* populate if not all pages are already there */
...@@ -1026,11 +1020,17 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, ...@@ -1026,11 +1020,17 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
ptr = __addr_to_pcpu_ptr(chunk->base_addr + off); ptr = __addr_to_pcpu_ptr(chunk->base_addr + off);
kmemleak_alloc_percpu(ptr, size, gfp); kmemleak_alloc_percpu(ptr, size, gfp);
trace_percpu_alloc_percpu(reserved, is_atomic, size, align,
chunk->base_addr, off, ptr);
return ptr; return ptr;
fail_unlock: fail_unlock:
spin_unlock_irqrestore(&pcpu_lock, flags); spin_unlock_irqrestore(&pcpu_lock, flags);
fail: fail:
trace_percpu_alloc_percpu_fail(reserved, is_atomic, size, align);
if (!is_atomic && warn_limit) { if (!is_atomic && warn_limit) {
pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n", pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n",
size, align, is_atomic, err); size, align, is_atomic, err);
...@@ -1280,6 +1280,8 @@ void free_percpu(void __percpu *ptr) ...@@ -1280,6 +1280,8 @@ void free_percpu(void __percpu *ptr)
} }
} }
trace_percpu_free_percpu(chunk->base_addr, off, ptr);
spin_unlock_irqrestore(&pcpu_lock, flags); spin_unlock_irqrestore(&pcpu_lock, flags);
} }
EXPORT_SYMBOL_GPL(free_percpu); EXPORT_SYMBOL_GPL(free_percpu);
...@@ -1656,6 +1658,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, ...@@ -1656,6 +1658,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) + pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) +
BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long); BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long);
pcpu_stats_save_ai(ai);
/* /*
* Allocate chunk slots. The additional last slot is for * Allocate chunk slots. The additional last slot is for
* empty chunks. * empty chunks.
...@@ -1699,6 +1703,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, ...@@ -1699,6 +1703,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
if (schunk->free_size) if (schunk->free_size)
schunk->map[++schunk->map_used] = ai->static_size + schunk->free_size; schunk->map[++schunk->map_used] = ai->static_size + schunk->free_size;
schunk->map[schunk->map_used] |= 1; schunk->map[schunk->map_used] |= 1;
schunk->has_reserved = true;
/* init dynamic chunk if necessary */ /* init dynamic chunk if necessary */
if (dyn_size) { if (dyn_size) {
...@@ -1717,6 +1722,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, ...@@ -1717,6 +1722,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
dchunk->map[1] = pcpu_reserved_chunk_limit; dchunk->map[1] = pcpu_reserved_chunk_limit;
dchunk->map[2] = (pcpu_reserved_chunk_limit + dchunk->free_size) | 1; dchunk->map[2] = (pcpu_reserved_chunk_limit + dchunk->free_size) | 1;
dchunk->map_used = 2; dchunk->map_used = 2;
dchunk->has_reserved = true;
} }
/* link the first chunk in */ /* link the first chunk in */
...@@ -1725,6 +1731,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, ...@@ -1725,6 +1731,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
pcpu_count_occupied_pages(pcpu_first_chunk, 1); pcpu_count_occupied_pages(pcpu_first_chunk, 1);
pcpu_chunk_relocate(pcpu_first_chunk, -1); pcpu_chunk_relocate(pcpu_first_chunk, -1);
pcpu_stats_chunk_alloc();
trace_percpu_create_chunk(base_addr);
/* we're done */ /* we're done */
pcpu_base_addr = base_addr; pcpu_base_addr = base_addr;
return 0; return 0;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册