提交 c550033c 编写于 作者: I Ingo Molnar

Merge branch 'core/percpu' into x86/core

......@@ -64,6 +64,7 @@ SECTIONS
__initramfs_end = .;
#endif
. = ALIGN(4096);
__per_cpu_load = .;
__per_cpu_start = .;
*(.data.percpu.page_aligned)
*(.data.percpu)
......
......@@ -213,17 +213,9 @@ SECTIONS
{ *(.data.cacheline_aligned) }
/* Per-cpu data: */
percpu : { } :percpu
. = ALIGN(PERCPU_PAGE_SIZE);
__phys_per_cpu_start = .;
.data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET)
{
__per_cpu_start = .;
*(.data.percpu.page_aligned)
*(.data.percpu)
*(.data.percpu.shared_aligned)
__per_cpu_end = .;
}
PERCPU_VADDR(PERCPU_ADDR, :percpu)
__phys_per_cpu_start = __per_cpu_load;
. = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits
* into percpu page size
*/
......
......@@ -181,14 +181,7 @@ SECTIONS
__initramfs_end = .;
}
#endif
. = ALIGN(PAGE_SIZE);
.data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
__per_cpu_start = .;
*(.data.percpu.page_aligned)
*(.data.percpu)
*(.data.percpu.shared_aligned)
__per_cpu_end = .;
}
PERCPU(PAGE_SIZE)
. = ALIGN(8);
.machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) {
......
......@@ -43,14 +43,6 @@
#else /* ...!ASSEMBLY */
#include <linux/stringify.h>
#include <asm/sections.h>
#define __addr_to_pcpu_ptr(addr) \
(void *)((unsigned long)(addr) - (unsigned long)pcpu_base_addr \
+ (unsigned long)__per_cpu_start)
#define __pcpu_ptr_to_addr(ptr) \
(void *)((unsigned long)(ptr) + (unsigned long)pcpu_base_addr \
- (unsigned long)__per_cpu_start)
#ifdef CONFIG_SMP
#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x
......
......@@ -233,8 +233,8 @@ static ssize_t __init setup_pcpu_remap(size_t static_size)
"%zu bytes\n", vm.addr, static_size);
ret = pcpu_setup_first_chunk(pcpur_get_page, static_size,
PERCPU_FIRST_CHUNK_RESERVE,
PMD_SIZE, dyn_size, vm.addr, NULL);
PERCPU_FIRST_CHUNK_RESERVE, dyn_size,
PMD_SIZE, vm.addr, NULL);
goto out_free_ar;
enomem:
......@@ -257,31 +257,13 @@ static ssize_t __init setup_pcpu_remap(size_t static_size)
* Embedding allocator
*
* The first chunk is sized to just contain the static area plus
* module and dynamic reserves, and allocated as a contiguous area
* using bootmem allocator and used as-is without being mapped into
* vmalloc area. This enables the first chunk to piggy back on the
* linear physical PMD mapping and doesn't add any additional pressure
* to TLB. Note that if the needed size is smaller than the minimum
* unit size, the leftover is returned to the bootmem allocator.
* module and dynamic reserves and embedded into linear physical
* mapping so that it can use PMD mapping without additional TLB
* pressure.
*/
static void *pcpue_ptr __initdata;
static size_t pcpue_size __initdata;
static size_t pcpue_unit_size __initdata;
static struct page * __init pcpue_get_page(unsigned int cpu, int pageno)
{
size_t off = (size_t)pageno << PAGE_SHIFT;
if (off >= pcpue_size)
return NULL;
return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + off);
}
static ssize_t __init setup_pcpu_embed(size_t static_size)
{
unsigned int cpu;
size_t dyn_size;
size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
/*
* If large page isn't supported, there's no benefit in doing
......@@ -291,33 +273,8 @@ static ssize_t __init setup_pcpu_embed(size_t static_size)
if (!cpu_has_pse || pcpu_need_numa())
return -EINVAL;
/* allocate and copy */
pcpue_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
PERCPU_DYNAMIC_RESERVE);
pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE);
dyn_size = pcpue_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;
pcpue_ptr = pcpu_alloc_bootmem(0, num_possible_cpus() * pcpue_unit_size,
PAGE_SIZE);
if (!pcpue_ptr)
return -ENOMEM;
for_each_possible_cpu(cpu) {
void *ptr = pcpue_ptr + cpu * pcpue_unit_size;
free_bootmem(__pa(ptr + pcpue_size),
pcpue_unit_size - pcpue_size);
memcpy(ptr, __per_cpu_load, static_size);
}
/* we're ready, commit */
pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n",
pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size);
return pcpu_setup_first_chunk(pcpue_get_page, static_size,
PERCPU_FIRST_CHUNK_RESERVE,
pcpue_unit_size, dyn_size,
pcpue_ptr, NULL);
return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
reserve - PERCPU_FIRST_CHUNK_RESERVE, -1);
}
/*
......@@ -375,8 +332,8 @@ static ssize_t __init setup_pcpu_4k(size_t static_size)
pcpu4k_nr_static_pages, static_size);
ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size,
PERCPU_FIRST_CHUNK_RESERVE, -1, -1, NULL,
pcpu4k_populate_pte);
PERCPU_FIRST_CHUNK_RESERVE, -1,
-1, NULL, pcpu4k_populate_pte);
goto out_free_ar;
enomem:
......
......@@ -107,10 +107,14 @@ typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr);
extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
size_t static_size, size_t reserved_size,
ssize_t unit_size, ssize_t dyn_size,
ssize_t dyn_size, ssize_t unit_size,
void *base_addr,
pcpu_populate_pte_fn_t populate_pte_fn);
extern ssize_t __init pcpu_embed_first_chunk(
size_t static_size, size_t reserved_size,
ssize_t dyn_size, ssize_t unit_size);
/*
* Use this to get to a cpu's version of the per-cpu object
* dynamically allocated. Non-atomic access to the current CPU's
......
......@@ -9599,10 +9599,11 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
cpu = task_cpu(tsk);
ca = task_ca(tsk);
for (; ca; ca = ca->parent) {
do {
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
*cpuusage += cputime;
}
ca = ca->parent;
} while (ca);
}
struct cgroup_subsys cpuacct_subsys = {
......
......@@ -120,7 +120,7 @@ void *__alloc_percpu(size_t size, size_t align)
* on it. Larger alignment should only be used for module
* percpu sections on SMP for which this path isn't used.
*/
WARN_ON_ONCE(align > __alignof__(unsigned long long));
WARN_ON_ONCE(align > SMP_CACHE_BYTES);
if (unlikely(!pdata))
return NULL;
......
......@@ -46,7 +46,8 @@
* - define CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
*
* - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate
* regular address to percpu pointer and back
* regular address to percpu pointer and back if they need to be
* different from the default
*
* - use pcpu_setup_first_chunk() during percpu area initialization to
* setup the first chunk containing the kernel static percpu area
......@@ -67,11 +68,24 @@
#include <linux/workqueue.h>
#include <asm/cacheflush.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
#define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */
#define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */
/* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */
#ifndef __addr_to_pcpu_ptr
#define __addr_to_pcpu_ptr(addr) \
(void *)((unsigned long)(addr) - (unsigned long)pcpu_base_addr \
+ (unsigned long)__per_cpu_start)
#endif
#ifndef __pcpu_ptr_to_addr
#define __pcpu_ptr_to_addr(ptr) \
(void *)((unsigned long)(ptr) + (unsigned long)pcpu_base_addr \
- (unsigned long)__per_cpu_start)
#endif
struct pcpu_chunk {
struct list_head list; /* linked to pcpu_slot lists */
struct rb_node rb_node; /* key is chunk->vm->addr */
......@@ -1013,8 +1027,8 @@ EXPORT_SYMBOL_GPL(free_percpu);
* @get_page_fn: callback to fetch page pointer
* @static_size: the size of static percpu area in bytes
* @reserved_size: the size of reserved percpu area in bytes
* @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto
* @dyn_size: free size for dynamic allocation in bytes, -1 for auto
* @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto
* @base_addr: mapped address, NULL for auto
* @populate_pte_fn: callback to allocate pagetable, NULL if unnecessary
*
......@@ -1039,14 +1053,14 @@ EXPORT_SYMBOL_GPL(free_percpu);
* limited offset range for symbol relocations to guarantee module
* percpu symbols fall inside the relocatable range.
*
* @dyn_size, if non-negative, determines the number of bytes
* available for dynamic allocation in the first chunk. Specifying
* non-negative value makes percpu leave alone the area beyond
* @static_size + @reserved_size + @dyn_size.
*
* @unit_size, if non-negative, specifies unit size and must be
* aligned to PAGE_SIZE and equal to or larger than @static_size +
* @reserved_size + @dyn_size.
*
* @dyn_size, if non-negative, limits the number of bytes available
* for dynamic allocation in the first chunk. Specifying non-negative
* value make percpu leave alone the area beyond @static_size +
* @reserved_size + @dyn_size.
* @reserved_size + if non-negative, @dyn_size.
*
* Non-null @base_addr means that the caller already allocated virtual
* region for the first chunk and mapped it. percpu must not mess
......@@ -1069,12 +1083,14 @@ EXPORT_SYMBOL_GPL(free_percpu);
*/
size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
size_t static_size, size_t reserved_size,
ssize_t unit_size, ssize_t dyn_size,
ssize_t dyn_size, ssize_t unit_size,
void *base_addr,
pcpu_populate_pte_fn_t populate_pte_fn)
{
static struct vm_struct first_vm;
static int smap[2], dmap[2];
size_t size_sum = static_size + reserved_size +
(dyn_size >= 0 ? dyn_size : 0);
struct pcpu_chunk *schunk, *dchunk = NULL;
unsigned int cpu;
int nr_pages;
......@@ -1085,20 +1101,18 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC);
BUG_ON(!static_size);
if (unit_size >= 0) {
BUG_ON(unit_size < static_size + reserved_size +
(dyn_size >= 0 ? dyn_size : 0));
BUG_ON(unit_size < size_sum);
BUG_ON(unit_size & ~PAGE_MASK);
} else {
BUG_ON(dyn_size >= 0);
BUG_ON(unit_size < PCPU_MIN_UNIT_SIZE);
} else
BUG_ON(base_addr);
}
BUG_ON(base_addr && populate_pte_fn);
if (unit_size >= 0)
pcpu_unit_pages = unit_size >> PAGE_SHIFT;
else
pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT,
PFN_UP(static_size + reserved_size));
PFN_UP(size_sum));
pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size;
......@@ -1224,3 +1238,89 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0);
return pcpu_unit_size;
}
/*
* Embedding first chunk setup helper.
*/
static void *pcpue_ptr __initdata;
static size_t pcpue_size __initdata;
static size_t pcpue_unit_size __initdata;
static struct page * __init pcpue_get_page(unsigned int cpu, int pageno)
{
size_t off = (size_t)pageno << PAGE_SHIFT;
if (off >= pcpue_size)
return NULL;
return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + off);
}
/**
* pcpu_embed_first_chunk - embed the first percpu chunk into bootmem
* @static_size: the size of static percpu area in bytes
* @reserved_size: the size of reserved percpu area in bytes
* @dyn_size: free size for dynamic allocation in bytes, -1 for auto
* @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto
*
* This is a helper to ease setting up embedded first percpu chunk and
* can be called where pcpu_setup_first_chunk() is expected.
*
* If this function is used to setup the first chunk, it is allocated
* as a contiguous area using bootmem allocator and used as-is without
* being mapped into vmalloc area. This enables the first chunk to
* piggy back on the linear physical mapping which often uses larger
* page size.
*
* When @dyn_size is positive, dynamic area might be larger than
* specified to fill page alignment. Also, when @dyn_size is auto,
* @dyn_size does not fill the whole first chunk but only what's
* necessary for page alignment after static and reserved areas.
*
* If the needed size is smaller than the minimum or specified unit
* size, the leftover is returned to the bootmem allocator.
*
* RETURNS:
* The determined pcpu_unit_size which can be used to initialize
* percpu access on success, -errno on failure.
*/
ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
ssize_t dyn_size, ssize_t unit_size)
{
unsigned int cpu;
/* determine parameters and allocate */
pcpue_size = PFN_ALIGN(static_size + reserved_size +
(dyn_size >= 0 ? dyn_size : 0));
if (dyn_size != 0)
dyn_size = pcpue_size - static_size - reserved_size;
if (unit_size >= 0) {
BUG_ON(unit_size < pcpue_size);
pcpue_unit_size = unit_size;
} else
pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE);
pcpue_ptr = __alloc_bootmem_nopanic(
num_possible_cpus() * pcpue_unit_size,
PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
if (!pcpue_ptr)
return -ENOMEM;
/* return the leftover and copy */
for_each_possible_cpu(cpu) {
void *ptr = pcpue_ptr + cpu * pcpue_unit_size;
free_bootmem(__pa(ptr + pcpue_size),
pcpue_unit_size - pcpue_size);
memcpy(ptr, __per_cpu_load, static_size);
}
/* we're ready, commit */
pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n",
pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size);
return pcpu_setup_first_chunk(pcpue_get_page, static_size,
reserved_size, dyn_size,
pcpue_unit_size, pcpue_ptr, NULL);
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册