提交 99937d64 编写于 作者: I Ingo Molnar

Merge branch 'tj-percpu' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc into core/percpu

......@@ -112,8 +112,8 @@ ENTRY(ia32_sysenter_target)
CFI_DEF_CFA rsp,0
CFI_REGISTER rsp,rbp
SWAPGS_UNSAFE_STACK
movq %gs:pda_kernelstack, %rsp
addq $(PDA_STACKOFFSET),%rsp
movq PER_CPU_VAR(kernel_stack), %rsp
addq $(KERNEL_STACK_OFFSET),%rsp
/*
* No need to follow this irqs on/off section: the syscall
* disabled irqs, here we enable it straight after entry:
......@@ -273,13 +273,13 @@ ENDPROC(ia32_sysenter_target)
ENTRY(ia32_cstar_target)
CFI_STARTPROC32 simple
CFI_SIGNAL_FRAME
CFI_DEF_CFA rsp,PDA_STACKOFFSET
CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
CFI_REGISTER rip,rcx
/*CFI_REGISTER rflags,r11*/
SWAPGS_UNSAFE_STACK
movl %esp,%r8d
CFI_REGISTER rsp,r8
movq %gs:pda_kernelstack,%rsp
movq PER_CPU_VAR(kernel_stack),%rsp
/*
* No need to follow this irqs on/off section: the syscall
* disabled irqs and here we enable it straight after entry:
......
#ifndef _ASM_X86_CURRENT_H
#define _ASM_X86_CURRENT_H
#ifdef CONFIG_X86_32
#include <linux/compiler.h>
#include <asm/percpu.h>
#ifndef __ASSEMBLY__
struct task_struct;
DECLARE_PER_CPU(struct task_struct *, current_task);
static __always_inline struct task_struct *get_current(void)
{
return percpu_read(current_task);
}
#else /* X86_32 */
#ifndef __ASSEMBLY__
#include <asm/pda.h>
struct task_struct;
static __always_inline struct task_struct *get_current(void)
{
return read_pda(pcurrent);
return percpu_read(current_task);
}
#else /* __ASSEMBLY__ */
#include <asm/asm-offsets.h>
#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg
#define current get_current()
#endif /* __ASSEMBLY__ */
#endif /* X86_32 */
#define current get_current()
#endif /* _ASM_X86_CURRENT_H */
......@@ -3,22 +3,36 @@
#include <linux/threads.h>
#include <linux/irq.h>
#include <asm/pda.h>
#include <asm/apic.h>
typedef struct {
unsigned int __softirq_pending;
unsigned int __nmi_count; /* arch dependent */
unsigned int apic_timer_irqs; /* arch dependent */
unsigned int irq0_irqs;
unsigned int irq_resched_count;
unsigned int irq_call_count;
unsigned int irq_tlb_count;
unsigned int irq_thermal_count;
unsigned int irq_spurious_count;
unsigned int irq_threshold_count;
} ____cacheline_aligned irq_cpustat_t;
DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
#define MAX_HARDIRQS_PER_CPU NR_VECTORS
#define __ARCH_IRQ_STAT 1
#define inc_irq_stat(member) add_pda(member, 1)
#define inc_irq_stat(member) percpu_add(irq_stat.member, 1)
#define local_softirq_pending() read_pda(__softirq_pending)
#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending)
#define __ARCH_SET_SOFTIRQ_PENDING 1
#define set_softirq_pending(x) write_pda(__softirq_pending, (x))
#define or_softirq_pending(x) or_pda(__softirq_pending, (x))
#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x))
#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x))
extern void ack_bad_irq(unsigned int irq);
......
#ifndef _ASM_X86_MMU_CONTEXT_64_H
#define _ASM_X86_MMU_CONTEXT_64_H
#include <asm/pda.h>
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
#ifdef CONFIG_SMP
if (read_pda(mmu_state) == TLBSTATE_OK)
write_pda(mmu_state, TLBSTATE_LAZY);
if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
#endif
}
......@@ -19,8 +17,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
/* stop flush ipis for the previous mm */
cpu_clear(cpu, prev->cpu_vm_mask);
#ifdef CONFIG_SMP
write_pda(mmu_state, TLBSTATE_OK);
write_pda(active_mm, next);
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
percpu_write(cpu_tlbstate.active_mm, next);
#endif
cpu_set(cpu, next->cpu_vm_mask);
load_cr3(next->pgd);
......@@ -30,9 +28,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
}
#ifdef CONFIG_SMP
else {
write_pda(mmu_state, TLBSTATE_OK);
if (read_pda(active_mm) != next)
BUG();
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
/* We were in lazy tlb mode and leave_mm disabled
* tlb flush IPI delivery. We must reload CR3
......
......@@ -13,8 +13,8 @@
#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
#define IRQSTACK_ORDER 2
#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER)
#define IRQ_STACK_ORDER 2
#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
#define STACKFAULT_STACK 1
#define DOUBLEFAULT_STACK 2
......
......@@ -11,33 +11,18 @@
/* Per processor datastructure. %gs points to it while the kernel runs */
struct x8664_pda {
struct task_struct *pcurrent; /* 0 Current process */
unsigned long dummy;
unsigned long kernelstack; /* 16 top of kernel stack for current */
unsigned long oldrsp; /* 24 user rsp for system call */
int irqcount; /* 32 Irq nesting counter. Starts -1 */
unsigned int cpunumber; /* 36 Logical CPU number */
unsigned long unused1;
unsigned long unused2;
unsigned long unused3;
unsigned long unused4;
int unused5;
unsigned int unused6; /* 36 was cpunumber */
#ifdef CONFIG_CC_STACKPROTECTOR
unsigned long stack_canary; /* 40 stack canary value */
/* gcc-ABI: this canary MUST be at
offset 40!!! */
#endif
char *irqstackptr;
short nodenumber; /* number of current node (32k max) */
short in_bootmem; /* pda lives in bootmem */
unsigned int __softirq_pending;
unsigned int __nmi_count; /* number of NMI on this CPUs */
short mmu_state;
short isidle;
struct mm_struct *active_mm;
unsigned apic_timer_irqs;
unsigned irq0_irqs;
unsigned irq_resched_count;
unsigned irq_call_count;
unsigned irq_tlb_count;
unsigned irq_thermal_count;
unsigned irq_threshold_count;
unsigned irq_spurious_count;
} ____cacheline_aligned_in_smp;
DECLARE_PER_CPU(struct x8664_pda, __pda);
......@@ -57,6 +42,4 @@ extern void pda_init(int);
#endif
#define PDA_STACKOFFSET (5*8)
#endif /* _ASM_X86_PDA_H */
......@@ -39,10 +39,10 @@
#include <linux/stringify.h>
#ifdef CONFIG_SMP
#define __percpu_seg_str "%%"__stringify(__percpu_seg)":"
#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x
#define __my_cpu_offset percpu_read(this_cpu_off)
#else
#define __percpu_seg_str
#define __percpu_arg(x) "%" #x
#endif
/* For arch-specific code, we can use direct single-insn ops (they
......@@ -58,22 +58,22 @@ do { \
} \
switch (sizeof(var)) { \
case 1: \
asm(op "b %1,"__percpu_seg_str"%0" \
asm(op "b %1,"__percpu_arg(0) \
: "+m" (var) \
: "ri" ((T__)val)); \
break; \
case 2: \
asm(op "w %1,"__percpu_seg_str"%0" \
asm(op "w %1,"__percpu_arg(0) \
: "+m" (var) \
: "ri" ((T__)val)); \
break; \
case 4: \
asm(op "l %1,"__percpu_seg_str"%0" \
asm(op "l %1,"__percpu_arg(0) \
: "+m" (var) \
: "ri" ((T__)val)); \
break; \
case 8: \
asm(op "q %1,"__percpu_seg_str"%0" \
asm(op "q %1,"__percpu_arg(0) \
: "+m" (var) \
: "r" ((T__)val)); \
break; \
......@@ -86,22 +86,22 @@ do { \
typeof(var) ret__; \
switch (sizeof(var)) { \
case 1: \
asm(op "b "__percpu_seg_str"%1,%0" \
asm(op "b "__percpu_arg(1)",%0" \
: "=r" (ret__) \
: "m" (var)); \
break; \
case 2: \
asm(op "w "__percpu_seg_str"%1,%0" \
asm(op "w "__percpu_arg(1)",%0" \
: "=r" (ret__) \
: "m" (var)); \
break; \
case 4: \
asm(op "l "__percpu_seg_str"%1,%0" \
asm(op "l "__percpu_arg(1)",%0" \
: "=r" (ret__) \
: "m" (var)); \
break; \
case 8: \
asm(op "q "__percpu_seg_str"%1,%0" \
asm(op "q "__percpu_arg(1)",%0" \
: "=r" (ret__) \
: "m" (var)); \
break; \
......@@ -122,9 +122,9 @@ do { \
#define x86_test_and_clear_bit_percpu(bit, var) \
({ \
int old__; \
asm volatile("btr %1,"__percpu_seg_str"%c2\n\tsbbl %0,%0" \
: "=r" (old__) \
: "dIr" (bit), "i" (&per_cpu__##var) : "memory"); \
asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0" \
: "=r" (old__), "+m" (per_cpu__##var) \
: "dIr" (bit)); \
old__; \
})
......
......@@ -378,6 +378,9 @@ union thread_xstate {
#ifdef CONFIG_X86_64
DECLARE_PER_CPU(struct orig_ist, orig_ist);
DECLARE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack);
DECLARE_PER_CPU(char *, irq_stack_ptr);
#endif
extern void print_cpu_info(struct cpuinfo_x86 *);
......
......@@ -25,9 +25,7 @@ extern unsigned int num_processors;
DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
DECLARE_PER_CPU(cpumask_t, cpu_core_map);
DECLARE_PER_CPU(u16, cpu_llc_id);
#ifdef CONFIG_X86_32
DECLARE_PER_CPU(int, cpu_number);
#endif
static inline struct cpumask *cpu_sibling_mask(int cpu)
{
......@@ -164,7 +162,7 @@ extern unsigned disabled_cpus __cpuinitdata;
extern int safe_smp_processor_id(void);
#elif defined(CONFIG_X86_64_SMP)
#define raw_smp_processor_id() read_pda(cpunumber)
#define raw_smp_processor_id() (percpu_read(cpu_number))
#define stack_smp_processor_id() \
({ \
......
......@@ -94,7 +94,7 @@ do { \
"call __switch_to\n\t" \
".globl thread_return\n" \
"thread_return:\n\t" \
"movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \
"movq "__percpu_arg([current_task])",%%rsi\n\t" \
"movq %P[thread_info](%%rsi),%%r8\n\t" \
LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \
"movq %%rax,%%rdi\n\t" \
......@@ -106,7 +106,7 @@ do { \
[ti_flags] "i" (offsetof(struct thread_info, flags)), \
[tif_fork] "i" (TIF_FORK), \
[thread_info] "i" (offsetof(struct task_struct, stack)), \
[pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \
[current_task] "m" (per_cpu_var(current_task)) \
: "memory", "cc" __EXTRA_CLOBBER)
#endif
......
......@@ -194,25 +194,21 @@ static inline struct thread_info *current_thread_info(void)
#else /* X86_32 */
#include <asm/pda.h>
#include <asm/percpu.h>
#define KERNEL_STACK_OFFSET (5*8)
/*
* macros/functions for gaining access to the thread information structure
* preempt_count needs to be 1 initially, until the scheduler is functional.
*/
#ifndef __ASSEMBLY__
static inline struct thread_info *current_thread_info(void)
{
struct thread_info *ti;
ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE);
return ti;
}
DECLARE_PER_CPU(unsigned long, kernel_stack);
/* do not use in interrupt context */
static inline struct thread_info *stack_thread_info(void)
static inline struct thread_info *current_thread_info(void)
{
struct thread_info *ti;
asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1)));
ti = (void *)(percpu_read(kernel_stack) +
KERNEL_STACK_OFFSET - THREAD_SIZE);
return ti;
}
......@@ -220,8 +216,8 @@ static inline struct thread_info *stack_thread_info(void)
/* how to get the thread information struct from ASM */
#define GET_THREAD_INFO(reg) \
movq %gs:pda_kernelstack,reg ; \
subq $(THREAD_SIZE-PDA_STACKOFFSET),reg
movq PER_CPU_VAR(kernel_stack),reg ; \
subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg
#endif
......
......@@ -148,20 +148,17 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
#define TLBSTATE_OK 1
#define TLBSTATE_LAZY 2
#ifdef CONFIG_X86_32
struct tlb_state {
struct mm_struct *active_mm;
int state;
char __cacheline_padding[L1_CACHE_BYTES-8];
};
DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
void reset_lazy_tlbstate(void);
#else
static inline void reset_lazy_tlbstate(void)
{
percpu_write(cpu_tlbstate.state, 0);
percpu_write(cpu_tlbstate.active_mm, &init_mm);
}
#endif
#endif /* SMP */
......
......@@ -83,7 +83,8 @@ extern cpumask_t *node_to_cpumask_map;
DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
/* Returns the number of the current Node. */
#define numa_node_id() read_pda(nodenumber)
DECLARE_PER_CPU(int, node_number);
#define numa_node_id() percpu_read(node_number)
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
extern int cpu_to_node(int cpu);
......
......@@ -49,12 +49,6 @@ int main(void)
BLANK();
#undef ENTRY
#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
ENTRY(kernelstack);
ENTRY(oldrsp);
ENTRY(pcurrent);
ENTRY(irqcount);
ENTRY(cpunumber);
ENTRY(irqstackptr);
DEFINE(pda_size, sizeof(struct x8664_pda));
BLANK();
#undef ENTRY
......
......@@ -881,47 +881,32 @@ __setup("clearcpuid=", setup_disablecpuid);
#ifdef CONFIG_X86_64
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack);
#ifdef CONFIG_SMP
DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */
#else
DEFINE_PER_CPU(char *, irq_stack_ptr) =
per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64;
#endif
DEFINE_PER_CPU(unsigned long, kernel_stack) =
(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
EXPORT_PER_CPU_SYMBOL(kernel_stack);
DEFINE_PER_CPU(unsigned int, irq_count) = -1;
void __cpuinit pda_init(int cpu)
{
struct x8664_pda *pda = cpu_pda(cpu);
/* Setup up data that may be needed in __get_free_pages early */
loadsegment(fs, 0);
loadsegment(gs, 0);
load_pda_offset(cpu);
pda->cpunumber = cpu;
pda->irqcount = -1;
pda->kernelstack = (unsigned long)stack_thread_info() -
PDA_STACKOFFSET + THREAD_SIZE;
pda->active_mm = &init_mm;
pda->mmu_state = 0;
if (cpu == 0) {
/* others are initialized in smpboot.c */
pda->pcurrent = &init_task;
pda->irqstackptr = boot_cpu_stack;
pda->irqstackptr += IRQSTACKSIZE - 64;
} else {
if (!pda->irqstackptr) {
pda->irqstackptr = (char *)
__get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
if (!pda->irqstackptr)
panic("cannot allocate irqstack for cpu %d",
cpu);
pda->irqstackptr += IRQSTACKSIZE - 64;
}
if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
pda->nodenumber = cpu_to_node(cpu);
}
}
static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
DEBUG_STKSZ] __page_aligned_bss;
static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
__aligned(PAGE_SIZE);
extern asmlinkage void ignore_sysret(void);
......@@ -979,15 +964,18 @@ void __cpuinit cpu_init(void)
struct tss_struct *t = &per_cpu(init_tss, cpu);
struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
unsigned long v;
char *estacks = NULL;
struct task_struct *me;
int i;
/* CPU 0 is initialised in head64.c */
if (cpu != 0)
pda_init(cpu);
else
estacks = boot_exception_stacks;
#ifdef CONFIG_NUMA
if (cpu != 0 && percpu_read(node_number) == 0 &&
cpu_to_node(cpu) != NUMA_NO_NODE)
percpu_write(node_number, cpu_to_node(cpu));
#endif
me = current;
......@@ -1021,18 +1009,13 @@ void __cpuinit cpu_init(void)
* set up and load the per-CPU TSS
*/
if (!orig_ist->ist[0]) {
static const unsigned int order[N_EXCEPTION_STACKS] = {
[0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
[DEBUG_STACK - 1] = DEBUG_STACK_ORDER
static const unsigned int sizes[N_EXCEPTION_STACKS] = {
[0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
[DEBUG_STACK - 1] = DEBUG_STKSZ
};
char *estacks = per_cpu(exception_stacks, cpu);
for (v = 0; v < N_EXCEPTION_STACKS; v++) {
if (cpu) {
estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
if (!estacks)
panic("Cannot allocate exception "
"stack %ld %d\n", v, cpu);
}
estacks += PAGE_SIZE << order[v];
estacks += sizes[v];
orig_ist->ist[v] = t->x86_tss.ist[v] =
(unsigned long)estacks;
}
......
......@@ -106,7 +106,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
const struct stacktrace_ops *ops, void *data)
{
const unsigned cpu = get_cpu();
unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
unsigned long *irq_stack_end =
(unsigned long *)per_cpu(irq_stack_ptr, cpu);
unsigned used = 0;
struct thread_info *tinfo;
int graph = 0;
......@@ -160,23 +161,23 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
stack = (unsigned long *) estack_end[-2];
continue;
}
if (irqstack_end) {
unsigned long *irqstack;
irqstack = irqstack_end -
(IRQSTACKSIZE - 64) / sizeof(*irqstack);
if (irq_stack_end) {
unsigned long *irq_stack;
irq_stack = irq_stack_end -
(IRQ_STACK_SIZE - 64) / sizeof(*irq_stack);
if (stack >= irqstack && stack < irqstack_end) {
if (stack >= irq_stack && stack < irq_stack_end) {
if (ops->stack(data, "IRQ") < 0)
break;
bp = print_context_stack(tinfo, stack, bp,
ops, data, irqstack_end, &graph);
ops, data, irq_stack_end, &graph);
/*
* We link to the next stack (which would be
* the process stack normally) the last
* pointer (index -1 to end) in the IRQ stack:
*/
stack = (unsigned long *) (irqstack_end[-1]);
irqstack_end = NULL;
stack = (unsigned long *) (irq_stack_end[-1]);
irq_stack_end = NULL;
ops->stack(data, "EOI");
continue;
}
......@@ -199,10 +200,10 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack;
int i;
const int cpu = smp_processor_id();
unsigned long *irqstack_end =
(unsigned long *) (cpu_pda(cpu)->irqstackptr);
unsigned long *irqstack =
(unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
unsigned long *irq_stack_end =
(unsigned long *)(per_cpu(irq_stack_ptr, cpu));
unsigned long *irq_stack =
(unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
/*
* debugging aid: "show_stack(NULL, NULL);" prints the
......@@ -218,9 +219,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
stack = sp;
for (i = 0; i < kstack_depth_to_print; i++) {
if (stack >= irqstack && stack <= irqstack_end) {
if (stack == irqstack_end) {
stack = (unsigned long *) (irqstack_end[-1]);
if (stack >= irq_stack && stack <= irq_stack_end) {
if (stack == irq_stack_end) {
stack = (unsigned long *) (irq_stack_end[-1]);
printk(" <EOI> ");
}
} else {
......@@ -241,7 +242,7 @@ void show_registers(struct pt_regs *regs)
int i;
unsigned long sp;
const int cpu = smp_processor_id();
struct task_struct *cur = cpu_pda(cpu)->pcurrent;
struct task_struct *cur = current;
sp = regs->sp;
printk("CPU %d ", cpu);
......
......@@ -210,7 +210,7 @@ ENTRY(native_usergs_sysret64)
/* %rsp:at FRAMEEND */
.macro FIXUP_TOP_OF_STACK tmp offset=0
movq %gs:pda_oldrsp,\tmp
movq PER_CPU_VAR(old_rsp),\tmp
movq \tmp,RSP+\offset(%rsp)
movq $__USER_DS,SS+\offset(%rsp)
movq $__USER_CS,CS+\offset(%rsp)
......@@ -221,7 +221,7 @@ ENTRY(native_usergs_sysret64)
.macro RESTORE_TOP_OF_STACK tmp offset=0
movq RSP+\offset(%rsp),\tmp
movq \tmp,%gs:pda_oldrsp
movq \tmp,PER_CPU_VAR(old_rsp)
movq EFLAGS+\offset(%rsp),\tmp
movq \tmp,R11+\offset(%rsp)
.endm
......@@ -337,15 +337,15 @@ ENTRY(save_args)
je 1f
SWAPGS
/*
* irqcount is used to check if a CPU is already on an interrupt stack
* irq_count is used to check if a CPU is already on an interrupt stack
* or not. While this is essentially redundant with preempt_count it is
* a little cheaper to use a separate counter in the PDA (short of
* moving irq_enter into assembly, which would be too much work)
*/
1: incl %gs:pda_irqcount
1: incl PER_CPU_VAR(irq_count)
jne 2f
popq_cfi %rax /* move return address... */
mov %gs:pda_irqstackptr,%rsp
mov PER_CPU_VAR(irq_stack_ptr),%rsp
EMPTY_FRAME 0
pushq_cfi %rax /* ... to the new stack */
/*
......@@ -468,7 +468,7 @@ END(ret_from_fork)
ENTRY(system_call)
CFI_STARTPROC simple
CFI_SIGNAL_FRAME
CFI_DEF_CFA rsp,PDA_STACKOFFSET
CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
CFI_REGISTER rip,rcx
/*CFI_REGISTER rflags,r11*/
SWAPGS_UNSAFE_STACK
......@@ -479,8 +479,8 @@ ENTRY(system_call)
*/
ENTRY(system_call_after_swapgs)
movq %rsp,%gs:pda_oldrsp
movq %gs:pda_kernelstack,%rsp
movq %rsp,PER_CPU_VAR(old_rsp)
movq PER_CPU_VAR(kernel_stack),%rsp
/*
* No need to follow this irqs off/on section - it's straight
* and short:
......@@ -523,7 +523,7 @@ sysret_check:
CFI_REGISTER rip,rcx
RESTORE_ARGS 0,-ARG_SKIP,1
/*CFI_REGISTER rflags,r11*/
movq %gs:pda_oldrsp, %rsp
movq PER_CPU_VAR(old_rsp), %rsp
USERGS_SYSRET64
CFI_RESTORE_STATE
......@@ -833,11 +833,11 @@ common_interrupt:
XCPT_FRAME
addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
interrupt do_IRQ
/* 0(%rsp): oldrsp-ARGOFFSET */
/* 0(%rsp): old_rsp-ARGOFFSET */
ret_from_intr:
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
decl %gs:pda_irqcount
decl PER_CPU_VAR(irq_count)
leaveq
CFI_DEF_CFA_REGISTER rsp
CFI_ADJUST_CFA_OFFSET -8
......@@ -1260,14 +1260,14 @@ ENTRY(call_softirq)
CFI_REL_OFFSET rbp,0
mov %rsp,%rbp
CFI_DEF_CFA_REGISTER rbp
incl %gs:pda_irqcount
cmove %gs:pda_irqstackptr,%rsp
incl PER_CPU_VAR(irq_count)
cmove PER_CPU_VAR(irq_stack_ptr),%rsp
push %rbp # backlink for old unwinder
call __do_softirq
leaveq
CFI_DEF_CFA_REGISTER rsp
CFI_ADJUST_CFA_OFFSET -8
decl %gs:pda_irqcount
decl PER_CPU_VAR(irq_count)
ret
CFI_ENDPROC
END(call_softirq)
......@@ -1297,15 +1297,15 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
movq %rdi, %rsp # we don't return, adjust the stack frame
CFI_ENDPROC
DEFAULT_FRAME
11: incl %gs:pda_irqcount
11: incl PER_CPU_VAR(irq_count)
movq %rsp,%rbp
CFI_DEF_CFA_REGISTER rbp
cmovzq %gs:pda_irqstackptr,%rsp
cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
pushq %rbp # backlink for old unwinder
call xen_evtchn_do_upcall
popq %rsp
CFI_DEF_CFA_REGISTER rsp
decl %gs:pda_irqcount
decl PER_CPU_VAR(irq_count)
jmp error_exit
CFI_ENDPROC
END(do_hypervisor_callback)
......
......@@ -36,11 +36,7 @@ void ack_bad_irq(unsigned int irq)
#endif
}
#ifdef CONFIG_X86_32
# define irq_stats(x) (&per_cpu(irq_stat, x))
#else
# define irq_stats(x) cpu_pda(x)
#endif
#define irq_stats(x) (&per_cpu(irq_stat, x))
/*
* /proc/interrupts printing:
*/
......
......@@ -19,6 +19,9 @@
#include <asm/io_apic.h>
#include <asm/idle.h>
DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
EXPORT_PER_CPU_SYMBOL(irq_stat);
/*
* Probabilistic stack overflow check:
*
......
......@@ -61,11 +61,7 @@ static int endflag __initdata;
static inline unsigned int get_nmi_count(int cpu)
{
#ifdef CONFIG_X86_64
return cpu_pda(cpu)->__nmi_count;
#else
return nmi_count(cpu);
#endif
return per_cpu(irq_stat, cpu).__nmi_count;
}
static inline int mce_in_progress(void)
......@@ -82,12 +78,8 @@ static inline int mce_in_progress(void)
*/
static inline unsigned int get_timer_irqs(int cpu)
{
#ifdef CONFIG_X86_64
return read_pda(apic_timer_irqs) + read_pda(irq0_irqs);
#else
return per_cpu(irq_stat, cpu).apic_timer_irqs +
per_cpu(irq_stat, cpu).irq0_irqs;
#endif
}
#ifdef CONFIG_SMP
......
......@@ -66,9 +66,6 @@ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
DEFINE_PER_CPU(int, cpu_number);
EXPORT_PER_CPU_SYMBOL(cpu_number);
/*
* Return saved PC of a blocked thread.
*/
......
......@@ -57,6 +57,12 @@
asmlinkage extern void ret_from_fork(void);
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
DEFINE_PER_CPU(unsigned long, old_rsp);
static DEFINE_PER_CPU(unsigned char, is_idle);
unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
static ATOMIC_NOTIFIER_HEAD(idle_notifier);
......@@ -75,13 +81,13 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister);
void enter_idle(void)
{
write_pda(isidle, 1);
percpu_write(is_idle, 1);
atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
}
static void __exit_idle(void)
{
if (test_and_clear_bit_pda(0, isidle) == 0)
if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
return;
atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
}
......@@ -392,7 +398,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
load_gs_index(0);
regs->ip = new_ip;
regs->sp = new_sp;
write_pda(oldrsp, new_sp);
percpu_write(old_rsp, new_sp);
regs->cs = __USER_CS;
regs->ss = __USER_DS;
regs->flags = 0x200;
......@@ -613,13 +619,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/*
* Switch the PDA and FPU contexts.
*/
prev->usersp = read_pda(oldrsp);
write_pda(oldrsp, next->usersp);
write_pda(pcurrent, next_p);
prev->usersp = percpu_read(old_rsp);
percpu_write(old_rsp, next->usersp);
percpu_write(current_task, next_p);
write_pda(kernelstack,
percpu_write(kernel_stack,
(unsigned long)task_stack_page(next_p) +
THREAD_SIZE - PDA_STACKOFFSET);
THREAD_SIZE - KERNEL_STACK_OFFSET);
#ifdef CONFIG_CC_STACKPROTECTOR
write_pda(stack_canary, next_p->stack_canary);
/*
......
......@@ -22,6 +22,15 @@
# define DBG(x...)
#endif
/*
* Could be inside CONFIG_HAVE_SETUP_PER_CPU_AREA with other stuff but
* voyager wants cpu_number too.
*/
#ifdef CONFIG_SMP
DEFINE_PER_CPU(int, cpu_number);
EXPORT_PER_CPU_SYMBOL(cpu_number);
#endif
#ifdef CONFIG_X86_LOCAL_APIC
unsigned int num_processors;
unsigned disabled_cpus __cpuinitdata;
......@@ -44,6 +53,8 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
#define X86_64_NUMA 1 /* (used later) */
DEFINE_PER_CPU(int, node_number) = 0;
EXPORT_PER_CPU_SYMBOL(node_number);
/*
* Map cpu index to node index
......@@ -192,7 +203,11 @@ void __init setup_per_cpu_areas(void)
memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);
per_cpu_offset(cpu) = ptr - __per_cpu_start;
per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
per_cpu(cpu_number, cpu) = cpu;
#ifdef CONFIG_X86_64
per_cpu(irq_stack_ptr, cpu) =
(char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64;
/*
* CPU0 modified pda in the init data area, reload pda
* offset for CPU0 and clear the area for others.
......@@ -202,7 +217,6 @@ void __init setup_per_cpu_areas(void)
else
memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu)));
#endif
per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
}
......@@ -271,7 +285,7 @@ void __cpuinit numa_set_node(int cpu, int node)
per_cpu(x86_cpu_to_node_map, cpu) = node;
if (node != NUMA_NO_NODE)
cpu_pda(cpu)->nodenumber = node;
per_cpu(node_number, cpu) = node;
}
void __cpuinit numa_clear_node(int cpu)
......
......@@ -790,15 +790,17 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
set_idle_for_cpu(cpu, c_idle.idle);
do_rest:
#ifdef CONFIG_X86_32
per_cpu(current_task, cpu) = c_idle.idle;
#ifdef CONFIG_X86_32
init_gdt(cpu);
/* Stack for startup_32 can be just as for start_secondary onwards */
irq_ctx_init(cpu);
#else
cpu_pda(cpu)->pcurrent = c_idle.idle;
clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
initial_gs = per_cpu_offset(cpu);
per_cpu(kernel_stack, cpu) =
(unsigned long)task_stack_page(c_idle.idle) -
KERNEL_STACK_OFFSET + THREAD_SIZE;
#endif
early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
initial_code = (unsigned long)start_secondary;
......
......@@ -28,7 +28,5 @@ __cpuinit void init_gdt(int cpu)
write_gdt_entry(get_cpu_gdt_table(cpu),
GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
per_cpu(cpu_number, cpu) = cpu;
}
#endif
......@@ -4,8 +4,8 @@
#include <asm/tlbflush.h>
DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate)
____cacheline_aligned = { &init_mm, 0, };
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
= { &init_mm, 0, };
/* must come after the send_IPI functions above for inlining */
#include <mach_ipi.h>
......@@ -231,14 +231,6 @@ void flush_tlb_all(void)
on_each_cpu(do_flush_tlb_all, NULL, 1);
}
void reset_lazy_tlbstate(void)
{
int cpu = raw_smp_processor_id();
per_cpu(cpu_tlbstate, cpu).state = 0;
per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
}
static int init_flush_cpumask(void)
{
alloc_cpumask_var(&flush_cpumask, GFP_KERNEL);
......
......@@ -18,6 +18,9 @@
#include <asm/uv/uv_hub.h>
#include <asm/uv/uv_bau.h>
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
= { &init_mm, 0, };
#include <mach_ipi.h>
/*
* Smarter SMP flushing macros.
......@@ -62,9 +65,9 @@ static DEFINE_PER_CPU(union smp_flush_state, flush_state);
*/
void leave_mm(int cpu)
{
if (read_pda(mmu_state) == TLBSTATE_OK)
if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
BUG();
cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
load_cr3(swapper_pg_dir);
}
EXPORT_SYMBOL_GPL(leave_mm);
......@@ -142,8 +145,8 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
* BUG();
*/
if (f->flush_mm == read_pda(active_mm)) {
if (read_pda(mmu_state) == TLBSTATE_OK) {
if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
if (f->flush_va == TLB_FLUSH_ALL)
local_flush_tlb();
else
......@@ -281,7 +284,7 @@ static void do_flush_tlb_all(void *info)
unsigned long cpu = smp_processor_id();
__flush_tlb_all();
if (read_pda(mmu_state) == TLBSTATE_LAZY)
if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
leave_mm(cpu);
}
......
......@@ -1063,11 +1063,7 @@ static void drop_other_mm_ref(void *info)
struct mm_struct *mm = info;
struct mm_struct *active_mm;
#ifdef CONFIG_X86_64
active_mm = read_pda(active_mm);
#else
active_mm = __get_cpu_var(cpu_tlbstate).active_mm;
#endif
active_mm = percpu_read(cpu_tlbstate.active_mm);
if (active_mm == mm)
leave_mm(smp_processor_id());
......
......@@ -50,11 +50,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
*/
static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
{
#ifdef CONFIG_X86_32
__get_cpu_var(irq_stat).irq_resched_count++;
#else
add_pda(irq_resched_count, 1);
#endif
inc_irq_stat(irq_resched_count);
return IRQ_HANDLED;
}
......@@ -283,12 +279,11 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
struct task_struct *idle = idle_task(cpu);
int rc;
per_cpu(current_task, cpu) = idle;
#ifdef CONFIG_X86_32
init_gdt(cpu);
per_cpu(current_task, cpu) = idle;
irq_ctx_init(cpu);
#else
cpu_pda(cpu)->pcurrent = idle;
clear_tsk_thread_flag(idle, TIF_FORK);
#endif
xen_setup_timer(cpu);
......@@ -435,11 +430,7 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
{
irq_enter();
generic_smp_call_function_interrupt();
#ifdef CONFIG_X86_32
__get_cpu_var(irq_stat).irq_call_count++;
#else
add_pda(irq_call_count, 1);
#endif
inc_irq_stat(irq_call_count);
irq_exit();
return IRQ_HANDLED;
......@@ -449,11 +440,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
{
irq_enter();
generic_smp_call_function_single_interrupt();
#ifdef CONFIG_X86_32
__get_cpu_var(irq_stat).irq_call_count++;
#else
add_pda(irq_call_count, 1);
#endif
inc_irq_stat(irq_call_count);
irq_exit();
return IRQ_HANDLED;
......
......@@ -17,6 +17,7 @@
#include <asm/processor-flags.h>
#include <asm/errno.h>
#include <asm/segment.h>
#include <asm/percpu.h>
#include <xen/interface/xen.h>
......@@ -28,12 +29,10 @@
#if 1
/*
x86-64 does not yet support direct access to percpu variables
via a segment override, so we just need to make sure this code
never gets used
FIXME: x86_64 now can support direct access to percpu variables
via a segment override. Update xen accordingly.
*/
#define BUG ud2a
#define PER_CPU_VAR(var, off) 0xdeadbeef
#endif
/*
......@@ -45,14 +44,14 @@ ENTRY(xen_irq_enable_direct)
BUG
/* Unmask events */
movb $0, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
/* Preempt here doesn't matter because that will deal with
any pending interrupts. The pending check may end up being
run on the wrong CPU, but that doesn't hurt. */
/* Test for pending */
testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending)
testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
jz 1f
2: call check_events
......@@ -69,7 +68,7 @@ ENDPATCH(xen_irq_enable_direct)
ENTRY(xen_irq_disable_direct)
BUG
movb $1, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
ENDPATCH(xen_irq_disable_direct)
ret
ENDPROC(xen_irq_disable_direct)
......@@ -87,7 +86,7 @@ ENDPATCH(xen_irq_disable_direct)
ENTRY(xen_save_fl_direct)
BUG
testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
setz %ah
addb %ah,%ah
ENDPATCH(xen_save_fl_direct)
......@@ -107,13 +106,13 @@ ENTRY(xen_restore_fl_direct)
BUG
testb $X86_EFLAGS_IF>>8, %ah
setz PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
/* Preempt here doesn't matter because that will deal with
any pending interrupts. The pending check may end up being
run on the wrong CPU, but that doesn't hurt. */
/* check for unmasked and pending */
cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending)
cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
jz 1f
2: call check_events
1:
......@@ -195,11 +194,11 @@ RELOC(xen_sysexit, 1b+1)
ENTRY(xen_sysret64)
/* We're already on the usermode stack at this point, but still
with the kernel gs, so we can easily switch back */
movq %rsp, %gs:pda_oldrsp
movq %gs:pda_kernelstack,%rsp
movq %rsp, PER_CPU_VAR(old_rsp)
movq PER_CPU_VAR(kernel_stack),%rsp
pushq $__USER_DS
pushq %gs:pda_oldrsp
pushq PER_CPU_VAR(old_rsp)
pushq %r11
pushq $__USER_CS
pushq %rcx
......@@ -212,11 +211,11 @@ RELOC(xen_sysret64, 1b+1)
ENTRY(xen_sysret32)
/* We're already on the usermode stack at this point, but still
with the kernel gs, so we can easily switch back */
movq %rsp, %gs:pda_oldrsp
movq %gs:pda_kernelstack, %rsp
movq %rsp, PER_CPU_VAR(old_rsp)
movq PER_CPU_VAR(kernel_stack), %rsp
pushq $__USER32_DS
pushq %gs:pda_oldrsp
pushq PER_CPU_VAR(old_rsp)
pushq %r11
pushq $__USER32_CS
pushq %rcx
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册