提交 ae2b56b9 编写于 作者: I Ingo Molnar

Merge branch 'tj-percpu' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc into core/percpu

......@@ -138,11 +138,4 @@ struct genapic {
extern struct genapic *genapic;
extern void es7000_update_genapic_to_cluster(void);
enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
#define get_uv_system_type() UV_NONE
#define is_uv_system() 0
#define uv_wakeup_secondary(a, b) 1
#define uv_system_init() do {} while (0)
#endif /* _ASM_X86_GENAPIC_32_H */
......@@ -51,15 +51,9 @@ extern struct genapic apic_x2apic_phys;
extern int acpi_madt_oem_check(char *, char *);
extern void apic_send_IPI_self(int vector);
enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
extern enum uv_system_type get_uv_system_type(void);
extern int is_uv_system(void);
extern struct genapic apic_x2apic_uv_x;
DECLARE_PER_CPU(int, x2apic_extra_bits);
extern void uv_cpu_init(void);
extern void uv_system_init(void);
extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
extern void setup_apic_routing(void);
......
#ifdef CONFIG_X86_32
# include "irq_regs_32.h"
#else
# include "irq_regs_64.h"
#endif
/*
* Per-cpu current frame pointer - the location of the last exception frame on
* the stack, stored in the per-cpu area.
*
* Jeremy Fitzhardinge <jeremy@goop.org>
*/
#ifndef _ASM_X86_IRQ_REGS_H
#define _ASM_X86_IRQ_REGS_H
#include <asm/percpu.h>
#define ARCH_HAS_OWN_IRQ_REGS
DECLARE_PER_CPU(struct pt_regs *, irq_regs);
static inline struct pt_regs *get_irq_regs(void)
{
return percpu_read(irq_regs);
}
static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
{
struct pt_regs *old_regs;
old_regs = get_irq_regs();
percpu_write(irq_regs, new_regs);
return old_regs;
}
#endif /* _ASM_X86_IRQ_REGS_32_H */
/*
* Per-cpu current frame pointer - the location of the last exception frame on
* the stack, stored in the per-cpu area.
*
* Jeremy Fitzhardinge <jeremy@goop.org>
*/
#ifndef _ASM_X86_IRQ_REGS_32_H
#define _ASM_X86_IRQ_REGS_32_H
#include <asm/percpu.h>
#define ARCH_HAS_OWN_IRQ_REGS
DECLARE_PER_CPU(struct pt_regs *, irq_regs);
static inline struct pt_regs *get_irq_regs(void)
{
return percpu_read(irq_regs);
}
static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
{
struct pt_regs *old_regs;
old_regs = get_irq_regs();
percpu_write(irq_regs, new_regs);
return old_regs;
}
#endif /* _ASM_X86_IRQ_REGS_32_H */
#include <asm-generic/irq_regs.h>
......@@ -49,31 +49,33 @@
* some of the following vectors are 'rare', they are merged
* into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
* TLB, reschedule and local APIC vectors are performance-critical.
*
* Vectors 0xf0-0xfa are free (reserved for future Linux use).
*/
#ifdef CONFIG_X86_32
# define SPURIOUS_APIC_VECTOR 0xff
# define ERROR_APIC_VECTOR 0xfe
# define INVALIDATE_TLB_VECTOR 0xfd
# define RESCHEDULE_VECTOR 0xfc
# define CALL_FUNCTION_VECTOR 0xfb
# define CALL_FUNCTION_SINGLE_VECTOR 0xfa
# define THERMAL_APIC_VECTOR 0xf0
# define RESCHEDULE_VECTOR 0xfd
# define CALL_FUNCTION_VECTOR 0xfc
# define CALL_FUNCTION_SINGLE_VECTOR 0xfb
# define THERMAL_APIC_VECTOR 0xfa
/* 0xf8 - 0xf9 : free */
# define INVALIDATE_TLB_VECTOR_END 0xf7
# define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */
# define NUM_INVALIDATE_TLB_VECTORS 8
#else
#define SPURIOUS_APIC_VECTOR 0xff
#define ERROR_APIC_VECTOR 0xfe
#define RESCHEDULE_VECTOR 0xfd
#define CALL_FUNCTION_VECTOR 0xfc
#define CALL_FUNCTION_SINGLE_VECTOR 0xfb
#define THERMAL_APIC_VECTOR 0xfa
#define THRESHOLD_APIC_VECTOR 0xf9
#define UV_BAU_MESSAGE 0xf8
#define INVALIDATE_TLB_VECTOR_END 0xf7
#define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */
# define SPURIOUS_APIC_VECTOR 0xff
# define ERROR_APIC_VECTOR 0xfe
# define RESCHEDULE_VECTOR 0xfd
# define CALL_FUNCTION_VECTOR 0xfc
# define CALL_FUNCTION_SINGLE_VECTOR 0xfb
# define THERMAL_APIC_VECTOR 0xfa
# define THRESHOLD_APIC_VECTOR 0xf9
# define UV_BAU_MESSAGE 0xf8
# define INVALIDATE_TLB_VECTOR_END 0xf7
# define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */
#define NUM_INVALIDATE_TLB_VECTORS 8
......
......@@ -11,10 +11,26 @@
*/
#ifdef CONFIG_X86_SMP
BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0,
smp_invalidate_interrupt)
BUILD_INTERRUPT3(invalidate_interrupt1,INVALIDATE_TLB_VECTOR_START+1,
smp_invalidate_interrupt)
BUILD_INTERRUPT3(invalidate_interrupt2,INVALIDATE_TLB_VECTOR_START+2,
smp_invalidate_interrupt)
BUILD_INTERRUPT3(invalidate_interrupt3,INVALIDATE_TLB_VECTOR_START+3,
smp_invalidate_interrupt)
BUILD_INTERRUPT3(invalidate_interrupt4,INVALIDATE_TLB_VECTOR_START+4,
smp_invalidate_interrupt)
BUILD_INTERRUPT3(invalidate_interrupt5,INVALIDATE_TLB_VECTOR_START+5,
smp_invalidate_interrupt)
BUILD_INTERRUPT3(invalidate_interrupt6,INVALIDATE_TLB_VECTOR_START+6,
smp_invalidate_interrupt)
BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7,
smp_invalidate_interrupt)
#endif
/*
......
......@@ -21,11 +21,54 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
void destroy_context(struct mm_struct *mm);
#ifdef CONFIG_X86_32
# include "mmu_context_32.h"
#else
# include "mmu_context_64.h"
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
#ifdef CONFIG_SMP
if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
#endif
}
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
unsigned cpu = smp_processor_id();
if (likely(prev != next)) {
/* stop flush ipis for the previous mm */
cpu_clear(cpu, prev->cpu_vm_mask);
#ifdef CONFIG_SMP
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
percpu_write(cpu_tlbstate.active_mm, next);
#endif
cpu_set(cpu, next->cpu_vm_mask);
/* Re-load page tables */
load_cr3(next->pgd);
/*
* load the LDT, if the LDT is different:
*/
if (unlikely(prev->context.ldt != next->context.ldt))
load_LDT_nolock(&next->context);
}
#ifdef CONFIG_SMP
else {
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
/* We were in lazy tlb mode and leave_mm disabled
* tlb flush IPI delivery. We must reload CR3
* to make sure to use no freed page tables.
*/
load_cr3(next->pgd);
load_LDT_nolock(&next->context);
}
}
#endif
}
#define activate_mm(prev, next) \
do { \
......@@ -33,5 +76,17 @@ do { \
switch_mm((prev), (next), NULL); \
} while (0);
#ifdef CONFIG_X86_32
#define deactivate_mm(tsk, mm) \
do { \
loadsegment(gs, 0); \
} while (0)
#else
#define deactivate_mm(tsk, mm) \
do { \
load_gs_index(0); \
loadsegment(fs, 0); \
} while (0)
#endif
#endif /* _ASM_X86_MMU_CONTEXT_H */
#ifndef _ASM_X86_MMU_CONTEXT_32_H
#define _ASM_X86_MMU_CONTEXT_32_H
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
#ifdef CONFIG_SMP
if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
#endif
}
static inline void switch_mm(struct mm_struct *prev,
struct mm_struct *next,
struct task_struct *tsk)
{
int cpu = smp_processor_id();
if (likely(prev != next)) {
/* stop flush ipis for the previous mm */
cpu_clear(cpu, prev->cpu_vm_mask);
#ifdef CONFIG_SMP
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
percpu_write(cpu_tlbstate.active_mm, next);
#endif
cpu_set(cpu, next->cpu_vm_mask);
/* Re-load page tables */
load_cr3(next->pgd);
/*
* load the LDT, if the LDT is different:
*/
if (unlikely(prev->context.ldt != next->context.ldt))
load_LDT_nolock(&next->context);
}
#ifdef CONFIG_SMP
else {
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
/* We were in lazy tlb mode and leave_mm disabled
* tlb flush IPI delivery. We must reload %cr3.
*/
load_cr3(next->pgd);
load_LDT_nolock(&next->context);
}
}
#endif
}
#define deactivate_mm(tsk, mm) \
asm("movl %0,%%gs": :"r" (0));
#endif /* _ASM_X86_MMU_CONTEXT_32_H */
#ifndef _ASM_X86_MMU_CONTEXT_64_H
#define _ASM_X86_MMU_CONTEXT_64_H
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
#ifdef CONFIG_SMP
if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
#endif
}
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
unsigned cpu = smp_processor_id();
if (likely(prev != next)) {
/* stop flush ipis for the previous mm */
cpu_clear(cpu, prev->cpu_vm_mask);
#ifdef CONFIG_SMP
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
percpu_write(cpu_tlbstate.active_mm, next);
#endif
cpu_set(cpu, next->cpu_vm_mask);
load_cr3(next->pgd);
if (unlikely(next->context.ldt != prev->context.ldt))
load_LDT_nolock(&next->context);
}
#ifdef CONFIG_SMP
else {
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
/* We were in lazy tlb mode and leave_mm disabled
* tlb flush IPI delivery. We must reload CR3
* to make sure to use no freed page tables.
*/
load_cr3(next->pgd);
load_LDT_nolock(&next->context);
}
}
#endif
}
#define deactivate_mm(tsk, mm) \
do { \
load_gs_index(0); \
asm volatile("movl %0,%%fs"::"r"(0)); \
} while (0)
#endif /* _ASM_X86_MMU_CONTEXT_64_H */
......@@ -75,7 +75,7 @@ do { \
case 8: \
asm(op "q %1,"__percpu_arg(0) \
: "+m" (var) \
: "r" ((T__)val)); \
: "re" ((T__)val)); \
break; \
default: __bad_percpu_size(); \
} \
......
......@@ -89,13 +89,15 @@ do { \
#ifdef CONFIG_CC_STACKPROTECTOR
#define __switch_canary \
"movq %P[task_canary](%%rsi),%%r8\n\t" \
"movq %%r8,%%gs:%P[gs_canary]\n\t"
#define __switch_canary_param \
, [task_canary] "i" (offsetof(struct task_struct, stack_canary)) \
, [gs_canary] "i" (offsetof(union irq_stack_union, stack_canary))
"movq %%r8,"__percpu_arg([gs_canary])"\n\t"
#define __switch_canary_oparam \
, [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary))
#define __switch_canary_iparam \
, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
#else /* CC_STACKPROTECTOR */
#define __switch_canary
#define __switch_canary_param
#define __switch_canary_oparam
#define __switch_canary_iparam
#endif /* CC_STACKPROTECTOR */
/* Save restore flags to clear handle leaking NT */
......@@ -114,13 +116,14 @@ do { \
"jc ret_from_fork\n\t" \
RESTORE_CONTEXT \
: "=a" (last) \
__switch_canary_oparam \
: [next] "S" (next), [prev] "D" (prev), \
[threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
[ti_flags] "i" (offsetof(struct thread_info, flags)), \
[tif_fork] "i" (TIF_FORK), \
[thread_info] "i" (offsetof(struct task_struct, stack)), \
[current_task] "m" (per_cpu_var(current_task)) \
__switch_canary_param \
__switch_canary_iparam \
: "memory", "cc" __EXTRA_CLOBBER)
#endif
......
#ifndef _ASM_X86_UV_UV_H
#define _ASM_X86_UV_UV_H
enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
#ifdef CONFIG_X86_64
extern enum uv_system_type get_uv_system_type(void);
extern int is_uv_system(void);
extern void uv_cpu_init(void);
extern void uv_system_init(void);
extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm,
unsigned long va,
unsigned int cpu);
#else /* X86_64 */
static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
static inline int is_uv_system(void) { return 0; }
static inline void uv_cpu_init(void) { }
static inline void uv_system_init(void) { }
static inline int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
{ return 1; }
static inline const struct cpumask *
uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm,
unsigned long va, unsigned int cpu)
{ return cpumask; }
#endif /* X86_64 */
#endif /* _ASM_X86_UV_UV_H */
......@@ -325,8 +325,6 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
#define cpubit_isset(cpu, bau_local_cpumask) \
test_bit((cpu), (bau_local_cpumask).bits)
extern int uv_flush_tlb_others(struct cpumask *,
struct mm_struct *, unsigned long);
extern void uv_bau_message_intr1(void);
extern void uv_bau_timeout_intr1(void);
......
......@@ -58,7 +58,7 @@ obj-$(CONFIG_PCI) += early-quirks.o
apm-y := apm_32.o
obj-$(CONFIG_APM) += apm.o
obj-$(CONFIG_X86_SMP) += smp.o
obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o
obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb.o
obj-$(CONFIG_X86_32_SMP) += smpcommon.o
obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o
......
......@@ -28,6 +28,7 @@
#include <asm/apic.h>
#include <mach_apic.h>
#include <asm/genapic.h>
#include <asm/uv/uv.h>
#endif
#include <asm/pgtable.h>
......@@ -63,23 +64,23 @@ cpumask_t cpu_sibling_setup_map;
static struct cpu_dev *this_cpu __cpuinitdata;
DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
#ifdef CONFIG_X86_64
/* We need valid kernel segments for data and code in long mode too
* IRET will check the segment types kkeil 2000/10/28
* Also sysret mandates a special GDT layout
*/
/* The TLS descriptors are currently at a different place compared to i386.
Hopefully nobody expects them at a fixed place (Wine?) */
DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
/*
* We need valid kernel segments for data and code in long mode too
* IRET will check the segment types kkeil 2000/10/28
* Also sysret mandates a special GDT layout
*
* The TLS descriptors are currently at a different place compared to i386.
* Hopefully nobody expects them at a fixed place (Wine?)
*/
[GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
[GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
[GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
[GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
[GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
[GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
} };
#else
DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
[GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
[GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
[GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
......@@ -111,9 +112,9 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
[GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
[GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
[GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } },
} };
[GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
#endif
} };
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
#ifdef CONFIG_X86_32
......
......@@ -672,7 +672,7 @@ common_interrupt:
ENDPROC(common_interrupt)
CFI_ENDPROC
#define BUILD_INTERRUPT(name, nr) \
#define BUILD_INTERRUPT3(name, nr, fn) \
ENTRY(name) \
RING0_INT_FRAME; \
pushl $~(nr); \
......@@ -680,11 +680,13 @@ ENTRY(name) \
SAVE_ALL; \
TRACE_IRQS_OFF \
movl %esp,%eax; \
call smp_##name; \
call fn; \
jmp ret_from_intr; \
CFI_ENDPROC; \
ENDPROC(name)
#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name)
/* The include is where all of the SMP etc. interrupts come from */
#include "entry_arch.h"
......
......@@ -25,6 +25,7 @@
#include <asm/ipi.h>
#include <asm/genapic.h>
#include <asm/pgtable.h>
#include <asm/uv/uv.h>
#include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h>
#include <asm/uv/bios.h>
......
......@@ -429,12 +429,14 @@ is386: movl $2,%ecx # set MP
ljmp $(__KERNEL_CS),$1f
1: movl $(__KERNEL_DS),%eax # reload all the segment registers
movl %eax,%ss # after changing gdt.
movl %eax,%fs # gets reset once there's real percpu
movl $(__USER_DS),%eax # DS/ES contains default USER segment
movl %eax,%ds
movl %eax,%es
movl $(__KERNEL_PERCPU), %eax
movl %eax,%fs # set this cpu's percpu
xorl %eax,%eax # Clear GS and LDT
movl %eax,%gs
lldt %ax
......@@ -446,8 +448,6 @@ is386: movl $2,%ecx # set MP
movb $1, ready
cmpb $0,%cl # the first CPU calls start_kernel
je 1f
movl $(__KERNEL_PERCPU), %eax
movl %eax,%fs # set this cpu's percpu
movl (stack_start), %esp
1:
#endif /* CONFIG_SMP */
......
......@@ -22,6 +22,9 @@
DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
EXPORT_PER_CPU_SYMBOL(irq_stat);
DEFINE_PER_CPU(struct pt_regs *, irq_regs);
EXPORT_PER_CPU_SYMBOL(irq_regs);
/*
* Probabilistic stack overflow check:
*
......
......@@ -149,8 +149,15 @@ void __init native_init_IRQ(void)
*/
alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
/* IPI for invalidation */
alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
/* IPIs for invalidation */
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
/* IPI for generic function call */
alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
......
......@@ -62,6 +62,7 @@
#include <asm/vmi.h>
#include <asm/genapic.h>
#include <asm/setup.h>
#include <asm/uv/uv.h>
#include <linux/mc146818rtc.h>
#include <mach_apic.h>
......
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/delay.h>
#include <linux/spinlock.h>
#include <linux/smp.h>
#include <linux/kernel_stat.h>
#include <linux/mc146818rtc.h>
#include <linux/interrupt.h>
#include <linux/module.h>
#include <asm/mtrr.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/proto.h>
#include <asm/apicdef.h>
#include <asm/idle.h>
#include <asm/uv/uv_hub.h>
#include <asm/uv/uv_bau.h>
#include <asm/apic.h>
#include <asm/uv/uv.h>
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
= { &init_mm, 0, };
......@@ -120,10 +113,20 @@ EXPORT_SYMBOL_GPL(leave_mm);
* Interrupts are disabled.
*/
asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
/*
* FIXME: use of asmlinkage is not consistent. On x86_64 it's noop
* but still used for documentation purpose but the usage is slightly
* inconsistent. On x86_32, asmlinkage is regparm(0) but interrupt
* entry calls in with the first parameter in %eax. Maybe define
* intrlinkage?
*/
#ifdef CONFIG_X86_64
asmlinkage
#endif
void smp_invalidate_interrupt(struct pt_regs *regs)
{
int cpu;
int sender;
unsigned int cpu;
unsigned int sender;
union smp_flush_state *f;
cpu = smp_processor_id();
......@@ -156,14 +159,16 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
}
out:
ack_APIC_irq();
smp_mb__before_clear_bit();
cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask));
smp_mb__after_clear_bit();
inc_irq_stat(irq_tlb_count);
}
static void flush_tlb_others_ipi(const struct cpumask *cpumask,
struct mm_struct *mm, unsigned long va)
{
int sender;
unsigned int sender;
union smp_flush_state *f;
/* Caller has disabled preemption */
......@@ -206,16 +211,13 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm, unsigned long va)
{
if (is_uv_system()) {
/* FIXME: could be an percpu_alloc'd thing */
static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
struct cpumask *after_uv_flush = &get_cpu_var(flush_tlb_mask);
cpumask_andnot(after_uv_flush, cpumask,
cpumask_of(smp_processor_id()));
if (!uv_flush_tlb_others(after_uv_flush, mm, va))
flush_tlb_others_ipi(after_uv_flush, mm, va);
unsigned int cpu;
put_cpu_var(flush_tlb_uv_cpumask);
cpu = get_cpu();
cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu);
if (cpumask)
flush_tlb_others_ipi(cpumask, mm, va);
put_cpu();
return;
}
flush_tlb_others_ipi(cpumask, mm, va);
......
#include <linux/spinlock.h>
#include <linux/cpu.h>
#include <linux/interrupt.h>
#include <asm/tlbflush.h>
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
= { &init_mm, 0, };
/* must come after the send_IPI functions above for inlining */
#include <mach_ipi.h>
/*
* Smarter SMP flushing macros.
* c/o Linus Torvalds.
*
* These mean you can really definitely utterly forget about
* writing to user space from interrupts. (Its not allowed anyway).
*
* Optimizations Manfred Spraul <manfred@colorfullife.com>
*/
static cpumask_var_t flush_cpumask;
static struct mm_struct *flush_mm;
static unsigned long flush_va;
static DEFINE_SPINLOCK(tlbstate_lock);
/*
* We cannot call mmdrop() because we are in interrupt context,
* instead update mm->cpu_vm_mask.
*
* We need to reload %cr3 since the page tables may be going
* away from under us..
*/
void leave_mm(int cpu)
{
BUG_ON(percpu_read(cpu_tlbstate.state) == TLBSTATE_OK);
cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
load_cr3(swapper_pg_dir);
}
EXPORT_SYMBOL_GPL(leave_mm);
/*
*
* The flush IPI assumes that a thread switch happens in this order:
* [cpu0: the cpu that switches]
* 1) switch_mm() either 1a) or 1b)
* 1a) thread switch to a different mm
* 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
* Stop ipi delivery for the old mm. This is not synchronized with
* the other cpus, but smp_invalidate_interrupt ignore flush ipis
* for the wrong mm, and in the worst case we perform a superfluous
* tlb flush.
* 1a2) set cpu_tlbstate to TLBSTATE_OK
* Now the smp_invalidate_interrupt won't call leave_mm if cpu0
* was in lazy tlb mode.
* 1a3) update cpu_tlbstate[].active_mm
* Now cpu0 accepts tlb flushes for the new mm.
* 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
* Now the other cpus will send tlb flush ipis.
* 1a4) change cr3.
* 1b) thread switch without mm change
* cpu_tlbstate[].active_mm is correct, cpu0 already handles
* flush ipis.
* 1b1) set cpu_tlbstate to TLBSTATE_OK
* 1b2) test_and_set the cpu bit in cpu_vm_mask.
* Atomically set the bit [other cpus will start sending flush ipis],
* and test the bit.
* 1b3) if the bit was 0: leave_mm was called, flush the tlb.
* 2) switch %%esp, ie current
*
* The interrupt must handle 2 special cases:
* - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
* - the cpu performs speculative tlb reads, i.e. even if the cpu only
* runs in kernel space, the cpu could load tlb entries for user space
* pages.
*
* The good news is that cpu_tlbstate is local to each cpu, no
* write/read ordering problems.
*/
/*
* TLB flush IPI:
*
* 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
* 2) Leave the mm if we are in the lazy tlb mode.
*/
void smp_invalidate_interrupt(struct pt_regs *regs)
{
unsigned long cpu;
cpu = get_cpu();
if (!cpumask_test_cpu(cpu, flush_cpumask))
goto out;
/*
* This was a BUG() but until someone can quote me the
* line from the intel manual that guarantees an IPI to
* multiple CPUs is retried _only_ on the erroring CPUs
* its staying as a return
*
* BUG();
*/
if (flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
if (flush_va == TLB_FLUSH_ALL)
local_flush_tlb();
else
__flush_tlb_one(flush_va);
} else
leave_mm(cpu);
}
ack_APIC_irq();
smp_mb__before_clear_bit();
cpumask_clear_cpu(cpu, flush_cpumask);
smp_mb__after_clear_bit();
out:
put_cpu_no_resched();
inc_irq_stat(irq_tlb_count);
}
void native_flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm, unsigned long va)
{
/*
* - mask must exist :)
*/
BUG_ON(cpumask_empty(cpumask));
BUG_ON(!mm);
/*
* i'm not happy about this global shared spinlock in the
* MM hot path, but we'll see how contended it is.
* AK: x86-64 has a faster method that could be ported.
*/
spin_lock(&tlbstate_lock);
cpumask_andnot(flush_cpumask, cpumask, cpumask_of(smp_processor_id()));
#ifdef CONFIG_HOTPLUG_CPU
/* If a CPU which we ran on has gone down, OK. */
cpumask_and(flush_cpumask, flush_cpumask, cpu_online_mask);
if (unlikely(cpumask_empty(flush_cpumask))) {
spin_unlock(&tlbstate_lock);
return;
}
#endif
flush_mm = mm;
flush_va = va;
/*
* Make the above memory operations globally visible before
* sending the IPI.
*/
smp_mb();
/*
* We have to send the IPI only to
* CPUs affected.
*/
send_IPI_mask(flush_cpumask, INVALIDATE_TLB_VECTOR);
while (!cpumask_empty(flush_cpumask))
/* nothing. lockup detection does not belong here */
cpu_relax();
flush_mm = NULL;
flush_va = 0;
spin_unlock(&tlbstate_lock);
}
void flush_tlb_current_task(void)
{
struct mm_struct *mm = current->mm;
preempt_disable();
local_flush_tlb();
if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
preempt_enable();
}
void flush_tlb_mm(struct mm_struct *mm)
{
preempt_disable();
if (current->active_mm == mm) {
if (current->mm)
local_flush_tlb();
else
leave_mm(smp_processor_id());
}
if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
preempt_enable();
}
void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
{
struct mm_struct *mm = vma->vm_mm;
preempt_disable();
if (current->active_mm == mm) {
if (current->mm)
__flush_tlb_one(va);
else
leave_mm(smp_processor_id());
}
if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
flush_tlb_others(&mm->cpu_vm_mask, mm, va);
preempt_enable();
}
EXPORT_SYMBOL(flush_tlb_page);
static void do_flush_tlb_all(void *info)
{
unsigned long cpu = smp_processor_id();
__flush_tlb_all();
if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
leave_mm(cpu);
}
void flush_tlb_all(void)
{
on_each_cpu(do_flush_tlb_all, NULL, 1);
}
static int init_flush_cpumask(void)
{
alloc_cpumask_var(&flush_cpumask, GFP_KERNEL);
return 0;
}
early_initcall(init_flush_cpumask);
......@@ -11,6 +11,7 @@
#include <linux/kernel.h>
#include <asm/mmu_context.h>
#include <asm/uv/uv.h>
#include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h>
#include <asm/uv/uv_bau.h>
......@@ -209,14 +210,15 @@ static int uv_wait_completion(struct bau_desc *bau_desc,
*
* Send a broadcast and wait for a broadcast message to complete.
*
* The cpumaskp mask contains the cpus the broadcast was sent to.
* The flush_mask contains the cpus the broadcast was sent to.
*
* Returns 1 if all remote flushing was done. The mask is zeroed.
* Returns 0 if some remote flushing remains to be done. The mask will have
* some bits still set.
* Returns NULL if all remote flushing was done. The mask is zeroed.
* Returns @flush_mask if some remote flushing remains to be done. The
* mask will have some bits still set.
*/
int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
struct cpumask *cpumaskp)
const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
struct bau_desc *bau_desc,
struct cpumask *flush_mask)
{
int completion_status = 0;
int right_shift;
......@@ -263,59 +265,69 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
* Success, so clear the remote cpu's from the mask so we don't
* use the IPI method of shootdown on them.
*/
for_each_cpu(bit, cpumaskp) {
for_each_cpu(bit, flush_mask) {
blade = uv_cpu_to_blade_id(bit);
if (blade == this_blade)
continue;
cpumask_clear_cpu(bit, cpumaskp);
cpumask_clear_cpu(bit, flush_mask);
}
if (!cpumask_empty(cpumaskp))
return 0;
return 1;
if (!cpumask_empty(flush_mask))
return flush_mask;
return NULL;
}
/**
* uv_flush_tlb_others - globally purge translation cache of a virtual
* address or all TLB's
* @cpumaskp: mask of all cpu's in which the address is to be removed
* @cpumask: mask of all cpu's in which the address is to be removed
* @mm: mm_struct containing virtual address range
* @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
* @cpu: the current cpu
*
* This is the entry point for initiating any UV global TLB shootdown.
*
* Purges the translation caches of all specified processors of the given
* virtual address, or purges all TLB's on specified processors.
*
* The caller has derived the cpumaskp from the mm_struct and has subtracted
* the local cpu from the mask. This function is called only if there
* are bits set in the mask. (e.g. flush_tlb_page())
* The caller has derived the cpumask from the mm_struct. This function
* is called only if there are bits set in the mask. (e.g. flush_tlb_page())
*
* The cpumaskp is converted into a nodemask of the nodes containing
* The cpumask is converted into a nodemask of the nodes containing
* the cpus.
*
* Returns 1 if all remote flushing was done.
* Returns 0 if some remote flushing remains to be done.
* Note that this function should be called with preemption disabled.
*
* Returns NULL if all remote flushing was done.
* Returns pointer to cpumask if some remote flushing remains to be
* done. The returned pointer is valid till preemption is re-enabled.
*/
int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm,
unsigned long va)
const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm,
unsigned long va, unsigned int cpu)
{
static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
struct cpumask *flush_mask = &__get_cpu_var(flush_tlb_mask);
int i;
int bit;
int blade;
int cpu;
int uv_cpu;
int this_blade;
int locals = 0;
struct bau_desc *bau_desc;
cpu = uv_blade_processor_id();
WARN_ON(!in_atomic());
cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
uv_cpu = uv_blade_processor_id();
this_blade = uv_numa_blade_id();
bau_desc = __get_cpu_var(bau_control).descriptor_base;
bau_desc += UV_ITEMS_PER_DESCRIPTOR * cpu;
bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu;
bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
i = 0;
for_each_cpu(bit, cpumaskp) {
for_each_cpu(bit, flush_mask) {
blade = uv_cpu_to_blade_id(bit);
BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
if (blade == this_blade) {
......@@ -330,17 +342,17 @@ int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm,
* no off_node flushing; return status for local node
*/
if (locals)
return 0;
return flush_mask;
else
return 1;
return NULL;
}
__get_cpu_var(ptcstats).requestor++;
__get_cpu_var(ptcstats).ntargeted += i;
bau_desc->payload.address = va;
bau_desc->payload.sending_cpu = smp_processor_id();
bau_desc->payload.sending_cpu = cpu;
return uv_flush_send_and_wait(cpu, this_blade, bau_desc, cpumaskp);
return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask);
}
/*
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册