diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 39b2bd48dfbcf595b10c5ba7fd8afe3922e56391..d048cad9bcad4b8c48c62279d7b73eb0499305d7 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -72,6 +72,8 @@ extern u16 __read_mostly tlb_lli_4m[NR_INFO]; extern u16 __read_mostly tlb_lld_4k[NR_INFO]; extern u16 __read_mostly tlb_lld_2m[NR_INFO]; extern u16 __read_mostly tlb_lld_4m[NR_INFO]; +extern s8 __read_mostly tlb_flushall_shift; + /* * CPU type and hardware bug flags. Kept separately for each CPU. * Members of this structure are referenced in head.S, so think twice diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b2016df0081379d8e38b91cf5cbe21d784a0d0a1..7595552600b85c8918e94a77e3043cc1e17b76e4 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -459,16 +459,26 @@ u16 __read_mostly tlb_lld_4k[NR_INFO]; u16 __read_mostly tlb_lld_2m[NR_INFO]; u16 __read_mostly tlb_lld_4m[NR_INFO]; +/* + * tlb_flushall_shift shows the balance point in replacing cr3 write + * with multiple 'invlpg'. It will do this replacement when + * flush_tlb_lines <= active_lines/2^tlb_flushall_shift. + * If tlb_flushall_shift is -1, means the replacement will be disabled. + */ +s8 __read_mostly tlb_flushall_shift = -1; + void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c) { if (this_cpu->c_detect_tlb) this_cpu->c_detect_tlb(c); printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ - "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n", + "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ + "tlb_flushall_shift is 0x%x\n", tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], - tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES]); + tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], + tlb_flushall_shift); } void __cpuinit detect_ht(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index ed0d512cf51b3c7b08d58b2545fc9350ca3bafad..0a4ce2980a5a33e90bea05599a015a0c1381bf23 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -610,6 +610,39 @@ static void __cpuinit intel_tlb_lookup(const unsigned char desc) } } +static void __cpuinit intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c) +{ + if (!cpu_has_invlpg) { + tlb_flushall_shift = -1; + return; + } + switch ((c->x86 << 8) + c->x86_model) { + case 0x60f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ + case 0x616: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ + case 0x617: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ + case 0x61d: /* six-core 45 nm xeon "Dunnington" */ + tlb_flushall_shift = -1; + break; + case 0x61a: /* 45 nm nehalem, "Bloomfield" */ + case 0x61e: /* 45 nm nehalem, "Lynnfield" */ + case 0x625: /* 32 nm nehalem, "Clarkdale" */ + case 0x62c: /* 32 nm nehalem, "Gulftown" */ + case 0x62e: /* 45 nm nehalem-ex, "Beckton" */ + case 0x62f: /* 32 nm Xeon E7 */ + tlb_flushall_shift = 6; + break; + case 0x62a: /* SandyBridge */ + case 0x62d: /* SandyBridge, "Romely-EP" */ + tlb_flushall_shift = 5; + break; + case 0x63a: /* Ivybridge */ + tlb_flushall_shift = 1; + break; + default: + tlb_flushall_shift = 6; + } +} + static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c) { int i, j, n; @@ -630,6 +663,7 @@ static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c) for (j = 1 ; j < 16 ; j++) intel_tlb_lookup(desc[j]); } + intel_tlb_flushall_shift_set(c); } static const struct cpu_dev __cpuinitconst intel_cpu_dev = { diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 184a02a4d871b1fc7381daed9eb3f899a1135aa8..2939f2f9edbb7208e09265b6b35de8882ff9395d 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -316,8 +316,6 @@ void flush_tlb_mm(struct mm_struct *mm) preempt_enable(); } -#define FLUSHALL_BAR 16 - #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline unsigned long has_large_page(struct mm_struct *mm, unsigned long start, unsigned long end) @@ -352,7 +350,7 @@ void flush_tlb_range(struct vm_area_struct *vma, { struct mm_struct *mm; - if (!cpu_has_invlpg || vma->vm_flags & VM_HUGETLB) { + if (vma->vm_flags & VM_HUGETLB || tlb_flushall_shift == -1) { flush_all: flush_tlb_mm(vma->vm_mm); return; @@ -373,7 +371,8 @@ void flush_tlb_range(struct vm_area_struct *vma, act_entries = tlb_entries > mm->total_vm ? mm->total_vm : tlb_entries; - if ((end - start)/PAGE_SIZE > act_entries/FLUSHALL_BAR) + if ((end - start) >> PAGE_SHIFT > + act_entries >> tlb_flushall_shift) local_flush_tlb(); else { if (has_large_page(mm, start, end)) { diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index f96a5b58a975c6ccc1ebadecf746f84dd47a38ea..75e888b3cfd23fe06d90e7ad23b2046c3df30259 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -113,7 +113,8 @@ static inline int tlb_fast_mode(struct mmu_gather *tlb) void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm); void tlb_flush_mmu(struct mmu_gather *tlb); -void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end); +void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, + unsigned long end); int __tlb_remove_page(struct mmu_gather *tlb, struct page *page); /* tlb_remove_page