diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h index c86a947f5368633b86d1a8e5dc7ddd1803982c64..a3d0211970e95e5152edbaeeecd8ab3a42041ac6 100644 --- a/arch/ia64/include/asm/acpi.h +++ b/arch/ia64/include/asm/acpi.h @@ -112,6 +112,8 @@ static inline void arch_acpi_set_pdc_bits(u32 *buf) buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP; } +#define acpi_unlazy_tlb(x) + #ifdef CONFIG_ACPI_NUMA extern cpumask_t early_cpu_possible_map; #define for_each_possible_early_cpu(cpu) \ diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 72d867f6b518e4db5a79a10c924f858a3edb0af8..8d0ec9df1cbeb53894f47454cd1391b297e6086c 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -150,6 +150,8 @@ static inline void disable_acpi(void) { } extern int x86_acpi_numa_init(void); #endif /* CONFIG_ACPI_NUMA */ +#define acpi_unlazy_tlb(x) leave_mm(x) + #ifdef CONFIG_ACPI_APEI static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr) { diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 0f3d0cea4d00ca253ee5b46ed78158d460797a56..3118392cdf756bfc913d7a4137d5f7e0d46b046d 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -85,6 +85,7 @@ void leave_mm(int cpu) switch_mm(NULL, &init_mm, NULL); } +EXPORT_SYMBOL_GPL(leave_mm); void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) @@ -195,12 +196,22 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); write_cr3(build_cr3(next, new_asid)); - trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, - TLB_FLUSH_ALL); + + /* + * NB: This gets called via leave_mm() in the idle path + * where RCU functions differently. Tracing normally + * uses RCU, so we need to use the _rcuidle variant. + * + * (There is no good reason for this. The idle code should + * be rearranged to call this before rcu_idle_enter().) + */ + trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); } else { /* The new ASID is already up to date. */ write_cr3(build_cr3_noflush(next, new_asid)); - trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0); + + /* See above wrt _rcuidle. */ + trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); } this_cpu_write(cpu_tlbstate.loaded_mm, next); diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 2736e25e9dc6458d736383ed0bc60761e7eb4a5f..d50a7b6ccddd99785dc7c6b1e0f368144c597d0e 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -710,6 +710,8 @@ static DEFINE_RAW_SPINLOCK(c3_lock); static void acpi_idle_enter_bm(struct acpi_processor *pr, struct acpi_processor_cx *cx, bool timer_bc) { + acpi_unlazy_tlb(smp_processor_id()); + /* * Must be done before busmaster disable as we might need to * access HPET ! diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 5dc7ea4b6bc4cc207cffd8d8efbaed776179c9d9..f0b06b14e782b5b926b5ba7876d827551ce4cfa9 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -913,15 +913,16 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state = &drv->states[index]; unsigned long eax = flg2MWAIT(state->flags); unsigned int cstate; + int cpu = smp_processor_id(); cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) + 1; /* - * NB: if CPUIDLE_FLAG_TLB_FLUSHED is set, this idle transition - * will probably flush the TLB. It's not guaranteed to flush - * the TLB, though, so it's not clear that we can do anything - * useful with this knowledge. + * leave_mm() to avoid costly and often unnecessary wakeups + * for flushing the user TLB's associated with the active mm. */ + if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED) + leave_mm(cpu); if (!(lapic_timer_reliable_states & (1 << (cstate)))) tick_broadcast_enter();