diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 35a5d5dca2fae5fb68d522658824440f3b736d8b..c36140d788fe215aadb3a8f27a8de040f2c44c06 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -556,10 +556,6 @@ static void early_init_amd(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - if (check_tsc_unstable()) - clear_sched_clock_stable(); - } else { - clear_sched_clock_stable(); } /* Bit 12 of 8000_0007 edx is accumulated power mechanism. */ diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index adc0ebd8bed0e17be1716f3fb7c3eab51b7fa0c4..43955ee6715b1876b89ebd615b8eb171bbde1dd0 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -105,8 +105,6 @@ static void early_init_centaur(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSENTER32); #endif - - clear_sched_clock_stable(); } static void init_centaur(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b11b38c3b0bde194b9139ebfa0d3bc251b96f803..58094a1f9e9d301e11d2c93a1ecc126e1715002e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -88,7 +88,6 @@ static void default_init(struct cpuinfo_x86 *c) strcpy(c->x86_model_id, "386"); } #endif - clear_sched_clock_stable(); } static const struct cpu_dev default_cpu = { @@ -1077,8 +1076,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) */ if (this_cpu->c_init) this_cpu->c_init(c); - else - clear_sched_clock_stable(); /* Disable the PN if appropriate */ squash_the_stupid_serial_number(c); diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 0a3bc19de0177e93f81ae24c58264e7205406fd6..a70fd61095f8a73baa5eb7c486afd6ff19cd4fd1 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -185,7 +185,6 @@ static void early_init_cyrix(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); break; } - clear_sched_clock_stable(); } static void init_cyrix(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index fe0a615a051b19a99f9388a7b245b326cee86e11..063197771b8d7ba08f2eafe474cacb0efe9e79d3 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -162,10 +162,6 @@ static void early_init_intel(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - if (check_tsc_unstable()) - clear_sched_clock_stable(); - } else { - clear_sched_clock_stable(); } /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index 8457b49786686f74a737429ec47d04c2f92bab35..d77d07ab310b4317d33e44de10896297bb2a6654 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -16,8 +16,6 @@ static void early_init_transmeta(struct cpuinfo_x86 *c) if (xlvl >= 0x80860001) c->x86_capability[CPUID_8086_0001_EDX] = cpuid_edx(0x80860001); } - - clear_sched_clock_stable(); } static void init_transmeta(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 46bcda4cb1c2f84762bc409bf4c89b1528ba1c52..4f7a9833d8e51f2e023c3a5c0f6b54813c70c4a0 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -327,9 +327,16 @@ unsigned long long sched_clock(void) { return paravirt_sched_clock(); } + +static inline bool using_native_sched_clock(void) +{ + return pv_time_ops.sched_clock == native_sched_clock; +} #else unsigned long long sched_clock(void) __attribute__((alias("native_sched_clock"))); + +static inline bool using_native_sched_clock(void) { return true; } #endif int check_tsc_unstable(void) @@ -1112,8 +1119,10 @@ static void tsc_cs_mark_unstable(struct clocksource *cs) { if (tsc_unstable) return; + tsc_unstable = 1; - clear_sched_clock_stable(); + if (using_native_sched_clock()) + clear_sched_clock_stable(); disable_sched_clock_irqtime(); pr_info("Marking TSC unstable due to clocksource watchdog\n"); } @@ -1135,18 +1144,20 @@ static struct clocksource clocksource_tsc = { void mark_tsc_unstable(char *reason) { - if (!tsc_unstable) { - tsc_unstable = 1; + if (tsc_unstable) + return; + + tsc_unstable = 1; + if (using_native_sched_clock()) clear_sched_clock_stable(); - disable_sched_clock_irqtime(); - pr_info("Marking TSC unstable due to %s\n", reason); - /* Change only the rating, when not registered */ - if (clocksource_tsc.mult) - clocksource_mark_unstable(&clocksource_tsc); - else { - clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; - clocksource_tsc.rating = 0; - } + disable_sched_clock_irqtime(); + pr_info("Marking TSC unstable due to %s\n", reason); + /* Change only the rating, when not registered */ + if (clocksource_tsc.mult) { + clocksource_mark_unstable(&clocksource_tsc); + } else { + clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; + clocksource_tsc.rating = 0; } } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 956383844116ab456f8552abd4b5dcc00e09f347..3b31fc05a0f1e45be5985b860a5fde95ee969832 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3287,10 +3287,15 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) struct task_struct *p; /* - * Optimization: we know that if all tasks are in - * the fair class we can call that function directly: + * Optimization: we know that if all tasks are in the fair class we can + * call that function directly, but only if the @prev task wasn't of a + * higher scheduling class, because otherwise those loose the + * opportunity to pull in more work from other CPUs. */ - if (likely(rq->nr_running == rq->cfs.h_nr_running)) { + if (likely((prev->sched_class == &idle_sched_class || + prev->sched_class == &fair_sched_class) && + rq->nr_running == rq->cfs.h_nr_running)) { + p = fair_sched_class.pick_next_task(rq, prev, rf); if (unlikely(p == RETRY_TASK)) goto again; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3e88b35ac1571cd2dc1719378902ca75c08cfbb9..dea138964b9107b3e22542a8b80f5cf1d43c1dee 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5799,7 +5799,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t * Due to large variance we need a large fuzz factor; hackbench in * particularly is sensitive here. */ - if ((avg_idle / 512) < avg_cost) + if (sched_feat(SIS_AVG_CPU) && (avg_idle / 512) < avg_cost) return -1; time = local_clock(); diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 69631fa46c2f84fecd3e15599cba0e5935c1148e..1b3c8189b28656d2644a714ff60ceab7d015d97b 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -51,6 +51,11 @@ SCHED_FEAT(NONTASK_CAPACITY, true) */ SCHED_FEAT(TTWU_QUEUE, true) +/* + * When doing wakeups, attempt to limit superfluous scans of the LLC domain. + */ +SCHED_FEAT(SIS_AVG_CPU, false) + #ifdef HAVE_RT_PUSH_IPI /* * In order to avoid a thundering herd attack of CPUs that are