diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c index 8b0c7db85a474bd45f62a52fb05357b33fe4608f..f8a8e46acb78b0cc895d3d4ca715e87ad57f1cea 100644 --- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -58,10 +58,12 @@ enum { }; #define INTEL_MSR_RANGE (0xffff) +#define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1) struct acpi_cpufreq_data { struct acpi_processor_performance *acpi_data; struct cpufreq_frequency_table *freq_table; + unsigned int max_freq; unsigned int resume; unsigned int cpu_feature; }; @@ -258,6 +260,100 @@ static u32 get_cur_val(cpumask_t mask) return cmd.val; } +/* + * Return the measured active (C0) frequency on this CPU since last call + * to this function. + * Input: cpu number + * Return: Average CPU frequency in terms of max frequency (zero on error) + * + * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance + * over a period of time, while CPU is in C0 state. + * IA32_MPERF counts at the rate of max advertised frequency + * IA32_APERF counts at the rate of actual CPU frequency + * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and + * no meaning should be associated with absolute values of these MSRs. + */ +static unsigned int get_measured_perf(unsigned int cpu) +{ + union { + struct { + u32 lo; + u32 hi; + } split; + u64 whole; + } aperf_cur, mperf_cur; + + cpumask_t saved_mask; + unsigned int perf_percent; + unsigned int retval; + + saved_mask = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + if (get_cpu() != cpu) { + /* We were not able to run on requested processor */ + put_cpu(); + return 0; + } + + rdmsr(MSR_IA32_APERF, aperf_cur.split.lo, aperf_cur.split.hi); + rdmsr(MSR_IA32_MPERF, mperf_cur.split.lo, mperf_cur.split.hi); + + wrmsr(MSR_IA32_APERF, 0,0); + wrmsr(MSR_IA32_MPERF, 0,0); + +#ifdef __i386__ + /* + * We dont want to do 64 bit divide with 32 bit kernel + * Get an approximate value. Return failure in case we cannot get + * an approximate value. + */ + if (unlikely(aperf_cur.split.hi || mperf_cur.split.hi)) { + int shift_count; + u32 h; + + h = max_t(u32, aperf_cur.split.hi, mperf_cur.split.hi); + shift_count = fls(h); + + aperf_cur.whole >>= shift_count; + mperf_cur.whole >>= shift_count; + } + + if (((unsigned long)(-1) / 100) < aperf_cur.split.lo) { + int shift_count = 7; + aperf_cur.split.lo >>= shift_count; + mperf_cur.split.lo >>= shift_count; + } + + if (aperf_cur.split.lo && mperf_cur.split.lo) { + perf_percent = (aperf_cur.split.lo * 100) / mperf_cur.split.lo; + } else { + perf_percent = 0; + } + +#else + if (unlikely(((unsigned long)(-1) / 100) < aperf_cur.whole)) { + int shift_count = 7; + aperf_cur.whole >>= shift_count; + mperf_cur.whole >>= shift_count; + } + + if (aperf_cur.whole && mperf_cur.whole) { + perf_percent = (aperf_cur.whole * 100) / mperf_cur.whole; + } else { + perf_percent = 0; + } + +#endif + + retval = drv_data[cpu]->max_freq * perf_percent / 100; + + put_cpu(); + set_cpus_allowed(current, saved_mask); + + dprintk("cpu %d: performance percent %d\n", cpu, perf_percent); + return retval; +} + static unsigned int get_cur_freq_on_cpu(unsigned int cpu) { struct acpi_cpufreq_data *data = drv_data[cpu]; @@ -497,7 +593,6 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) unsigned int valid_states = 0; unsigned int cpu = policy->cpu; struct acpi_cpufreq_data *data; - unsigned int l, h; unsigned int result = 0; struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; struct acpi_processor_performance *perf; @@ -591,6 +686,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) } policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + data->max_freq = perf->states[0].core_frequency * 1000; /* table init */ for (i = 0; i < perf->state_count; i++) { if (i > 0 && perf->states[i].core_frequency == @@ -625,6 +721,15 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) /* notify BIOS that we exist */ acpi_processor_notify_smm(THIS_MODULE); + /* Check for APERF/MPERF support in hardware */ + if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) { + unsigned int ecx; + ecx = cpuid_ecx(6); + if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY) { + acpi_cpufreq_driver.getavg = get_measured_perf; + } + } + dprintk("CPU%u - ACPI performance management activated.\n", cpu); for (i = 0; i < perf->state_count; i++) dprintk(" %cP%d: %d MHz, %d mW, %d uS\n", diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 86e69b7f9122d3639e834307c5a1662e33ee8986..56c433e64d586a03288cfb34cd4c6987e490286e 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1274,6 +1274,26 @@ int cpufreq_driver_target(struct cpufreq_policy *policy, } EXPORT_SYMBOL_GPL(cpufreq_driver_target); +int cpufreq_driver_getavg(struct cpufreq_policy *policy) +{ + int ret = 0; + + policy = cpufreq_cpu_get(policy->cpu); + if (!policy) + return -EINVAL; + + mutex_lock(&policy->lock); + + if (cpu_online(policy->cpu) && cpufreq_driver->getavg) + ret = cpufreq_driver->getavg(policy->cpu); + + mutex_unlock(&policy->lock); + + cpufreq_cpu_put(policy); + return ret; +} +EXPORT_SYMBOL_GPL(cpufreq_driver_getavg); + /* * Locking: Must be called with the lock_cpu_hotplug() lock held * when "event" is CPUFREQ_GOV_LIMITS diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index bf8aa45d4f019cccd866103d278fa9e7bd4f669d..291cfe9400a1f612c3c4c4bf60171cd2931bfd49 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -393,8 +393,15 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) * policy. To be safe, we focus 10 points under the threshold. */ if (load < (dbs_tuners_ins.up_threshold - 10)) { - unsigned int freq_next = (policy->cur * load) / + unsigned int freq_next, freq_cur; + + freq_cur = cpufreq_driver_getavg(policy); + if (!freq_cur) + freq_cur = policy->cur; + + freq_next = (freq_cur * load) / (dbs_tuners_ins.up_threshold - 10); + if (!dbs_tuners_ins.powersave_bias) { __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L); diff --git a/include/asm-i386/msr.h b/include/asm-i386/msr.h index 62b76cd96957da8ddfca0a0d1a1508b373be8ab5..0aa15fc8d918e94f9f66494ddf1c2e31f3a1dfe7 100644 --- a/include/asm-i386/msr.h +++ b/include/asm-i386/msr.h @@ -125,6 +125,9 @@ static inline void wrmsrl (unsigned long msr, unsigned long long val) #define MSR_IA32_PERF_STATUS 0x198 #define MSR_IA32_PERF_CTL 0x199 +#define MSR_IA32_MPERF 0xE7 +#define MSR_IA32_APERF 0xE8 + #define MSR_IA32_THERM_CONTROL 0x19a #define MSR_IA32_THERM_INTERRUPT 0x19b #define MSR_IA32_THERM_STATUS 0x19c diff --git a/include/asm-x86_64/msr.h b/include/asm-x86_64/msr.h index 37e194169fac2f2793894331eb9ec34d81401dfa..e615822887373f5c6089b9cd3139161823fb25ab 100644 --- a/include/asm-x86_64/msr.h +++ b/include/asm-x86_64/msr.h @@ -307,6 +307,9 @@ static inline unsigned int cpuid_edx(unsigned int op) #define MSR_IA32_PERF_STATUS 0x198 #define MSR_IA32_PERF_CTL 0x199 +#define MSR_IA32_MPERF 0xE7 +#define MSR_IA32_APERF 0xE8 + #define MSR_IA32_THERM_CONTROL 0x19a #define MSR_IA32_THERM_INTERRUPT 0x19b #define MSR_IA32_THERM_STATUS 0x19c diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 4ea39fee99c7938e4bd9dfec951773b025ada1a7..7f008f6bfdc36e2d1b430bf2b13bd876f43de1ff 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -172,6 +172,8 @@ extern int __cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int relation); +extern int cpufreq_driver_getavg(struct cpufreq_policy *policy); + int cpufreq_register_governor(struct cpufreq_governor *governor); void cpufreq_unregister_governor(struct cpufreq_governor *governor); @@ -204,6 +206,7 @@ struct cpufreq_driver { unsigned int (*get) (unsigned int cpu); /* optional */ + unsigned int (*getavg) (unsigned int cpu); int (*exit) (struct cpufreq_policy *policy); int (*suspend) (struct cpufreq_policy *policy, pm_message_t pmsg); int (*resume) (struct cpufreq_policy *policy);