提交 16b5b092 编写于 作者: R Rafael J. Wysocki

Merge branch 'pm-tools'

* pm-tools:
  cpupower: Add support for new AMD family 0x17
  cpupower: Fix bug where return value was not used
  tools/power turbostat: update version number
  tools/power turbostat: decode MSR_IA32_MISC_ENABLE only on Intel
  tools/power turbostat: stop migrating, unless '-m'
  tools/power turbostat: if  --debug, print sampling overhead
  tools/power turbostat: hide SKL counters, when not requested
  intel_pstate: use updated msr-index.h HWP.EPP values
  tools/power x86_energy_perf_policy: support HWP.EPP
  x86: msr-index.h: fix shifts to ULL results in HWP macros.
  x86: msr-index.h: define HWP.EPP values
  x86: msr-index.h: define EPB mid-points
...@@ -249,9 +249,13 @@ ...@@ -249,9 +249,13 @@
#define HWP_MIN_PERF(x) (x & 0xff) #define HWP_MIN_PERF(x) (x & 0xff)
#define HWP_MAX_PERF(x) ((x & 0xff) << 8) #define HWP_MAX_PERF(x) ((x & 0xff) << 8)
#define HWP_DESIRED_PERF(x) ((x & 0xff) << 16) #define HWP_DESIRED_PERF(x) ((x & 0xff) << 16)
#define HWP_ENERGY_PERF_PREFERENCE(x) ((x & 0xff) << 24) #define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24)
#define HWP_ACTIVITY_WINDOW(x) ((x & 0xff3) << 32) #define HWP_EPP_PERFORMANCE 0x00
#define HWP_PACKAGE_CONTROL(x) ((x & 0x1) << 42) #define HWP_EPP_BALANCE_PERFORMANCE 0x80
#define HWP_EPP_BALANCE_POWERSAVE 0xC0
#define HWP_EPP_POWERSAVE 0xFF
#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32)
#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42)
/* IA32_HWP_STATUS */ /* IA32_HWP_STATUS */
#define HWP_GUARANTEED_CHANGE(x) (x & 0x1) #define HWP_GUARANTEED_CHANGE(x) (x & 0x1)
...@@ -474,9 +478,11 @@ ...@@ -474,9 +478,11 @@
#define MSR_MISC_PWR_MGMT 0x000001aa #define MSR_MISC_PWR_MGMT 0x000001aa
#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 #define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0
#define ENERGY_PERF_BIAS_PERFORMANCE 0 #define ENERGY_PERF_BIAS_PERFORMANCE 0
#define ENERGY_PERF_BIAS_NORMAL 6 #define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE 4
#define ENERGY_PERF_BIAS_POWERSAVE 15 #define ENERGY_PERF_BIAS_NORMAL 6
#define ENERGY_PERF_BIAS_BALANCE_POWERSAVE 8
#define ENERGY_PERF_BIAS_POWERSAVE 15
#define MSR_IA32_PACKAGE_THERM_STATUS 0x000001b1 #define MSR_IA32_PACKAGE_THERM_STATUS 0x000001b1
......
...@@ -653,6 +653,12 @@ static const char * const energy_perf_strings[] = { ...@@ -653,6 +653,12 @@ static const char * const energy_perf_strings[] = {
"power", "power",
NULL NULL
}; };
static const unsigned int epp_values[] = {
HWP_EPP_PERFORMANCE,
HWP_EPP_BALANCE_PERFORMANCE,
HWP_EPP_BALANCE_POWERSAVE,
HWP_EPP_POWERSAVE
};
static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data) static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data)
{ {
...@@ -664,17 +670,14 @@ static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data) ...@@ -664,17 +670,14 @@ static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data)
return epp; return epp;
if (static_cpu_has(X86_FEATURE_HWP_EPP)) { if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
/* if (epp == HWP_EPP_PERFORMANCE)
* Range: return 1;
* 0x00-0x3F : Performance if (epp <= HWP_EPP_BALANCE_PERFORMANCE)
* 0x40-0x7F : Balance performance return 2;
* 0x80-0xBF : Balance power if (epp <= HWP_EPP_BALANCE_POWERSAVE)
* 0xC0-0xFF : Power return 3;
* The EPP is a 8 bit value, but our ranges restrict the else
* value which can be set. Here only using top two bits return 4;
* effectively.
*/
index = (epp >> 6) + 1;
} else if (static_cpu_has(X86_FEATURE_EPB)) { } else if (static_cpu_has(X86_FEATURE_EPB)) {
/* /*
* Range: * Range:
...@@ -712,15 +715,8 @@ static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data, ...@@ -712,15 +715,8 @@ static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
value &= ~GENMASK_ULL(31, 24); value &= ~GENMASK_ULL(31, 24);
/*
* If epp is not default, convert from index into
* energy_perf_strings to epp value, by shifting 6
* bits left to use only top two bits in epp.
* The resultant epp need to shifted by 24 bits to
* epp position in MSR_HWP_REQUEST.
*/
if (epp == -EINVAL) if (epp == -EINVAL)
epp = (pref_index - 1) << 6; epp = epp_values[pref_index - 1];
value |= (u64)epp << 24; value |= (u64)epp << 24;
ret = wrmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, value); ret = wrmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, value);
......
...@@ -26,6 +26,15 @@ union msr_pstate { ...@@ -26,6 +26,15 @@ union msr_pstate {
unsigned res3:21; unsigned res3:21;
unsigned en:1; unsigned en:1;
} bits; } bits;
struct {
unsigned fid:8;
unsigned did:6;
unsigned vid:8;
unsigned iddval:8;
unsigned idddiv:2;
unsigned res1:30;
unsigned en:1;
} fam17h_bits;
unsigned long long val; unsigned long long val;
}; };
...@@ -35,6 +44,8 @@ static int get_did(int family, union msr_pstate pstate) ...@@ -35,6 +44,8 @@ static int get_did(int family, union msr_pstate pstate)
if (family == 0x12) if (family == 0x12)
t = pstate.val & 0xf; t = pstate.val & 0xf;
else if (family == 0x17)
t = pstate.fam17h_bits.did;
else else
t = pstate.bits.did; t = pstate.bits.did;
...@@ -44,16 +55,20 @@ static int get_did(int family, union msr_pstate pstate) ...@@ -44,16 +55,20 @@ static int get_did(int family, union msr_pstate pstate)
static int get_cof(int family, union msr_pstate pstate) static int get_cof(int family, union msr_pstate pstate)
{ {
int t; int t;
int fid, did; int fid, did, cof;
did = get_did(family, pstate); did = get_did(family, pstate);
if (family == 0x17) {
t = 0x10; fid = pstate.fam17h_bits.fid;
fid = pstate.bits.fid; cof = 200 * fid / did;
if (family == 0x11) } else {
t = 0x8; t = 0x10;
fid = pstate.bits.fid;
return (100 * (fid + t)) >> did; if (family == 0x11)
t = 0x8;
cof = (100 * (fid + t)) >> did;
}
return cof;
} }
/* Needs: /* Needs:
......
...@@ -70,6 +70,8 @@ enum cpupower_cpu_vendor {X86_VENDOR_UNKNOWN = 0, X86_VENDOR_INTEL, ...@@ -70,6 +70,8 @@ enum cpupower_cpu_vendor {X86_VENDOR_UNKNOWN = 0, X86_VENDOR_INTEL,
#define CPUPOWER_CAP_IS_SNB 0x00000020 #define CPUPOWER_CAP_IS_SNB 0x00000020
#define CPUPOWER_CAP_INTEL_IDA 0x00000040 #define CPUPOWER_CAP_INTEL_IDA 0x00000040
#define CPUPOWER_AMD_CPBDIS 0x02000000
#define MAX_HW_PSTATES 10 #define MAX_HW_PSTATES 10
struct cpupower_cpu_info { struct cpupower_cpu_info {
......
...@@ -2,11 +2,14 @@ ...@@ -2,11 +2,14 @@
#include "helpers/helpers.h" #include "helpers/helpers.h"
#define MSR_AMD_HWCR 0xc0010015
int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active, int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active,
int *states) int *states)
{ {
struct cpupower_cpu_info cpu_info; struct cpupower_cpu_info cpu_info;
int ret; int ret;
unsigned long long val;
*support = *active = *states = 0; *support = *active = *states = 0;
...@@ -16,10 +19,22 @@ int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active, ...@@ -16,10 +19,22 @@ int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active,
if (cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_CBP) { if (cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_CBP) {
*support = 1; *support = 1;
amd_pci_get_num_boost_states(active, states);
if (ret <= 0) /* AMD Family 0x17 does not utilize PCI D18F4 like prior
return ret; * families and has no fixed discrete boost states but
*support = 1; * has Hardware determined variable increments instead.
*/
if (cpu_info.family == 0x17) {
if (!read_msr(cpu, MSR_AMD_HWCR, &val)) {
if (!(val & CPUPOWER_AMD_CPBDIS))
*active = 1;
}
} else {
ret = amd_pci_get_num_boost_states(active, states);
if (ret)
return ret;
}
} else if (cpupower_cpu_info.caps & CPUPOWER_CAP_INTEL_IDA) } else if (cpupower_cpu_info.caps & CPUPOWER_CAP_INTEL_IDA)
*support = *active = 1; *support = *active = 1;
return 0; return 0;
......
...@@ -57,7 +57,6 @@ unsigned int list_header_only; ...@@ -57,7 +57,6 @@ unsigned int list_header_only;
unsigned int dump_only; unsigned int dump_only;
unsigned int do_snb_cstates; unsigned int do_snb_cstates;
unsigned int do_knl_cstates; unsigned int do_knl_cstates;
unsigned int do_skl_residency;
unsigned int do_slm_cstates; unsigned int do_slm_cstates;
unsigned int use_c1_residency_msr; unsigned int use_c1_residency_msr;
unsigned int has_aperf; unsigned int has_aperf;
...@@ -93,6 +92,7 @@ unsigned int do_ring_perf_limit_reasons; ...@@ -93,6 +92,7 @@ unsigned int do_ring_perf_limit_reasons;
unsigned int crystal_hz; unsigned int crystal_hz;
unsigned long long tsc_hz; unsigned long long tsc_hz;
int base_cpu; int base_cpu;
int do_migrate;
double discover_bclk(unsigned int family, unsigned int model); double discover_bclk(unsigned int family, unsigned int model);
unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
/* IA32_HWP_REQUEST, IA32_HWP_STATUS */ /* IA32_HWP_REQUEST, IA32_HWP_STATUS */
...@@ -151,6 +151,8 @@ size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size; ...@@ -151,6 +151,8 @@ size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
#define MAX_ADDED_COUNTERS 16 #define MAX_ADDED_COUNTERS 16
struct thread_data { struct thread_data {
struct timeval tv_begin;
struct timeval tv_end;
unsigned long long tsc; unsigned long long tsc;
unsigned long long aperf; unsigned long long aperf;
unsigned long long mperf; unsigned long long mperf;
...@@ -301,6 +303,9 @@ int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg ...@@ -301,6 +303,9 @@ int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg
int cpu_migrate(int cpu) int cpu_migrate(int cpu)
{ {
if (!do_migrate)
return 0;
CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
...@@ -384,8 +389,14 @@ struct msr_counter bic[] = { ...@@ -384,8 +389,14 @@ struct msr_counter bic[] = {
{ 0x0, "CPU" }, { 0x0, "CPU" },
{ 0x0, "Mod%c6" }, { 0x0, "Mod%c6" },
{ 0x0, "sysfs" }, { 0x0, "sysfs" },
{ 0x0, "Totl%C0" },
{ 0x0, "Any%C0" },
{ 0x0, "GFX%C0" },
{ 0x0, "CPUGFX%" },
}; };
#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
#define BIC_Package (1ULL << 0) #define BIC_Package (1ULL << 0)
#define BIC_Avg_MHz (1ULL << 1) #define BIC_Avg_MHz (1ULL << 1)
...@@ -426,6 +437,10 @@ struct msr_counter bic[] = { ...@@ -426,6 +437,10 @@ struct msr_counter bic[] = {
#define BIC_CPU (1ULL << 36) #define BIC_CPU (1ULL << 36)
#define BIC_Mod_c6 (1ULL << 37) #define BIC_Mod_c6 (1ULL << 37)
#define BIC_sysfs (1ULL << 38) #define BIC_sysfs (1ULL << 38)
#define BIC_Totl_c0 (1ULL << 39)
#define BIC_Any_c0 (1ULL << 40)
#define BIC_GFX_c0 (1ULL << 41)
#define BIC_CPUGFX (1ULL << 42)
unsigned long long bic_enabled = 0xFFFFFFFFFFFFFFFFULL; unsigned long long bic_enabled = 0xFFFFFFFFFFFFFFFFULL;
unsigned long long bic_present = BIC_sysfs; unsigned long long bic_present = BIC_sysfs;
...@@ -521,6 +536,8 @@ void print_header(char *delim) ...@@ -521,6 +536,8 @@ void print_header(char *delim)
struct msr_counter *mp; struct msr_counter *mp;
int printed = 0; int printed = 0;
if (debug)
outp += sprintf(outp, "usec %s", delim);
if (DO_BIC(BIC_Package)) if (DO_BIC(BIC_Package))
outp += sprintf(outp, "%sPackage", (printed++ ? delim : "")); outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
if (DO_BIC(BIC_Core)) if (DO_BIC(BIC_Core))
...@@ -599,12 +616,14 @@ void print_header(char *delim) ...@@ -599,12 +616,14 @@ void print_header(char *delim)
if (DO_BIC(BIC_GFXMHz)) if (DO_BIC(BIC_GFXMHz))
outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : "")); outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
if (do_skl_residency) { if (DO_BIC(BIC_Totl_c0))
outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : "")); outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
if (DO_BIC(BIC_Any_c0))
outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : "")); outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
if (DO_BIC(BIC_GFX_c0))
outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : "")); outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
if (DO_BIC(BIC_CPUGFX))
outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : "")); outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
}
if (DO_BIC(BIC_Pkgpc2)) if (DO_BIC(BIC_Pkgpc2))
outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : "")); outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
...@@ -771,6 +790,14 @@ int format_counters(struct thread_data *t, struct core_data *c, ...@@ -771,6 +790,14 @@ int format_counters(struct thread_data *t, struct core_data *c,
(cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset))) (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
return 0; return 0;
if (debug) {
/* on each row, print how many usec each timestamp took to gather */
struct timeval tv;
timersub(&t->tv_end, &t->tv_begin, &tv);
outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
}
interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
tsc = t->tsc * tsc_tweak; tsc = t->tsc * tsc_tweak;
...@@ -912,12 +939,14 @@ int format_counters(struct thread_data *t, struct core_data *c, ...@@ -912,12 +939,14 @@ int format_counters(struct thread_data *t, struct core_data *c,
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz); outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
if (do_skl_residency) { if (DO_BIC(BIC_Totl_c0))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc); outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc);
if (DO_BIC(BIC_Any_c0))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0/tsc); outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0/tsc);
if (DO_BIC(BIC_GFX_c0))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0/tsc); outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0/tsc);
if (DO_BIC(BIC_CPUGFX))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0/tsc); outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0/tsc);
}
if (DO_BIC(BIC_Pkgpc2)) if (DO_BIC(BIC_Pkgpc2))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2/tsc); outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2/tsc);
...@@ -1038,12 +1067,16 @@ delta_package(struct pkg_data *new, struct pkg_data *old) ...@@ -1038,12 +1067,16 @@ delta_package(struct pkg_data *new, struct pkg_data *old)
int i; int i;
struct msr_counter *mp; struct msr_counter *mp;
if (do_skl_residency) {
if (DO_BIC(BIC_Totl_c0))
old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0; old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
if (DO_BIC(BIC_Any_c0))
old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0; old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
if (DO_BIC(BIC_GFX_c0))
old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0; old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
if (DO_BIC(BIC_CPUGFX))
old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0; old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
}
old->pc2 = new->pc2 - old->pc2; old->pc2 = new->pc2 - old->pc2;
if (DO_BIC(BIC_Pkgpc3)) if (DO_BIC(BIC_Pkgpc3))
old->pc3 = new->pc3 - old->pc3; old->pc3 = new->pc3 - old->pc3;
...@@ -1292,12 +1325,14 @@ int sum_counters(struct thread_data *t, struct core_data *c, ...@@ -1292,12 +1325,14 @@ int sum_counters(struct thread_data *t, struct core_data *c,
if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
return 0; return 0;
if (do_skl_residency) { if (DO_BIC(BIC_Totl_c0))
average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0; average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
if (DO_BIC(BIC_Any_c0))
average.packages.pkg_any_core_c0 += p->pkg_any_core_c0; average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
if (DO_BIC(BIC_GFX_c0))
average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0; average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
if (DO_BIC(BIC_CPUGFX))
average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0; average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
}
average.packages.pc2 += p->pc2; average.packages.pc2 += p->pc2;
if (DO_BIC(BIC_Pkgpc3)) if (DO_BIC(BIC_Pkgpc3))
...@@ -1357,12 +1392,14 @@ void compute_average(struct thread_data *t, struct core_data *c, ...@@ -1357,12 +1392,14 @@ void compute_average(struct thread_data *t, struct core_data *c,
average.cores.c7 /= topo.num_cores; average.cores.c7 /= topo.num_cores;
average.cores.mc6_us /= topo.num_cores; average.cores.mc6_us /= topo.num_cores;
if (do_skl_residency) { if (DO_BIC(BIC_Totl_c0))
average.packages.pkg_wtd_core_c0 /= topo.num_packages; average.packages.pkg_wtd_core_c0 /= topo.num_packages;
if (DO_BIC(BIC_Any_c0))
average.packages.pkg_any_core_c0 /= topo.num_packages; average.packages.pkg_any_core_c0 /= topo.num_packages;
if (DO_BIC(BIC_GFX_c0))
average.packages.pkg_any_gfxe_c0 /= topo.num_packages; average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
if (DO_BIC(BIC_CPUGFX))
average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages; average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
}
average.packages.pc2 /= topo.num_packages; average.packages.pc2 /= topo.num_packages;
if (DO_BIC(BIC_Pkgpc3)) if (DO_BIC(BIC_Pkgpc3))
...@@ -1482,6 +1519,9 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) ...@@ -1482,6 +1519,9 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
struct msr_counter *mp; struct msr_counter *mp;
int i; int i;
gettimeofday(&t->tv_begin, (struct timezone *)NULL);
if (cpu_migrate(cpu)) { if (cpu_migrate(cpu)) {
fprintf(outf, "Could not migrate to CPU %d\n", cpu); fprintf(outf, "Could not migrate to CPU %d\n", cpu);
return -1; return -1;
...@@ -1565,7 +1605,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) ...@@ -1565,7 +1605,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
/* collect core counters only for 1st thread in core */ /* collect core counters only for 1st thread in core */
if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
return 0; goto done;
if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) { if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) {
if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
...@@ -1601,15 +1641,21 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) ...@@ -1601,15 +1641,21 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
/* collect package counters only for 1st core in package */ /* collect package counters only for 1st core in package */
if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
return 0; goto done;
if (do_skl_residency) { if (DO_BIC(BIC_Totl_c0)) {
if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0)) if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
return -10; return -10;
}
if (DO_BIC(BIC_Any_c0)) {
if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0)) if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
return -11; return -11;
}
if (DO_BIC(BIC_GFX_c0)) {
if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0)) if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
return -12; return -12;
}
if (DO_BIC(BIC_CPUGFX)) {
if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0)) if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
return -13; return -13;
} }
...@@ -1688,6 +1734,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) ...@@ -1688,6 +1734,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (get_mp(cpu, mp, &p->counter[i])) if (get_mp(cpu, mp, &p->counter[i]))
return -10; return -10;
} }
done:
gettimeofday(&t->tv_end, (struct timezone *)NULL);
return 0; return 0;
} }
...@@ -3895,6 +3943,9 @@ void decode_misc_enable_msr(void) ...@@ -3895,6 +3943,9 @@ void decode_misc_enable_msr(void)
{ {
unsigned long long msr; unsigned long long msr;
if (!genuine_intel)
return;
if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr)) if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n", fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
base_cpu, msr, base_cpu, msr,
...@@ -4198,7 +4249,12 @@ void process_cpuid() ...@@ -4198,7 +4249,12 @@ void process_cpuid()
BIC_PRESENT(BIC_Pkgpc10); BIC_PRESENT(BIC_Pkgpc10);
} }
do_irtl_hsw = has_hsw_msrs(family, model); do_irtl_hsw = has_hsw_msrs(family, model);
do_skl_residency = has_skl_msrs(family, model); if (has_skl_msrs(family, model)) {
BIC_PRESENT(BIC_Totl_c0);
BIC_PRESENT(BIC_Any_c0);
BIC_PRESENT(BIC_GFX_c0);
BIC_PRESENT(BIC_CPUGFX);
}
do_slm_cstates = is_slm(family, model); do_slm_cstates = is_slm(family, model);
do_knl_cstates = is_knl(family, model); do_knl_cstates = is_knl(family, model);
...@@ -4578,7 +4634,7 @@ int get_and_dump_counters(void) ...@@ -4578,7 +4634,7 @@ int get_and_dump_counters(void)
} }
void print_version() { void print_version() {
fprintf(outf, "turbostat version 17.04.12" fprintf(outf, "turbostat version 17.06.23"
" - Len Brown <lenb@kernel.org>\n"); " - Len Brown <lenb@kernel.org>\n");
} }
...@@ -4951,6 +5007,7 @@ void cmdline(int argc, char **argv) ...@@ -4951,6 +5007,7 @@ void cmdline(int argc, char **argv)
{"hide", required_argument, 0, 'H'}, // meh, -h taken by --help {"hide", required_argument, 0, 'H'}, // meh, -h taken by --help
{"Joules", no_argument, 0, 'J'}, {"Joules", no_argument, 0, 'J'},
{"list", no_argument, 0, 'l'}, {"list", no_argument, 0, 'l'},
{"migrate", no_argument, 0, 'm'},
{"out", required_argument, 0, 'o'}, {"out", required_argument, 0, 'o'},
{"quiet", no_argument, 0, 'q'}, {"quiet", no_argument, 0, 'q'},
{"show", required_argument, 0, 's'}, {"show", required_argument, 0, 's'},
...@@ -4962,7 +5019,7 @@ void cmdline(int argc, char **argv) ...@@ -4962,7 +5019,7 @@ void cmdline(int argc, char **argv)
progname = argv[0]; progname = argv[0];
while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:qST:v", while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:Jmo:qST:v",
long_options, &option_index)) != -1) { long_options, &option_index)) != -1) {
switch (opt) { switch (opt) {
case 'a': case 'a':
...@@ -5005,6 +5062,9 @@ void cmdline(int argc, char **argv) ...@@ -5005,6 +5062,9 @@ void cmdline(int argc, char **argv)
list_header_only++; list_header_only++;
quiet++; quiet++;
break; break;
case 'm':
do_migrate = 1;
break;
case 'o': case 'o':
outf = fopen_or_die(optarg, "w"); outf = fopen_or_die(optarg, "w");
break; break;
......
DESTDIR ?= CC = $(CROSS_COMPILE)gcc
BUILD_OUTPUT := $(CURDIR)
PREFIX := /usr
DESTDIR :=
ifeq ("$(origin O)", "command line")
BUILD_OUTPUT := $(O)
endif
x86_energy_perf_policy : x86_energy_perf_policy.c x86_energy_perf_policy : x86_energy_perf_policy.c
CFLAGS += -Wall
CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
%: %.c
@mkdir -p $(BUILD_OUTPUT)
$(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@
.PHONY : clean
clean : clean :
rm -f x86_energy_perf_policy @rm -f $(BUILD_OUTPUT)/x86_energy_perf_policy
install : x86_energy_perf_policy
install -d $(DESTDIR)$(PREFIX)/bin
install $(BUILD_OUTPUT)/x86_energy_perf_policy $(DESTDIR)$(PREFIX)/bin/x86_energy_perf_policy
install -d $(DESTDIR)$(PREFIX)/share/man/man8
install x86_energy_perf_policy.8 $(DESTDIR)$(PREFIX)/share/man/man8
install :
install x86_energy_perf_policy ${DESTDIR}/usr/bin/
install x86_energy_perf_policy.8 ${DESTDIR}/usr/share/man/man8/
.\" This page Copyright (C) 2010 Len Brown <len.brown@intel.com> .\" This page Copyright (C) 2010 - 2015 Len Brown <len.brown@intel.com>
.\" Distributed under the GPL, Copyleft 1994. .\" Distributed under the GPL, Copyleft 1994.
.TH X86_ENERGY_PERF_POLICY 8 .TH X86_ENERGY_PERF_POLICY 8
.SH NAME .SH NAME
x86_energy_perf_policy \- read or write MSR_IA32_ENERGY_PERF_BIAS x86_energy_perf_policy \- Manage Energy vs. Performance Policy via x86 Model Specific Registers
.SH SYNOPSIS .SH SYNOPSIS
.ft B
.B x86_energy_perf_policy .B x86_energy_perf_policy
.RB [ "\-c cpu" ] .RB "[ options ] [ scope ] [field \ value]"
.RB [ "\-v" ]
.RB "\-r"
.br .br
.B x86_energy_perf_policy .RB "scope: \-\-cpu\ cpu-list | \-\-pkg\ pkg-list"
.RB [ "\-c cpu" ]
.RB [ "\-v" ]
.RB 'performance'
.br .br
.B x86_energy_perf_policy .RB "cpu-list, pkg-list: # | #,# | #-# | all"
.RB [ "\-c cpu" ]
.RB [ "\-v" ]
.RB 'normal'
.br .br
.B x86_energy_perf_policy .RB "field: \-\-all | \-\-epb | \-\-hwp-epp | \-\-hwp-min | \-\-hwp-max | \-\-hwp-desired"
.RB [ "\-c cpu" ]
.RB [ "\-v" ]
.RB 'powersave'
.br .br
.B x86_energy_perf_policy .RB "other: (\-\-force | \-\-hwp-enable | \-\-turbo-enable) value)"
.RB [ "\-c cpu" ]
.RB [ "\-v" ]
.RB n
.br .br
.RB "value: # | default | performance | balance-performance | balance-power | power"
.SH DESCRIPTION .SH DESCRIPTION
\fBx86_energy_perf_policy\fP \fBx86_energy_perf_policy\fP
allows software to convey displays and updates energy-performance policy settings specific to
its policy for the relative importance of performance Intel Architecture Processors. Settings are accessed via Model Specific Register (MSR)
versus energy savings to the processor. updates, no matter if the Linux cpufreq sub-system is enabled or not.
The processor uses this information in model-specific ways Policy in MSR_IA32_ENERGY_PERF_BIAS (EPB)
when it must select trade-offs between performance and may affect a wide range of hardware decisions,
energy efficiency. such as how aggressively the hardware enters and exits CPU idle states (C-states)
and Processor Performance States (P-states).
This policy hint does not replace explicit OS C-state and P-state selection.
Rather, it tells the hardware how aggressively to implement those selections.
Further, it allows the OS to influence energy/performance trade-offs where there
is no software interface, such as in the opportunistic "turbo-mode" P-state range.
Note that MSR_IA32_ENERGY_PERF_BIAS is defined per CPU,
but some implementations
share a single MSR among all CPUs in each processor package.
On those systems, a write to EPB on one processor will
be visible, and will have an effect, on all CPUs
in the same processor package.
This policy hint does not supersede Processor Performance states Hardware P-States (HWP) are effectively an expansion of hardware
(P-states) or CPU Idle power states (C-states), but allows P-state control from the opportunistic turbo-mode P-state range
software to have influence where it would otherwise be unable to include the entire range of available P-states.
to express a preference. On Broadwell Xeon, the initial HWP implementation, EBP influenced HWP.
That influence was removed in subsequent generations,
where it was moved to the
Energy_Performance_Preference (EPP) field in
a pair of dedicated MSRs -- MSR_IA32_HWP_REQUEST and MSR_IA32_HWP_REQUEST_PKG.
For example, this setting may tell the hardware how EPP is the most commonly managed knob in HWP mode,
aggressively or conservatively to control frequency but MSR_IA32_HWP_REQUEST also allows the user to specify
in the "turbo range" above the explicitly OS-controlled minimum-frequency for Quality-of-Service,
P-state frequency range. It may also tell the hardware and maximum-frequency for power-capping.
how aggressively is should enter the OS requested C-states. MSR_IA32_HWP_REQUEST is defined per-CPU.
Support for this feature is indicated by CPUID.06H.ECX.bit3 MSR_IA32_HWP_REQUEST_PKG has the same capability as MSR_IA32_HWP_REQUEST,
per the Intel Architectures Software Developer's Manual. but it can simultaneously set the default policy for all CPUs within a package.
A bit in per-CPU MSR_IA32_HWP_REQUEST indicates whether it is
over-ruled-by or exempt-from MSR_IA32_HWP_REQUEST_PKG.
.SS Options MSR_HWP_CAPABILITIES shows the default values for the fields
\fB-c\fP limits operation to a single CPU. in MSR_IA32_HWP_REQUEST. It is displayed when no values
The default is to operate on all CPUs. are being written.
Note that MSR_IA32_ENERGY_PERF_BIAS is defined per
logical processor, but that the initial implementations .SS SCOPE OPTIONS
of the MSR were shared among all processors in each package.
.PP
\fB-v\fP increases verbosity. By default
x86_energy_perf_policy is silent.
.PP
\fB-r\fP is for "read-only" mode - the unchanged state
is read and displayed.
.PP .PP
.I performance \fB-c, --cpu\fP Operate on the MSR_IA32_HWP_REQUEST for each CPU in a CPU-list.
Set a policy where performance is paramount. The CPU-list may be comma-separated CPU numbers, with dash for range
The processor will be unwilling to sacrifice any performance or the string "all". Eg. '--cpu 1,4,6-8' or '--cpu all'.
for the sake of energy saving. This is the hardware default. When --cpu is used, \fB--hwp-use-pkg\fP is available, which specifies whether the per-cpu
MSR_IA32_HWP_REQUEST should be over-ruled by MSR_IA32_HWP_REQUEST_PKG (1),
or exempt from MSR_IA32_HWP_REQUEST_PKG (0).
\fB-p, --pkg\fP Operate on the MSR_IA32_HWP_REQUEST_PKG for each package in the package-list.
The list is a string of individual package numbers separated
by commas, and or ranges of package numbers separated by a dash,
or the string "all".
For example '--pkg 1,3' or '--pkg all'
.SS VALUE OPTIONS
.PP .PP
.I normal .I normal | default
Set a policy with a normal balance between performance and energy efficiency. Set a policy with a normal balance between performance and energy efficiency.
The processor will tolerate minor performance compromise The processor will tolerate minor performance compromise
for potentially significant energy savings. for potentially significant energy savings.
This reasonable default for most desktops and servers. This is a reasonable default for most desktops and servers.
"default" is a synonym for "normal".
.PP .PP
.I powersave .I performance
Set a policy for maximum performance,
accepting no performance sacrifice for the benefit of energy efficiency.
.PP
.I balance-performance
Set a policy with a high priority on performance,
but allowing some performance loss to benefit energy efficiency.
.PP
.I balance-power
Set a policy where the performance and power are balanced.
This is the default.
.PP
.I power
Set a policy where the processor can accept Set a policy where the processor can accept
a measurable performance hit to maximize energy efficiency. a measurable performance impact to maximize energy efficiency.
.PP .PP
.I n The following table shows the mapping from the value strings above to actual MSR values.
Set MSR_IA32_ENERGY_PERF_BIAS to the specified number. This mapping is defined in the Linux-kernel header, msr-index.h.
The range of valid numbers is 0-15, where 0 is maximum
performance and 15 is maximum energy efficiency.
.nf
VALUE STRING EPB EPP
performance 0 0
balance-performance 4 128
normal, default 6 128
balance-power 8 192
power 15 255
.fi
.PP
For MSR_IA32_HWP_REQUEST performance fields
(--hwp-min, --hwp-max, --hwp-desired), the value option
is in units of 100 MHz, Eg. 12 signifies 1200 MHz.
.SS FIELD OPTIONS
\fB-a, --all value-string\fP Sets all EPB and EPP and HWP limit fields to the value associated with
the value-string. In addition, enables turbo-mode and HWP-mode, if they were previous disabled.
Thus "--all normal" will set a system without cpufreq into a well known configuration.
.PP
\fB-B, --epb\fP set EPB per-core or per-package.
See value strings in the table above.
.PP
\fB-d, --debug\fP debug increases verbosity. By default
x86_energy_perf_policy is silent for updates,
and verbose for read-only mode.
.PP
\fB-P, --hwp-epp\fP set HWP.EPP per-core or per-package.
See value strings in the table above.
.PP
\fB-m, --hwp-min\fP request HWP to not go below the specified core/bus ratio.
The "default" is the value found in IA32_HWP_CAPABILITIES.min.
.PP
\fB-M, --hwp-max\fP request HWP not exceed a the specified core/bus ratio.
The "default" is the value found in IA32_HWP_CAPABILITIES.max.
.PP
\fB-D, --hwp-desired\fP request HWP 'desired' frequency.
The "normal" setting is 0, which
corresponds to 'full autonomous' HWP control.
Non-zero performance values request a specific performance
level on this processor, specified in multiples of 100 MHz.
.PP
\fB-w, --hwp-window\fP specify integer number of microsec
in the sliding window that HWP uses to maintain average frequency.
This parameter is meaningful only when the "desired" field above is non-zero.
Default is 0, allowing the HW to choose.
.SH OTHER OPTIONS
.PP
\fB-f, --force\fP writes the specified values without bounds checking.
.PP
\fB-U, --hwp-use-pkg\fP (0 | 1), when used in conjunction with --cpu,
indicates whether the per-CPU MSR_IA32_HWP_REQUEST should be overruled (1)
or exempt (0) from per-Package MSR_IA32_HWP_REQUEST_PKG settings.
The default is exempt.
.PP
\fB-H, --hwp-enable\fP enable HardWare-P-state (HWP) mode. Once enabled, system RESET is required to disable HWP mode.
.PP
\fB-t, --turbo-enable\fP enable (1) or disable (0) turbo mode.
.PP
\fB-v, --version\fP print version and exit.
.PP
If no request to change policy is made,
the default behavior is to read
and display the current system state,
including the default capabilities.
.SH WARNING
.PP
This utility writes directly to Model Specific Registers.
There is no locking or coordination should this utility
be used to modify HWP limit fields at the same time that
intel_pstate's sysfs attributes access the same MSRs.
.PP
Note that --hwp-desired and --hwp-window are considered experimental.
Future versions of Linux reserve the right to access these
fields internally -- potentially conflicting with user-space access.
.SH EXAMPLE
.nf
# sudo x86_energy_perf_policy
cpu0: EPB 6
cpu0: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0
cpu0: HWP_CAP: low 1 eff 8 guar 27 high 35
cpu1: EPB 6
cpu1: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0
cpu1: HWP_CAP: low 1 eff 8 guar 27 high 35
cpu2: EPB 6
cpu2: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0
cpu2: HWP_CAP: low 1 eff 8 guar 27 high 35
cpu3: EPB 6
cpu3: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0
cpu3: HWP_CAP: low 1 eff 8 guar 27 high 35
.fi
.SH NOTES .SH NOTES
.B "x86_energy_perf_policy " .B "x86_energy_perf_policy"
runs only as root. runs only as root.
.SH FILES .SH FILES
.ta .ta
.nf .nf
/dev/cpu/*/msr /dev/cpu/*/msr
.fi .fi
.SH "SEE ALSO" .SH "SEE ALSO"
.nf
msr(4) msr(4)
Intel(R) 64 and IA-32 Architectures Software Developer's Manual
.fi
.PP .PP
.SH AUTHORS .SH AUTHORS
.nf .nf
Written by Len Brown <len.brown@intel.com> Len Brown
...@@ -3,322 +3,1424 @@ ...@@ -3,322 +3,1424 @@
* policy preference bias on recent X86 processors. * policy preference bias on recent X86 processors.
*/ */
/* /*
* Copyright (c) 2010, Intel Corporation. * Copyright (c) 2010 - 2017 Intel Corporation.
* Len Brown <len.brown@intel.com> * Len Brown <len.brown@intel.com>
* *
* This program is free software; you can redistribute it and/or modify it * This program is released under GPL v2
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*/ */
#define _GNU_SOURCE
#include MSRHEADER
#include <stdio.h> #include <stdio.h>
#include <unistd.h> #include <unistd.h>
#include <sys/types.h> #include <sys/types.h>
#include <sched.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/resource.h> #include <sys/resource.h>
#include <getopt.h>
#include <err.h>
#include <fcntl.h> #include <fcntl.h>
#include <signal.h> #include <signal.h>
#include <sys/time.h> #include <sys/time.h>
#include <limits.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <cpuid.h>
#include <errno.h>
#define OPTARG_NORMAL (INT_MAX - 1)
#define OPTARG_POWER (INT_MAX - 2)
#define OPTARG_BALANCE_POWER (INT_MAX - 3)
#define OPTARG_BALANCE_PERFORMANCE (INT_MAX - 4)
#define OPTARG_PERFORMANCE (INT_MAX - 5)
struct msr_hwp_cap {
unsigned char highest;
unsigned char guaranteed;
unsigned char efficient;
unsigned char lowest;
};
unsigned int verbose; /* set with -v */ struct msr_hwp_request {
unsigned int read_only; /* set with -r */ unsigned char hwp_min;
unsigned char hwp_max;
unsigned char hwp_desired;
unsigned char hwp_epp;
unsigned int hwp_window;
unsigned char hwp_use_pkg;
} req_update;
unsigned int debug;
unsigned int verbose;
unsigned int force;
char *progname; char *progname;
unsigned long long new_bias; int base_cpu;
int cpu = -1; unsigned char update_epb;
unsigned long long new_epb;
unsigned char turbo_is_enabled;
unsigned char update_turbo;
unsigned char turbo_update_value;
unsigned char update_hwp_epp;
unsigned char update_hwp_min;
unsigned char update_hwp_max;
unsigned char update_hwp_desired;
unsigned char update_hwp_window;
unsigned char update_hwp_use_pkg;
unsigned char update_hwp_enable;
#define hwp_update_enabled() (update_hwp_enable | update_hwp_epp | update_hwp_max | update_hwp_min | update_hwp_desired | update_hwp_window | update_hwp_use_pkg)
int max_cpu_num;
int max_pkg_num;
#define MAX_PACKAGES 64
unsigned int first_cpu_in_pkg[MAX_PACKAGES];
unsigned long long pkg_present_set;
unsigned long long pkg_selected_set;
cpu_set_t *cpu_present_set;
cpu_set_t *cpu_selected_set;
int genuine_intel;
size_t cpu_setsize;
char *proc_stat = "/proc/stat";
unsigned int has_epb; /* MSR_IA32_ENERGY_PERF_BIAS */
unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
/* IA32_HWP_REQUEST, IA32_HWP_STATUS */
unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */
unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */
unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
unsigned int has_hwp_request_pkg; /* IA32_HWP_REQUEST_PKG */
unsigned int bdx_highest_ratio;
/* /*
* Usage: * maintain compatibility with original implementation, but don't document it:
*
* -c cpu: limit action to a single CPU (default is all CPUs)
* -v: verbose output (can invoke more than once)
* -r: read-only, don't change any settings
*
* performance
* Performance is paramount.
* Unwilling to sacrifice any performance
* for the sake of energy saving. (hardware default)
*
* normal
* Can tolerate minor performance compromise
* for potentially significant energy savings.
* (reasonable default for most desktops and servers)
*
* powersave
* Can tolerate significant performance hit
* to maximize energy savings.
*
* n
* a numerical value to write to the underlying MSR.
*/ */
void usage(void) void usage(void)
{ {
printf("%s: [-c cpu] [-v] " fprintf(stderr, "%s [options] [scope][field value]\n", progname);
"(-r | 'performance' | 'normal' | 'powersave' | n)\n", fprintf(stderr, "scope: --cpu cpu-list [--hwp-use-pkg #] | --pkg pkg-list\n");
progname); fprintf(stderr, "field: --all | --epb | --hwp-epp | --hwp-min | --hwp-max | --hwp-desired\n");
fprintf(stderr, "other: --hwp-enable | --turbo-enable (0 | 1) | --help | --force\n");
fprintf(stderr,
"value: ( # | \"normal\" | \"performance\" | \"balance-performance\" | \"balance-power\"| \"power\")\n");
fprintf(stderr, "--hwp-window usec\n");
fprintf(stderr, "Specify only Energy Performance BIAS (legacy usage):\n");
fprintf(stderr, "%s: [-c cpu] [-v] (-r | policy-value )\n", progname);
exit(1); exit(1);
} }
#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 /*
* If bdx_highest_ratio is set,
* then we must translate between MSR format and simple ratio
* used on the cmdline.
*/
int ratio_2_msr_perf(int ratio)
{
int msr_perf;
if (!bdx_highest_ratio)
return ratio;
msr_perf = ratio * 255 / bdx_highest_ratio;
if (debug)
fprintf(stderr, "%d = ratio_to_msr_perf(%d)\n", msr_perf, ratio);
return msr_perf;
}
int msr_perf_2_ratio(int msr_perf)
{
int ratio;
double d;
if (!bdx_highest_ratio)
return msr_perf;
d = (double)msr_perf * (double) bdx_highest_ratio / 255.0;
d = d + 0.5; /* round */
ratio = (int)d;
if (debug)
fprintf(stderr, "%d = msr_perf_ratio(%d) {%f}\n", ratio, msr_perf, d);
return ratio;
}
int parse_cmdline_epb(int i)
{
if (!has_epb)
errx(1, "EPB not enabled on this platform");
update_epb = 1;
switch (i) {
case OPTARG_POWER:
return ENERGY_PERF_BIAS_POWERSAVE;
case OPTARG_BALANCE_POWER:
return ENERGY_PERF_BIAS_BALANCE_POWERSAVE;
case OPTARG_NORMAL:
return ENERGY_PERF_BIAS_NORMAL;
case OPTARG_BALANCE_PERFORMANCE:
return ENERGY_PERF_BIAS_BALANCE_PERFORMANCE;
case OPTARG_PERFORMANCE:
return ENERGY_PERF_BIAS_PERFORMANCE;
}
if (i < 0 || i > ENERGY_PERF_BIAS_POWERSAVE)
errx(1, "--epb must be from 0 to 15");
return i;
}
#define HWP_CAP_LOWEST 0
#define HWP_CAP_HIGHEST 255
/*
* "performance" changes hwp_min to cap.highest
* All others leave it at cap.lowest
*/
int parse_cmdline_hwp_min(int i)
{
update_hwp_min = 1;
switch (i) {
case OPTARG_POWER:
case OPTARG_BALANCE_POWER:
case OPTARG_NORMAL:
case OPTARG_BALANCE_PERFORMANCE:
return HWP_CAP_LOWEST;
case OPTARG_PERFORMANCE:
return HWP_CAP_HIGHEST;
}
return i;
}
/*
* "power" changes hwp_max to cap.lowest
* All others leave it at cap.highest
*/
int parse_cmdline_hwp_max(int i)
{
update_hwp_max = 1;
switch (i) {
case OPTARG_POWER:
return HWP_CAP_LOWEST;
case OPTARG_NORMAL:
case OPTARG_BALANCE_POWER:
case OPTARG_BALANCE_PERFORMANCE:
case OPTARG_PERFORMANCE:
return HWP_CAP_HIGHEST;
}
return i;
}
/*
* for --hwp-des, all strings leave it in autonomous mode
* If you want to change it, you need to explicitly pick a value
*/
int parse_cmdline_hwp_desired(int i)
{
update_hwp_desired = 1;
switch (i) {
case OPTARG_POWER:
case OPTARG_BALANCE_POWER:
case OPTARG_BALANCE_PERFORMANCE:
case OPTARG_NORMAL:
case OPTARG_PERFORMANCE:
return 0; /* autonomous */
}
return i;
}
int parse_cmdline_hwp_window(int i)
{
unsigned int exponent;
update_hwp_window = 1;
switch (i) {
case OPTARG_POWER:
case OPTARG_BALANCE_POWER:
case OPTARG_NORMAL:
case OPTARG_BALANCE_PERFORMANCE:
case OPTARG_PERFORMANCE:
return 0;
}
if (i < 0 || i > 1270000000) {
fprintf(stderr, "--hwp-window: 0 for auto; 1 - 1270000000 usec for window duration\n");
usage();
}
for (exponent = 0; ; ++exponent) {
if (debug)
printf("%d 10^%d\n", i, exponent);
if (i <= 127)
break;
i = i / 10;
}
if (debug)
fprintf(stderr, "%d*10^%d: 0x%x\n", i, exponent, (exponent << 7) | i);
return (exponent << 7) | i;
}
int parse_cmdline_hwp_epp(int i)
{
update_hwp_epp = 1;
switch (i) {
case OPTARG_POWER:
return HWP_EPP_POWERSAVE;
case OPTARG_BALANCE_POWER:
return HWP_EPP_BALANCE_POWERSAVE;
case OPTARG_NORMAL:
case OPTARG_BALANCE_PERFORMANCE:
return HWP_EPP_BALANCE_PERFORMANCE;
case OPTARG_PERFORMANCE:
return HWP_EPP_PERFORMANCE;
}
if (i < 0 || i > 0xff) {
fprintf(stderr, "--hwp-epp must be from 0 to 0xff\n");
usage();
}
return i;
}
int parse_cmdline_turbo(int i)
{
update_turbo = 1;
switch (i) {
case OPTARG_POWER:
return 0;
case OPTARG_NORMAL:
case OPTARG_BALANCE_POWER:
case OPTARG_BALANCE_PERFORMANCE:
case OPTARG_PERFORMANCE:
return 1;
}
if (i < 0 || i > 1) {
fprintf(stderr, "--turbo-enable: 1 to enable, 0 to disable\n");
usage();
}
return i;
}
int parse_optarg_string(char *s)
{
int i;
char *endptr;
if (!strncmp(s, "default", 7))
return OPTARG_NORMAL;
if (!strncmp(s, "normal", 6))
return OPTARG_NORMAL;
if (!strncmp(s, "power", 9))
return OPTARG_POWER;
if (!strncmp(s, "balance-power", 17))
return OPTARG_BALANCE_POWER;
if (!strncmp(s, "balance-performance", 19))
return OPTARG_BALANCE_PERFORMANCE;
if (!strncmp(s, "performance", 11))
return OPTARG_PERFORMANCE;
i = strtol(s, &endptr, 0);
if (s == endptr) {
fprintf(stderr, "no digits in \"%s\"\n", s);
usage();
}
if (i == LONG_MIN || i == LONG_MAX)
errx(-1, "%s", s);
if (i > 0xFF)
errx(-1, "%d (0x%x) must be < 256", i, i);
if (i < 0)
errx(-1, "%d (0x%x) must be >= 0", i, i);
return i;
}
void parse_cmdline_all(char *s)
{
force++;
update_hwp_enable = 1;
req_update.hwp_min = parse_cmdline_hwp_min(parse_optarg_string(s));
req_update.hwp_max = parse_cmdline_hwp_max(parse_optarg_string(s));
req_update.hwp_epp = parse_cmdline_hwp_epp(parse_optarg_string(s));
if (has_epb)
new_epb = parse_cmdline_epb(parse_optarg_string(s));
turbo_update_value = parse_cmdline_turbo(parse_optarg_string(s));
req_update.hwp_desired = parse_cmdline_hwp_desired(parse_optarg_string(s));
req_update.hwp_window = parse_cmdline_hwp_window(parse_optarg_string(s));
}
void validate_cpu_selected_set(void)
{
int cpu;
if (CPU_COUNT_S(cpu_setsize, cpu_selected_set) == 0)
errx(0, "no CPUs requested");
for (cpu = 0; cpu <= max_cpu_num; ++cpu) {
if (CPU_ISSET_S(cpu, cpu_setsize, cpu_selected_set))
if (!CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set))
errx(1, "Requested cpu% is not present", cpu);
}
}
void parse_cmdline_cpu(char *s)
{
char *startp, *endp;
int cpu = 0;
if (pkg_selected_set) {
usage();
errx(1, "--cpu | --pkg");
}
cpu_selected_set = CPU_ALLOC((max_cpu_num + 1));
if (cpu_selected_set == NULL)
err(1, "cpu_selected_set");
CPU_ZERO_S(cpu_setsize, cpu_selected_set);
for (startp = s; startp && *startp;) {
if (*startp == ',') {
startp++;
continue;
}
if (*startp == '-') {
int end_cpu;
#define BIAS_PERFORMANCE 0 startp++;
#define BIAS_BALANCE 6 end_cpu = strtol(startp, &endp, 10);
#define BIAS_POWERSAVE 15 if (startp == endp)
continue;
while (cpu <= end_cpu) {
if (cpu > max_cpu_num)
errx(1, "Requested cpu%d exceeds max cpu%d", cpu, max_cpu_num);
CPU_SET_S(cpu, cpu_setsize, cpu_selected_set);
cpu++;
}
startp = endp;
continue;
}
if (strncmp(startp, "all", 3) == 0) {
for (cpu = 0; cpu <= max_cpu_num; cpu += 1) {
if (CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set))
CPU_SET_S(cpu, cpu_setsize, cpu_selected_set);
}
startp += 3;
if (*startp == 0)
break;
}
/* "--cpu even" is not documented */
if (strncmp(startp, "even", 4) == 0) {
for (cpu = 0; cpu <= max_cpu_num; cpu += 2) {
if (CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set))
CPU_SET_S(cpu, cpu_setsize, cpu_selected_set);
}
startp += 4;
if (*startp == 0)
break;
}
/* "--cpu odd" is not documented */
if (strncmp(startp, "odd", 3) == 0) {
for (cpu = 1; cpu <= max_cpu_num; cpu += 2) {
if (CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set))
CPU_SET_S(cpu, cpu_setsize, cpu_selected_set);
}
startp += 3;
if (*startp == 0)
break;
}
cpu = strtol(startp, &endp, 10);
if (startp == endp)
errx(1, "--cpu cpu-set: confused by '%s'", startp);
if (cpu > max_cpu_num)
errx(1, "Requested cpu%d exceeds max cpu%d", cpu, max_cpu_num);
CPU_SET_S(cpu, cpu_setsize, cpu_selected_set);
startp = endp;
}
validate_cpu_selected_set();
}
void parse_cmdline_pkg(char *s)
{
char *startp, *endp;
int pkg = 0;
if (cpu_selected_set) {
usage();
errx(1, "--pkg | --cpu");
}
pkg_selected_set = 0;
for (startp = s; startp && *startp;) {
if (*startp == ',') {
startp++;
continue;
}
if (*startp == '-') {
int end_pkg;
startp++;
end_pkg = strtol(startp, &endp, 10);
if (startp == endp)
continue;
while (pkg <= end_pkg) {
if (pkg > max_pkg_num)
errx(1, "Requested pkg%d exceeds max pkg%d", pkg, max_pkg_num);
pkg_selected_set |= 1 << pkg;
pkg++;
}
startp = endp;
continue;
}
if (strncmp(startp, "all", 3) == 0) {
pkg_selected_set = pkg_present_set;
return;
}
pkg = strtol(startp, &endp, 10);
if (pkg > max_pkg_num)
errx(1, "Requested pkg%d Exceeds max pkg%d", pkg, max_pkg_num);
pkg_selected_set |= 1 << pkg;
startp = endp;
}
}
void for_packages(unsigned long long pkg_set, int (func)(int))
{
int pkg_num;
for (pkg_num = 0; pkg_num <= max_pkg_num; ++pkg_num) {
if (pkg_set & (1UL << pkg_num))
func(pkg_num);
}
}
void print_version(void)
{
printf("x86_energy_perf_policy 17.05.11 (C) Len Brown <len.brown@intel.com>\n");
}
void cmdline(int argc, char **argv) void cmdline(int argc, char **argv)
{ {
int opt; int opt;
int option_index = 0;
static struct option long_options[] = {
{"all", required_argument, 0, 'a'},
{"cpu", required_argument, 0, 'c'},
{"pkg", required_argument, 0, 'p'},
{"debug", no_argument, 0, 'd'},
{"hwp-desired", required_argument, 0, 'D'},
{"epb", required_argument, 0, 'B'},
{"force", no_argument, 0, 'f'},
{"hwp-enable", no_argument, 0, 'e'},
{"help", no_argument, 0, 'h'},
{"hwp-epp", required_argument, 0, 'P'},
{"hwp-min", required_argument, 0, 'm'},
{"hwp-max", required_argument, 0, 'M'},
{"read", no_argument, 0, 'r'},
{"turbo-enable", required_argument, 0, 't'},
{"hwp-use-pkg", required_argument, 0, 'u'},
{"version", no_argument, 0, 'v'},
{"hwp-window", required_argument, 0, 'w'},
{0, 0, 0, 0 }
};
progname = argv[0]; progname = argv[0];
while ((opt = getopt(argc, argv, "+rvc:")) != -1) { while ((opt = getopt_long_only(argc, argv, "+a:c:dD:E:e:f:m:M:rt:u:vw",
long_options, &option_index)) != -1) {
switch (opt) { switch (opt) {
case 'a':
parse_cmdline_all(optarg);
break;
case 'B':
new_epb = parse_cmdline_epb(parse_optarg_string(optarg));
break;
case 'c': case 'c':
cpu = atoi(optarg); parse_cmdline_cpu(optarg);
break;
case 'e':
update_hwp_enable = 1;
break;
case 'h':
usage();
break;
case 'd':
debug++;
verbose++;
break;
case 'f':
force++;
break;
case 'D':
req_update.hwp_desired = parse_cmdline_hwp_desired(parse_optarg_string(optarg));
break;
case 'm':
req_update.hwp_min = parse_cmdline_hwp_min(parse_optarg_string(optarg));
break;
case 'M':
req_update.hwp_max = parse_cmdline_hwp_max(parse_optarg_string(optarg));
break;
case 'p':
parse_cmdline_pkg(optarg);
break;
case 'P':
req_update.hwp_epp = parse_cmdline_hwp_epp(parse_optarg_string(optarg));
break; break;
case 'r': case 'r':
read_only = 1; /* v1 used -r to specify read-only mode, now the default */
break;
case 't':
turbo_update_value = parse_cmdline_turbo(parse_optarg_string(optarg));
break;
case 'u':
update_hwp_use_pkg++;
if (atoi(optarg) == 0)
req_update.hwp_use_pkg = 0;
else
req_update.hwp_use_pkg = 1;
break; break;
case 'v': case 'v':
verbose++; print_version();
exit(0);
break;
case 'w':
req_update.hwp_window = parse_cmdline_hwp_window(parse_optarg_string(optarg));
break; break;
default: default:
usage(); usage();
} }
} }
/* if -r, then should be no additional optind */
if (read_only && (argc > optind))
usage();
/* /*
* if no -r , then must be one additional optind * v1 allowed "performance"|"normal"|"power" with no policy specifier
* to update BIAS. Continue to support that, even though no longer documented.
*/ */
if (!read_only) { if (argc == optind + 1)
new_epb = parse_cmdline_epb(parse_optarg_string(argv[optind]));
if (argc != optind + 1) { if (argc > optind + 1) {
printf("must supply -r or policy param\n"); fprintf(stderr, "stray parameter '%s'\n", argv[optind + 1]);
usage(); usage();
} }
}
if (!strcmp("performance", argv[optind])) {
new_bias = BIAS_PERFORMANCE; int get_msr(int cpu, int offset, unsigned long long *msr)
} else if (!strcmp("normal", argv[optind])) { {
new_bias = BIAS_BALANCE; int retval;
} else if (!strcmp("powersave", argv[optind])) { char pathname[32];
new_bias = BIAS_POWERSAVE; int fd;
} else {
char *endptr; sprintf(pathname, "/dev/cpu/%d/msr", cpu);
fd = open(pathname, O_RDONLY);
new_bias = strtoull(argv[optind], &endptr, 0); if (fd < 0)
if (endptr == argv[optind] || err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
new_bias > BIAS_POWERSAVE) {
fprintf(stderr, "invalid value: %s\n", retval = pread(fd, msr, sizeof(*msr), offset);
argv[optind]); if (retval != sizeof(*msr))
usage(); err(-1, "%s offset 0x%llx read failed", pathname, (unsigned long long)offset);
}
} if (debug > 1)
fprintf(stderr, "get_msr(cpu%d, 0x%X, 0x%llX)\n", cpu, offset, *msr);
close(fd);
return 0;
}
int put_msr(int cpu, int offset, unsigned long long new_msr)
{
char pathname[32];
int retval;
int fd;
sprintf(pathname, "/dev/cpu/%d/msr", cpu);
fd = open(pathname, O_RDWR);
if (fd < 0)
err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
retval = pwrite(fd, &new_msr, sizeof(new_msr), offset);
if (retval != sizeof(new_msr))
err(-2, "pwrite(cpu%d, offset 0x%x, 0x%llx) = %d", cpu, offset, new_msr, retval);
close(fd);
if (debug > 1)
fprintf(stderr, "put_msr(cpu%d, 0x%X, 0x%llX)\n", cpu, offset, new_msr);
return 0;
}
void print_hwp_cap(int cpu, struct msr_hwp_cap *cap, char *str)
{
if (cpu != -1)
printf("cpu%d: ", cpu);
printf("HWP_CAP: low %d eff %d guar %d high %d\n",
cap->lowest, cap->efficient, cap->guaranteed, cap->highest);
}
void read_hwp_cap(int cpu, struct msr_hwp_cap *cap, unsigned int msr_offset)
{
unsigned long long msr;
get_msr(cpu, msr_offset, &msr);
cap->highest = msr_perf_2_ratio(HWP_HIGHEST_PERF(msr));
cap->guaranteed = msr_perf_2_ratio(HWP_GUARANTEED_PERF(msr));
cap->efficient = msr_perf_2_ratio(HWP_MOSTEFFICIENT_PERF(msr));
cap->lowest = msr_perf_2_ratio(HWP_LOWEST_PERF(msr));
}
void print_hwp_request(int cpu, struct msr_hwp_request *h, char *str)
{
if (cpu != -1)
printf("cpu%d: ", cpu);
if (str)
printf("%s", str);
printf("HWP_REQ: min %d max %d des %d epp %d window 0x%x (%d*10^%dus) use_pkg %d\n",
h->hwp_min, h->hwp_max, h->hwp_desired, h->hwp_epp,
h->hwp_window, h->hwp_window & 0x7F, (h->hwp_window >> 7) & 0x7, h->hwp_use_pkg);
}
void print_hwp_request_pkg(int pkg, struct msr_hwp_request *h, char *str)
{
printf("pkg%d: ", pkg);
if (str)
printf("%s", str);
printf("HWP_REQ_PKG: min %d max %d des %d epp %d window 0x%x (%d*10^%dus)\n",
h->hwp_min, h->hwp_max, h->hwp_desired, h->hwp_epp,
h->hwp_window, h->hwp_window & 0x7F, (h->hwp_window >> 7) & 0x7);
}
void read_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset)
{
unsigned long long msr;
get_msr(cpu, msr_offset, &msr);
hwp_req->hwp_min = msr_perf_2_ratio((((msr) >> 0) & 0xff));
hwp_req->hwp_max = msr_perf_2_ratio((((msr) >> 8) & 0xff));
hwp_req->hwp_desired = msr_perf_2_ratio((((msr) >> 16) & 0xff));
hwp_req->hwp_epp = (((msr) >> 24) & 0xff);
hwp_req->hwp_window = (((msr) >> 32) & 0x3ff);
hwp_req->hwp_use_pkg = (((msr) >> 42) & 0x1);
}
void write_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset)
{
unsigned long long msr = 0;
if (debug > 1)
printf("cpu%d: requesting min %d max %d des %d epp %d window 0x%0x use_pkg %d\n",
cpu, hwp_req->hwp_min, hwp_req->hwp_max,
hwp_req->hwp_desired, hwp_req->hwp_epp,
hwp_req->hwp_window, hwp_req->hwp_use_pkg);
msr |= HWP_MIN_PERF(ratio_2_msr_perf(hwp_req->hwp_min));
msr |= HWP_MAX_PERF(ratio_2_msr_perf(hwp_req->hwp_max));
msr |= HWP_DESIRED_PERF(ratio_2_msr_perf(hwp_req->hwp_desired));
msr |= HWP_ENERGY_PERF_PREFERENCE(hwp_req->hwp_epp);
msr |= HWP_ACTIVITY_WINDOW(hwp_req->hwp_window);
msr |= HWP_PACKAGE_CONTROL(hwp_req->hwp_use_pkg);
put_msr(cpu, msr_offset, msr);
}
int print_cpu_msrs(int cpu)
{
unsigned long long msr;
struct msr_hwp_request req;
struct msr_hwp_cap cap;
if (has_epb) {
get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
printf("cpu%d: EPB %u\n", cpu, (unsigned int) msr);
} }
if (!has_hwp)
return 0;
read_hwp_request(cpu, &req, MSR_HWP_REQUEST);
print_hwp_request(cpu, &req, "");
read_hwp_cap(cpu, &cap, MSR_HWP_CAPABILITIES);
print_hwp_cap(cpu, &cap, "");
return 0;
}
int print_pkg_msrs(int pkg)
{
struct msr_hwp_request req;
unsigned long long msr;
if (!has_hwp)
return 0;
read_hwp_request(first_cpu_in_pkg[pkg], &req, MSR_HWP_REQUEST_PKG);
print_hwp_request_pkg(pkg, &req, "");
if (has_hwp_notify) {
get_msr(first_cpu_in_pkg[pkg], MSR_HWP_INTERRUPT, &msr);
fprintf(stderr,
"pkg%d: MSR_HWP_INTERRUPT: 0x%08llx (Excursion_Min-%sabled, Guaranteed_Perf_Change-%sabled)\n",
pkg, msr,
((msr) & 0x2) ? "EN" : "Dis",
((msr) & 0x1) ? "EN" : "Dis");
}
get_msr(first_cpu_in_pkg[pkg], MSR_HWP_STATUS, &msr);
fprintf(stderr,
"pkg%d: MSR_HWP_STATUS: 0x%08llx (%sExcursion_Min, %sGuaranteed_Perf_Change)\n",
pkg, msr,
((msr) & 0x4) ? "" : "No-",
((msr) & 0x1) ? "" : "No-");
return 0;
} }
/* /*
* validate_cpuid() * Assumption: All HWP systems have 100 MHz bus clock
* returns on success, quietly exits on failure (make verbose with -v)
*/ */
void validate_cpuid(void) int ratio_2_sysfs_khz(int ratio)
{ {
unsigned int eax, ebx, ecx, edx, max_level; int bclk_khz = 100 * 1000; /* 100,000 KHz = 100 MHz */
unsigned int fms, family, model, stepping;
eax = ebx = ecx = edx = 0; return ratio * bclk_khz;
}
/*
* If HWP is enabled and cpufreq sysfs attribtes are present,
* then update sysfs, so that it will not become
* stale when we write to MSRs.
* (intel_pstate's max_perf_pct and min_perf_pct will follow cpufreq,
* so we don't have to touch that.)
*/
void update_cpufreq_scaling_freq(int is_max, int cpu, unsigned int ratio)
{
char pathname[64];
FILE *fp;
int retval;
int khz;
asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), sprintf(pathname, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_%s_freq",
"=d" (edx) : "a" (0)); cpu, is_max ? "max" : "min");
if (ebx != 0x756e6547 || edx != 0x49656e69 || ecx != 0x6c65746e) { fp = fopen(pathname, "w");
if (verbose) if (!fp) {
fprintf(stderr, "%.4s%.4s%.4s != GenuineIntel", if (debug)
(char *)&ebx, (char *)&edx, (char *)&ecx); perror(pathname);
exit(1); return;
} }
asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx"); khz = ratio_2_sysfs_khz(ratio);
family = (fms >> 8) & 0xf; retval = fprintf(fp, "%d", khz);
model = (fms >> 4) & 0xf; if (retval < 0)
stepping = fms & 0xf; if (debug)
if (family == 6 || family == 0xf) perror("fprintf");
model += ((fms >> 16) & 0xf) << 4; if (debug)
printf("echo %d > %s\n", khz, pathname);
if (verbose > 1) fclose(fp);
printf("CPUID %d levels family:model:stepping " }
"0x%x:%x:%x (%d:%d:%d)\n", max_level,
family, model, stepping, family, model, stepping);
if (!(edx & (1 << 5))) { /*
if (verbose) * We update all sysfs before updating any MSRs because of
printf("CPUID: no MSR\n"); * bugs in cpufreq/intel_pstate where the sysfs writes
exit(1); * for a CPU may change the min/max values on other CPUS.
*/
int update_sysfs(int cpu)
{
if (!has_hwp)
return 0;
if (!hwp_update_enabled())
return 0;
if (access("/sys/devices/system/cpu/cpu0/cpufreq", F_OK))
return 0;
if (update_hwp_min)
update_cpufreq_scaling_freq(0, cpu, req_update.hwp_min);
if (update_hwp_max)
update_cpufreq_scaling_freq(1, cpu, req_update.hwp_max);
return 0;
}
int verify_hwp_req_self_consistency(int cpu, struct msr_hwp_request *req)
{
/* fail if min > max requested */
if (req->hwp_min > req->hwp_max) {
errx(1, "cpu%d: requested hwp-min %d > hwp_max %d",
cpu, req->hwp_min, req->hwp_max);
} }
/* /* fail if desired > max requestd */
* Support for MSR_IA32_ENERGY_PERF_BIAS if (req->hwp_desired && (req->hwp_desired > req->hwp_max)) {
* is indicated by CPUID.06H.ECX.bit3 errx(1, "cpu%d: requested hwp-desired %d > hwp_max %d",
*/ cpu, req->hwp_desired, req->hwp_max);
asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (6));
if (verbose)
printf("CPUID.06H.ECX: 0x%x\n", ecx);
if (!(ecx & (1 << 3))) {
if (verbose)
printf("CPUID: No MSR_IA32_ENERGY_PERF_BIAS\n");
exit(1);
} }
return; /* success */ /* fail if desired < min requestd */
if (req->hwp_desired && (req->hwp_desired < req->hwp_min)) {
errx(1, "cpu%d: requested hwp-desired %d < requested hwp_min %d",
cpu, req->hwp_desired, req->hwp_min);
}
return 0;
} }
unsigned long long get_msr(int cpu, int offset) int check_hwp_request_v_hwp_capabilities(int cpu, struct msr_hwp_request *req, struct msr_hwp_cap *cap)
{ {
unsigned long long msr; if (update_hwp_max) {
char msr_path[32]; if (req->hwp_max > cap->highest)
int retval; errx(1, "cpu%d: requested max %d > capabilities highest %d, use --force?",
int fd; cpu, req->hwp_max, cap->highest);
if (req->hwp_max < cap->lowest)
errx(1, "cpu%d: requested max %d < capabilities lowest %d, use --force?",
cpu, req->hwp_max, cap->lowest);
}
sprintf(msr_path, "/dev/cpu/%d/msr", cpu); if (update_hwp_min) {
fd = open(msr_path, O_RDONLY); if (req->hwp_min > cap->highest)
if (fd < 0) { errx(1, "cpu%d: requested min %d > capabilities highest %d, use --force?",
printf("Try \"# modprobe msr\"\n"); cpu, req->hwp_min, cap->highest);
perror(msr_path); if (req->hwp_min < cap->lowest)
exit(1); errx(1, "cpu%d: requested min %d < capabilities lowest %d, use --force?",
cpu, req->hwp_min, cap->lowest);
} }
retval = pread(fd, &msr, sizeof msr, offset); if (update_hwp_min && update_hwp_max && (req->hwp_min > req->hwp_max))
errx(1, "cpu%d: requested min %d > requested max %d",
cpu, req->hwp_min, req->hwp_max);
if (retval != sizeof msr) { if (update_hwp_desired && req->hwp_desired) {
printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval); if (req->hwp_desired > req->hwp_max)
exit(-2); errx(1, "cpu%d: requested desired %d > requested max %d, use --force?",
cpu, req->hwp_desired, req->hwp_max);
if (req->hwp_desired < req->hwp_min)
errx(1, "cpu%d: requested desired %d < requested min %d, use --force?",
cpu, req->hwp_desired, req->hwp_min);
if (req->hwp_desired < cap->lowest)
errx(1, "cpu%d: requested desired %d < capabilities lowest %d, use --force?",
cpu, req->hwp_desired, cap->lowest);
if (req->hwp_desired > cap->highest)
errx(1, "cpu%d: requested desired %d > capabilities highest %d, use --force?",
cpu, req->hwp_desired, cap->highest);
} }
close(fd);
return msr; return 0;
} }
unsigned long long put_msr(int cpu, unsigned long long new_msr, int offset) int update_hwp_request(int cpu)
{ {
unsigned long long old_msr; struct msr_hwp_request req;
char msr_path[32]; struct msr_hwp_cap cap;
int retval;
int fd; int msr_offset = MSR_HWP_REQUEST;
read_hwp_request(cpu, &req, msr_offset);
if (debug)
print_hwp_request(cpu, &req, "old: ");
if (update_hwp_min)
req.hwp_min = req_update.hwp_min;
if (update_hwp_max)
req.hwp_max = req_update.hwp_max;
if (update_hwp_desired)
req.hwp_desired = req_update.hwp_desired;
if (update_hwp_window)
req.hwp_window = req_update.hwp_window;
if (update_hwp_epp)
req.hwp_epp = req_update.hwp_epp;
req.hwp_use_pkg = req_update.hwp_use_pkg;
read_hwp_cap(cpu, &cap, MSR_HWP_CAPABILITIES);
if (debug)
print_hwp_cap(cpu, &cap, "");
if (!force)
check_hwp_request_v_hwp_capabilities(cpu, &req, &cap);
verify_hwp_req_self_consistency(cpu, &req);
sprintf(msr_path, "/dev/cpu/%d/msr", cpu); write_hwp_request(cpu, &req, msr_offset);
fd = open(msr_path, O_RDWR);
if (fd < 0) { if (debug) {
perror(msr_path); read_hwp_request(cpu, &req, msr_offset);
exit(1); print_hwp_request(cpu, &req, "new: ");
} }
return 0;
}
int update_hwp_request_pkg(int pkg)
{
struct msr_hwp_request req;
struct msr_hwp_cap cap;
int cpu = first_cpu_in_pkg[pkg];
int msr_offset = MSR_HWP_REQUEST_PKG;
read_hwp_request(cpu, &req, msr_offset);
if (debug)
print_hwp_request_pkg(pkg, &req, "old: ");
if (update_hwp_min)
req.hwp_min = req_update.hwp_min;
if (update_hwp_max)
req.hwp_max = req_update.hwp_max;
if (update_hwp_desired)
req.hwp_desired = req_update.hwp_desired;
if (update_hwp_window)
req.hwp_window = req_update.hwp_window;
if (update_hwp_epp)
req.hwp_epp = req_update.hwp_epp;
read_hwp_cap(cpu, &cap, MSR_HWP_CAPABILITIES);
if (debug)
print_hwp_cap(cpu, &cap, "");
if (!force)
check_hwp_request_v_hwp_capabilities(cpu, &req, &cap);
verify_hwp_req_self_consistency(cpu, &req);
write_hwp_request(cpu, &req, msr_offset);
retval = pread(fd, &old_msr, sizeof old_msr, offset); if (debug) {
if (retval != sizeof old_msr) { read_hwp_request(cpu, &req, msr_offset);
perror("pwrite"); print_hwp_request_pkg(pkg, &req, "new: ");
printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval);
exit(-2);
} }
return 0;
}
int enable_hwp_on_cpu(int cpu)
{
unsigned long long msr;
get_msr(cpu, MSR_PM_ENABLE, &msr);
put_msr(cpu, MSR_PM_ENABLE, 1);
if (verbose)
printf("cpu%d: MSR_PM_ENABLE old: %d new: %d\n", cpu, (unsigned int) msr, 1);
return 0;
}
int update_cpu_msrs(int cpu)
{
unsigned long long msr;
retval = pwrite(fd, &new_msr, sizeof new_msr, offset); if (update_epb) {
if (retval != sizeof new_msr) { get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
perror("pwrite"); put_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, new_epb);
printf("pwrite cpu%d 0x%x = %d\n", cpu, offset, retval);
exit(-2); if (verbose)
printf("cpu%d: ENERGY_PERF_BIAS old: %d new: %d\n",
cpu, (unsigned int) msr, (unsigned int) new_epb);
} }
close(fd); if (update_turbo) {
int turbo_is_present_and_disabled;
get_msr(cpu, MSR_IA32_MISC_ENABLE, &msr);
turbo_is_present_and_disabled = ((msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE) != 0);
if (turbo_update_value == 1) {
if (turbo_is_present_and_disabled) {
msr &= ~MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
put_msr(cpu, MSR_IA32_MISC_ENABLE, msr);
if (verbose)
printf("cpu%d: turbo ENABLE\n", cpu);
}
} else {
/*
* if "turbo_is_enabled" were known to be describe this cpu
* then we could use it here to skip redundant disable requests.
* but cpu may be in a different package, so we always write.
*/
msr |= MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
put_msr(cpu, MSR_IA32_MISC_ENABLE, msr);
if (verbose)
printf("cpu%d: turbo DISABLE\n", cpu);
}
}
if (!has_hwp)
return 0;
if (!hwp_update_enabled())
return 0;
update_hwp_request(cpu);
return 0;
}
/*
* Open a file, and exit on failure
*/
FILE *fopen_or_die(const char *path, const char *mode)
{
FILE *filep = fopen(path, "r");
return old_msr; if (!filep)
err(1, "%s: open failed", path);
return filep;
} }
void print_msr(int cpu) unsigned int get_pkg_num(int cpu)
{ {
printf("cpu%d: 0x%016llx\n", FILE *fp;
cpu, get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS)); char pathname[128];
unsigned int pkg;
int retval;
sprintf(pathname, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
fp = fopen_or_die(pathname, "r");
retval = fscanf(fp, "%d\n", &pkg);
if (retval != 1)
errx(1, "%s: failed to parse", pathname);
return pkg;
} }
void update_msr(int cpu) int set_max_cpu_pkg_num(int cpu)
{ {
unsigned long long previous_msr; unsigned int pkg;
previous_msr = put_msr(cpu, new_bias, MSR_IA32_ENERGY_PERF_BIAS); if (max_cpu_num < cpu)
max_cpu_num = cpu;
if (verbose) pkg = get_pkg_num(cpu);
printf("cpu%d msr0x%x 0x%016llx -> 0x%016llx\n",
cpu, MSR_IA32_ENERGY_PERF_BIAS, previous_msr, new_bias); if (pkg >= MAX_PACKAGES)
errx(1, "cpu%d: %d >= MAX_PACKAGES (%d)", cpu, pkg, MAX_PACKAGES);
if (pkg > max_pkg_num)
max_pkg_num = pkg;
return; if ((pkg_present_set & (1ULL << pkg)) == 0) {
pkg_present_set |= (1ULL << pkg);
first_cpu_in_pkg[pkg] = cpu;
}
return 0;
}
int mark_cpu_present(int cpu)
{
CPU_SET_S(cpu, cpu_setsize, cpu_present_set);
return 0;
} }
char *proc_stat = "/proc/stat";
/* /*
* run func() on every cpu in /dev/cpu * run func(cpu) on every cpu in /proc/stat
* return max_cpu number
*/ */
void for_every_cpu(void (func)(int)) int for_all_proc_cpus(int (func)(int))
{ {
FILE *fp; FILE *fp;
int cpu_num;
int retval; int retval;
fp = fopen(proc_stat, "r"); fp = fopen_or_die(proc_stat, "r");
if (fp == NULL) {
perror(proc_stat);
exit(1);
}
retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n"); retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
if (retval != 0) { if (retval != 0)
perror("/proc/stat format"); err(1, "%s: failed to parse format", proc_stat);
exit(1);
}
while (1) { while (1) {
int cpu; retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
retval = fscanf(fp,
"cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n",
&cpu);
if (retval != 1) if (retval != 1)
break; break;
func(cpu); retval = func(cpu_num);
if (retval) {
fclose(fp);
return retval;
}
} }
fclose(fp); fclose(fp);
return 0;
}
void for_all_cpus_in_set(size_t set_size, cpu_set_t *cpu_set, int (func)(int))
{
int cpu_num;
for (cpu_num = 0; cpu_num <= max_cpu_num; ++cpu_num)
if (CPU_ISSET_S(cpu_num, set_size, cpu_set))
func(cpu_num);
}
void init_data_structures(void)
{
for_all_proc_cpus(set_max_cpu_pkg_num);
cpu_setsize = CPU_ALLOC_SIZE((max_cpu_num + 1));
cpu_present_set = CPU_ALLOC((max_cpu_num + 1));
if (cpu_present_set == NULL)
err(3, "CPU_ALLOC");
CPU_ZERO_S(cpu_setsize, cpu_present_set);
for_all_proc_cpus(mark_cpu_present);
}
/* clear has_hwp if it is not enable (or being enabled) */
void verify_hwp_is_enabled(void)
{
unsigned long long msr;
if (!has_hwp) /* set in early_cpuid() */
return;
/* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
get_msr(base_cpu, MSR_PM_ENABLE, &msr);
if ((msr & 1) == 0) {
fprintf(stderr, "HWP can be enabled using '--hwp-enable'\n");
has_hwp = 0;
return;
}
}
int req_update_bounds_check(void)
{
if (!hwp_update_enabled())
return 0;
/* fail if min > max requested */
if ((update_hwp_max && update_hwp_min) &&
(req_update.hwp_min > req_update.hwp_max)) {
printf("hwp-min %d > hwp_max %d\n", req_update.hwp_min, req_update.hwp_max);
return -EINVAL;
}
/* fail if desired > max requestd */
if (req_update.hwp_desired && update_hwp_max &&
(req_update.hwp_desired > req_update.hwp_max)) {
printf("hwp-desired cannot be greater than hwp_max\n");
return -EINVAL;
}
/* fail if desired < min requestd */
if (req_update.hwp_desired && update_hwp_min &&
(req_update.hwp_desired < req_update.hwp_min)) {
printf("hwp-desired cannot be less than hwp_min\n");
return -EINVAL;
}
return 0;
}
void set_base_cpu(void)
{
base_cpu = sched_getcpu();
if (base_cpu < 0)
err(-ENODEV, "No valid cpus found");
}
void probe_dev_msr(void)
{
struct stat sb;
char pathname[32];
sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
if (stat(pathname, &sb))
if (system("/sbin/modprobe msr > /dev/null 2>&1"))
err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
}
/*
* early_cpuid()
* initialize turbo_is_enabled, has_hwp, has_epb
* before cmdline is parsed
*/
void early_cpuid(void)
{
unsigned int eax, ebx, ecx, edx, max_level;
unsigned int fms, family, model;
__get_cpuid(0, &max_level, &ebx, &ecx, &edx);
if (max_level < 6)
errx(1, "Processor not supported\n");
__get_cpuid(1, &fms, &ebx, &ecx, &edx);
family = (fms >> 8) & 0xf;
model = (fms >> 4) & 0xf;
if (family == 6 || family == 0xf)
model += ((fms >> 16) & 0xf) << 4;
if (model == 0x4F) {
unsigned long long msr;
get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
bdx_highest_ratio = msr & 0xFF;
}
__get_cpuid(0x6, &eax, &ebx, &ecx, &edx);
turbo_is_enabled = (eax >> 1) & 1;
has_hwp = (eax >> 7) & 1;
has_epb = (ecx >> 3) & 1;
}
/*
* parse_cpuid()
* set
* has_hwp, has_hwp_notify, has_hwp_activity_window, has_hwp_epp, has_hwp_request_pkg, has_epb
*/
void parse_cpuid(void)
{
unsigned int eax, ebx, ecx, edx, max_level;
unsigned int fms, family, model, stepping;
eax = ebx = ecx = edx = 0;
__get_cpuid(0, &max_level, &ebx, &ecx, &edx);
if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
genuine_intel = 1;
if (debug)
fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ",
(char *)&ebx, (char *)&edx, (char *)&ecx);
__get_cpuid(1, &fms, &ebx, &ecx, &edx);
family = (fms >> 8) & 0xf;
model = (fms >> 4) & 0xf;
stepping = fms & 0xf;
if (family == 6 || family == 0xf)
model += ((fms >> 16) & 0xf) << 4;
if (debug) {
fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
max_level, family, model, stepping, family, model, stepping);
fprintf(stderr, "CPUID(1): %s %s %s %s %s %s %s %s\n",
ecx & (1 << 0) ? "SSE3" : "-",
ecx & (1 << 3) ? "MONITOR" : "-",
ecx & (1 << 7) ? "EIST" : "-",
ecx & (1 << 8) ? "TM2" : "-",
edx & (1 << 4) ? "TSC" : "-",
edx & (1 << 5) ? "MSR" : "-",
edx & (1 << 22) ? "ACPI-TM" : "-",
edx & (1 << 29) ? "TM" : "-");
}
if (!(edx & (1 << 5)))
errx(1, "CPUID: no MSR");
__get_cpuid(0x6, &eax, &ebx, &ecx, &edx);
/* turbo_is_enabled already set */
/* has_hwp already set */
has_hwp_notify = eax & (1 << 8);
has_hwp_activity_window = eax & (1 << 9);
has_hwp_epp = eax & (1 << 10);
has_hwp_request_pkg = eax & (1 << 11);
if (!has_hwp_request_pkg && update_hwp_use_pkg)
errx(1, "--hwp-use-pkg is not available on this hardware");
/* has_epb already set */
if (debug)
fprintf(stderr,
"CPUID(6): %sTURBO, %sHWP, %sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
turbo_is_enabled ? "" : "No-",
has_hwp ? "" : "No-",
has_hwp_notify ? "" : "No-",
has_hwp_activity_window ? "" : "No-",
has_hwp_epp ? "" : "No-",
has_hwp_request_pkg ? "" : "No-",
has_epb ? "" : "No-");
return; /* success */
} }
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
set_base_cpu();
probe_dev_msr();
init_data_structures();
early_cpuid(); /* initial cpuid parse before cmdline */
cmdline(argc, argv); cmdline(argc, argv);
if (verbose > 1) if (debug)
printf("x86_energy_perf_policy Nov 24, 2010" print_version();
" - Len Brown <lenb@kernel.org>\n");
if (verbose > 1 && !read_only) parse_cpuid();
printf("new_bias %lld\n", new_bias);
/* If CPU-set and PKG-set are not initialized, default to all CPUs */
validate_cpuid(); if ((cpu_selected_set == 0) && (pkg_selected_set == 0))
cpu_selected_set = cpu_present_set;
if (cpu != -1) {
if (read_only) /*
print_msr(cpu); * If HWP is being enabled, do it now, so that subsequent operations
else * that access HWP registers can work.
update_msr(cpu); */
} else { if (update_hwp_enable)
if (read_only) for_all_cpus_in_set(cpu_setsize, cpu_selected_set, enable_hwp_on_cpu);
for_every_cpu(print_msr);
else /* If HWP present, but disabled, warn and ignore from here forward */
for_every_cpu(update_msr); verify_hwp_is_enabled();
if (req_update_bounds_check())
return -EINVAL;
/* display information only, no updates to settings */
if (!update_epb && !update_turbo && !hwp_update_enabled()) {
if (cpu_selected_set)
for_all_cpus_in_set(cpu_setsize, cpu_selected_set, print_cpu_msrs);
if (has_hwp_request_pkg) {
if (pkg_selected_set == 0)
pkg_selected_set = pkg_present_set;
for_packages(pkg_selected_set, print_pkg_msrs);
}
return 0;
} }
/* update CPU set */
if (cpu_selected_set) {
for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_sysfs);
for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_cpu_msrs);
} else if (pkg_selected_set)
for_packages(pkg_selected_set, update_hwp_request_pkg);
return 0; return 0;
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册