提交 5358e6e7 编写于 作者: L Linus Torvalds

Merge branch 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux

Pull turbostat updates from Len Brown:
 "User-space turbostat (and x86_energy_perf_policy) patches.

  They are primarily bug fixes from users"

* 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux:
  tools/power turbostat: update version number
  tools/power turbostat: Add support for Hygon Fam 18h (Dhyana) RAPL
  tools/power turbostat: Fix caller parameter of get_tdp_amd()
  tools/power turbostat: Fix CPU%C1 display value
  tools/power turbostat: do not enforce 1ms
  tools/power turbostat: read from pipes too
  tools/power turbostat: Add Ice Lake NNPI support
  tools/power turbostat: rename has_hsw_msrs()
  tools/power turbostat: Fix Haswell Core systems
  tools/power turbostat: add Jacobsville support
  tools/power turbostat: fix buffer overrun
  tools/power turbostat: fix file descriptor leaks
  tools/power turbostat: fix leak of file descriptor on error return path
  tools/power turbostat: Make interval calculation per thread to reduce jitter
  tools/power turbostat: remove duplicate pc10 column
  tools/power x86_energy_perf_policy: Fix argument parsing
  tools/power: Fix typo in man page
  tools/power/x86: Enable compiler optimisations and Fortify by default
  tools/power x86_energy_perf_policy: Fix "uninitialized variable" warnings at -O2
......@@ -9,9 +9,10 @@ ifeq ("$(origin O)", "command line")
endif
turbostat : turbostat.c
override CFLAGS += -Wall -I../../../include
override CFLAGS += -O2 -Wall -I../../../include
override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
override CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"'
override CFLAGS += -D_FORTIFY_SOURCE=2
%: %.c
@mkdir -p $(BUILD_OUTPUT)
......
......@@ -39,7 +39,6 @@ FILE *outf;
int *fd_percpu;
struct timeval interval_tv = {5, 0};
struct timespec interval_ts = {5, 0};
struct timespec one_msec = {0, 1000000};
unsigned int num_iterations;
unsigned int debug;
unsigned int quiet;
......@@ -60,6 +59,7 @@ unsigned int do_irtl_hsw;
unsigned int units = 1000000; /* MHz etc */
unsigned int genuine_intel;
unsigned int authentic_amd;
unsigned int hygon_genuine;
unsigned int max_level, max_extended_level;
unsigned int has_invariant_tsc;
unsigned int do_nhm_platform_info;
......@@ -100,6 +100,7 @@ unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
unsigned int has_misc_feature_control;
unsigned int first_counter_read = 1;
int ignore_stdin;
#define RAPL_PKG (1 << 0)
/* 0x610 MSR_PKG_POWER_LIMIT */
......@@ -166,6 +167,7 @@ size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
struct thread_data {
struct timeval tv_begin;
struct timeval tv_end;
struct timeval tv_delta;
unsigned long long tsc;
unsigned long long aperf;
unsigned long long mperf;
......@@ -506,6 +508,7 @@ unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAU
unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
#define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME)
#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
......@@ -849,7 +852,6 @@ int dump_counters(struct thread_data *t, struct core_data *c,
outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg);
......@@ -911,7 +913,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
if (DO_BIC(BIC_TOD))
outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);
interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec/1000000.0;
tsc = t->tsc * tsc_tweak;
......@@ -1287,6 +1289,14 @@ delta_core(struct core_data *new, struct core_data *old)
}
}
int soft_c1_residency_display(int bic)
{
if (!DO_BIC(BIC_CPU_c1) || use_c1_residency_msr)
return 0;
return DO_BIC_READ(bic);
}
/*
* old = new - old
*/
......@@ -1309,6 +1319,7 @@ delta_thread(struct thread_data *new, struct thread_data *old,
* over-write old w/ new so we can print end of interval values
*/
timersub(&new->tv_begin, &old->tv_begin, &old->tv_delta);
old->tv_begin = new->tv_begin;
old->tv_end = new->tv_end;
......@@ -1322,7 +1333,8 @@ delta_thread(struct thread_data *new, struct thread_data *old,
old->c1 = new->c1 - old->c1;
if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) ||
soft_c1_residency_display(BIC_Avg_MHz)) {
if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
old->aperf = new->aperf - old->aperf;
old->mperf = new->mperf - old->mperf;
......@@ -1404,6 +1416,8 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
t->tv_begin.tv_usec = 0;
t->tv_end.tv_sec = 0;
t->tv_end.tv_usec = 0;
t->tv_delta.tv_sec = 0;
t->tv_delta.tv_usec = 0;
t->tsc = 0;
t->aperf = 0;
......@@ -1573,6 +1587,9 @@ void compute_average(struct thread_data *t, struct core_data *c,
for_all_cpus(sum_counters, t, c, p);
/* Use the global time delta for the average. */
average.threads.tv_delta = tv_delta;
average.threads.tsc /= topo.num_cpus;
average.threads.aperf /= topo.num_cpus;
average.threads.mperf /= topo.num_cpus;
......@@ -1714,7 +1731,7 @@ void get_apic_id(struct thread_data *t)
if (!DO_BIC(BIC_X2APIC))
return;
if (authentic_amd) {
if (authentic_amd || hygon_genuine) {
unsigned int topology_extensions;
if (max_extended_level < 0x8000001e)
......@@ -1762,19 +1779,20 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
struct msr_counter *mp;
int i;
gettimeofday(&t->tv_begin, (struct timezone *)NULL);
if (cpu_migrate(cpu)) {
fprintf(outf, "Could not migrate to CPU %d\n", cpu);
return -1;
}
gettimeofday(&t->tv_begin, (struct timezone *)NULL);
if (first_counter_read)
get_apic_id(t);
retry:
t->tsc = rdtsc(); /* we are running on local CPU of interest */
if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) ||
soft_c1_residency_display(BIC_Avg_MHz)) {
unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
/*
......@@ -1851,20 +1869,20 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
goto done;
if (DO_BIC(BIC_CPU_c3)) {
if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) {
if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
return -6;
}
if (DO_BIC(BIC_CPU_c6) && !do_knl_cstates) {
if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !do_knl_cstates) {
if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
return -7;
} else if (do_knl_cstates) {
} else if (do_knl_cstates || soft_c1_residency_display(BIC_CPU_c6)) {
if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
return -7;
}
if (DO_BIC(BIC_CPU_c7))
if (DO_BIC(BIC_CPU_c7) || soft_c1_residency_display(BIC_CPU_c7))
if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
return -8;
......@@ -2912,6 +2930,7 @@ int snapshot_cpu_lpi_us(void)
if (retval != 1) {
fprintf(stderr, "Disabling Low Power Idle CPU output\n");
BIC_NOT_PRESENT(BIC_CPU_LPI);
fclose(fp);
return -1;
}
......@@ -2938,6 +2957,7 @@ int snapshot_sys_lpi_us(void)
if (retval != 1) {
fprintf(stderr, "Disabling Low Power Idle System output\n");
BIC_NOT_PRESENT(BIC_SYS_LPI);
fclose(fp);
return -1;
}
fclose(fp);
......@@ -2985,8 +3005,6 @@ static void signal_handler (int signal)
fprintf(stderr, "SIGUSR1\n");
break;
}
/* make sure this manually-invoked interval is at least 1ms long */
nanosleep(&one_msec, NULL);
}
void setup_signal_handler(void)
......@@ -3005,29 +3023,38 @@ void setup_signal_handler(void)
void do_sleep(void)
{
struct timeval select_timeout;
struct timeval tout;
struct timespec rest;
fd_set readfds;
int retval;
FD_ZERO(&readfds);
FD_SET(0, &readfds);
if (!isatty(fileno(stdin))) {
if (ignore_stdin) {
nanosleep(&interval_ts, NULL);
return;
}
select_timeout = interval_tv;
retval = select(1, &readfds, NULL, NULL, &select_timeout);
tout = interval_tv;
retval = select(1, &readfds, NULL, NULL, &tout);
if (retval == 1) {
switch (getc(stdin)) {
case 'q':
exit_requested = 1;
break;
case EOF:
/*
* 'stdin' is a pipe closed on the other end. There
* won't be any further input.
*/
ignore_stdin = 1;
/* Sleep the rest of the time */
rest.tv_sec = (tout.tv_sec + tout.tv_usec / 1000000);
rest.tv_nsec = (tout.tv_usec % 1000000) * 1000;
nanosleep(&rest, NULL);
}
/* make sure this manually-invoked interval is at least 1ms long */
nanosleep(&one_msec, NULL);
}
}
......@@ -3209,6 +3236,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
break;
case INTEL_FAM6_HASWELL_CORE: /* HSW */
case INTEL_FAM6_HASWELL_X: /* HSX */
case INTEL_FAM6_HASWELL_ULT: /* HSW */
case INTEL_FAM6_HASWELL_GT3E: /* HSW */
case INTEL_FAM6_BROADWELL_CORE: /* BDW */
case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
......@@ -3405,6 +3433,7 @@ int has_config_tdp(unsigned int family, unsigned int model)
case INTEL_FAM6_IVYBRIDGE: /* IVB */
case INTEL_FAM6_HASWELL_CORE: /* HSW */
case INTEL_FAM6_HASWELL_X: /* HSX */
case INTEL_FAM6_HASWELL_ULT: /* HSW */
case INTEL_FAM6_HASWELL_GT3E: /* HSW */
case INTEL_FAM6_BROADWELL_CORE: /* BDW */
case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
......@@ -3803,6 +3832,7 @@ double get_tdp_amd(unsigned int family)
{
switch (family) {
case 0x17:
case 0x18:
default:
/* This is the max stock TDP of HEDT/Server Fam17h chips */
return 250.0;
......@@ -3841,6 +3871,7 @@ void rapl_probe_intel(unsigned int family, unsigned int model)
case INTEL_FAM6_SANDYBRIDGE:
case INTEL_FAM6_IVYBRIDGE:
case INTEL_FAM6_HASWELL_CORE: /* HSW */
case INTEL_FAM6_HASWELL_ULT: /* HSW */
case INTEL_FAM6_HASWELL_GT3E: /* HSW */
case INTEL_FAM6_BROADWELL_CORE: /* BDW */
case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
......@@ -3982,6 +4013,7 @@ void rapl_probe_amd(unsigned int family, unsigned int model)
switch (family) {
case 0x17: /* Zen, Zen+ */
case 0x18: /* Hygon Dhyana */
do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY;
if (rapl_joules) {
BIC_PRESENT(BIC_Pkg_J);
......@@ -4002,7 +4034,7 @@ void rapl_probe_amd(unsigned int family, unsigned int model)
rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f));
rapl_power_units = ldexp(1.0, -(msr & 0xf));
tdp = get_tdp_amd(model);
tdp = get_tdp_amd(family);
rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
if (!quiet)
......@@ -4018,7 +4050,7 @@ void rapl_probe(unsigned int family, unsigned int model)
{
if (genuine_intel)
rapl_probe_intel(family, model);
if (authentic_amd)
if (authentic_amd || hygon_genuine)
rapl_probe_amd(family, model);
}
......@@ -4032,6 +4064,7 @@ void perf_limit_reasons_probe(unsigned int family, unsigned int model)
switch (model) {
case INTEL_FAM6_HASWELL_CORE: /* HSW */
case INTEL_FAM6_HASWELL_ULT: /* HSW */
case INTEL_FAM6_HASWELL_GT3E: /* HSW */
do_gfx_perf_limit_reasons = 1;
case INTEL_FAM6_HASWELL_X: /* HSX */
......@@ -4251,6 +4284,7 @@ int has_snb_msrs(unsigned int family, unsigned int model)
case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */
case INTEL_FAM6_HASWELL_CORE: /* HSW */
case INTEL_FAM6_HASWELL_X: /* HSW */
case INTEL_FAM6_HASWELL_ULT: /* HSW */
case INTEL_FAM6_HASWELL_GT3E: /* HSW */
case INTEL_FAM6_BROADWELL_CORE: /* BDW */
case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
......@@ -4267,7 +4301,7 @@ int has_snb_msrs(unsigned int family, unsigned int model)
}
/*
* HSW adds support for additional MSRs:
* HSW ULT added support for C8/C9/C10 MSRs:
*
* MSR_PKG_C8_RESIDENCY 0x00000630
* MSR_PKG_C9_RESIDENCY 0x00000631
......@@ -4278,13 +4312,13 @@ int has_snb_msrs(unsigned int family, unsigned int model)
* MSR_PKGC10_IRTL 0x00000635
*
*/
int has_hsw_msrs(unsigned int family, unsigned int model)
int has_c8910_msrs(unsigned int family, unsigned int model)
{
if (!genuine_intel)
return 0;
switch (model) {
case INTEL_FAM6_HASWELL_CORE:
case INTEL_FAM6_HASWELL_ULT: /* HSW */
case INTEL_FAM6_BROADWELL_CORE: /* BDW */
case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
......@@ -4568,9 +4602,6 @@ unsigned int intel_model_duplicates(unsigned int model)
case INTEL_FAM6_XEON_PHI_KNM:
return INTEL_FAM6_XEON_PHI_KNL;
case INTEL_FAM6_HASWELL_ULT:
return INTEL_FAM6_HASWELL_CORE;
case INTEL_FAM6_BROADWELL_X:
case INTEL_FAM6_BROADWELL_XEON_D: /* BDX-DE */
return INTEL_FAM6_BROADWELL_X;
......@@ -4582,7 +4613,11 @@ unsigned int intel_model_duplicates(unsigned int model)
return INTEL_FAM6_SKYLAKE_MOBILE;
case INTEL_FAM6_ICELAKE_MOBILE:
case INTEL_FAM6_ICELAKE_NNPI:
return INTEL_FAM6_CANNONLAKE_MOBILE;
case INTEL_FAM6_ATOM_TREMONT_X:
return INTEL_FAM6_ATOM_GOLDMONT_X;
}
return model;
}
......@@ -4600,6 +4635,8 @@ void process_cpuid()
genuine_intel = 1;
else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
authentic_amd = 1;
else if (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e)
hygon_genuine = 1;
if (!quiet)
fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
......@@ -4820,12 +4857,12 @@ void process_cpuid()
BIC_NOT_PRESENT(BIC_CPU_c7);
BIC_NOT_PRESENT(BIC_Pkgpc7);
}
if (has_hsw_msrs(family, model)) {
if (has_c8910_msrs(family, model)) {
BIC_PRESENT(BIC_Pkgpc8);
BIC_PRESENT(BIC_Pkgpc9);
BIC_PRESENT(BIC_Pkgpc10);
}
do_irtl_hsw = has_hsw_msrs(family, model);
do_irtl_hsw = has_c8910_msrs(family, model);
if (has_skl_msrs(family, model)) {
BIC_PRESENT(BIC_Totl_c0);
BIC_PRESENT(BIC_Any_c0);
......@@ -5123,7 +5160,7 @@ int initialize_counters(int cpu_id)
void allocate_output_buffer()
{
output_buffer = calloc(1, (1 + topo.num_cpus) * 1024);
output_buffer = calloc(1, (1 + topo.num_cpus) * 2048);
outp = output_buffer;
if (outp == NULL)
err(-1, "calloc output buffer");
......@@ -5269,7 +5306,7 @@ int get_and_dump_counters(void)
}
void print_version() {
fprintf(outf, "turbostat version 19.03.20"
fprintf(outf, "turbostat version 19.08.31"
" - Len Brown <lenb@kernel.org>\n");
}
......
......@@ -9,8 +9,9 @@ ifeq ("$(origin O)", "command line")
endif
x86_energy_perf_policy : x86_energy_perf_policy.c
override CFLAGS += -Wall -I../../../include
override CFLAGS += -O2 -Wall -I../../../include
override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
override CFLAGS += -D_FORTIFY_SOURCE=2
%: %.c
@mkdir -p $(BUILD_OUTPUT)
......
......@@ -40,7 +40,7 @@ in the same processor package.
Hardware P-States (HWP) are effectively an expansion of hardware
P-state control from the opportunistic turbo-mode P-state range
to include the entire range of available P-states.
On Broadwell Xeon, the initial HWP implementation, EBP influenced HWP.
On Broadwell Xeon, the initial HWP implementation, EPB influenced HWP.
That influence was removed in subsequent generations,
where it was moved to the
Energy_Performance_Preference (EPP) field in
......
......@@ -545,7 +545,7 @@ void cmdline(int argc, char **argv)
progname = argv[0];
while ((opt = getopt_long_only(argc, argv, "+a:c:dD:E:e:f:m:M:rt:u:vw",
while ((opt = getopt_long_only(argc, argv, "+a:c:dD:E:e:f:m:M:rt:u:vw:",
long_options, &option_index)) != -1) {
switch (opt) {
case 'a':
......@@ -1259,6 +1259,15 @@ void probe_dev_msr(void)
if (system("/sbin/modprobe msr > /dev/null 2>&1"))
err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
}
static void get_cpuid_or_exit(unsigned int leaf,
unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
{
if (!__get_cpuid(leaf, eax, ebx, ecx, edx))
errx(1, "Processor not supported\n");
}
/*
* early_cpuid()
* initialize turbo_is_enabled, has_hwp, has_epb
......@@ -1266,15 +1275,10 @@ void probe_dev_msr(void)
*/
void early_cpuid(void)
{
unsigned int eax, ebx, ecx, edx, max_level;
unsigned int eax, ebx, ecx, edx;
unsigned int fms, family, model;
__get_cpuid(0, &max_level, &ebx, &ecx, &edx);
if (max_level < 6)
errx(1, "Processor not supported\n");
__get_cpuid(1, &fms, &ebx, &ecx, &edx);
get_cpuid_or_exit(1, &fms, &ebx, &ecx, &edx);
family = (fms >> 8) & 0xf;
model = (fms >> 4) & 0xf;
if (family == 6 || family == 0xf)
......@@ -1288,7 +1292,7 @@ void early_cpuid(void)
bdx_highest_ratio = msr & 0xFF;
}
__get_cpuid(0x6, &eax, &ebx, &ecx, &edx);
get_cpuid_or_exit(0x6, &eax, &ebx, &ecx, &edx);
turbo_is_enabled = (eax >> 1) & 1;
has_hwp = (eax >> 7) & 1;
has_epb = (ecx >> 3) & 1;
......@@ -1306,7 +1310,7 @@ void parse_cpuid(void)
eax = ebx = ecx = edx = 0;
__get_cpuid(0, &max_level, &ebx, &ecx, &edx);
get_cpuid_or_exit(0, &max_level, &ebx, &ecx, &edx);
if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
genuine_intel = 1;
......@@ -1315,7 +1319,7 @@ void parse_cpuid(void)
fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ",
(char *)&ebx, (char *)&edx, (char *)&ecx);
__get_cpuid(1, &fms, &ebx, &ecx, &edx);
get_cpuid_or_exit(1, &fms, &ebx, &ecx, &edx);
family = (fms >> 8) & 0xf;
model = (fms >> 4) & 0xf;
stepping = fms & 0xf;
......@@ -1340,7 +1344,7 @@ void parse_cpuid(void)
errx(1, "CPUID: no MSR");
__get_cpuid(0x6, &eax, &ebx, &ecx, &edx);
get_cpuid_or_exit(0x6, &eax, &ebx, &ecx, &edx);
/* turbo_is_enabled already set */
/* has_hwp already set */
has_hwp_notify = eax & (1 << 8);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册