turbostat.c 139.0 KB
Newer Older
L
Len Brown 已提交
1 2 3 4
/*
 * turbostat -- show CPU frequency and C-state residency
 * on modern Intel turbo-capable processors.
 *
5
 * Copyright (c) 2013 Intel Corporation.
L
Len Brown 已提交
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
 * Len Brown <len.brown@intel.com>
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 */

22
#define _GNU_SOURCE
23
#include MSRHEADER
24
#include INTEL_FAMILY_HEADER
25
#include <stdarg.h>
L
Len Brown 已提交
26
#include <stdio.h>
27
#include <err.h>
L
Len Brown 已提交
28 29 30 31
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
32
#include <sys/select.h>
L
Len Brown 已提交
33 34 35 36 37
#include <sys/resource.h>
#include <fcntl.h>
#include <signal.h>
#include <sys/time.h>
#include <stdlib.h>
38
#include <getopt.h>
L
Len Brown 已提交
39 40 41
#include <dirent.h>
#include <string.h>
#include <ctype.h>
42
#include <sched.h>
43
#include <time.h>
44
#include <cpuid.h>
45 46
#include <linux/capability.h>
#include <errno.h>
L
Len Brown 已提交
47 48

char *proc_stat = "/proc/stat";
49
FILE *outf;
50
int *fd_percpu;
51
struct timeval interval_tv = {5, 0};
52
struct timespec interval_ts = {5, 0};
53
struct timespec one_msec = {0, 1000000};
54
unsigned int num_iterations;
55
unsigned int debug;
56
unsigned int quiet;
57
unsigned int shown;
58
unsigned int sums_need_wide_columns;
59 60
unsigned int rapl_joules;
unsigned int summary_only;
61
unsigned int list_header_only;
62
unsigned int dump_only;
L
Len Brown 已提交
63
unsigned int do_snb_cstates;
64
unsigned int do_knl_cstates;
65
unsigned int do_slm_cstates;
66
unsigned int do_cnl_cstates;
67
unsigned int use_c1_residency_msr;
L
Len Brown 已提交
68
unsigned int has_aperf;
69
unsigned int has_epb;
70 71
unsigned int do_irtl_snb;
unsigned int do_irtl_hsw;
72
unsigned int units = 1000000;	/* MHz etc */
L
Len Brown 已提交
73 74
unsigned int genuine_intel;
unsigned int has_invariant_tsc;
75
unsigned int do_nhm_platform_info;
76
unsigned int no_MSR_MISC_PWR_MGMT;
77
unsigned int aperf_mperf_multiplier = 1;
L
Len Brown 已提交
78
double bclk;
79
double base_hz;
80
unsigned int has_base_hz;
81
double tsc_tweak = 1.0;
82 83 84
unsigned int show_pkg_only;
unsigned int show_core_only;
char *output_buffer, *outp;
85 86 87
unsigned int do_rapl;
unsigned int do_dts;
unsigned int do_ptm;
L
Len Brown 已提交
88
unsigned long long  gfx_cur_rc6_ms;
89 90
unsigned long long cpuidle_cur_cpu_lpi_us;
unsigned long long cpuidle_cur_sys_lpi_us;
L
Len Brown 已提交
91
unsigned int gfx_cur_mhz;
92 93
unsigned int tcc_activation_temp;
unsigned int tcc_activation_temp_override;
94 95
double rapl_power_units, rapl_time_units;
double rapl_dram_energy_units, rapl_energy_units;
96
double rapl_joule_counter_range;
97
unsigned int do_core_perf_limit_reasons;
98
unsigned int has_automatic_cstate_conversion;
99 100
unsigned int do_gfx_perf_limit_reasons;
unsigned int do_ring_perf_limit_reasons;
101 102
unsigned int crystal_hz;
unsigned long long tsc_hz;
103
int base_cpu;
104
double discover_bclk(unsigned int family, unsigned int model);
105 106 107 108 109 110
unsigned int has_hwp;	/* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
			/* IA32_HWP_REQUEST, IA32_HWP_STATUS */
unsigned int has_hwp_notify;		/* IA32_HWP_INTERRUPT */
unsigned int has_hwp_activity_window;	/* IA32_HWP_REQUEST[bits 41:32] */
unsigned int has_hwp_epp;		/* IA32_HWP_REQUEST[bits 31:24] */
unsigned int has_hwp_pkg;		/* IA32_HWP_REQUEST_PKG */
111
unsigned int has_misc_feature_control;
112

L
Len Brown 已提交
113 114 115 116 117 118 119 120 121 122 123 124 125
#define RAPL_PKG		(1 << 0)
					/* 0x610 MSR_PKG_POWER_LIMIT */
					/* 0x611 MSR_PKG_ENERGY_STATUS */
#define RAPL_PKG_PERF_STATUS	(1 << 1)
					/* 0x613 MSR_PKG_PERF_STATUS */
#define RAPL_PKG_POWER_INFO	(1 << 2)
					/* 0x614 MSR_PKG_POWER_INFO */

#define RAPL_DRAM		(1 << 3)
					/* 0x618 MSR_DRAM_POWER_LIMIT */
					/* 0x619 MSR_DRAM_ENERGY_STATUS */
#define RAPL_DRAM_PERF_STATUS	(1 << 4)
					/* 0x61b MSR_DRAM_PERF_STATUS */
126 127
#define RAPL_DRAM_POWER_INFO	(1 << 5)
					/* 0x61c MSR_DRAM_POWER_INFO */
L
Len Brown 已提交
128

129
#define RAPL_CORES_POWER_LIMIT	(1 << 6)
L
Len Brown 已提交
130
					/* 0x638 MSR_PP0_POWER_LIMIT */
131
#define RAPL_CORE_POLICY	(1 << 7)
L
Len Brown 已提交
132 133
					/* 0x63a MSR_PP0_POLICY */

134
#define RAPL_GFX		(1 << 8)
L
Len Brown 已提交
135 136 137
					/* 0x640 MSR_PP1_POWER_LIMIT */
					/* 0x641 MSR_PP1_ENERGY_STATUS */
					/* 0x642 MSR_PP1_POLICY */
138 139 140 141

#define RAPL_CORES_ENERGY_STATUS	(1 << 9)
					/* 0x639 MSR_PP0_ENERGY_STATUS */
#define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
142 143 144
#define	TJMAX_DEFAULT	100

#define MAX(a, b) ((a) > (b) ? (a) : (b))
L
Len Brown 已提交
145

146 147 148 149 150
/*
 * buffer size used by sscanf() for added column names
 * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
 */
#define	NAME_BYTES 20
151
#define PATH_BYTES 128
152

L
Len Brown 已提交
153 154 155
int backwards_count;
char *progname;

156 157 158
#define CPU_SUBSET_MAXCPUS	1024	/* need to use before probe... */
cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
159 160
#define MAX_ADDED_COUNTERS 8
#define MAX_ADDED_THREAD_COUNTERS 24
161 162

struct thread_data {
163 164
	struct timeval tv_begin;
	struct timeval tv_end;
165 166 167
	unsigned long long tsc;
	unsigned long long aperf;
	unsigned long long mperf;
168
	unsigned long long c1;
169
	unsigned long long  irq_count;
170
	unsigned int smi_count;
171 172 173 174
	unsigned int cpu_id;
	unsigned int flags;
#define CPU_IS_FIRST_THREAD_IN_CORE	0x2
#define CPU_IS_FIRST_CORE_IN_PACKAGE	0x4
175
	unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
176 177 178 179 180 181
} *thread_even, *thread_odd;

struct core_data {
	unsigned long long c3;
	unsigned long long c6;
	unsigned long long c7;
182
	unsigned long long mc6_us;	/* duplicate as per-core for now, even though per module */
183
	unsigned int core_temp_c;
184
	unsigned int core_id;
185
	unsigned long long counter[MAX_ADDED_COUNTERS];
186 187 188 189 190 191 192
} *core_even, *core_odd;

struct pkg_data {
	unsigned long long pc2;
	unsigned long long pc3;
	unsigned long long pc6;
	unsigned long long pc7;
193 194 195
	unsigned long long pc8;
	unsigned long long pc9;
	unsigned long long pc10;
196 197
	unsigned long long cpu_lpi;
	unsigned long long sys_lpi;
198 199 200 201
	unsigned long long pkg_wtd_core_c0;
	unsigned long long pkg_any_core_c0;
	unsigned long long pkg_any_gfxe_c0;
	unsigned long long pkg_both_core_gfxe_c0;
202
	long long gfx_rc6_ms;
L
Len Brown 已提交
203
	unsigned int gfx_mhz;
204
	unsigned int package_id;
205 206 207 208 209 210 211
	unsigned int energy_pkg;	/* MSR_PKG_ENERGY_STATUS */
	unsigned int energy_dram;	/* MSR_DRAM_ENERGY_STATUS */
	unsigned int energy_cores;	/* MSR_PP0_ENERGY_STATUS */
	unsigned int energy_gfx;	/* MSR_PP1_ENERGY_STATUS */
	unsigned int rapl_pkg_perf_status;	/* MSR_PKG_PERF_STATUS */
	unsigned int rapl_dram_perf_status;	/* MSR_DRAM_PERF_STATUS */
	unsigned int pkg_temp_c;
212
	unsigned long long counter[MAX_ADDED_COUNTERS];
213 214 215 216 217 218 219 220 221 222 223 224 225
} *package_even, *package_odd;

#define ODD_COUNTERS thread_odd, core_odd, package_odd
#define EVEN_COUNTERS thread_even, core_even, package_even

#define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \
	(thread_base + (pkg_no) * topo.num_cores_per_pkg * \
		topo.num_threads_per_core + \
		(core_no) * topo.num_threads_per_core + (thread_no))
#define GET_CORE(core_base, core_no, pkg_no) \
	(core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no))
#define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)

226
enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE};
227
enum counter_type {COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC};
228 229 230 231 232
enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT};

struct msr_counter {
	unsigned int msr_num;
	char name[NAME_BYTES];
233
	char path[PATH_BYTES];
234 235 236 237
	unsigned int width;
	enum counter_type type;
	enum counter_format format;
	struct msr_counter *next;
238 239 240
	unsigned int flags;
#define	FLAGS_HIDE	(1 << 0)
#define	FLAGS_SHOW	(1 << 1)
241
#define	SYSFS_PERCPU	(1 << 1)
242 243 244
};

struct sys_counters {
245 246 247
	unsigned int added_thread_counters;
	unsigned int added_core_counters;
	unsigned int added_package_counters;
248 249 250 251 252
	struct msr_counter *tp;
	struct msr_counter *cp;
	struct msr_counter *pp;
} sys;

253 254 255 256
struct system_summary {
	struct thread_data threads;
	struct core_data cores;
	struct pkg_data packages;
257
} average;
258 259 260 261 262 263 264 265 266 267 268 269 270


struct topo_params {
	int num_packages;
	int num_cpus;
	int num_cores;
	int max_cpu_num;
	int num_cores_per_pkg;
	int num_threads_per_core;
} topo;

struct timeval tv_even, tv_odd, tv_delta;

271 272 273
int *irq_column_2_cpu;	/* /proc/interrupts column numbers */
int *irqs_per_cpu;		/* indexed by cpu_num */

274 275 276
void setup_all_buffers(void);

int cpu_is_not_present(int cpu)
277
{
278
	return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
279
}
280
/*
281 282
 * run func(thread, core, package) in topology order
 * skip non-present cpus
283
 */
284 285 286

int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
	struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
287
{
288
	int retval, pkg_no, core_no, thread_no;
289

290 291 292 293 294 295 296
	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
		for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
			for (thread_no = 0; thread_no <
				topo.num_threads_per_core; ++thread_no) {
				struct thread_data *t;
				struct core_data *c;
				struct pkg_data *p;
297

298 299 300 301 302 303 304 305 306 307 308 309 310 311 312
				t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);

				if (cpu_is_not_present(t->cpu_id))
					continue;

				c = GET_CORE(core_base, core_no, pkg_no);
				p = GET_PKG(pkg_base, pkg_no);

				retval = func(t, c, p);
				if (retval)
					return retval;
			}
		}
	}
	return 0;
313 314 315 316
}

int cpu_migrate(int cpu)
{
317 318 319
	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
	CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
	if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
320 321 322 323
		return -1;
	else
		return 0;
}
324
int get_msr_fd(int cpu)
L
Len Brown 已提交
325 326 327 328
{
	char pathname[32];
	int fd;

329 330 331 332 333
	fd = fd_percpu[cpu];

	if (fd)
		return fd;

L
Len Brown 已提交
334 335
	sprintf(pathname, "/dev/cpu/%d/msr", cpu);
	fd = open(pathname, O_RDONLY);
336
	if (fd < 0)
337
		err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
L
Len Brown 已提交
338

339 340 341 342 343 344 345 346 347 348
	fd_percpu[cpu] = fd;

	return fd;
}

int get_msr(int cpu, off_t offset, unsigned long long *msr)
{
	ssize_t retval;

	retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
349

350
	if (retval != sizeof *msr)
351
		err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
352 353

	return 0;
L
Len Brown 已提交
354 355
}

356
/*
357 358
 * Each string in this array is compared in --show and --hide cmdline.
 * Thus, strings that are proper sub-sets must follow their more specific peers.
359
 */
360
struct msr_counter bic[] = {
361 362
	{ 0x0, "usec" },
	{ 0x0, "Time_Of_Day_Seconds" },
363 364 365 366 367
	{ 0x0, "Package" },
	{ 0x0, "Avg_MHz" },
	{ 0x0, "Bzy_MHz" },
	{ 0x0, "TSC_MHz" },
	{ 0x0, "IRQ" },
368
	{ 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL},
369 370 371 372 373 374 375 376 377 378 379 380 381 382 383
	{ 0x0, "Busy%" },
	{ 0x0, "CPU%c1" },
	{ 0x0, "CPU%c3" },
	{ 0x0, "CPU%c6" },
	{ 0x0, "CPU%c7" },
	{ 0x0, "ThreadC" },
	{ 0x0, "CoreTmp" },
	{ 0x0, "CoreCnt" },
	{ 0x0, "PkgTmp" },
	{ 0x0, "GFX%rc6" },
	{ 0x0, "GFXMHz" },
	{ 0x0, "Pkg%pc2" },
	{ 0x0, "Pkg%pc3" },
	{ 0x0, "Pkg%pc6" },
	{ 0x0, "Pkg%pc7" },
384 385
	{ 0x0, "Pkg%pc8" },
	{ 0x0, "Pkg%pc9" },
386
	{ 0x0, "Pk%pc10" },
387 388
	{ 0x0, "CPU%LPI" },
	{ 0x0, "SYS%LPI" },
389 390 391 392 393 394 395 396 397 398 399 400 401
	{ 0x0, "PkgWatt" },
	{ 0x0, "CorWatt" },
	{ 0x0, "GFXWatt" },
	{ 0x0, "PkgCnt" },
	{ 0x0, "RAMWatt" },
	{ 0x0, "PKG_%" },
	{ 0x0, "RAM_%" },
	{ 0x0, "Pkg_J" },
	{ 0x0, "Cor_J" },
	{ 0x0, "GFX_J" },
	{ 0x0, "RAM_J" },
	{ 0x0, "Core" },
	{ 0x0, "CPU" },
402
	{ 0x0, "Mod%c6" },
403
	{ 0x0, "sysfs" },
404 405 406 407
	{ 0x0, "Totl%C0" },
	{ 0x0, "Any%C0" },
	{ 0x0, "GFX%C0" },
	{ 0x0, "CPUGFX%" },
408
	{ 0x0, "Node%" },
409 410
};

411 412


413
#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439
#define	BIC_USEC	(1ULL << 0)
#define	BIC_TOD		(1ULL << 1)
#define	BIC_Package	(1ULL << 2)
#define	BIC_Avg_MHz	(1ULL << 3)
#define	BIC_Bzy_MHz	(1ULL << 4)
#define	BIC_TSC_MHz	(1ULL << 5)
#define	BIC_IRQ		(1ULL << 6)
#define	BIC_SMI		(1ULL << 7)
#define	BIC_Busy	(1ULL << 8)
#define	BIC_CPU_c1	(1ULL << 9)
#define	BIC_CPU_c3	(1ULL << 10)
#define	BIC_CPU_c6	(1ULL << 11)
#define	BIC_CPU_c7	(1ULL << 12)
#define	BIC_ThreadC	(1ULL << 13)
#define	BIC_CoreTmp	(1ULL << 14)
#define	BIC_CoreCnt	(1ULL << 15)
#define	BIC_PkgTmp	(1ULL << 16)
#define	BIC_GFX_rc6	(1ULL << 17)
#define	BIC_GFXMHz	(1ULL << 18)
#define	BIC_Pkgpc2	(1ULL << 19)
#define	BIC_Pkgpc3	(1ULL << 20)
#define	BIC_Pkgpc6	(1ULL << 21)
#define	BIC_Pkgpc7	(1ULL << 22)
#define	BIC_Pkgpc8	(1ULL << 23)
#define	BIC_Pkgpc9	(1ULL << 24)
#define	BIC_Pkgpc10	(1ULL << 25)
440 441
#define BIC_CPU_LPI	(1ULL << 26)
#define BIC_SYS_LPI	(1ULL << 27)
442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465
#define	BIC_PkgWatt	(1ULL << 26)
#define	BIC_CorWatt	(1ULL << 27)
#define	BIC_GFXWatt	(1ULL << 28)
#define	BIC_PkgCnt	(1ULL << 29)
#define	BIC_RAMWatt	(1ULL << 30)
#define	BIC_PKG__	(1ULL << 31)
#define	BIC_RAM__	(1ULL << 32)
#define	BIC_Pkg_J	(1ULL << 33)
#define	BIC_Cor_J	(1ULL << 34)
#define	BIC_GFX_J	(1ULL << 35)
#define	BIC_RAM_J	(1ULL << 36)
#define	BIC_Core	(1ULL << 37)
#define	BIC_CPU		(1ULL << 38)
#define	BIC_Mod_c6	(1ULL << 39)
#define	BIC_sysfs	(1ULL << 40)
#define	BIC_Totl_c0	(1ULL << 41)
#define	BIC_Any_c0	(1ULL << 42)
#define	BIC_GFX_c0	(1ULL << 43)
#define	BIC_CPUGFX	(1ULL << 44)

#define BIC_DISABLED_BY_DEFAULT	(BIC_USEC | BIC_TOD)

unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs;
466 467

#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
468
#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
469
#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
470
#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
471

472

473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496
#define MAX_DEFERRED 16
char *deferred_skip_names[MAX_DEFERRED];
int deferred_skip_index;

/*
 * HIDE_LIST - hide this list of counters, show the rest [default]
 * SHOW_LIST - show this list of counters, hide the rest
 */
enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;

void help(void)
{
	fprintf(outf,
	"Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
	"\n"
	"Turbostat forks the specified COMMAND and prints statistics\n"
	"when COMMAND completes.\n"
	"If no COMMAND is specified, turbostat wakes every 5-seconds\n"
	"to print statistics, until interrupted.\n"
	"--add		add a counter\n"
	"		eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
	"--cpu	cpu-set	limit output to summary plus cpu-set:\n"
	"		{core | package | j,k,l..m,n-p }\n"
	"--quiet	skip decoding system configuration header\n"
497
	"--interval sec.subsec	Override default 5-second measurement interval\n"
498 499
	"--help		print this help message\n"
	"--list		list column headers only\n"
500
	"--num_iterations num   number of the measurement iterations\n"
501 502 503 504 505 506
	"--out file	create or truncate \"file\" for all output\n"
	"--version	print version information\n"
	"\n"
	"For more help, run \"man turbostat\"\n");
}

507 508 509 510 511
/*
 * bic_lookup
 * for all the strings in comma separate name_list,
 * set the approprate bit in return value.
 */
512
unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
513 514 515 516 517 518 519 520 521 522 523 524
{
	int i;
	unsigned long long retval = 0;

	while (name_list) {
		char *comma;

		comma = strchr(name_list, ',');

		if (comma)
			*comma = '\0';

525 526 527
		if (!strcmp(name_list, "all"))
			return ~0;

528 529 530 531 532 533 534
		for (i = 0; i < MAX_BIC; ++i) {
			if (!strcmp(name_list, bic[i].name)) {
				retval |= (1ULL << i);
				break;
			}
		}
		if (i == MAX_BIC) {
535 536 537 538 539 540 541 542 543 544 545 546 547
			if (mode == SHOW_LIST) {
				fprintf(stderr, "Invalid counter name: %s\n", name_list);
				exit(-1);
			}
			deferred_skip_names[deferred_skip_index++] = name_list;
			if (debug)
				fprintf(stderr, "deferred \"%s\"\n", name_list);
			if (deferred_skip_index >= MAX_DEFERRED) {
				fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
					MAX_DEFERRED, name_list);
				help();
				exit(1);
			}
548 549 550 551 552 553 554 555 556
		}

		name_list = comma;
		if (name_list)
			name_list++;

	}
	return retval;
}
557

558

559
void print_header(char *delim)
L
Len Brown 已提交
560
{
561
	struct msr_counter *mp;
562
	int printed = 0;
563

564 565 566 567
	if (DO_BIC(BIC_USEC))
		outp += sprintf(outp, "%susec", (printed++ ? delim : ""));
	if (DO_BIC(BIC_TOD))
		outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
568
	if (DO_BIC(BIC_Package))
569
		outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
570
	if (DO_BIC(BIC_Core))
571
		outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
572
	if (DO_BIC(BIC_CPU))
573
		outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
574
	if (DO_BIC(BIC_Avg_MHz))
575
		outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
576
	if (DO_BIC(BIC_Busy))
577
		outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : ""));
578
	if (DO_BIC(BIC_Bzy_MHz))
579
		outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : ""));
580
	if (DO_BIC(BIC_TSC_MHz))
581
		outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
582

583 584
	if (DO_BIC(BIC_IRQ)) {
		if (sums_need_wide_columns)
585
			outp += sprintf(outp, "%s     IRQ", (printed++ ? delim : ""));
586
		else
587
			outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
588 589
	}

590
	if (DO_BIC(BIC_SMI))
591
		outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
592

593
	for (mp = sys.tp; mp; mp = mp->next) {
594

595 596
		if (mp->format == FORMAT_RAW) {
			if (mp->width == 64)
597
				outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name);
598
			else
599
				outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name);
600
		} else {
601
			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
602
				outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name);
603
			else
604
				outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name);
605 606 607
		}
	}

608
	if (DO_BIC(BIC_CPU_c1))
609
		outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
610
	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates)
611
		outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
612
	if (DO_BIC(BIC_CPU_c6))
613
		outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
614
	if (DO_BIC(BIC_CPU_c7))
615
		outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : ""));
616

617
	if (DO_BIC(BIC_Mod_c6))
618
		outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : ""));
619

620
	if (DO_BIC(BIC_CoreTmp))
621
		outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
622 623 624 625

	for (mp = sys.cp; mp; mp = mp->next) {
		if (mp->format == FORMAT_RAW) {
			if (mp->width == 64)
626
				outp += sprintf(outp, "%s%18.18s", delim, mp->name);
627
			else
628
				outp += sprintf(outp, "%s%10.10s", delim, mp->name);
629
		} else {
630 631 632 633
			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
				outp += sprintf(outp, "%s%8s", delim, mp->name);
			else
				outp += sprintf(outp, "%s%s", delim, mp->name);
634 635 636
		}
	}

637
	if (DO_BIC(BIC_PkgTmp))
638
		outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
639

640
	if (DO_BIC(BIC_GFX_rc6))
641
		outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : ""));
L
Len Brown 已提交
642

643
	if (DO_BIC(BIC_GFXMHz))
644
		outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
L
Len Brown 已提交
645

646
	if (DO_BIC(BIC_Totl_c0))
647
		outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
648
	if (DO_BIC(BIC_Any_c0))
649
		outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
650
	if (DO_BIC(BIC_GFX_c0))
651
		outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
652
	if (DO_BIC(BIC_CPUGFX))
653
		outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
654

655
	if (DO_BIC(BIC_Pkgpc2))
656
		outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
657
	if (DO_BIC(BIC_Pkgpc3))
658
		outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : ""));
659
	if (DO_BIC(BIC_Pkgpc6))
660
		outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : ""));
661
	if (DO_BIC(BIC_Pkgpc7))
662
		outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : ""));
663
	if (DO_BIC(BIC_Pkgpc8))
664
		outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : ""));
665
	if (DO_BIC(BIC_Pkgpc9))
666
		outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
667
	if (DO_BIC(BIC_Pkgpc10))
668
		outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
669 670 671 672
	if (DO_BIC(BIC_CPU_LPI))
		outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : ""));
	if (DO_BIC(BIC_SYS_LPI))
		outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
L
Len Brown 已提交
673

674
	if (do_rapl && !rapl_joules) {
675
		if (DO_BIC(BIC_PkgWatt))
676
			outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
677
		if (DO_BIC(BIC_CorWatt))
678
			outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
679
		if (DO_BIC(BIC_GFXWatt))
680
			outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
681
		if (DO_BIC(BIC_RAMWatt))
682
			outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : ""));
683
		if (DO_BIC(BIC_PKG__))
684
			outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
685
		if (DO_BIC(BIC_RAM__))
686
			outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
687
	} else if (do_rapl && rapl_joules) {
688
		if (DO_BIC(BIC_Pkg_J))
689
			outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
690
		if (DO_BIC(BIC_Cor_J))
691
			outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
692
		if (DO_BIC(BIC_GFX_J))
693
			outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
694
		if (DO_BIC(BIC_RAM_J))
695
			outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : ""));
696
		if (DO_BIC(BIC_PKG__))
697
			outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
698
		if (DO_BIC(BIC_RAM__))
699
			outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
700
	}
701 702 703
	for (mp = sys.pp; mp; mp = mp->next) {
		if (mp->format == FORMAT_RAW) {
			if (mp->width == 64)
704
				outp += sprintf(outp, "%s%18.18s", delim, mp->name);
705
			else
706
				outp += sprintf(outp, "%s%10.10s", delim, mp->name);
707
		} else {
708 709 710 711
			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
				outp += sprintf(outp, "%s%8s", delim, mp->name);
			else
				outp += sprintf(outp, "%s%s", delim, mp->name);
712 713 714
		}
	}

715
	outp += sprintf(outp, "\n");
L
Len Brown 已提交
716 717
}

718 719
int dump_counters(struct thread_data *t, struct core_data *c,
	struct pkg_data *p)
L
Len Brown 已提交
720
{
721 722 723
	int i;
	struct msr_counter *mp;

724
	outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
725 726

	if (t) {
727 728 729 730 731 732
		outp += sprintf(outp, "CPU: %d flags 0x%x\n",
			t->cpu_id, t->flags);
		outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
		outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
		outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
		outp += sprintf(outp, "c1: %016llX\n", t->c1);
733

734
		if (DO_BIC(BIC_IRQ))
735
			outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
736
		if (DO_BIC(BIC_SMI))
737
			outp += sprintf(outp, "SMI: %d\n", t->smi_count);
738 739 740 741 742

		for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
			outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n",
				i, mp->msr_num, t->counter[i]);
		}
743
	}
L
Len Brown 已提交
744

745
	if (c) {
746 747 748 749 750
		outp += sprintf(outp, "core: %d\n", c->core_id);
		outp += sprintf(outp, "c3: %016llX\n", c->c3);
		outp += sprintf(outp, "c6: %016llX\n", c->c6);
		outp += sprintf(outp, "c7: %016llX\n", c->c7);
		outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
751 752 753 754 755

		for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
			outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n",
				i, mp->msr_num, c->counter[i]);
		}
756
		outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
757
	}
L
Len Brown 已提交
758

759
	if (p) {
760
		outp += sprintf(outp, "package: %d\n", p->package_id);
761 762 763 764 765 766

		outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
		outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
		outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
		outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);

767
		outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
768
		if (DO_BIC(BIC_Pkgpc3))
769
			outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
770
		if (DO_BIC(BIC_Pkgpc6))
771
			outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
772
		if (DO_BIC(BIC_Pkgpc7))
773
			outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
774 775 776
		outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
		outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
		outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
777 778 779
		outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
		outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
		outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
780 781 782 783 784 785 786 787 788
		outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg);
		outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores);
		outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx);
		outp += sprintf(outp, "Joules RAM: %0X\n", p->energy_dram);
		outp += sprintf(outp, "Throttle PKG: %0X\n",
			p->rapl_pkg_perf_status);
		outp += sprintf(outp, "Throttle RAM: %0X\n",
			p->rapl_dram_perf_status);
		outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
789 790 791 792 793

		for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
			outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n",
				i, mp->msr_num, p->counter[i]);
		}
794
	}
795 796 797

	outp += sprintf(outp, "\n");

798
	return 0;
L
Len Brown 已提交
799 800
}

L
Len Brown 已提交
801 802 803
/*
 * column formatting convention & formats
 */
804 805
int format_counters(struct thread_data *t, struct core_data *c,
	struct pkg_data *p)
L
Len Brown 已提交
806
{
807
	double interval_float, tsc;
808
	char *fmt8;
809 810
	int i;
	struct msr_counter *mp;
811 812
	char *delim = "\t";
	int printed = 0;
L
Len Brown 已提交
813

814 815 816 817 818 819 820 821
	 /* if showing only 1st thread in core and this isn't one, bail out */
	if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
		return 0;

	 /* if showing only 1st thread in pkg and this isn't one, bail out */
	if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
		return 0;

822 823 824 825 826
	/*if not summary line and --cpu is used */
	if ((t != &average.threads) &&
		(cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
		return 0;

827
	if (DO_BIC(BIC_USEC)) {
828 829 830 831 832 833 834
		/* on each row, print how many usec each timestamp took to gather */
		struct timeval tv;

		timersub(&t->tv_end, &t->tv_begin, &tv);
		outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
	}

835 836 837 838
	/* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */
	if (DO_BIC(BIC_TOD))
		outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);

L
Len Brown 已提交
839 840
	interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;

841 842
	tsc = t->tsc * tsc_tweak;

843 844
	/* topo columns, print blanks on 1st (average) line */
	if (t == &average.threads) {
845
		if (DO_BIC(BIC_Package))
846
			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
847
		if (DO_BIC(BIC_Core))
848
			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
849
		if (DO_BIC(BIC_CPU))
850
			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
L
Len Brown 已提交
851
	} else {
852
		if (DO_BIC(BIC_Package)) {
853
			if (p)
854
				outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id);
855
			else
856
				outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
857
		}
858
		if (DO_BIC(BIC_Core)) {
859
			if (c)
860
				outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
861
			else
862
				outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
863
		}
864
		if (DO_BIC(BIC_CPU))
865
			outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
L
Len Brown 已提交
866
	}
867

868
	if (DO_BIC(BIC_Avg_MHz))
869
		outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
870 871
			1.0 / units * t->aperf / interval_float);

872
	if (DO_BIC(BIC_Busy))
873
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf/tsc);
L
Len Brown 已提交
874

875
	if (DO_BIC(BIC_Bzy_MHz)) {
876
		if (has_base_hz)
877
			outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
878
		else
879
			outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
880
				tsc / units * t->aperf / t->mperf / interval_float);
881
	}
L
Len Brown 已提交
882

883
	if (DO_BIC(BIC_TSC_MHz))
884
		outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc/units/interval_float);
L
Len Brown 已提交
885

886
	/* IRQ */
887 888
	if (DO_BIC(BIC_IRQ)) {
		if (sums_need_wide_columns)
889
			outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count);
890
		else
891
			outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
892
	}
893

894
	/* SMI */
895
	if (DO_BIC(BIC_SMI))
896
		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
897

898
	/* Added counters */
899 900 901
	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
		if (mp->format == FORMAT_RAW) {
			if (mp->width == 32)
902
				outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) t->counter[i]);
903
			else
904
				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
905
		} else if (mp->format == FORMAT_DELTA) {
906
			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
907
				outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]);
908
			else
909
				outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]);
910
		} else if (mp->format == FORMAT_PERCENT) {
911
			if (mp->type == COUNTER_USEC)
912
				outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), t->counter[i]/interval_float/10000);
913
			else
914
				outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i]/tsc);
915 916 917
		}
	}

918 919
	/* C1 */
	if (DO_BIC(BIC_CPU_c1))
920
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1/tsc);
921 922


923 924 925 926
	/* print per-core data only for 1st thread in core */
	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
		goto done;

927
	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates)
928
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc);
929
	if (DO_BIC(BIC_CPU_c6))
930
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc);
931
	if (DO_BIC(BIC_CPU_c7))
932
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7/tsc);
933

934 935
	/* Mod%c6 */
	if (DO_BIC(BIC_Mod_c6))
936
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc);
937

938
	if (DO_BIC(BIC_CoreTmp))
939
		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
940

941 942 943
	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
		if (mp->format == FORMAT_RAW) {
			if (mp->width == 32)
944
				outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) c->counter[i]);
945
			else
946
				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
947
		} else if (mp->format == FORMAT_DELTA) {
948
			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
949
				outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]);
950
			else
951
				outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]);
952
		} else if (mp->format == FORMAT_PERCENT) {
953
			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i]/tsc);
954 955 956
		}
	}

957 958 959 960
	/* print per-package data only for 1st core in package */
	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
		goto done;

961
	/* PkgTmp */
962
	if (DO_BIC(BIC_PkgTmp))
963
		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c);
964

L
Len Brown 已提交
965
	/* GFXrc6 */
966
	if (DO_BIC(BIC_GFX_rc6)) {
967
		if (p->gfx_rc6_ms == -1) {	/* detect GFX counter reset */
968
			outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
969
		} else {
970
			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
971 972 973
				p->gfx_rc6_ms / 10.0 / interval_float);
		}
	}
L
Len Brown 已提交
974

L
Len Brown 已提交
975
	/* GFXMHz */
976
	if (DO_BIC(BIC_GFXMHz))
977
		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
L
Len Brown 已提交
978

979
	/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
980
	if (DO_BIC(BIC_Totl_c0))
981
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc);
982
	if (DO_BIC(BIC_Any_c0))
983
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0/tsc);
984
	if (DO_BIC(BIC_GFX_c0))
985
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0/tsc);
986
	if (DO_BIC(BIC_CPUGFX))
987
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0/tsc);
988

989
	if (DO_BIC(BIC_Pkgpc2))
990
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2/tsc);
991
	if (DO_BIC(BIC_Pkgpc3))
992
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3/tsc);
993
	if (DO_BIC(BIC_Pkgpc6))
994
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6/tsc);
995
	if (DO_BIC(BIC_Pkgpc7))
996
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7/tsc);
997
	if (DO_BIC(BIC_Pkgpc8))
998
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8/tsc);
999
	if (DO_BIC(BIC_Pkgpc9))
1000
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9/tsc);
1001
	if (DO_BIC(BIC_Pkgpc10))
1002
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc);
1003

1004 1005 1006 1007 1008
	if (DO_BIC(BIC_CPU_LPI))
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
	if (DO_BIC(BIC_SYS_LPI))
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);

1009 1010 1011 1012
	/*
 	 * If measurement interval exceeds minimum RAPL Joule Counter range,
 	 * indicate that results are suspect by printing "**" in fraction place.
 	 */
1013
	if (interval_float < rapl_joule_counter_range)
1014
		fmt8 = "%s%.2f";
1015
	else
1016
		fmt8 = "%6.0f**";
1017

1018
	if (DO_BIC(BIC_PkgWatt))
1019
		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
1020
	if (DO_BIC(BIC_CorWatt))
1021
		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
1022
	if (DO_BIC(BIC_GFXWatt))
1023
		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
1024
	if (DO_BIC(BIC_RAMWatt))
1025
		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units / interval_float);
1026
	if (DO_BIC(BIC_Pkg_J))
1027
		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
1028
	if (DO_BIC(BIC_Cor_J))
1029
		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
1030
	if (DO_BIC(BIC_GFX_J))
1031
		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
1032
	if (DO_BIC(BIC_RAM_J))
1033
		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
1034
	if (DO_BIC(BIC_PKG__))
1035
		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
1036
	if (DO_BIC(BIC_RAM__))
1037
		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
1038

1039 1040 1041
	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
		if (mp->format == FORMAT_RAW) {
			if (mp->width == 32)
1042
				outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) p->counter[i]);
1043
			else
1044
				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
1045
		} else if (mp->format == FORMAT_DELTA) {
1046
			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1047
				outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]);
1048
			else
1049
				outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]);
1050
		} else if (mp->format == FORMAT_PERCENT) {
1051
			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i]/tsc);
1052 1053 1054
		}
	}

1055
done:
1056 1057
	if (*(outp - 1) != '\n')
		outp += sprintf(outp, "\n");
1058 1059

	return 0;
L
Len Brown 已提交
1060 1061
}

1062
void flush_output_stdout(void)
1063
{
1064 1065 1066 1067 1068 1069 1070 1071 1072 1073
	FILE *filep;

	if (outf == stderr)
		filep = stdout;
	else
		filep = outf;

	fputs(output_buffer, filep);
	fflush(filep);

1074 1075
	outp = output_buffer;
}
1076
void flush_output_stderr(void)
1077
{
1078 1079
	fputs(output_buffer, outf);
	fflush(outf);
1080 1081 1082
	outp = output_buffer;
}
void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
L
Len Brown 已提交
1083
{
L
Len Brown 已提交
1084
	static int printed;
L
Len Brown 已提交
1085

L
Len Brown 已提交
1086
	if (!printed || !summary_only)
1087
		print_header("\t");
L
Len Brown 已提交
1088

1089 1090 1091
	if (topo.num_cpus > 1)
		format_counters(&average.threads, &average.cores,
			&average.packages);
L
Len Brown 已提交
1092

L
Len Brown 已提交
1093 1094 1095 1096 1097
	printed = 1;

	if (summary_only)
		return;

1098
	for_all_cpus(format_counters, t, c, p);
L
Len Brown 已提交
1099 1100
}

1101 1102 1103 1104 1105 1106 1107
#define DELTA_WRAP32(new, old)			\
	if (new > old) {			\
		old = new - old;		\
	} else {				\
		old = 0x100000000 + new - old;	\
	}

1108
int
1109 1110
delta_package(struct pkg_data *new, struct pkg_data *old)
{
1111 1112
	int i;
	struct msr_counter *mp;
1113

1114 1115

	if (DO_BIC(BIC_Totl_c0))
1116
		old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
1117
	if (DO_BIC(BIC_Any_c0))
1118
		old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
1119
	if (DO_BIC(BIC_GFX_c0))
1120
		old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
1121
	if (DO_BIC(BIC_CPUGFX))
1122
		old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
1123

1124
	old->pc2 = new->pc2 - old->pc2;
1125
	if (DO_BIC(BIC_Pkgpc3))
1126
		old->pc3 = new->pc3 - old->pc3;
1127
	if (DO_BIC(BIC_Pkgpc6))
1128
		old->pc6 = new->pc6 - old->pc6;
1129
	if (DO_BIC(BIC_Pkgpc7))
1130
		old->pc7 = new->pc7 - old->pc7;
1131 1132 1133
	old->pc8 = new->pc8 - old->pc8;
	old->pc9 = new->pc9 - old->pc9;
	old->pc10 = new->pc10 - old->pc10;
1134 1135
	old->cpu_lpi = new->cpu_lpi - old->cpu_lpi;
	old->sys_lpi = new->sys_lpi - old->sys_lpi;
1136 1137
	old->pkg_temp_c = new->pkg_temp_c;

1138 1139 1140 1141 1142 1143
	/* flag an error when rc6 counter resets/wraps */
	if (old->gfx_rc6_ms >  new->gfx_rc6_ms)
		old->gfx_rc6_ms = -1;
	else
		old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;

L
Len Brown 已提交
1144 1145
	old->gfx_mhz = new->gfx_mhz;

1146 1147 1148 1149 1150 1151
	DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
	DELTA_WRAP32(new->energy_cores, old->energy_cores);
	DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
	DELTA_WRAP32(new->energy_dram, old->energy_dram);
	DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
	DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
1152

1153 1154 1155 1156 1157 1158 1159
	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
		if (mp->format == FORMAT_RAW)
			old->counter[i] = new->counter[i];
		else
			old->counter[i] = new->counter[i] - old->counter[i];
	}

1160
	return 0;
1161
}
L
Len Brown 已提交
1162

1163 1164
void
delta_core(struct core_data *new, struct core_data *old)
L
Len Brown 已提交
1165
{
1166 1167 1168
	int i;
	struct msr_counter *mp;

1169 1170 1171
	old->c3 = new->c3 - old->c3;
	old->c6 = new->c6 - old->c6;
	old->c7 = new->c7 - old->c7;
1172
	old->core_temp_c = new->core_temp_c;
1173
	old->mc6_us = new->mc6_us - old->mc6_us;
1174 1175 1176 1177 1178 1179 1180

	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
		if (mp->format == FORMAT_RAW)
			old->counter[i] = new->counter[i];
		else
			old->counter[i] = new->counter[i] - old->counter[i];
	}
1181
}
L
Len Brown 已提交
1182

1183 1184 1185
/*
 * old = new - old
 */
1186
int
1187 1188 1189
delta_thread(struct thread_data *new, struct thread_data *old,
	struct core_data *core_delta)
{
1190 1191 1192
	int i;
	struct msr_counter *mp;

1193 1194 1195 1196 1197 1198 1199 1200 1201
	/*
	 * the timestamps from start of measurement interval are in "old"
	 * the timestamp from end of measurement interval are in "new"
	 * over-write old w/ new so we can print end of interval values
	 */

	old->tv_begin = new->tv_begin;
	old->tv_end = new->tv_end;

1202 1203 1204
	old->tsc = new->tsc - old->tsc;

	/* check for TSC < 1 Mcycles over interval */
1205 1206 1207 1208
	if (old->tsc < (1000 * 1000))
		errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
		     "You can disable all c-states by booting with \"idle=poll\"\n"
		     "or just the deep ones with \"processor.max_cstate=1\"");
L
Len Brown 已提交
1209

1210
	old->c1 = new->c1 - old->c1;
L
Len Brown 已提交
1211

1212
	if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
1213 1214 1215 1216
		if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
			old->aperf = new->aperf - old->aperf;
			old->mperf = new->mperf - old->mperf;
		} else {
1217
			return -1;
L
Len Brown 已提交
1218
		}
1219
	}
L
Len Brown 已提交
1220 1221


1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232
	if (use_c1_residency_msr) {
		/*
		 * Some models have a dedicated C1 residency MSR,
		 * which should be more accurate than the derivation below.
		 */
	} else {
		/*
		 * As counter collection is not atomic,
		 * it is possible for mperf's non-halted cycles + idle states
		 * to exceed TSC's all cycles: show c1 = 0% in that case.
		 */
1233
		if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak))
1234 1235 1236
			old->c1 = 0;
		else {
			/* normal case, derive c1 */
1237
			old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
1238
				- core_delta->c6 - core_delta->c7;
1239
		}
1240
	}
1241

1242
	if (old->mperf == 0) {
1243 1244
		if (debug > 1)
			fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
1245
		old->mperf = 1;	/* divide by 0 protection */
L
Len Brown 已提交
1246
	}
1247

1248
	if (DO_BIC(BIC_IRQ))
1249 1250
		old->irq_count = new->irq_count - old->irq_count;

1251
	if (DO_BIC(BIC_SMI))
1252
		old->smi_count = new->smi_count - old->smi_count;
1253

1254 1255 1256 1257 1258 1259
	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
		if (mp->format == FORMAT_RAW)
			old->counter[i] = new->counter[i];
		else
			old->counter[i] = new->counter[i] - old->counter[i];
	}
1260
	return 0;
1261 1262 1263 1264 1265 1266
}

int delta_cpu(struct thread_data *t, struct core_data *c,
	struct pkg_data *p, struct thread_data *t2,
	struct core_data *c2, struct pkg_data *p2)
{
1267 1268
	int retval = 0;

1269 1270 1271 1272 1273
	/* calculate core delta only for 1st thread in core */
	if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
		delta_core(c, c2);

	/* always calculate thread delta */
1274 1275 1276
	retval = delta_thread(t, t2, c2);	/* c2 is core delta */
	if (retval)
		return retval;
1277 1278 1279

	/* calculate package delta only for 1st core in package */
	if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
1280
		retval = delta_package(p, p2);
1281

1282
	return retval;
L
Len Brown 已提交
1283 1284
}

1285 1286
void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
1287 1288 1289
	int i;
	struct msr_counter  *mp;

1290 1291 1292 1293 1294
	t->tv_begin.tv_sec = 0;
	t->tv_begin.tv_usec = 0;
	t->tv_end.tv_sec = 0;
	t->tv_end.tv_usec = 0;

1295 1296 1297 1298 1299
	t->tsc = 0;
	t->aperf = 0;
	t->mperf = 0;
	t->c1 = 0;

1300 1301 1302
	t->irq_count = 0;
	t->smi_count = 0;

1303 1304 1305 1306 1307 1308
	/* tells format_counters to dump all fields from this set */
	t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;

	c->c3 = 0;
	c->c6 = 0;
	c->c7 = 0;
1309
	c->mc6_us = 0;
1310
	c->core_temp_c = 0;
1311

1312 1313 1314 1315 1316
	p->pkg_wtd_core_c0 = 0;
	p->pkg_any_core_c0 = 0;
	p->pkg_any_gfxe_c0 = 0;
	p->pkg_both_core_gfxe_c0 = 0;

1317
	p->pc2 = 0;
1318
	if (DO_BIC(BIC_Pkgpc3))
1319
		p->pc3 = 0;
1320
	if (DO_BIC(BIC_Pkgpc6))
1321
		p->pc6 = 0;
1322
	if (DO_BIC(BIC_Pkgpc7))
1323
		p->pc7 = 0;
1324 1325 1326
	p->pc8 = 0;
	p->pc9 = 0;
	p->pc10 = 0;
1327 1328
	p->cpu_lpi = 0;
	p->sys_lpi = 0;
1329 1330 1331 1332 1333 1334 1335 1336

	p->energy_pkg = 0;
	p->energy_dram = 0;
	p->energy_cores = 0;
	p->energy_gfx = 0;
	p->rapl_pkg_perf_status = 0;
	p->rapl_dram_perf_status = 0;
	p->pkg_temp_c = 0;
L
Len Brown 已提交
1337

L
Len Brown 已提交
1338
	p->gfx_rc6_ms = 0;
L
Len Brown 已提交
1339
	p->gfx_mhz = 0;
1340 1341 1342 1343 1344 1345 1346 1347
	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
		t->counter[i] = 0;

	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
		c->counter[i] = 0;

	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
		p->counter[i] = 0;
1348 1349 1350
}
int sum_counters(struct thread_data *t, struct core_data *c,
	struct pkg_data *p)
L
Len Brown 已提交
1351
{
1352 1353 1354
	int i;
	struct msr_counter *mp;

1355 1356 1357 1358 1359 1360 1361
	/* remember first tv_begin */
	if (average.threads.tv_begin.tv_sec == 0)
		average.threads.tv_begin = t->tv_begin;

	/* remember last tv_end */
	average.threads.tv_end = t->tv_end;

1362 1363 1364 1365
	average.threads.tsc += t->tsc;
	average.threads.aperf += t->aperf;
	average.threads.mperf += t->mperf;
	average.threads.c1 += t->c1;
L
Len Brown 已提交
1366

1367 1368 1369
	average.threads.irq_count += t->irq_count;
	average.threads.smi_count += t->smi_count;

1370 1371 1372 1373 1374 1375
	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
		if (mp->format == FORMAT_RAW)
			continue;
		average.threads.counter[i] += t->counter[i];
	}

1376 1377 1378
	/* sum per-core values only for 1st thread in core */
	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
		return 0;
L
Len Brown 已提交
1379

1380 1381 1382
	average.cores.c3 += c->c3;
	average.cores.c6 += c->c6;
	average.cores.c7 += c->c7;
1383
	average.cores.mc6_us += c->mc6_us;
1384

1385 1386
	average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);

1387 1388 1389 1390 1391 1392
	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
		if (mp->format == FORMAT_RAW)
			continue;
		average.cores.counter[i] += c->counter[i];
	}

1393 1394 1395 1396
	/* sum per-pkg values only for 1st core in pkg */
	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
		return 0;

1397
	if (DO_BIC(BIC_Totl_c0))
1398
		average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
1399
	if (DO_BIC(BIC_Any_c0))
1400
		average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
1401
	if (DO_BIC(BIC_GFX_c0))
1402
		average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
1403
	if (DO_BIC(BIC_CPUGFX))
1404 1405
		average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;

1406
	average.packages.pc2 += p->pc2;
1407
	if (DO_BIC(BIC_Pkgpc3))
1408
		average.packages.pc3 += p->pc3;
1409
	if (DO_BIC(BIC_Pkgpc6))
1410
		average.packages.pc6 += p->pc6;
1411
	if (DO_BIC(BIC_Pkgpc7))
1412
		average.packages.pc7 += p->pc7;
1413 1414 1415
	average.packages.pc8 += p->pc8;
	average.packages.pc9 += p->pc9;
	average.packages.pc10 += p->pc10;
1416

1417 1418 1419
	average.packages.cpu_lpi = p->cpu_lpi;
	average.packages.sys_lpi = p->sys_lpi;

1420 1421 1422 1423 1424
	average.packages.energy_pkg += p->energy_pkg;
	average.packages.energy_dram += p->energy_dram;
	average.packages.energy_cores += p->energy_cores;
	average.packages.energy_gfx += p->energy_gfx;

L
Len Brown 已提交
1425
	average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
L
Len Brown 已提交
1426 1427
	average.packages.gfx_mhz = p->gfx_mhz;

1428 1429 1430 1431
	average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);

	average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
	average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
1432 1433 1434 1435 1436 1437

	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
		if (mp->format == FORMAT_RAW)
			continue;
		average.packages.counter[i] += p->counter[i];
	}
1438 1439 1440 1441 1442 1443 1444 1445 1446
	return 0;
}
/*
 * sum the counters for all cpus in the system
 * compute the weighted average
 */
void compute_average(struct thread_data *t, struct core_data *c,
	struct pkg_data *p)
{
1447 1448 1449
	int i;
	struct msr_counter *mp;

1450 1451 1452 1453 1454 1455 1456 1457 1458
	clear_counters(&average.threads, &average.cores, &average.packages);

	for_all_cpus(sum_counters, t, c, p);

	average.threads.tsc /= topo.num_cpus;
	average.threads.aperf /= topo.num_cpus;
	average.threads.mperf /= topo.num_cpus;
	average.threads.c1 /= topo.num_cpus;

1459 1460 1461
	if (average.threads.irq_count > 9999999)
		sums_need_wide_columns = 1;

1462 1463 1464
	average.cores.c3 /= topo.num_cores;
	average.cores.c6 /= topo.num_cores;
	average.cores.c7 /= topo.num_cores;
1465
	average.cores.mc6_us /= topo.num_cores;
1466

1467
	if (DO_BIC(BIC_Totl_c0))
1468
		average.packages.pkg_wtd_core_c0 /= topo.num_packages;
1469
	if (DO_BIC(BIC_Any_c0))
1470
		average.packages.pkg_any_core_c0 /= topo.num_packages;
1471
	if (DO_BIC(BIC_GFX_c0))
1472
		average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
1473
	if (DO_BIC(BIC_CPUGFX))
1474 1475
		average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;

1476
	average.packages.pc2 /= topo.num_packages;
1477
	if (DO_BIC(BIC_Pkgpc3))
1478
		average.packages.pc3 /= topo.num_packages;
1479
	if (DO_BIC(BIC_Pkgpc6))
1480
		average.packages.pc6 /= topo.num_packages;
1481
	if (DO_BIC(BIC_Pkgpc7))
1482
		average.packages.pc7 /= topo.num_packages;
1483 1484 1485 1486

	average.packages.pc8 /= topo.num_packages;
	average.packages.pc9 /= topo.num_packages;
	average.packages.pc10 /= topo.num_packages;
1487 1488 1489 1490

	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
		if (mp->format == FORMAT_RAW)
			continue;
1491 1492 1493
		if (mp->type == COUNTER_ITEMS) {
			if (average.threads.counter[i] > 9999999)
				sums_need_wide_columns = 1;
1494
			continue;
1495
		}
1496 1497 1498 1499 1500
		average.threads.counter[i] /= topo.num_cpus;
	}
	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
		if (mp->format == FORMAT_RAW)
			continue;
1501 1502 1503 1504
		if (mp->type == COUNTER_ITEMS) {
			if (average.cores.counter[i] > 9999999)
				sums_need_wide_columns = 1;
		}
1505 1506 1507 1508 1509
		average.cores.counter[i] /= topo.num_cores;
	}
	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
		if (mp->format == FORMAT_RAW)
			continue;
1510 1511 1512 1513
		if (mp->type == COUNTER_ITEMS) {
			if (average.packages.counter[i] > 9999999)
				sums_need_wide_columns = 1;
		}
1514 1515
		average.packages.counter[i] /= topo.num_packages;
	}
L
Len Brown 已提交
1516 1517
}

1518
static unsigned long long rdtsc(void)
L
Len Brown 已提交
1519
{
1520
	unsigned int low, high;
1521

1522
	asm volatile("rdtsc" : "=a" (low), "=d" (high));
1523

1524 1525
	return low | ((unsigned long long)high) << 32;
}
1526

1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565
/*
 * Open a file, and exit on failure
 */
FILE *fopen_or_die(const char *path, const char *mode)
{
	FILE *filep = fopen(path, mode);

	if (!filep)
		err(1, "%s: open failed", path);
	return filep;
}
/*
 * snapshot_sysfs_counter()
 *
 * return snapshot of given counter
 */
unsigned long long snapshot_sysfs_counter(char *path)
{
	FILE *fp;
	int retval;
	unsigned long long counter;

	fp = fopen_or_die(path, "r");

	retval = fscanf(fp, "%lld", &counter);
	if (retval != 1)
		err(1, "snapshot_sysfs_counter(%s)", path);

	fclose(fp);

	return counter;
}

int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
{
	if (mp->msr_num != 0) {
		if (get_msr(cpu, mp->msr_num, counterp))
			return -1;
	} else {
1566
		char path[128 + PATH_BYTES];
1567 1568 1569 1570 1571 1572 1573 1574 1575

		if (mp->flags & SYSFS_PERCPU) {
			sprintf(path, "/sys/devices/system/cpu/cpu%d/%s",
				 cpu, mp->path);

			*counterp = snapshot_sysfs_counter(path);
		} else {
			*counterp = snapshot_sysfs_counter(mp->path);
		}
1576 1577 1578 1579 1580
	}

	return 0;
}

1581 1582 1583 1584 1585 1586 1587 1588
/*
 * get_counters(...)
 * migrate to cpu
 * acquire and record local counters for that cpu
 */
int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
	int cpu = t->cpu_id;
1589
	unsigned long long msr;
1590
	int aperf_mperf_retry_count = 0;
1591 1592
	struct msr_counter *mp;
	int i;
1593

1594 1595 1596

	gettimeofday(&t->tv_begin, (struct timezone *)NULL);

1597
	if (cpu_migrate(cpu)) {
1598
		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
1599
		return -1;
1600
	}
1601

1602
retry:
1603 1604
	t->tsc = rdtsc();	/* we are running on local CPU of interest */

1605
	if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629
		unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;

		/*
		 * The TSC, APERF and MPERF must be read together for
		 * APERF/MPERF and MPERF/TSC to give accurate results.
		 *
		 * Unfortunately, APERF and MPERF are read by
		 * individual system call, so delays may occur
		 * between them.  If the time to read them
		 * varies by a large amount, we re-read them.
		 */

		/*
		 * This initial dummy APERF read has been seen to
		 * reduce jitter in the subsequent reads.
		 */

		if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
			return -3;

		t->tsc = rdtsc();	/* re-read close to APERF */

		tsc_before = t->tsc;

1630
		if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1631
			return -3;
1632 1633 1634

		tsc_between = rdtsc();

1635
		if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
1636
			return -4;
1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656

		tsc_after = rdtsc();

		aperf_time = tsc_between - tsc_before;
		mperf_time = tsc_after - tsc_between;

		/*
		 * If the system call latency to read APERF and MPERF
		 * differ by more than 2x, then try again.
		 */
		if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
			aperf_mperf_retry_count++;
			if (aperf_mperf_retry_count < 5)
				goto retry;
			else
				warnx("cpu%d jitter %lld %lld",
					cpu, aperf_time, mperf_time);
		}
		aperf_mperf_retry_count = 0;

1657 1658
		t->aperf = t->aperf * aperf_mperf_multiplier;
		t->mperf = t->mperf * aperf_mperf_multiplier;
1659 1660
	}

1661
	if (DO_BIC(BIC_IRQ))
1662
		t->irq_count = irqs_per_cpu[cpu];
1663
	if (DO_BIC(BIC_SMI)) {
1664 1665 1666 1667
		if (get_msr(cpu, MSR_SMI_COUNT, &msr))
			return -5;
		t->smi_count = msr & 0xFFFFFFFF;
	}
1668
	if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
1669 1670 1671 1672
		if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
			return -6;
	}

1673
	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1674
		if (get_mp(cpu, mp, &t->counter[i]))
1675 1676 1677
			return -10;
	}

1678 1679
	/* collect core counters only for 1st thread in core */
	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1680
		goto done;
1681

1682
	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) {
1683 1684
		if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
			return -6;
1685 1686
	}

1687
	if (DO_BIC(BIC_CPU_c6) && !do_knl_cstates) {
1688 1689
		if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
			return -7;
1690 1691 1692
	} else if (do_knl_cstates) {
		if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
			return -7;
1693 1694
	}

1695
	if (DO_BIC(BIC_CPU_c7))
1696 1697 1698
		if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
			return -8;

1699 1700 1701 1702
	if (DO_BIC(BIC_Mod_c6))
		if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
			return -8;

1703
	if (DO_BIC(BIC_CoreTmp)) {
1704 1705 1706 1707 1708
		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
			return -9;
		c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
	}

1709
	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1710
		if (get_mp(cpu, mp, &c->counter[i]))
1711 1712
			return -10;
	}
1713

1714 1715
	/* collect package counters only for 1st core in package */
	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1716
		goto done;
1717

1718
	if (DO_BIC(BIC_Totl_c0)) {
1719 1720
		if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
			return -10;
1721 1722
	}
	if (DO_BIC(BIC_Any_c0)) {
1723 1724
		if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
			return -11;
1725 1726
	}
	if (DO_BIC(BIC_GFX_c0)) {
1727 1728
		if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
			return -12;
1729 1730
	}
	if (DO_BIC(BIC_CPUGFX)) {
1731 1732 1733
		if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
			return -13;
	}
1734
	if (DO_BIC(BIC_Pkgpc3))
1735 1736
		if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
			return -9;
1737
	if (DO_BIC(BIC_Pkgpc6)) {
1738 1739 1740 1741 1742 1743 1744 1745 1746
		if (do_slm_cstates) {
			if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
				return -10;
		} else {
			if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
				return -10;
		}
	}

1747
	if (DO_BIC(BIC_Pkgpc2))
1748 1749
		if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
			return -11;
1750
	if (DO_BIC(BIC_Pkgpc7))
1751 1752
		if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
			return -12;
1753
	if (DO_BIC(BIC_Pkgpc8))
1754 1755
		if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
			return -13;
1756
	if (DO_BIC(BIC_Pkgpc9))
1757 1758
		if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
			return -13;
1759
	if (DO_BIC(BIC_Pkgpc10))
1760 1761
		if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
			return -13;
1762

1763 1764 1765 1766 1767
	if (DO_BIC(BIC_CPU_LPI))
		p->cpu_lpi = cpuidle_cur_cpu_lpi_us;
	if (DO_BIC(BIC_SYS_LPI))
		p->sys_lpi = cpuidle_cur_sys_lpi_us;

1768 1769 1770 1771 1772
	if (do_rapl & RAPL_PKG) {
		if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
			return -13;
		p->energy_pkg = msr & 0xFFFFFFFF;
	}
1773
	if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797
		if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
			return -14;
		p->energy_cores = msr & 0xFFFFFFFF;
	}
	if (do_rapl & RAPL_DRAM) {
		if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
			return -15;
		p->energy_dram = msr & 0xFFFFFFFF;
	}
	if (do_rapl & RAPL_GFX) {
		if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr))
			return -16;
		p->energy_gfx = msr & 0xFFFFFFFF;
	}
	if (do_rapl & RAPL_PKG_PERF_STATUS) {
		if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr))
			return -16;
		p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
	}
	if (do_rapl & RAPL_DRAM_PERF_STATUS) {
		if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr))
			return -16;
		p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
	}
1798
	if (DO_BIC(BIC_PkgTmp)) {
1799 1800 1801 1802
		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
			return -17;
		p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
	}
L
Len Brown 已提交
1803

1804
	if (DO_BIC(BIC_GFX_rc6))
L
Len Brown 已提交
1805 1806
		p->gfx_rc6_ms = gfx_cur_rc6_ms;

1807
	if (DO_BIC(BIC_GFXMHz))
L
Len Brown 已提交
1808 1809
		p->gfx_mhz = gfx_cur_mhz;

1810
	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1811
		if (get_mp(cpu, mp, &p->counter[i]))
1812 1813
			return -10;
	}
1814 1815
done:
	gettimeofday(&t->tv_end, (struct timezone *)NULL);
1816

1817
	return 0;
L
Len Brown 已提交
1818 1819
}

1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837
/*
 * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
 * If you change the values, note they are used both in comparisons
 * (>= PCL__7) and to index pkg_cstate_limit_strings[].
 */

#define PCLUKN 0 /* Unknown */
#define PCLRSV 1 /* Reserved */
#define PCL__0 2 /* PC0 */
#define PCL__1 3 /* PC1 */
#define PCL__2 4 /* PC2 */
#define PCL__3 5 /* PC3 */
#define PCL__4 6 /* PC4 */
#define PCL__6 7 /* PC6 */
#define PCL_6N 8 /* PC6 No Retention */
#define PCL_6R 9 /* PC6 Retention */
#define PCL__7 10 /* PC7 */
#define PCL_7S 11 /* PC7 Shrink */
1838 1839 1840
#define PCL__8 12 /* PC8 */
#define PCL__9 13 /* PC9 */
#define PCLUNL 14 /* Unlimited */
1841 1842 1843

int pkg_cstate_limit = PCLUKN;
char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
1844
	"pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "unlimited"};
1845

1846 1847 1848
int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1849
int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7};
1850
int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1851
int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1852
int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1853
int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1854

1855 1856 1857 1858 1859 1860 1861

static void
calculate_tsc_tweak()
{
	tsc_tweak = base_hz / tsc_hz;
}

1862 1863
static void
dump_nhm_platform_info(void)
L
Len Brown 已提交
1864 1865 1866 1867
{
	unsigned long long msr;
	unsigned int ratio;

1868
	get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
L
Len Brown 已提交
1869

1870
	fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
1871

L
Len Brown 已提交
1872
	ratio = (msr >> 40) & 0xFF;
1873
	fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n",
L
Len Brown 已提交
1874 1875 1876
		ratio, bclk, ratio * bclk);

	ratio = (msr >> 8) & 0xFF;
1877
	fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
L
Len Brown 已提交
1878 1879
		ratio, bclk, ratio * bclk);

1880
	get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
1881
	fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
1882
		base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
1883

1884 1885 1886 1887 1888 1889 1890 1891 1892
	return;
}

static void
dump_hsw_turbo_ratio_limits(void)
{
	unsigned long long msr;
	unsigned int ratio;

1893
	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
1894

1895
	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
1896 1897 1898

	ratio = (msr >> 8) & 0xFF;
	if (ratio)
1899
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n",
1900 1901 1902 1903
			ratio, bclk, ratio * bclk);

	ratio = (msr >> 0) & 0xFF;
	if (ratio)
1904
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n",
1905 1906 1907 1908 1909 1910 1911 1912 1913
			ratio, bclk, ratio * bclk);
	return;
}

static void
dump_ivt_turbo_ratio_limits(void)
{
	unsigned long long msr;
	unsigned int ratio;
1914

1915
	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
1916

1917
	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
1918 1919 1920

	ratio = (msr >> 56) & 0xFF;
	if (ratio)
1921
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n",
1922 1923 1924 1925
			ratio, bclk, ratio * bclk);

	ratio = (msr >> 48) & 0xFF;
	if (ratio)
1926
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n",
1927 1928 1929 1930
			ratio, bclk, ratio * bclk);

	ratio = (msr >> 40) & 0xFF;
	if (ratio)
1931
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n",
1932 1933 1934 1935
			ratio, bclk, ratio * bclk);

	ratio = (msr >> 32) & 0xFF;
	if (ratio)
1936
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n",
1937 1938 1939 1940
			ratio, bclk, ratio * bclk);

	ratio = (msr >> 24) & 0xFF;
	if (ratio)
1941
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n",
1942 1943 1944 1945
			ratio, bclk, ratio * bclk);

	ratio = (msr >> 16) & 0xFF;
	if (ratio)
1946
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n",
1947 1948 1949 1950
			ratio, bclk, ratio * bclk);

	ratio = (msr >> 8) & 0xFF;
	if (ratio)
1951
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n",
1952 1953 1954 1955
			ratio, bclk, ratio * bclk);

	ratio = (msr >> 0) & 0xFF;
	if (ratio)
1956
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n",
1957
			ratio, bclk, ratio * bclk);
1958 1959
	return;
}
1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973
int has_turbo_ratio_group_limits(int family, int model)
{

	if (!genuine_intel)
		return 0;

	switch (model) {
	case INTEL_FAM6_ATOM_GOLDMONT:
	case INTEL_FAM6_SKYLAKE_X:
	case INTEL_FAM6_ATOM_DENVERTON:
		return 1;
	}
	return 0;
}
1974

1975
static void
1976
dump_turbo_ratio_limits(int family, int model)
1977
{
1978 1979
	unsigned long long msr, core_counts;
	unsigned int ratio, group_size;
L
Len Brown 已提交
1980

1981
	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
1982
	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
1983

1984 1985 1986 1987 1988 1989 1990
	if (has_turbo_ratio_group_limits(family, model)) {
		get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
		fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
	} else {
		core_counts = 0x0807060504030201;
	}

1991
	ratio = (msr >> 56) & 0xFF;
1992
	group_size = (core_counts >> 56) & 0xFF;
1993
	if (ratio)
1994 1995
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
			ratio, bclk, ratio * bclk, group_size);
1996 1997

	ratio = (msr >> 48) & 0xFF;
1998
	group_size = (core_counts >> 48) & 0xFF;
1999
	if (ratio)
2000 2001
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
			ratio, bclk, ratio * bclk, group_size);
2002 2003

	ratio = (msr >> 40) & 0xFF;
2004
	group_size = (core_counts >> 40) & 0xFF;
2005
	if (ratio)
2006 2007
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
			ratio, bclk, ratio * bclk, group_size);
2008 2009

	ratio = (msr >> 32) & 0xFF;
2010
	group_size = (core_counts >> 32) & 0xFF;
2011
	if (ratio)
2012 2013
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
			ratio, bclk, ratio * bclk, group_size);
2014

L
Len Brown 已提交
2015
	ratio = (msr >> 24) & 0xFF;
2016
	group_size = (core_counts >> 24) & 0xFF;
L
Len Brown 已提交
2017
	if (ratio)
2018 2019
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
			ratio, bclk, ratio * bclk, group_size);
L
Len Brown 已提交
2020 2021

	ratio = (msr >> 16) & 0xFF;
2022
	group_size = (core_counts >> 16) & 0xFF;
L
Len Brown 已提交
2023
	if (ratio)
2024 2025
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
			ratio, bclk, ratio * bclk, group_size);
L
Len Brown 已提交
2026 2027

	ratio = (msr >> 8) & 0xFF;
2028
	group_size = (core_counts >> 8) & 0xFF;
L
Len Brown 已提交
2029
	if (ratio)
2030 2031
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
			ratio, bclk, ratio * bclk, group_size);
L
Len Brown 已提交
2032 2033

	ratio = (msr >> 0) & 0xFF;
2034
	group_size = (core_counts >> 0) & 0xFF;
L
Len Brown 已提交
2035
	if (ratio)
2036 2037
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
			ratio, bclk, ratio * bclk, group_size);
2038 2039
	return;
}
2040

2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088
static void
dump_atom_turbo_ratio_limits(void)
{
	unsigned long long msr;
	unsigned int ratio;

	get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr);
	fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);

	ratio = (msr >> 0) & 0x3F;
	if (ratio)
		fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n",
			ratio, bclk, ratio * bclk);

	ratio = (msr >> 8) & 0x3F;
	if (ratio)
		fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n",
			ratio, bclk, ratio * bclk);

	ratio = (msr >> 16) & 0x3F;
	if (ratio)
		fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
			ratio, bclk, ratio * bclk);

	get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
	fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);

	ratio = (msr >> 24) & 0x3F;
	if (ratio)
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n",
			ratio, bclk, ratio * bclk);

	ratio = (msr >> 16) & 0x3F;
	if (ratio)
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n",
			ratio, bclk, ratio * bclk);

	ratio = (msr >> 8) & 0x3F;
	if (ratio)
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n",
			ratio, bclk, ratio * bclk);

	ratio = (msr >> 0) & 0x3F;
	if (ratio)
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n",
			ratio, bclk, ratio * bclk);
}

2089 2090 2091
static void
dump_knl_turbo_ratio_limits(void)
{
2092 2093
	const unsigned int buckets_no = 7;

2094
	unsigned long long msr;
2095 2096 2097 2098
	int delta_cores, delta_ratio;
	int i, b_nr;
	unsigned int cores[buckets_no];
	unsigned int ratio[buckets_no];
2099

2100
	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
2101

2102
	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
2103
		base_cpu, msr);
2104 2105 2106

	/**
	 * Turbo encoding in KNL is as follows:
2107 2108
	 * [0] -- Reserved
	 * [7:1] -- Base value of number of active cores of bucket 1.
2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126
	 * [15:8] -- Base value of freq ratio of bucket 1.
	 * [20:16] -- +ve delta of number of active cores of bucket 2.
	 * i.e. active cores of bucket 2 =
	 * active cores of bucket 1 + delta
	 * [23:21] -- Negative delta of freq ratio of bucket 2.
	 * i.e. freq ratio of bucket 2 =
	 * freq ratio of bucket 1 - delta
	 * [28:24]-- +ve delta of number of active cores of bucket 3.
	 * [31:29]-- -ve delta of freq ratio of bucket 3.
	 * [36:32]-- +ve delta of number of active cores of bucket 4.
	 * [39:37]-- -ve delta of freq ratio of bucket 4.
	 * [44:40]-- +ve delta of number of active cores of bucket 5.
	 * [47:45]-- -ve delta of freq ratio of bucket 5.
	 * [52:48]-- +ve delta of number of active cores of bucket 6.
	 * [55:53]-- -ve delta of freq ratio of bucket 6.
	 * [60:56]-- +ve delta of number of active cores of bucket 7.
	 * [63:61]-- -ve delta of freq ratio of bucket 7.
	 */
2127 2128 2129 2130 2131 2132

	b_nr = 0;
	cores[b_nr] = (msr & 0xFF) >> 1;
	ratio[b_nr] = (msr >> 8) & 0xFF;

	for (i = 16; i < 64; i += 8) {
2133
		delta_cores = (msr >> i) & 0x1F;
2134 2135 2136 2137 2138
		delta_ratio = (msr >> (i + 5)) & 0x7;

		cores[b_nr + 1] = cores[b_nr] + delta_cores;
		ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
		b_nr++;
2139
	}
2140 2141 2142

	for (i = buckets_no - 1; i >= 0; i--)
		if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
2143
			fprintf(outf,
2144
				"%d * %.1f = %.1f MHz max turbo %d active cores\n",
2145
				ratio[i], bclk, ratio[i] * bclk, cores[i]);
2146 2147
}

2148 2149 2150 2151 2152
static void
dump_nhm_cst_cfg(void)
{
	unsigned long long msr;

2153
	get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
2154

2155
	fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
2156

2157
	fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)",
2158 2159 2160 2161 2162
		(msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
		(msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
		(msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
		(msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
		(msr & (1 << 15)) ? "" : "UN",
2163
		(unsigned int)msr & 0xF,
2164
		pkg_cstate_limit_strings[pkg_cstate_limit]);
2165 2166 2167 2168 2169 2170 2171 2172 2173

#define AUTOMATIC_CSTATE_CONVERSION		(1UL << 16)
	if (has_automatic_cstate_conversion) {
		fprintf(outf, ", automatic c-state conversion=%s",
			(msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
	}

	fprintf(outf, ")\n");

2174
	return;
L
Len Brown 已提交
2175 2176
}

2177 2178 2179 2180 2181 2182
static void
dump_config_tdp(void)
{
	unsigned long long msr;

	get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
2183
	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
2184
	fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
2185 2186

	get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
2187
	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
2188
	if (msr) {
2189 2190 2191 2192
		fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
		fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
		fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
		fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
2193
	}
2194
	fprintf(outf, ")\n");
2195 2196

	get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
2197
	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
2198
	if (msr) {
2199 2200 2201 2202
		fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
		fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
		fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
		fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
2203
	}
2204
	fprintf(outf, ")\n");
2205 2206

	get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
2207
	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
2208
	if ((msr) & 0x3)
2209 2210 2211
		fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
	fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
	fprintf(outf, ")\n");
2212

2213
	get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
2214
	fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
2215
	fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
2216 2217
	fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
	fprintf(outf, ")\n");
2218
}
2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259

unsigned int irtl_time_units[] = {1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };

void print_irtl(void)
{
	unsigned long long msr;

	get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
	fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);

	get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
	fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);

	get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
	fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);

	if (!do_irtl_hsw)
		return;

	get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
	fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);

	get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
	fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);

	get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
	fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);

}
2260 2261 2262 2263
void free_fd_percpu(void)
{
	int i;

2264
	for (i = 0; i < topo.max_cpu_num + 1; ++i) {
2265 2266 2267 2268 2269
		if (fd_percpu[i] != 0)
			close(fd_percpu[i]);
	}

	free(fd_percpu);
2270 2271
}

2272
void free_all_buffers(void)
L
Len Brown 已提交
2273
{
2274 2275
	CPU_FREE(cpu_present_set);
	cpu_present_set = NULL;
2276
	cpu_present_setsize = 0;
L
Len Brown 已提交
2277

2278 2279 2280
	CPU_FREE(cpu_affinity_set);
	cpu_affinity_set = NULL;
	cpu_affinity_setsize = 0;
L
Len Brown 已提交
2281

2282 2283 2284
	free(thread_even);
	free(core_even);
	free(package_even);
L
Len Brown 已提交
2285

2286 2287 2288
	thread_even = NULL;
	core_even = NULL;
	package_even = NULL;
L
Len Brown 已提交
2289

2290 2291 2292
	free(thread_odd);
	free(core_odd);
	free(package_odd);
L
Len Brown 已提交
2293

2294 2295 2296
	thread_odd = NULL;
	core_odd = NULL;
	package_odd = NULL;
L
Len Brown 已提交
2297

2298 2299 2300
	free(output_buffer);
	output_buffer = NULL;
	outp = NULL;
2301 2302

	free_fd_percpu();
2303 2304 2305

	free(irq_column_2_cpu);
	free(irqs_per_cpu);
L
Len Brown 已提交
2306 2307
}

2308

2309
/*
2310
 * Parse a file containing a single int.
2311
 */
2312
int parse_int_file(const char *fmt, ...)
L
Len Brown 已提交
2313
{
2314 2315
	va_list args;
	char path[PATH_MAX];
2316
	FILE *filep;
2317
	int value;
L
Len Brown 已提交
2318

2319 2320 2321
	va_start(args, fmt);
	vsnprintf(path, sizeof(path), fmt, args);
	va_end(args);
2322
	filep = fopen_or_die(path, "r");
2323 2324
	if (fscanf(filep, "%d", &value) != 1)
		err(1, "%s: failed to parse number from file", path);
2325
	fclose(filep);
2326 2327 2328 2329
	return value;
}

/*
2330 2331 2332
 * get_cpu_position_in_core(cpu)
 * return the position of the CPU among its HT siblings in the core
 * return -1 if the sibling is not in list
2333
 */
2334
int get_cpu_position_in_core(int cpu)
2335
{
2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364
	char path[64];
	FILE *filep;
	int this_cpu;
	char character;
	int i;

	sprintf(path,
		"/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list",
		cpu);
	filep = fopen(path, "r");
	if (filep == NULL) {
		perror(path);
		exit(1);
	}

	for (i = 0; i < topo.num_threads_per_core; i++) {
		fscanf(filep, "%d", &this_cpu);
		if (this_cpu == cpu) {
			fclose(filep);
			return i;
		}

		/* Account for no separator after last thread*/
		if (i != (topo.num_threads_per_core - 1))
			fscanf(filep, "%c", &character);
	}

	fclose(filep);
	return -1;
L
Len Brown 已提交
2365 2366
}

2367 2368 2369 2370 2371
/*
 * cpu_is_first_core_in_package(cpu)
 * return 1 if given CPU is 1st core in package
 */
int cpu_is_first_core_in_package(int cpu)
L
Len Brown 已提交
2372
{
2373
	return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
L
Len Brown 已提交
2374 2375 2376 2377
}

int get_physical_package_id(int cpu)
{
2378
	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
L
Len Brown 已提交
2379 2380 2381 2382
}

int get_core_id(int cpu)
{
2383
	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
L
Len Brown 已提交
2384 2385
}

2386 2387 2388 2389
int get_num_ht_siblings(int cpu)
{
	char path[80];
	FILE *filep;
2390 2391
	int sib1;
	int matches = 0;
2392
	char character;
2393 2394
	char str[100];
	char *ch;
2395 2396

	sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu);
2397
	filep = fopen_or_die(path, "r");
2398

2399 2400
	/*
	 * file format:
2401 2402
	 * A ',' separated or '-' separated set of numbers
	 * (eg 1-2 or 1,3,4,5)
2403
	 */
2404 2405 2406 2407 2408 2409 2410 2411
	fscanf(filep, "%d%c\n", &sib1, &character);
	fseek(filep, 0, SEEK_SET);
	fgets(str, 100, filep);
	ch = strchr(str, character);
	while (ch != NULL) {
		matches++;
		ch = strchr(ch+1, character);
	}
2412 2413

	fclose(filep);
2414
	return matches+1;
2415 2416
}

L
Len Brown 已提交
2417
/*
2418 2419
 * run func(thread, core, package) in topology order
 * skip non-present cpus
L
Len Brown 已提交
2420 2421
 */

2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465
int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *,
	struct pkg_data *, struct thread_data *, struct core_data *,
	struct pkg_data *), struct thread_data *thread_base,
	struct core_data *core_base, struct pkg_data *pkg_base,
	struct thread_data *thread_base2, struct core_data *core_base2,
	struct pkg_data *pkg_base2)
{
	int retval, pkg_no, core_no, thread_no;

	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
		for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
			for (thread_no = 0; thread_no <
				topo.num_threads_per_core; ++thread_no) {
				struct thread_data *t, *t2;
				struct core_data *c, *c2;
				struct pkg_data *p, *p2;

				t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);

				if (cpu_is_not_present(t->cpu_id))
					continue;

				t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no);

				c = GET_CORE(core_base, core_no, pkg_no);
				c2 = GET_CORE(core_base2, core_no, pkg_no);

				p = GET_PKG(pkg_base, pkg_no);
				p2 = GET_PKG(pkg_base2, pkg_no);

				retval = func(t, c, p, t2, c2, p2);
				if (retval)
					return retval;
			}
		}
	}
	return 0;
}

/*
 * run func(cpu) on every cpu in /proc/stat
 * return max_cpu number
 */
int for_all_proc_cpus(int (func)(int))
L
Len Brown 已提交
2466 2467
{
	FILE *fp;
2468
	int cpu_num;
L
Len Brown 已提交
2469 2470
	int retval;

2471
	fp = fopen_or_die(proc_stat, "r");
L
Len Brown 已提交
2472 2473

	retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
2474 2475
	if (retval != 0)
		err(1, "%s: failed to parse format", proc_stat);
L
Len Brown 已提交
2476

2477 2478
	while (1) {
		retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
L
Len Brown 已提交
2479 2480 2481
		if (retval != 1)
			break;

2482 2483 2484 2485 2486
		retval = func(cpu_num);
		if (retval) {
			fclose(fp);
			return(retval);
		}
L
Len Brown 已提交
2487 2488
	}
	fclose(fp);
2489
	return 0;
L
Len Brown 已提交
2490 2491 2492 2493
}

void re_initialize(void)
{
2494 2495 2496
	free_all_buffers();
	setup_all_buffers();
	printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
L
Len Brown 已提交
2497 2498
}

2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512
void set_max_cpu_num(void)
{
	FILE *filep;
	unsigned long dummy;

	topo.max_cpu_num = 0;
	filep = fopen_or_die(
			"/sys/devices/system/cpu/cpu0/topology/thread_siblings",
			"r");
	while (fscanf(filep, "%lx,", &dummy) == 1)
		topo.max_cpu_num += 32;
	fclose(filep);
	topo.max_cpu_num--; /* 0 based */
}
2513

L
Len Brown 已提交
2514
/*
2515 2516
 * count_cpus()
 * remember the last one seen, it will be the max
L
Len Brown 已提交
2517
 */
2518
int count_cpus(int cpu)
L
Len Brown 已提交
2519
{
2520
	topo.num_cpus++;
2521 2522 2523 2524 2525
	return 0;
}
int mark_cpu_present(int cpu)
{
	CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
2526
	return 0;
L
Len Brown 已提交
2527 2528
}

2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591
/*
 * snapshot_proc_interrupts()
 *
 * read and record summary of /proc/interrupts
 *
 * return 1 if config change requires a restart, else return 0
 */
int snapshot_proc_interrupts(void)
{
	static FILE *fp;
	int column, retval;

	if (fp == NULL)
		fp = fopen_or_die("/proc/interrupts", "r");
	else
		rewind(fp);

	/* read 1st line of /proc/interrupts to get cpu* name for each column */
	for (column = 0; column < topo.num_cpus; ++column) {
		int cpu_number;

		retval = fscanf(fp, " CPU%d", &cpu_number);
		if (retval != 1)
			break;

		if (cpu_number > topo.max_cpu_num) {
			warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
			return 1;
		}

		irq_column_2_cpu[column] = cpu_number;
		irqs_per_cpu[cpu_number] = 0;
	}

	/* read /proc/interrupt count lines and sum up irqs per cpu */
	while (1) {
		int column;
		char buf[64];

		retval = fscanf(fp, " %s:", buf);	/* flush irq# "N:" */
		if (retval != 1)
			break;

		/* read the count per cpu */
		for (column = 0; column < topo.num_cpus; ++column) {

			int cpu_number, irq_count;

			retval = fscanf(fp, " %d", &irq_count);
			if (retval != 1)
				break;

			cpu_number = irq_column_2_cpu[column];
			irqs_per_cpu[cpu_number] += irq_count;

		}

		while (getc(fp) != '\n')
			;	/* flush interrupt description */

	}
	return 0;
}
L
Len Brown 已提交
2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614
/*
 * snapshot_gfx_rc6_ms()
 *
 * record snapshot of
 * /sys/class/drm/card0/power/rc6_residency_ms
 *
 * return 1 if config change requires a restart, else return 0
 */
int snapshot_gfx_rc6_ms(void)
{
	FILE *fp;
	int retval;

	fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");

	retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
	if (retval != 1)
		err(1, "GFX rc6");

	fclose(fp);

	return 0;
}
L
Len Brown 已提交
2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629
/*
 * snapshot_gfx_mhz()
 *
 * record snapshot of
 * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
 *
 * return 1 if config change requires a restart, else return 0
 */
int snapshot_gfx_mhz(void)
{
	static FILE *fp;
	int retval;

	if (fp == NULL)
		fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
2630
	else {
L
Len Brown 已提交
2631
		rewind(fp);
2632 2633
		fflush(fp);
	}
L
Len Brown 已提交
2634 2635 2636 2637 2638 2639 2640

	retval = fscanf(fp, "%d", &gfx_cur_mhz);
	if (retval != 1)
		err(1, "GFX MHz");

	return 0;
}
2641

2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687
/*
 * snapshot_cpu_lpi()
 *
 * record snapshot of
 * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
 *
 * return 1 if config change requires a restart, else return 0
 */
int snapshot_cpu_lpi_us(void)
{
	FILE *fp;
	int retval;

	fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r");

	retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us);
	if (retval != 1)
		err(1, "CPU LPI");

	fclose(fp);

	return 0;
}
/*
 * snapshot_sys_lpi()
 *
 * record snapshot of
 * /sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us
 *
 * return 1 if config change requires a restart, else return 0
 */
int snapshot_sys_lpi_us(void)
{
	FILE *fp;
	int retval;

	fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", "r");

	retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
	if (retval != 1)
		err(1, "SYS LPI");

	fclose(fp);

	return 0;
}
2688 2689 2690 2691 2692 2693 2694
/*
 * snapshot /proc and /sys files
 *
 * return 1 if configuration restart needed, else return 0
 */
int snapshot_proc_sysfs_files(void)
{
2695 2696 2697
	if (DO_BIC(BIC_IRQ))
		if (snapshot_proc_interrupts())
			return 1;
2698

2699
	if (DO_BIC(BIC_GFX_rc6))
L
Len Brown 已提交
2700 2701
		snapshot_gfx_rc6_ms();

2702
	if (DO_BIC(BIC_GFXMHz))
L
Len Brown 已提交
2703 2704
		snapshot_gfx_mhz();

2705 2706 2707 2708 2709 2710
	if (DO_BIC(BIC_CPU_LPI))
		snapshot_cpu_lpi_us();

	if (DO_BIC(BIC_SYS_LPI))
		snapshot_sys_lpi_us();

2711 2712 2713
	return 0;
}

2714 2715 2716 2717 2718 2719 2720 2721 2722 2723
int exit_requested;

static void signal_handler (int signal)
{
	switch (signal) {
	case SIGINT:
		exit_requested = 1;
		if (debug)
			fprintf(stderr, " SIGINT\n");
		break;
2724 2725 2726 2727
	case SIGUSR1:
		if (debug > 1)
			fprintf(stderr, "SIGUSR1\n");
		break;
2728
	}
2729 2730
	/* make sure this manually-invoked interval is at least 1ms long */
	nanosleep(&one_msec, NULL);
2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742
}

void setup_signal_handler(void)
{
	struct sigaction sa;

	memset(&sa, 0, sizeof(sa));

	sa.sa_handler = &signal_handler;

	if (sigaction(SIGINT, &sa, NULL) < 0)
		err(1, "sigaction SIGINT");
2743 2744
	if (sigaction(SIGUSR1, &sa, NULL) < 0)
		err(1, "sigaction SIGUSR1");
2745
}
2746

2747
void do_sleep(void)
2748 2749 2750 2751 2752 2753 2754 2755
{
	struct timeval select_timeout;
	fd_set readfds;
	int retval;

	FD_ZERO(&readfds);
	FD_SET(0, &readfds);

2756 2757 2758 2759
	if (!isatty(fileno(stdin))) {
		nanosleep(&interval_ts, NULL);
		return;
	}
2760

2761
	select_timeout = interval_tv;
2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773
	retval = select(1, &readfds, NULL, NULL, &select_timeout);

	if (retval == 1) {
		switch (getc(stdin)) {
		case 'q':
			exit_requested = 1;
			break;
		}
		/* make sure this manually-invoked interval is at least 1ms long */
		nanosleep(&one_msec, NULL);
	}
}
2774

L
Len Brown 已提交
2775 2776
void turbostat_loop()
{
2777
	int retval;
2778
	int restarted = 0;
2779
	int done_iters = 0;
2780

2781 2782
	setup_signal_handler();

L
Len Brown 已提交
2783
restart:
2784 2785
	restarted++;

2786
	snapshot_proc_sysfs_files();
2787
	retval = for_all_cpus(get_counters, EVEN_COUNTERS);
2788 2789 2790
	if (retval < -1) {
		exit(retval);
	} else if (retval == -1) {
2791 2792 2793
		if (restarted > 1) {
			exit(retval);
		}
2794 2795 2796
		re_initialize();
		goto restart;
	}
2797
	restarted = 0;
2798
	done_iters = 0;
L
Len Brown 已提交
2799 2800 2801
	gettimeofday(&tv_even, (struct timezone *)NULL);

	while (1) {
2802
		if (for_all_proc_cpus(cpu_is_not_present)) {
L
Len Brown 已提交
2803 2804 2805
			re_initialize();
			goto restart;
		}
2806
		do_sleep();
2807 2808
		if (snapshot_proc_sysfs_files())
			goto restart;
2809
		retval = for_all_cpus(get_counters, ODD_COUNTERS);
2810 2811 2812
		if (retval < -1) {
			exit(retval);
		} else if (retval == -1) {
2813 2814 2815
			re_initialize();
			goto restart;
		}
L
Len Brown 已提交
2816 2817
		gettimeofday(&tv_odd, (struct timezone *)NULL);
		timersub(&tv_odd, &tv_even, &tv_delta);
2818 2819 2820 2821
		if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) {
			re_initialize();
			goto restart;
		}
2822 2823
		compute_average(EVEN_COUNTERS);
		format_all_counters(EVEN_COUNTERS);
2824
		flush_output_stdout();
2825 2826
		if (exit_requested)
			break;
2827 2828
		if (num_iterations && ++done_iters >= num_iterations)
			break;
2829
		do_sleep();
2830 2831
		if (snapshot_proc_sysfs_files())
			goto restart;
2832
		retval = for_all_cpus(get_counters, EVEN_COUNTERS);
2833 2834 2835
		if (retval < -1) {
			exit(retval);
		} else if (retval == -1) {
L
Len Brown 已提交
2836 2837 2838 2839 2840
			re_initialize();
			goto restart;
		}
		gettimeofday(&tv_even, (struct timezone *)NULL);
		timersub(&tv_even, &tv_odd, &tv_delta);
2841 2842 2843 2844
		if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) {
			re_initialize();
			goto restart;
		}
2845 2846
		compute_average(ODD_COUNTERS);
		format_all_counters(ODD_COUNTERS);
2847
		flush_output_stdout();
2848 2849
		if (exit_requested)
			break;
2850 2851
		if (num_iterations && ++done_iters >= num_iterations)
			break;
L
Len Brown 已提交
2852 2853 2854 2855 2856 2857
	}
}

void check_dev_msr()
{
	struct stat sb;
2858
	char pathname[32];
L
Len Brown 已提交
2859

2860 2861
	sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
	if (stat(pathname, &sb))
2862 2863
 		if (system("/sbin/modprobe msr > /dev/null 2>&1"))
			err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
L
Len Brown 已提交
2864 2865
}

2866
void check_permissions()
L
Len Brown 已提交
2867
{
2868 2869 2870 2871 2872 2873
	struct __user_cap_header_struct cap_header_data;
	cap_user_header_t cap_header = &cap_header_data;
	struct __user_cap_data_struct cap_data_data;
	cap_user_data_t cap_data = &cap_data_data;
	extern int capget(cap_user_header_t hdrp, cap_user_data_t datap);
	int do_exit = 0;
2874
	char pathname[32];
2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888

	/* check for CAP_SYS_RAWIO */
	cap_header->pid = getpid();
	cap_header->version = _LINUX_CAPABILITY_VERSION;
	if (capget(cap_header, cap_data) < 0)
		err(-6, "capget(2) failed");

	if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) {
		do_exit++;
		warnx("capget(CAP_SYS_RAWIO) failed,"
			" try \"# setcap cap_sys_rawio=ep %s\"", progname);
	}

	/* test file permissions */
2889 2890
	sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
	if (euidaccess(pathname, R_OK)) {
2891 2892 2893 2894 2895 2896 2897
		do_exit++;
		warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
	}

	/* if all else fails, thell them to be root */
	if (do_exit)
		if (getuid() != 0)
2898
			warnx("... or simply run as root");
2899 2900 2901

	if (do_exit)
		exit(-6);
L
Len Brown 已提交
2902 2903
}

2904 2905 2906 2907 2908
/*
 * NHM adds support for additional MSRs:
 *
 * MSR_SMI_COUNT                   0x00000034
 *
2909
 * MSR_PLATFORM_INFO               0x000000ce
2910
 * MSR_PKG_CST_CONFIG_CONTROL     0x000000e2
2911
 *
2912 2913
 * MSR_MISC_PWR_MGMT               0x000001aa
 *
2914 2915 2916 2917 2918
 * MSR_PKG_C3_RESIDENCY            0x000003f8
 * MSR_PKG_C6_RESIDENCY            0x000003f9
 * MSR_CORE_C3_RESIDENCY           0x000003fc
 * MSR_CORE_C6_RESIDENCY           0x000003fd
 *
2919
 * Side effect:
2920
 * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
2921
 * sets has_misc_feature_control
2922
 */
2923
int probe_nhm_msrs(unsigned int family, unsigned int model)
L
Len Brown 已提交
2924
{
2925
	unsigned long long msr;
2926
	unsigned int base_ratio;
2927 2928
	int *pkg_cstate_limits;

L
Len Brown 已提交
2929 2930 2931 2932 2933 2934
	if (!genuine_intel)
		return 0;

	if (family != 6)
		return 0;

2935 2936
	bclk = discover_bclk(family, model);

L
Len Brown 已提交
2937
	switch (model) {
2938 2939
	case INTEL_FAM6_NEHALEM_EP:	/* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
	case INTEL_FAM6_NEHALEM:	/* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
L
Len Brown 已提交
2940
	case 0x1F:	/* Core i7 and i5 Processor - Nehalem */
2941 2942 2943 2944
	case INTEL_FAM6_WESTMERE:	/* Westmere Client - Clarkdale, Arrandale */
	case INTEL_FAM6_WESTMERE_EP:	/* Westmere EP - Gulftown */
	case INTEL_FAM6_NEHALEM_EX:	/* Nehalem-EX Xeon - Beckton */
	case INTEL_FAM6_WESTMERE_EX:	/* Westmere-EX Xeon - Eagleton */
2945 2946
		pkg_cstate_limits = nhm_pkg_cstate_limits;
		break;
2947 2948 2949 2950
	case INTEL_FAM6_SANDYBRIDGE:	/* SNB */
	case INTEL_FAM6_SANDYBRIDGE_X:	/* SNB Xeon */
	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
2951
		pkg_cstate_limits = snb_pkg_cstate_limits;
2952
		has_misc_feature_control = 1;
2953
		break;
2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965
	case INTEL_FAM6_HASWELL_CORE:	/* HSW */
	case INTEL_FAM6_HASWELL_X:	/* HSX */
	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
	case INTEL_FAM6_BROADWELL_CORE:	/* BDW */
	case INTEL_FAM6_BROADWELL_GT3E:	/* BDW */
	case INTEL_FAM6_BROADWELL_X:	/* BDX */
	case INTEL_FAM6_BROADWELL_XEON_D:	/* BDX-DE */
	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
2966
	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
2967
		pkg_cstate_limits = hsw_pkg_cstate_limits;
2968
		has_misc_feature_control = 1;
2969
		break;
2970 2971
	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
		pkg_cstate_limits = skx_pkg_cstate_limits;
2972
		has_misc_feature_control = 1;
2973
		break;
2974
	case INTEL_FAM6_ATOM_SILVERMONT1:	/* BYT */
2975
		no_MSR_MISC_PWR_MGMT = 1;
2976
	case INTEL_FAM6_ATOM_SILVERMONT2:	/* AVN */
2977 2978
		pkg_cstate_limits = slv_pkg_cstate_limits;
		break;
2979
	case INTEL_FAM6_ATOM_AIRMONT:	/* AMT */
2980
		pkg_cstate_limits = amt_pkg_cstate_limits;
2981
		no_MSR_MISC_PWR_MGMT = 1;
2982
		break;
2983
	case INTEL_FAM6_XEON_PHI_KNL:	/* PHI */
2984
	case INTEL_FAM6_XEON_PHI_KNM:
2985 2986
		pkg_cstate_limits = phi_pkg_cstate_limits;
		break;
2987
	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
2988
	case INTEL_FAM6_ATOM_GEMINI_LAKE:
2989
	case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
2990 2991
		pkg_cstate_limits = bxt_pkg_cstate_limits;
		break;
L
Len Brown 已提交
2992 2993 2994
	default:
		return 0;
	}
2995
	get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
2996
	pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
2997

2998
	get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
2999 3000 3001 3002
	base_ratio = (msr >> 8) & 0xFF;

	base_hz = base_ratio * bclk * 1000000;
	has_base_hz = 1;
3003
	return 1;
L
Len Brown 已提交
3004
}
3005
/*
3006
 * SLV client has support for unique MSRs:
3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024
 *
 * MSR_CC6_DEMOTION_POLICY_CONFIG
 * MSR_MC6_DEMOTION_POLICY_CONFIG
 */

int has_slv_msrs(unsigned int family, unsigned int model)
{
	if (!genuine_intel)
		return 0;

	switch (model) {
	case INTEL_FAM6_ATOM_SILVERMONT1:
	case INTEL_FAM6_ATOM_MERRIFIELD:
	case INTEL_FAM6_ATOM_MOOREFIELD:
		return 1;
	}
	return 0;
}
3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036
int is_dnv(unsigned int family, unsigned int model)
{

	if (!genuine_intel)
		return 0;

	switch (model) {
	case INTEL_FAM6_ATOM_DENVERTON:
		return 1;
	}
	return 0;
}
3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049
int is_bdx(unsigned int family, unsigned int model)
{

	if (!genuine_intel)
		return 0;

	switch (model) {
	case INTEL_FAM6_BROADWELL_X:
	case INTEL_FAM6_BROADWELL_XEON_D:
		return 1;
	}
	return 0;
}
3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061
int is_skx(unsigned int family, unsigned int model)
{

	if (!genuine_intel)
		return 0;

	switch (model) {
	case INTEL_FAM6_SKYLAKE_X:
		return 1;
	}
	return 0;
}
3062

3063
int has_turbo_ratio_limit(unsigned int family, unsigned int model)
3064
{
3065 3066 3067
	if (has_slv_msrs(family, model))
		return 0;

3068 3069
	switch (model) {
	/* Nehalem compatible, but do not include turbo-ratio limit support */
3070 3071 3072
	case INTEL_FAM6_NEHALEM_EX:	/* Nehalem-EX Xeon - Beckton */
	case INTEL_FAM6_WESTMERE_EX:	/* Westmere-EX Xeon - Eagleton */
	case INTEL_FAM6_XEON_PHI_KNL:	/* PHI - Knights Landing (different MSR definition) */
3073
	case INTEL_FAM6_XEON_PHI_KNM:
3074 3075 3076 3077 3078
		return 0;
	default:
		return 1;
	}
}
3079 3080 3081 3082 3083 3084 3085
int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
{
	if (has_slv_msrs(family, model))
		return 1;

	return 0;
}
3086 3087 3088 3089 3090 3091 3092 3093 3094
int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
{
	if (!genuine_intel)
		return 0;

	if (family != 6)
		return 0;

	switch (model) {
3095 3096
	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
	case INTEL_FAM6_HASWELL_X:	/* HSW Xeon */
3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110
		return 1;
	default:
		return 0;
	}
}
int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
{
	if (!genuine_intel)
		return 0;

	if (family != 6)
		return 0;

	switch (model) {
3111
	case INTEL_FAM6_HASWELL_X:	/* HSW Xeon */
3112 3113 3114 3115 3116 3117
		return 1;
	default:
		return 0;
	}
}

3118 3119 3120 3121 3122 3123 3124 3125 3126
int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
{
	if (!genuine_intel)
		return 0;

	if (family != 6)
		return 0;

	switch (model) {
3127
	case INTEL_FAM6_XEON_PHI_KNL:	/* Knights Landing */
3128
	case INTEL_FAM6_XEON_PHI_KNM:
3129 3130 3131 3132 3133
		return 1;
	default:
		return 0;
	}
}
3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149
int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
{
	if (!genuine_intel)
		return 0;

	if (family != 6)
		return 0;

	switch (model) {
	case INTEL_FAM6_ATOM_GOLDMONT:
	case INTEL_FAM6_SKYLAKE_X:
		return 1;
	default:
		return 0;
	}
}
3150 3151 3152 3153 3154 3155 3156 3157 3158
int has_config_tdp(unsigned int family, unsigned int model)
{
	if (!genuine_intel)
		return 0;

	if (family != 6)
		return 0;

	switch (model) {
3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171
	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
	case INTEL_FAM6_HASWELL_CORE:	/* HSW */
	case INTEL_FAM6_HASWELL_X:	/* HSX */
	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
	case INTEL_FAM6_BROADWELL_CORE:	/* BDW */
	case INTEL_FAM6_BROADWELL_GT3E:	/* BDW */
	case INTEL_FAM6_BROADWELL_X:	/* BDX */
	case INTEL_FAM6_BROADWELL_XEON_D:	/* BDX-DE */
	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
3172
	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
3173 3174 3175
	case INTEL_FAM6_SKYLAKE_X:	/* SKX */

	case INTEL_FAM6_XEON_PHI_KNL:	/* Knights Landing */
3176
	case INTEL_FAM6_XEON_PHI_KNM:
3177 3178 3179 3180 3181 3182
		return 1;
	default:
		return 0;
	}
}

3183
static void
3184
dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196
{
	if (!do_nhm_platform_info)
		return;

	dump_nhm_platform_info();

	if (has_hsw_turbo_ratio_limit(family, model))
		dump_hsw_turbo_ratio_limits();

	if (has_ivt_turbo_ratio_limit(family, model))
		dump_ivt_turbo_ratio_limits();

3197 3198
	if (has_turbo_ratio_limit(family, model))
		dump_turbo_ratio_limits(family, model);
3199

3200 3201 3202
	if (has_atom_turbo_ratio_limit(family, model))
		dump_atom_turbo_ratio_limits();

3203 3204 3205
	if (has_knl_turbo_ratio_limit(family, model))
		dump_knl_turbo_ratio_limits();

3206 3207 3208
	if (has_config_tdp(family, model))
		dump_config_tdp();

3209 3210 3211
	dump_nhm_cst_cfg();
}

3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252
static void
dump_sysfs_cstate_config(void)
{
	char path[64];
	char name_buf[16];
	char desc[64];
	FILE *input;
	int state;
	char *sp;

	if (!DO_BIC(BIC_sysfs))
		return;

	for (state = 0; state < 10; ++state) {

		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
			base_cpu, state);
		input = fopen(path, "r");
		if (input == NULL)
			continue;
		fgets(name_buf, sizeof(name_buf), input);

		 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
		sp = strchr(name_buf, '-');
		if (!sp)
			sp = strchrnul(name_buf, '\n');
		*sp = '\0';

		fclose(input);

		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc",
			base_cpu, state);
		input = fopen(path, "r");
		if (input == NULL)
			continue;
		fgets(desc, sizeof(desc), input);

		fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc);
		fclose(input);
	}
}
3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300
static void
dump_sysfs_pstate_config(void)
{
	char path[64];
	char driver_buf[64];
	char governor_buf[64];
	FILE *input;
	int turbo;

	sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver",
			base_cpu);
	input = fopen(path, "r");
	if (input == NULL) {
		fprintf(stderr, "NSFOD %s\n", path);
		return;
	}
	fgets(driver_buf, sizeof(driver_buf), input);
	fclose(input);

	sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor",
			base_cpu);
	input = fopen(path, "r");
	if (input == NULL) {
		fprintf(stderr, "NSFOD %s\n", path);
		return;
	}
	fgets(governor_buf, sizeof(governor_buf), input);
	fclose(input);

	fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf);
	fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf);

	sprintf(path, "/sys/devices/system/cpu/cpufreq/boost");
	input = fopen(path, "r");
	if (input != NULL) {
		fscanf(input, "%d", &turbo);
		fprintf(outf, "cpufreq boost: %d\n", turbo);
		fclose(input);
	}

	sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo");
	input = fopen(path, "r");
	if (input != NULL) {
		fscanf(input, "%d", &turbo);
		fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo);
		fclose(input);
	}
}
3301

3302

3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322
/*
 * print_epb()
 * Decode the ENERGY_PERF_BIAS MSR
 */
int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
	unsigned long long msr;
	char *epb_string;
	int cpu;

	if (!has_epb)
		return 0;

	cpu = t->cpu_id;

	/* EPB is per-package */
	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
		return 0;

	if (cpu_migrate(cpu)) {
3323
		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3324 3325 3326 3327 3328 3329
		return -1;
	}

	if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
		return 0;

3330
	switch (msr & 0xF) {
3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343
	case ENERGY_PERF_BIAS_PERFORMANCE:
		epb_string = "performance";
		break;
	case ENERGY_PERF_BIAS_NORMAL:
		epb_string = "balanced";
		break;
	case ENERGY_PERF_BIAS_POWERSAVE:
		epb_string = "powersave";
		break;
	default:
		epb_string = "custom";
		break;
	}
3344
	fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
3345 3346 3347

	return 0;
}
3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366
/*
 * print_hwp()
 * Decode the MSR_HWP_CAPABILITIES
 */
int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
	unsigned long long msr;
	int cpu;

	if (!has_hwp)
		return 0;

	cpu = t->cpu_id;

	/* MSR_HWP_CAPABILITIES is per-package */
	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
		return 0;

	if (cpu_migrate(cpu)) {
3367
		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3368 3369 3370 3371 3372 3373
		return -1;
	}

	if (get_msr(cpu, MSR_PM_ENABLE, &msr))
		return 0;

3374
	fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n",
3375 3376 3377 3378 3379 3380 3381 3382 3383
		cpu, msr, (msr & (1 << 0)) ? "" : "No-");

	/* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
	if ((msr & (1 << 0)) == 0)
		return 0;

	if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
		return 0;

3384
	fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
3385
			"(high %d guar %d eff %d low %d)\n",
3386 3387 3388 3389 3390 3391 3392 3393 3394
			cpu, msr,
			(unsigned int)HWP_HIGHEST_PERF(msr),
			(unsigned int)HWP_GUARANTEED_PERF(msr),
			(unsigned int)HWP_MOSTEFFICIENT_PERF(msr),
			(unsigned int)HWP_LOWEST_PERF(msr));

	if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
		return 0;

3395
	fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
3396
			"(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408
			cpu, msr,
			(unsigned int)(((msr) >> 0) & 0xff),
			(unsigned int)(((msr) >> 8) & 0xff),
			(unsigned int)(((msr) >> 16) & 0xff),
			(unsigned int)(((msr) >> 24) & 0xff),
			(unsigned int)(((msr) >> 32) & 0xff3),
			(unsigned int)(((msr) >> 42) & 0x1));

	if (has_hwp_pkg) {
		if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
			return 0;

3409
		fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
3410
			"(min %d max %d des %d epp 0x%x window 0x%x)\n",
3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421
			cpu, msr,
			(unsigned int)(((msr) >> 0) & 0xff),
			(unsigned int)(((msr) >> 8) & 0xff),
			(unsigned int)(((msr) >> 16) & 0xff),
			(unsigned int)(((msr) >> 24) & 0xff),
			(unsigned int)(((msr) >> 32) & 0xff3));
	}
	if (has_hwp_notify) {
		if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
			return 0;

3422
		fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
3423 3424 3425 3426 3427 3428 3429 3430
			"(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
			cpu, msr,
			((msr) & 0x1) ? "EN" : "Dis",
			((msr) & 0x2) ? "EN" : "Dis");
	}
	if (get_msr(cpu, MSR_HWP_STATUS, &msr))
		return 0;

3431
	fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
3432 3433 3434 3435
			"(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
			cpu, msr,
			((msr) & 0x1) ? "" : "No-",
			((msr) & 0x2) ? "" : "No-");
3436 3437 3438 3439

	return 0;
}

3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454
/*
 * print_perf_limit()
 */
int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
	unsigned long long msr;
	int cpu;

	cpu = t->cpu_id;

	/* per-package */
	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
		return 0;

	if (cpu_migrate(cpu)) {
3455
		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3456 3457 3458 3459 3460
		return -1;
	}

	if (do_core_perf_limit_reasons) {
		get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
3461 3462
		fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
		fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
3463
			(msr & 1 << 15) ? "bit15, " : "",
3464
			(msr & 1 << 14) ? "bit14, " : "",
3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476
			(msr & 1 << 13) ? "Transitions, " : "",
			(msr & 1 << 12) ? "MultiCoreTurbo, " : "",
			(msr & 1 << 11) ? "PkgPwrL2, " : "",
			(msr & 1 << 10) ? "PkgPwrL1, " : "",
			(msr & 1 << 9) ? "CorePwr, " : "",
			(msr & 1 << 8) ? "Amps, " : "",
			(msr & 1 << 6) ? "VR-Therm, " : "",
			(msr & 1 << 5) ? "Auto-HWP, " : "",
			(msr & 1 << 4) ? "Graphics, " : "",
			(msr & 1 << 2) ? "bit2, " : "",
			(msr & 1 << 1) ? "ThermStatus, " : "",
			(msr & 1 << 0) ? "PROCHOT, " : "");
3477
		fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
3478
			(msr & 1 << 31) ? "bit31, " : "",
3479
			(msr & 1 << 30) ? "bit30, " : "",
3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491
			(msr & 1 << 29) ? "Transitions, " : "",
			(msr & 1 << 28) ? "MultiCoreTurbo, " : "",
			(msr & 1 << 27) ? "PkgPwrL2, " : "",
			(msr & 1 << 26) ? "PkgPwrL1, " : "",
			(msr & 1 << 25) ? "CorePwr, " : "",
			(msr & 1 << 24) ? "Amps, " : "",
			(msr & 1 << 22) ? "VR-Therm, " : "",
			(msr & 1 << 21) ? "Auto-HWP, " : "",
			(msr & 1 << 20) ? "Graphics, " : "",
			(msr & 1 << 18) ? "bit18, " : "",
			(msr & 1 << 17) ? "ThermStatus, " : "",
			(msr & 1 << 16) ? "PROCHOT, " : "");
3492 3493 3494 3495

	}
	if (do_gfx_perf_limit_reasons) {
		get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
3496 3497
		fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
		fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
3498 3499 3500 3501 3502 3503 3504 3505
			(msr & 1 << 0) ? "PROCHOT, " : "",
			(msr & 1 << 1) ? "ThermStatus, " : "",
			(msr & 1 << 4) ? "Graphics, " : "",
			(msr & 1 << 6) ? "VR-Therm, " : "",
			(msr & 1 << 8) ? "Amps, " : "",
			(msr & 1 << 9) ? "GFXPwr, " : "",
			(msr & 1 << 10) ? "PkgPwrL1, " : "",
			(msr & 1 << 11) ? "PkgPwrL2, " : "");
3506
		fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517
			(msr & 1 << 16) ? "PROCHOT, " : "",
			(msr & 1 << 17) ? "ThermStatus, " : "",
			(msr & 1 << 20) ? "Graphics, " : "",
			(msr & 1 << 22) ? "VR-Therm, " : "",
			(msr & 1 << 24) ? "Amps, " : "",
			(msr & 1 << 25) ? "GFXPwr, " : "",
			(msr & 1 << 26) ? "PkgPwrL1, " : "",
			(msr & 1 << 27) ? "PkgPwrL2, " : "");
	}
	if (do_ring_perf_limit_reasons) {
		get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
3518 3519
		fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
		fprintf(outf, " (Active: %s%s%s%s%s%s)",
3520 3521 3522 3523 3524 3525
			(msr & 1 << 0) ? "PROCHOT, " : "",
			(msr & 1 << 1) ? "ThermStatus, " : "",
			(msr & 1 << 6) ? "VR-Therm, " : "",
			(msr & 1 << 8) ? "Amps, " : "",
			(msr & 1 << 10) ? "PkgPwrL1, " : "",
			(msr & 1 << 11) ? "PkgPwrL2, " : "");
3526
		fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
3527 3528 3529 3530 3531 3532 3533 3534 3535 3536
			(msr & 1 << 16) ? "PROCHOT, " : "",
			(msr & 1 << 17) ? "ThermStatus, " : "",
			(msr & 1 << 22) ? "VR-Therm, " : "",
			(msr & 1 << 24) ? "Amps, " : "",
			(msr & 1 << 26) ? "PkgPwrL1, " : "",
			(msr & 1 << 27) ? "PkgPwrL2, " : "");
	}
	return 0;
}

3537 3538 3539
#define	RAPL_POWER_GRANULARITY	0x7FFF	/* 15 bit power granularity */
#define	RAPL_TIME_GRANULARITY	0x3F /* 6 bit time granularity */

3540
double get_tdp(unsigned int model)
3541 3542 3543 3544
{
	unsigned long long msr;

	if (do_rapl & RAPL_PKG_POWER_INFO)
3545
		if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
3546 3547 3548
			return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;

	switch (model) {
3549 3550
	case INTEL_FAM6_ATOM_SILVERMONT1:
	case INTEL_FAM6_ATOM_SILVERMONT2:
3551 3552 3553 3554 3555 3556
		return 30.0;
	default:
		return 135.0;
	}
}

3557 3558 3559 3560 3561 3562 3563 3564 3565 3566
/*
 * rapl_dram_energy_units_probe()
 * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
 */
static double
rapl_dram_energy_units_probe(int  model, double rapl_energy_units)
{
	/* only called for genuine_intel, family 6 */

	switch (model) {
3567 3568 3569 3570
	case INTEL_FAM6_HASWELL_X:	/* HSX */
	case INTEL_FAM6_BROADWELL_X:	/* BDX */
	case INTEL_FAM6_BROADWELL_XEON_D:	/* BDX-DE */
	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
3571
	case INTEL_FAM6_XEON_PHI_KNM:
3572 3573 3574 3575 3576 3577
		return (rapl_dram_energy_units = 15.3 / 1000000);
	default:
		return (rapl_energy_units);
	}
}

3578

3579 3580 3581
/*
 * rapl_probe()
 *
3582
 * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
3583 3584 3585 3586
 */
void rapl_probe(unsigned int family, unsigned int model)
{
	unsigned long long msr;
3587
	unsigned int time_unit;
3588 3589 3590 3591 3592 3593 3594 3595 3596
	double tdp;

	if (!genuine_intel)
		return;

	if (family != 6)
		return;

	switch (model) {
3597 3598 3599 3600 3601 3602 3603
	case INTEL_FAM6_SANDYBRIDGE:
	case INTEL_FAM6_IVYBRIDGE:
	case INTEL_FAM6_HASWELL_CORE:	/* HSW */
	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
	case INTEL_FAM6_BROADWELL_CORE:	/* BDW */
	case INTEL_FAM6_BROADWELL_GT3E:	/* BDW */
3604
		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
3605 3606 3607 3608 3609 3610 3611 3612 3613
		if (rapl_joules) {
			BIC_PRESENT(BIC_Pkg_J);
			BIC_PRESENT(BIC_Cor_J);
			BIC_PRESENT(BIC_GFX_J);
		} else {
			BIC_PRESENT(BIC_PkgWatt);
			BIC_PRESENT(BIC_CorWatt);
			BIC_PRESENT(BIC_GFXWatt);
		}
3614
		break;
3615
	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
3616
	case INTEL_FAM6_ATOM_GEMINI_LAKE:
3617
		do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
3618 3619 3620 3621
		if (rapl_joules)
			BIC_PRESENT(BIC_Pkg_J);
		else
			BIC_PRESENT(BIC_PkgWatt);
3622
		break;
3623 3624 3625 3626
	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
3627
	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
3628
		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
3629 3630 3631 3632 3633 3634
		BIC_PRESENT(BIC_PKG__);
		BIC_PRESENT(BIC_RAM__);
		if (rapl_joules) {
			BIC_PRESENT(BIC_Pkg_J);
			BIC_PRESENT(BIC_Cor_J);
			BIC_PRESENT(BIC_RAM_J);
3635
			BIC_PRESENT(BIC_GFX_J);
3636 3637 3638 3639
		} else {
			BIC_PRESENT(BIC_PkgWatt);
			BIC_PRESENT(BIC_CorWatt);
			BIC_PRESENT(BIC_RAMWatt);
3640
			BIC_PRESENT(BIC_GFXWatt);
3641
		}
3642
		break;
3643 3644 3645 3646 3647
	case INTEL_FAM6_HASWELL_X:	/* HSX */
	case INTEL_FAM6_BROADWELL_X:	/* BDX */
	case INTEL_FAM6_BROADWELL_XEON_D:	/* BDX-DE */
	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
3648
	case INTEL_FAM6_XEON_PHI_KNM:
3649
		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
3650 3651 3652 3653 3654 3655 3656 3657 3658
		BIC_PRESENT(BIC_PKG__);
		BIC_PRESENT(BIC_RAM__);
		if (rapl_joules) {
			BIC_PRESENT(BIC_Pkg_J);
			BIC_PRESENT(BIC_RAM_J);
		} else {
			BIC_PRESENT(BIC_PkgWatt);
			BIC_PRESENT(BIC_RAMWatt);
		}
L
Len Brown 已提交
3659
		break;
3660 3661
	case INTEL_FAM6_SANDYBRIDGE_X:
	case INTEL_FAM6_IVYBRIDGE_X:
3662
		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673
		BIC_PRESENT(BIC_PKG__);
		BIC_PRESENT(BIC_RAM__);
		if (rapl_joules) {
			BIC_PRESENT(BIC_Pkg_J);
			BIC_PRESENT(BIC_Cor_J);
			BIC_PRESENT(BIC_RAM_J);
		} else {
			BIC_PRESENT(BIC_PkgWatt);
			BIC_PRESENT(BIC_CorWatt);
			BIC_PRESENT(BIC_RAMWatt);
		}
3674
		break;
3675 3676
	case INTEL_FAM6_ATOM_SILVERMONT1:	/* BYT */
	case INTEL_FAM6_ATOM_SILVERMONT2:	/* AVN */
3677
		do_rapl = RAPL_PKG | RAPL_CORES;
3678 3679 3680 3681 3682 3683 3684
		if (rapl_joules) {
			BIC_PRESENT(BIC_Pkg_J);
			BIC_PRESENT(BIC_Cor_J);
		} else {
			BIC_PRESENT(BIC_PkgWatt);
			BIC_PRESENT(BIC_CorWatt);
		}
3685
		break;
3686
	case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
3687
		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698
		BIC_PRESENT(BIC_PKG__);
		BIC_PRESENT(BIC_RAM__);
		if (rapl_joules) {
			BIC_PRESENT(BIC_Pkg_J);
			BIC_PRESENT(BIC_Cor_J);
			BIC_PRESENT(BIC_RAM_J);
		} else {
			BIC_PRESENT(BIC_PkgWatt);
			BIC_PRESENT(BIC_CorWatt);
			BIC_PRESENT(BIC_RAMWatt);
		}
3699
		break;
3700 3701 3702 3703 3704
	default:
		return;
	}

	/* units on package 0, verify later other packages match */
3705
	if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
3706 3707 3708
		return;

	rapl_power_units = 1.0 / (1 << (msr & 0xF));
3709
	if (model == INTEL_FAM6_ATOM_SILVERMONT1)
3710 3711 3712
		rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
	else
		rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
3713

3714 3715
	rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);

3716 3717 3718
	time_unit = msr >> 16 & 0xF;
	if (time_unit == 0)
		time_unit = 0xA;
3719

3720
	rapl_time_units = 1.0 / (1 << (time_unit));
3721

3722
	tdp = get_tdp(model);
3723

3724
	rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
3725
	if (!quiet)
3726
		fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
3727 3728 3729 3730

	return;
}

3731
void perf_limit_reasons_probe(unsigned int family, unsigned int model)
3732 3733 3734 3735 3736 3737 3738 3739
{
	if (!genuine_intel)
		return;

	if (family != 6)
		return;

	switch (model) {
3740 3741 3742
	case INTEL_FAM6_HASWELL_CORE:	/* HSW */
	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
3743
		do_gfx_perf_limit_reasons = 1;
3744
	case INTEL_FAM6_HASWELL_X:	/* HSX */
3745 3746 3747 3748 3749 3750 3751
		do_core_perf_limit_reasons = 1;
		do_ring_perf_limit_reasons = 1;
	default:
		return;
	}
}

3752 3753 3754 3755 3756 3757
void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
{
	if (is_skx(family, model) || is_bdx(family, model))
		has_automatic_cstate_conversion = 1;
}

3758 3759 3760
int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
	unsigned long long msr;
3761
	unsigned int dts, dts2;
3762 3763 3764 3765 3766 3767 3768 3769
	int cpu;

	if (!(do_dts || do_ptm))
		return 0;

	cpu = t->cpu_id;

	/* DTS is per-core, no need to print for each thread */
3770
	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
3771 3772 3773
		return 0;

	if (cpu_migrate(cpu)) {
3774
		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3775 3776 3777 3778 3779 3780 3781 3782
		return -1;
	}

	if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
			return 0;

		dts = (msr >> 16) & 0x7F;
3783
		fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
3784 3785 3786 3787 3788 3789 3790
			cpu, msr, tcc_activation_temp - dts);

		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
			return 0;

		dts = (msr >> 16) & 0x7F;
		dts2 = (msr >> 8) & 0x7F;
3791
		fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
3792 3793 3794 3795
			cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
	}


3796
	if (do_dts && debug) {
3797 3798 3799 3800 3801 3802 3803
		unsigned int resolution;

		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
			return 0;

		dts = (msr >> 16) & 0x7F;
		resolution = (msr >> 27) & 0xF;
3804
		fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
3805 3806 3807 3808 3809 3810 3811
			cpu, msr, tcc_activation_temp - dts, resolution);

		if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
			return 0;

		dts = (msr >> 16) & 0x7F;
		dts2 = (msr >> 8) & 0x7F;
3812
		fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
3813 3814 3815 3816 3817
			cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
	}

	return 0;
}
3818

3819 3820
void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
{
3821
	fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844
		cpu, label,
		((msr >> 15) & 1) ? "EN" : "DIS",
		((msr >> 0) & 0x7FFF) * rapl_power_units,
		(1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
		(((msr >> 16) & 1) ? "EN" : "DIS"));

	return;
}

int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
	unsigned long long msr;
	int cpu;

	if (!do_rapl)
		return 0;

	/* RAPL counters are per package, so print only for 1st thread/package */
	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
		return 0;

	cpu = t->cpu_id;
	if (cpu_migrate(cpu)) {
3845
		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3846 3847 3848 3849 3850 3851
		return -1;
	}

	if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
		return -1;

3852 3853 3854
	fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr,
		rapl_power_units, rapl_energy_units, rapl_time_units);

3855 3856
	if (do_rapl & RAPL_PKG_POWER_INFO) {

3857 3858 3859 3860
		if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
                	return -5;


3861
		fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
3862 3863 3864 3865 3866 3867
			cpu, msr,
			((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);

3868 3869 3870
	}
	if (do_rapl & RAPL_PKG) {

3871 3872 3873
		if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
			return -9;

3874
		fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
3875
			cpu, msr, (msr >> 63) & 1 ? "" : "UN");
3876 3877

		print_power_limit_msr(cpu, msr, "PKG Limit #1");
3878
		fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
3879 3880 3881 3882 3883 3884 3885
			cpu,
			((msr >> 47) & 1) ? "EN" : "DIS",
			((msr >> 32) & 0x7FFF) * rapl_power_units,
			(1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
			((msr >> 48) & 1) ? "EN" : "DIS");
	}

3886
	if (do_rapl & RAPL_DRAM_POWER_INFO) {
3887 3888 3889
		if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
                	return -6;

3890
		fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
3891 3892 3893 3894 3895
			cpu, msr,
			((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
3896 3897
	}
	if (do_rapl & RAPL_DRAM) {
3898 3899
		if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
			return -9;
3900
		fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
3901
				cpu, msr, (msr >> 31) & 1 ? "" : "UN");
3902 3903 3904

		print_power_limit_msr(cpu, msr, "DRAM Limit");
	}
3905
	if (do_rapl & RAPL_CORE_POLICY) {
3906 3907
		if (get_msr(cpu, MSR_PP0_POLICY, &msr))
			return -7;
3908

3909
		fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
3910
	}
3911
	if (do_rapl & RAPL_CORES_POWER_LIMIT) {
3912 3913 3914 3915 3916
		if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
			return -9;
		fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
				cpu, msr, (msr >> 31) & 1 ? "" : "UN");
		print_power_limit_msr(cpu, msr, "Cores Limit");
3917 3918
	}
	if (do_rapl & RAPL_GFX) {
3919 3920
		if (get_msr(cpu, MSR_PP1_POLICY, &msr))
			return -8;
3921

3922
		fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
3923

3924 3925 3926 3927 3928
		if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
			return -9;
		fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
				cpu, msr, (msr >> 31) & 1 ? "" : "UN");
		print_power_limit_msr(cpu, msr, "GFX Limit");
3929 3930 3931 3932
	}
	return 0;
}

3933 3934 3935 3936 3937 3938 3939
/*
 * SNB adds support for additional MSRs:
 *
 * MSR_PKG_C7_RESIDENCY            0x000003fa
 * MSR_CORE_C7_RESIDENCY           0x000003fe
 * MSR_PKG_C2_RESIDENCY            0x0000060d
 */
L
Len Brown 已提交
3940

3941
int has_snb_msrs(unsigned int family, unsigned int model)
L
Len Brown 已提交
3942 3943 3944 3945 3946
{
	if (!genuine_intel)
		return 0;

	switch (model) {
3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962
	case INTEL_FAM6_SANDYBRIDGE:
	case INTEL_FAM6_SANDYBRIDGE_X:
	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
	case INTEL_FAM6_HASWELL_CORE:	/* HSW */
	case INTEL_FAM6_HASWELL_X:	/* HSW */
	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
	case INTEL_FAM6_BROADWELL_CORE:	/* BDW */
	case INTEL_FAM6_BROADWELL_GT3E:	/* BDW */
	case INTEL_FAM6_BROADWELL_X:	/* BDX */
	case INTEL_FAM6_BROADWELL_XEON_D:	/* BDX-DE */
	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
3963
	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
3964 3965
	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
3966
	case INTEL_FAM6_ATOM_GEMINI_LAKE:
3967
	case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
L
Len Brown 已提交
3968 3969 3970 3971 3972
		return 1;
	}
	return 0;
}

3973 3974 3975
/*
 * HSW adds support for additional MSRs:
 *
3976 3977 3978 3979 3980 3981 3982 3983
 * MSR_PKG_C8_RESIDENCY		0x00000630
 * MSR_PKG_C9_RESIDENCY		0x00000631
 * MSR_PKG_C10_RESIDENCY	0x00000632
 *
 * MSR_PKGC8_IRTL		0x00000633
 * MSR_PKGC9_IRTL		0x00000634
 * MSR_PKGC10_IRTL		0x00000635
 *
3984 3985
 */
int has_hsw_msrs(unsigned int family, unsigned int model)
3986 3987 3988 3989 3990
{
	if (!genuine_intel)
		return 0;

	switch (model) {
3991 3992 3993 3994 3995 3996
	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
	case INTEL_FAM6_BROADWELL_CORE:	/* BDW */
	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
3997
	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
3998
	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
3999
	case INTEL_FAM6_ATOM_GEMINI_LAKE:
4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018
		return 1;
	}
	return 0;
}

/*
 * SKL adds support for additional MSRS:
 *
 * MSR_PKG_WEIGHTED_CORE_C0_RES    0x00000658
 * MSR_PKG_ANY_CORE_C0_RES         0x00000659
 * MSR_PKG_ANY_GFXE_C0_RES         0x0000065A
 * MSR_PKG_BOTH_CORE_GFXE_C0_RES   0x0000065B
 */
int has_skl_msrs(unsigned int family, unsigned int model)
{
	if (!genuine_intel)
		return 0;

	switch (model) {
4019 4020 4021 4022
	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
4023
	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
4024 4025 4026 4027 4028
		return 1;
	}
	return 0;
}

4029 4030 4031 4032 4033
int is_slm(unsigned int family, unsigned int model)
{
	if (!genuine_intel)
		return 0;
	switch (model) {
4034 4035
	case INTEL_FAM6_ATOM_SILVERMONT1:	/* BYT */
	case INTEL_FAM6_ATOM_SILVERMONT2:	/* AVN */
4036 4037 4038 4039 4040
		return 1;
	}
	return 0;
}

4041 4042 4043 4044 4045
int is_knl(unsigned int family, unsigned int model)
{
	if (!genuine_intel)
		return 0;
	switch (model) {
4046
	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
4047
	case INTEL_FAM6_XEON_PHI_KNM:
4048 4049 4050 4051 4052
		return 1;
	}
	return 0;
}

4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065
int is_cnl(unsigned int family, unsigned int model)
{
	if (!genuine_intel)
		return 0;

	switch (model) {
	case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
		return 1;
	}

	return 0;
}

4066 4067 4068 4069 4070 4071 4072
unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
{
	if (is_knl(family, model))
		return 1024;
	return 1;
}

4073 4074 4075 4076 4077 4078 4079 4080 4081
#define SLM_BCLK_FREQS 5
double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};

double slm_bclk(void)
{
	unsigned long long msr = 3;
	unsigned int i;
	double freq;

4082
	if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
4083
		fprintf(outf, "SLM BCLK: unknown\n");
4084 4085 4086

	i = msr & 0xf;
	if (i >= SLM_BCLK_FREQS) {
4087
		fprintf(outf, "SLM BCLK[%d] invalid\n", i);
4088
		i = 3;
4089 4090 4091
	}
	freq = slm_freq_table[i];

4092
	if (!quiet)
4093
		fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
4094 4095 4096 4097

	return freq;
}

L
Len Brown 已提交
4098 4099
double discover_bclk(unsigned int family, unsigned int model)
{
4100
	if (has_snb_msrs(family, model) || is_knl(family, model))
L
Len Brown 已提交
4101
		return 100.00;
4102 4103
	else if (is_slm(family, model))
		return slm_bclk();
L
Len Brown 已提交
4104 4105 4106 4107
	else
		return 133.33;
}

4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135
/*
 * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
 * the Thermal Control Circuit (TCC) activates.
 * This is usually equal to tjMax.
 *
 * Older processors do not have this MSR, so there we guess,
 * but also allow cmdline over-ride with -T.
 *
 * Several MSR temperature values are in units of degrees-C
 * below this value, including the Digital Thermal Sensor (DTS),
 * Package Thermal Management Sensor (PTM), and thermal event thresholds.
 */
int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
	unsigned long long msr;
	unsigned int target_c_local;
	int cpu;

	/* tcc_activation_temp is used only for dts or ptm */
	if (!(do_dts || do_ptm))
		return 0;

	/* this is a per-package concept */
	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
		return 0;

	cpu = t->cpu_id;
	if (cpu_migrate(cpu)) {
4136
		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
4137 4138 4139 4140 4141
		return -1;
	}

	if (tcc_activation_temp_override != 0) {
		tcc_activation_temp = tcc_activation_temp_override;
4142
		fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n",
4143 4144 4145 4146 4147
			cpu, tcc_activation_temp);
		return 0;
	}

	/* Temperature Target MSR is Nehalem and newer only */
4148
	if (!do_nhm_platform_info)
4149 4150
		goto guess;

4151
	if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
4152 4153
		goto guess;

4154
	target_c_local = (msr >> 16) & 0xFF;
4155

4156
	if (!quiet)
4157
		fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
4158 4159
			cpu, msr, target_c_local);

4160
	if (!target_c_local)
4161 4162 4163 4164 4165 4166 4167 4168
		goto guess;

	tcc_activation_temp = target_c_local;

	return 0;

guess:
	tcc_activation_temp = TJMAX_DEFAULT;
4169
	fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
4170 4171 4172 4173
		cpu, tcc_activation_temp);

	return 0;
}
4174

4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185
void decode_feature_control_msr(void)
{
	unsigned long long msr;

	if (!get_msr(base_cpu, MSR_IA32_FEATURE_CONTROL, &msr))
		fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
			base_cpu, msr,
			msr & FEATURE_CONTROL_LOCKED ? "" : "UN-",
			msr & (1 << 18) ? "SGX" : "");
}

4186 4187 4188 4189
void decode_misc_enable_msr(void)
{
	unsigned long long msr;

4190 4191 4192
	if (!genuine_intel)
		return;

4193
	if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
4194
		fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
4195
			base_cpu, msr,
4196 4197
			msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
			msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
4198
			msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-",
4199 4200
			msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
			msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
4201 4202
}

4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217
void decode_misc_feature_control(void)
{
	unsigned long long msr;

	if (!has_misc_feature_control)
		return;

	if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
		fprintf(outf, "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
			base_cpu, msr,
			msr & (0 << 0) ? "No-" : "",
			msr & (1 << 0) ? "No-" : "",
			msr & (2 << 0) ? "No-" : "",
			msr & (3 << 0) ? "No-" : "");
}
4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231
/*
 * Decode MSR_MISC_PWR_MGMT
 *
 * Decode the bits according to the Nehalem documentation
 * bit[0] seems to continue to have same meaning going forward
 * bit[1] less so...
 */
void decode_misc_pwr_mgmt_msr(void)
{
	unsigned long long msr;

	if (!do_nhm_platform_info)
		return;

4232 4233 4234
	if (no_MSR_MISC_PWR_MGMT)
		return;

4235
	if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
4236
		fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
4237 4238
			base_cpu, msr,
			msr & (1 << 0) ? "DIS" : "EN",
4239 4240
			msr & (1 << 1) ? "EN" : "DIS",
			msr & (1 << 8) ? "EN" : "DIS");
4241
}
4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259
/*
 * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
 *
 * This MSRs are present on Silvermont processors,
 * Intel Atom processor E3000 series (Baytrail), and friends.
 */
void decode_c6_demotion_policy_msr(void)
{
	unsigned long long msr;

	if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
		fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
			base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");

	if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr))
		fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n",
			base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
}
4260

4261
void process_cpuid()
L
Len Brown 已提交
4262
{
4263
	unsigned int eax, ebx, ecx, edx, max_level, max_extended_level;
L
Len Brown 已提交
4264
	unsigned int fms, family, model, stepping;
4265
	unsigned int has_turbo;
L
Len Brown 已提交
4266 4267 4268

	eax = ebx = ecx = edx = 0;

4269
	__cpuid(0, max_level, ebx, ecx, edx);
L
Len Brown 已提交
4270 4271 4272 4273

	if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
		genuine_intel = 1;

4274
	if (!quiet)
4275
		fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
L
Len Brown 已提交
4276 4277
			(char *)&ebx, (char *)&edx, (char *)&ecx);

4278
	__cpuid(1, fms, ebx, ecx, edx);
L
Len Brown 已提交
4279 4280 4281 4282 4283 4284
	family = (fms >> 8) & 0xf;
	model = (fms >> 4) & 0xf;
	stepping = fms & 0xf;
	if (family == 6 || family == 0xf)
		model += ((fms >> 16) & 0xf) << 4;

4285
	if (!quiet) {
4286
		fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
L
Len Brown 已提交
4287
			max_level, family, model, stepping, family, model, stepping);
4288
		fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n",
4289 4290
			ecx & (1 << 0) ? "SSE3" : "-",
			ecx & (1 << 3) ? "MONITOR" : "-",
4291
			ecx & (1 << 6) ? "SMX" : "-",
4292 4293 4294 4295 4296 4297 4298
			ecx & (1 << 7) ? "EIST" : "-",
			ecx & (1 << 8) ? "TM2" : "-",
			edx & (1 << 4) ? "TSC" : "-",
			edx & (1 << 5) ? "MSR" : "-",
			edx & (1 << 22) ? "ACPI-TM" : "-",
			edx & (1 << 29) ? "TM" : "-");
	}
L
Len Brown 已提交
4299

4300 4301
	if (!(edx & (1 << 5)))
		errx(1, "CPUID: no MSR");
L
Len Brown 已提交
4302 4303 4304 4305 4306 4307 4308

	/*
	 * check max extended function levels of CPUID.
	 * This is needed to check for invariant TSC.
	 * This check is valid for both Intel and AMD.
	 */
	ebx = ecx = edx = 0;
4309
	__cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
L
Len Brown 已提交
4310

4311
	if (max_extended_level >= 0x80000007) {
L
Len Brown 已提交
4312

4313 4314 4315 4316
		/*
		 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
		 * this check is valid for both Intel and AMD
		 */
4317
		__cpuid(0x80000007, eax, ebx, ecx, edx);
4318 4319
		has_invariant_tsc = edx & (1 << 8);
	}
L
Len Brown 已提交
4320 4321 4322 4323 4324 4325

	/*
	 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
	 * this check is valid for both Intel and AMD
	 */

4326
	__cpuid(0x6, eax, ebx, ecx, edx);
4327
	has_aperf = ecx & (1 << 0);
4328 4329 4330 4331 4332
	if (has_aperf) {
		BIC_PRESENT(BIC_Avg_MHz);
		BIC_PRESENT(BIC_Busy);
		BIC_PRESENT(BIC_Bzy_MHz);
	}
4333
	do_dts = eax & (1 << 0);
4334 4335
	if (do_dts)
		BIC_PRESENT(BIC_CoreTmp);
4336
	has_turbo = eax & (1 << 1);
4337
	do_ptm = eax & (1 << 6);
4338 4339
	if (do_ptm)
		BIC_PRESENT(BIC_PkgTmp);
4340 4341 4342 4343 4344
	has_hwp = eax & (1 << 7);
	has_hwp_notify = eax & (1 << 8);
	has_hwp_activity_window = eax & (1 << 9);
	has_hwp_epp = eax & (1 << 10);
	has_hwp_pkg = eax & (1 << 11);
4345 4346
	has_epb = ecx & (1 << 3);

4347
	if (!quiet)
4348
		fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
4349 4350
			"%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
			has_aperf ? "" : "No-",
4351
			has_turbo ? "" : "No-",
4352 4353 4354 4355 4356 4357 4358 4359
			do_dts ? "" : "No-",
			do_ptm ? "" : "No-",
			has_hwp ? "" : "No-",
			has_hwp_notify ? "" : "No-",
			has_hwp_activity_window ? "" : "No-",
			has_hwp_epp ? "" : "No-",
			has_hwp_pkg ? "" : "No-",
			has_epb ? "" : "No-");
L
Len Brown 已提交
4360

4361
	if (!quiet)
4362 4363
		decode_misc_enable_msr();

4364

4365
	if (max_level >= 0x7 && !quiet) {
4366
		int has_sgx;
L
Len Brown 已提交
4367

4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378
		ecx = 0;

		__cpuid_count(0x7, 0, eax, ebx, ecx, edx);

		has_sgx = ebx & (1 << 2);
		fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-");

		if (has_sgx)
			decode_feature_control_msr();
	}

4379
	if (max_level >= 0x15) {
4380 4381 4382 4383 4384 4385 4386
		unsigned int eax_crystal;
		unsigned int ebx_tsc;

		/*
		 * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
		 */
		eax_crystal = ebx_tsc = crystal_hz = edx = 0;
4387
		__cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
4388 4389 4390

		if (ebx_tsc != 0) {

4391
			if (!quiet && (ebx != 0))
4392
				fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
4393 4394 4395 4396
					eax_crystal, ebx_tsc, crystal_hz);

			if (crystal_hz == 0)
				switch(model) {
4397 4398 4399 4400
				case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
				case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
				case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
				case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
4401 4402
					crystal_hz = 24000000;	/* 24.0 MHz */
					break;
4403
				case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
4404 4405
					crystal_hz = 25000000;	/* 25.0 MHz */
					break;
4406
				case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
4407
				case INTEL_FAM6_ATOM_GEMINI_LAKE:
4408
					crystal_hz = 19200000;	/* 19.2 MHz */
4409 4410 4411 4412 4413 4414 4415
					break;
				default:
					crystal_hz = 0;
			}

			if (crystal_hz) {
				tsc_hz =  (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
4416
				if (!quiet)
4417
					fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
4418 4419 4420 4421
						tsc_hz / 1000000, crystal_hz, ebx_tsc,  eax_crystal);
			}
		}
	}
4422 4423 4424 4425 4426 4427 4428 4429
	if (max_level >= 0x16) {
		unsigned int base_mhz, max_mhz, bus_mhz, edx;

		/*
		 * CPUID 16H Base MHz, Max MHz, Bus MHz
		 */
		base_mhz = max_mhz = bus_mhz = edx = 0;

4430
		__cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
4431
		if (!quiet)
4432
			fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
4433 4434
				base_mhz, max_mhz, bus_mhz);
	}
4435

4436 4437 4438
	if (has_aperf)
		aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);

4439 4440 4441 4442 4443 4444 4445 4446 4447 4448
	BIC_PRESENT(BIC_IRQ);
	BIC_PRESENT(BIC_TSC_MHz);

	if (probe_nhm_msrs(family, model)) {
		do_nhm_platform_info = 1;
		BIC_PRESENT(BIC_CPU_c1);
		BIC_PRESENT(BIC_CPU_c3);
		BIC_PRESENT(BIC_CPU_c6);
		BIC_PRESENT(BIC_SMI);
	}
4449
	do_snb_cstates = has_snb_msrs(family, model);
4450 4451 4452 4453

	if (do_snb_cstates)
		BIC_PRESENT(BIC_CPU_c7);

4454
	do_irtl_snb = has_snb_msrs(family, model);
4455 4456 4457 4458 4459 4460 4461 4462
	if (do_snb_cstates && (pkg_cstate_limit >= PCL__2))
		BIC_PRESENT(BIC_Pkgpc2);
	if (pkg_cstate_limit >= PCL__3)
		BIC_PRESENT(BIC_Pkgpc3);
	if (pkg_cstate_limit >= PCL__6)
		BIC_PRESENT(BIC_Pkgpc6);
	if (do_snb_cstates && (pkg_cstate_limit >= PCL__7))
		BIC_PRESENT(BIC_Pkgpc7);
4463
	if (has_slv_msrs(family, model)) {
4464 4465 4466 4467
		BIC_NOT_PRESENT(BIC_Pkgpc2);
		BIC_NOT_PRESENT(BIC_Pkgpc3);
		BIC_PRESENT(BIC_Pkgpc6);
		BIC_NOT_PRESENT(BIC_Pkgpc7);
4468 4469 4470
		BIC_PRESENT(BIC_Mod_c6);
		use_c1_residency_msr = 1;
	}
4471 4472 4473 4474 4475 4476 4477 4478
	if (is_dnv(family, model)) {
		BIC_PRESENT(BIC_CPU_c1);
		BIC_NOT_PRESENT(BIC_CPU_c3);
		BIC_NOT_PRESENT(BIC_Pkgpc3);
		BIC_NOT_PRESENT(BIC_CPU_c7);
		BIC_NOT_PRESENT(BIC_Pkgpc7);
		use_c1_residency_msr = 1;
	}
4479 4480 4481 4482 4483 4484
	if (is_skx(family, model)) {
		BIC_NOT_PRESENT(BIC_CPU_c3);
		BIC_NOT_PRESENT(BIC_Pkgpc3);
		BIC_NOT_PRESENT(BIC_CPU_c7);
		BIC_NOT_PRESENT(BIC_Pkgpc7);
	}
4485 4486 4487 4488
	if (is_bdx(family, model)) {
		BIC_NOT_PRESENT(BIC_CPU_c7);
		BIC_NOT_PRESENT(BIC_Pkgpc7);
	}
4489 4490 4491 4492 4493
	if (has_hsw_msrs(family, model)) {
		BIC_PRESENT(BIC_Pkgpc8);
		BIC_PRESENT(BIC_Pkgpc9);
		BIC_PRESENT(BIC_Pkgpc10);
	}
4494
	do_irtl_hsw = has_hsw_msrs(family, model);
4495 4496 4497 4498 4499 4500
	if (has_skl_msrs(family, model)) {
		BIC_PRESENT(BIC_Totl_c0);
		BIC_PRESENT(BIC_Any_c0);
		BIC_PRESENT(BIC_GFX_c0);
		BIC_PRESENT(BIC_CPUGFX);
	}
4501
	do_slm_cstates = is_slm(family, model);
4502
	do_knl_cstates  = is_knl(family, model);
4503
	do_cnl_cstates = is_cnl(family, model);
L
Len Brown 已提交
4504

4505
	if (!quiet)
4506 4507
		decode_misc_pwr_mgmt_msr();

4508
	if (!quiet && has_slv_msrs(family, model))
4509 4510
		decode_c6_demotion_policy_msr();

4511
	rapl_probe(family, model);
4512
	perf_limit_reasons_probe(family, model);
4513
	automatic_cstate_conversion_probe(family, model);
4514

4515
	if (!quiet)
4516
		dump_cstate_pstate_config_info(family, model);
4517

4518 4519
	if (!quiet)
		dump_sysfs_cstate_config();
4520 4521
	if (!quiet)
		dump_sysfs_pstate_config();
4522

4523 4524 4525
	if (has_skl_msrs(family, model))
		calculate_tsc_tweak();

4526 4527
	if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
		BIC_PRESENT(BIC_GFX_rc6);
L
Len Brown 已提交
4528

4529 4530
	if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
		BIC_PRESENT(BIC_GFXMHz);
L
Len Brown 已提交
4531

4532 4533 4534 4535 4536 4537 4538 4539 4540 4541
	if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
		BIC_PRESENT(BIC_CPU_LPI);
	else
		BIC_NOT_PRESENT(BIC_CPU_LPI);

	if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", R_OK))
		BIC_PRESENT(BIC_SYS_LPI);
	else
		BIC_NOT_PRESENT(BIC_SYS_LPI);

4542
	if (!quiet)
4543 4544
		decode_misc_feature_control();

4545
	return;
L
Len Brown 已提交
4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565
}


/*
 * in /dev/cpu/ return success for names that are numbers
 * ie. filter out ".", "..", "microcode".
 */
int dir_filter(const struct dirent *dirp)
{
	if (isdigit(dirp->d_name[0]))
		return 1;
	else
		return 0;
}

int open_dev_cpu_msr(int dummy1)
{
	return 0;
}

4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577
void topology_probe()
{
	int i;
	int max_core_id = 0;
	int max_package_id = 0;
	int max_siblings = 0;
	struct cpu_topology {
		int core_id;
		int physical_package_id;
	} *cpus;

	/* Initialize num_cpus, max_cpu_num */
4578
	set_max_cpu_num();
4579 4580 4581
	topo.num_cpus = 0;
	for_all_proc_cpus(count_cpus);
	if (!summary_only && topo.num_cpus > 1)
4582
		BIC_PRESENT(BIC_CPU);
4583

4584
	if (debug > 1)
4585
		fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
4586 4587

	cpus = calloc(1, (topo.max_cpu_num  + 1) * sizeof(struct cpu_topology));
4588 4589
	if (cpus == NULL)
		err(1, "calloc cpus");
4590 4591 4592 4593 4594

	/*
	 * Allocate and initialize cpu_present_set
	 */
	cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
4595 4596
	if (cpu_present_set == NULL)
		err(3, "CPU_ALLOC");
4597 4598 4599 4600
	cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
	CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
	for_all_proc_cpus(mark_cpu_present);

4601 4602 4603 4604 4605 4606 4607 4608 4609
	/*
	 * Validate that all cpus in cpu_subset are also in cpu_present_set
	 */
	for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
		if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
			if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set))
				err(1, "cpu%d not present", i);
	}

4610 4611 4612 4613
	/*
	 * Allocate and initialize cpu_affinity_set
	 */
	cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
4614 4615
	if (cpu_affinity_set == NULL)
		err(3, "CPU_ALLOC");
4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627
	cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);


	/*
	 * For online cpus
	 * find max_core_id, max_package_id
	 */
	for (i = 0; i <= topo.max_cpu_num; ++i) {
		int siblings;

		if (cpu_is_not_present(i)) {
4628
			if (debug > 1)
4629
				fprintf(outf, "cpu%d NOT PRESENT\n", i);
4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642
			continue;
		}
		cpus[i].core_id = get_core_id(i);
		if (cpus[i].core_id > max_core_id)
			max_core_id = cpus[i].core_id;

		cpus[i].physical_package_id = get_physical_package_id(i);
		if (cpus[i].physical_package_id > max_package_id)
			max_package_id = cpus[i].physical_package_id;

		siblings = get_num_ht_siblings(i);
		if (siblings > max_siblings)
			max_siblings = siblings;
4643
		if (debug > 1)
4644
			fprintf(outf, "cpu %d pkg %d core %d\n",
4645 4646 4647
				i, cpus[i].physical_package_id, cpus[i].core_id);
	}
	topo.num_cores_per_pkg = max_core_id + 1;
4648
	if (debug > 1)
4649
		fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
4650
			max_core_id, topo.num_cores_per_pkg);
4651
	if (!summary_only && topo.num_cores_per_pkg > 1)
4652
		BIC_PRESENT(BIC_Core);
4653 4654

	topo.num_packages = max_package_id + 1;
4655
	if (debug > 1)
4656
		fprintf(outf, "max_package_id %d, sizing for %d packages\n",
4657
			max_package_id, topo.num_packages);
4658
	if (!summary_only && topo.num_packages > 1)
4659
		BIC_PRESENT(BIC_Package);
4660 4661

	topo.num_threads_per_core = max_siblings;
4662
	if (debug > 1)
4663
		fprintf(outf, "max_siblings %d\n", max_siblings);
4664 4665 4666 4667 4668 4669 4670 4671 4672 4673

	free(cpus);
}

void
allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p)
{
	int i;

	*t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg *
4674
		topo.num_packages, sizeof(struct thread_data));
4675 4676 4677 4678 4679 4680 4681 4682
	if (*t == NULL)
		goto error;

	for (i = 0; i < topo.num_threads_per_core *
		topo.num_cores_per_pkg * topo.num_packages; i++)
		(*t)[i].cpu_id = -1;

	*c = calloc(topo.num_cores_per_pkg * topo.num_packages,
4683
		sizeof(struct core_data));
4684 4685 4686 4687 4688 4689
	if (*c == NULL)
		goto error;

	for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++)
		(*c)[i].core_id = -1;

4690
	*p = calloc(topo.num_packages, sizeof(struct pkg_data));
4691 4692 4693 4694 4695 4696 4697 4698
	if (*p == NULL)
		goto error;

	for (i = 0; i < topo.num_packages; i++)
		(*p)[i].package_id = i;

	return;
error:
4699
	err(1, "calloc counters");
4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738
}
/*
 * init_counter()
 *
 * set cpu_id, core_num, pkg_num
 * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
 *
 * increment topo.num_cores when 1st core in pkg seen
 */
void init_counter(struct thread_data *thread_base, struct core_data *core_base,
	struct pkg_data *pkg_base, int thread_num, int core_num,
	int pkg_num, int cpu_id)
{
	struct thread_data *t;
	struct core_data *c;
	struct pkg_data *p;

	t = GET_THREAD(thread_base, thread_num, core_num, pkg_num);
	c = GET_CORE(core_base, core_num, pkg_num);
	p = GET_PKG(pkg_base, pkg_num);

	t->cpu_id = cpu_id;
	if (thread_num == 0) {
		t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
		if (cpu_is_first_core_in_package(cpu_id))
			t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
	}

	c->core_id = core_num;
	p->package_id = pkg_num;
}


int initialize_counters(int cpu_id)
{
	int my_thread_id, my_core_id, my_package_id;

	my_package_id = get_physical_package_id(cpu_id);
	my_core_id = get_core_id(cpu_id);
4739 4740
	my_thread_id = get_cpu_position_in_core(cpu_id);
	if (!my_thread_id)
4741 4742 4743 4744 4745 4746 4747 4748 4749
		topo.num_cores++;

	init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
	init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
	return 0;
}

void allocate_output_buffer()
{
4750
	output_buffer = calloc(1, (1 + topo.num_cpus) * 1024);
4751
	outp = output_buffer;
4752 4753
	if (outp == NULL)
		err(-1, "calloc output buffer");
4754
}
4755 4756
void allocate_fd_percpu(void)
{
4757
	fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
4758 4759 4760
	if (fd_percpu == NULL)
		err(-1, "calloc fd_percpu");
}
4761 4762 4763 4764 4765
void allocate_irq_buffers(void)
{
	irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
	if (irq_column_2_cpu == NULL)
		err(-1, "calloc %d", topo.num_cpus);
4766

4767
	irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
4768
	if (irqs_per_cpu == NULL)
4769
		err(-1, "calloc %d", topo.max_cpu_num + 1);
4770
}
4771 4772 4773
void setup_all_buffers(void)
{
	topology_probe();
4774
	allocate_irq_buffers();
4775
	allocate_fd_percpu();
4776 4777 4778 4779 4780
	allocate_counters(&thread_even, &core_even, &package_even);
	allocate_counters(&thread_odd, &core_odd, &package_odd);
	allocate_output_buffer();
	for_all_proc_cpus(initialize_counters);
}
4781

4782 4783 4784 4785 4786 4787 4788
void set_base_cpu(void)
{
	base_cpu = sched_getcpu();
	if (base_cpu < 0)
		err(-ENODEV, "No valid cpus found");

	if (debug > 1)
4789
		fprintf(outf, "base_cpu = %d\n", base_cpu);
4790 4791
}

L
Len Brown 已提交
4792 4793
void turbostat_init()
{
4794 4795
	setup_all_buffers();
	set_base_cpu();
L
Len Brown 已提交
4796
	check_dev_msr();
4797
	check_permissions();
4798
	process_cpuid();
L
Len Brown 已提交
4799 4800


4801
	if (!quiet)
4802 4803
		for_all_cpus(print_hwp, ODD_COUNTERS);

4804
	if (!quiet)
4805 4806
		for_all_cpus(print_epb, ODD_COUNTERS);

4807
	if (!quiet)
4808 4809
		for_all_cpus(print_perf_limit, ODD_COUNTERS);

4810
	if (!quiet)
4811 4812 4813 4814
		for_all_cpus(print_rapl, ODD_COUNTERS);

	for_all_cpus(set_temperature_target, ODD_COUNTERS);

4815
	if (!quiet)
4816
		for_all_cpus(print_thermal, ODD_COUNTERS);
4817

4818
	if (!quiet && do_irtl_snb)
4819
		print_irtl();
L
Len Brown 已提交
4820 4821 4822 4823 4824
}

int fork_it(char **argv)
{
	pid_t child_pid;
4825
	int status;
4826

4827
	snapshot_proc_sysfs_files();
4828 4829 4830
	status = for_all_cpus(get_counters, EVEN_COUNTERS);
	if (status)
		exit(status);
4831 4832
	/* clear affinity side-effect of get_counters() */
	sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
L
Len Brown 已提交
4833 4834 4835 4836 4837 4838
	gettimeofday(&tv_even, (struct timezone *)NULL);

	child_pid = fork();
	if (!child_pid) {
		/* child */
		execvp(argv[0], argv);
4839
		err(errno, "exec %s", argv[0]);
L
Len Brown 已提交
4840 4841 4842
	} else {

		/* parent */
4843 4844
		if (child_pid == -1)
			err(1, "fork");
L
Len Brown 已提交
4845 4846 4847

		signal(SIGINT, SIG_IGN);
		signal(SIGQUIT, SIG_IGN);
4848 4849
		if (waitpid(child_pid, &status, 0) == -1)
			err(status, "waitpid");
L
Len Brown 已提交
4850
	}
4851 4852 4853 4854
	/*
	 * n.b. fork_it() does not check for errors from for_all_cpus()
	 * because re-starting is problematic when forking
	 */
4855
	snapshot_proc_sysfs_files();
4856
	for_all_cpus(get_counters, ODD_COUNTERS);
L
Len Brown 已提交
4857 4858
	gettimeofday(&tv_odd, (struct timezone *)NULL);
	timersub(&tv_odd, &tv_even, &tv_delta);
4859 4860 4861 4862 4863 4864
	if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
		fprintf(outf, "%s: Counter reset detected\n", progname);
	else {
		compute_average(EVEN_COUNTERS);
		format_all_counters(EVEN_COUNTERS);
	}
L
Len Brown 已提交
4865

4866 4867 4868
	fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);

	flush_output_stderr();
L
Len Brown 已提交
4869

4870
	return status;
L
Len Brown 已提交
4871 4872
}

4873 4874 4875 4876
int get_and_dump_counters(void)
{
	int status;

4877
	snapshot_proc_sysfs_files();
4878 4879 4880 4881 4882 4883 4884 4885
	status = for_all_cpus(get_counters, ODD_COUNTERS);
	if (status)
		return status;

	status = for_all_cpus(dump_counters, ODD_COUNTERS);
	if (status)
		return status;

4886
	flush_output_stdout();
4887 4888 4889 4890

	return status;
}

4891
void print_version() {
4892
	fprintf(outf, "turbostat version 17.06.23"
4893 4894 4895
		" - Len Brown <lenb@kernel.org>\n");
}

4896 4897
int add_counter(unsigned int msr_num, char *path, char *name,
	unsigned int width, enum counter_scope scope,
4898
	enum counter_type type, enum counter_format format, int flags)
4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909
{
	struct msr_counter *msrp;

	msrp = calloc(1, sizeof(struct msr_counter));
	if (msrp == NULL) {
		perror("calloc");
		exit(1);
	}

	msrp->msr_num = msr_num;
	strncpy(msrp->name, name, NAME_BYTES);
4910 4911
	if (path)
		strncpy(msrp->path, path, PATH_BYTES);
4912 4913 4914
	msrp->width = width;
	msrp->type = type;
	msrp->format = format;
4915
	msrp->flags = flags;
4916 4917 4918 4919 4920 4921

	switch (scope) {

	case SCOPE_CPU:
		msrp->next = sys.tp;
		sys.tp = msrp;
4922
		sys.added_thread_counters++;
4923
		if (sys.added_thread_counters > MAX_ADDED_THREAD_COUNTERS) {
4924 4925 4926 4927
			fprintf(stderr, "exceeded max %d added thread counters\n",
				MAX_ADDED_COUNTERS);
			exit(-1);
		}
4928 4929 4930 4931 4932
		break;

	case SCOPE_CORE:
		msrp->next = sys.cp;
		sys.cp = msrp;
4933 4934 4935 4936 4937 4938
		sys.added_core_counters++;
		if (sys.added_core_counters > MAX_ADDED_COUNTERS) {
			fprintf(stderr, "exceeded max %d added core counters\n",
				MAX_ADDED_COUNTERS);
			exit(-1);
		}
4939 4940 4941 4942 4943
		break;

	case SCOPE_PACKAGE:
		msrp->next = sys.pp;
		sys.pp = msrp;
4944 4945 4946 4947 4948 4949
		sys.added_package_counters++;
		if (sys.added_package_counters > MAX_ADDED_COUNTERS) {
			fprintf(stderr, "exceeded max %d added package counters\n",
				MAX_ADDED_COUNTERS);
			exit(-1);
		}
4950 4951 4952 4953 4954 4955 4956 4957 4958
		break;
	}

	return 0;
}

void parse_add_command(char *add_command)
{
	int msr_num = 0;
4959
	char *path = NULL;
4960
	char name_buffer[NAME_BYTES] = "";
4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974
	int width = 64;
	int fail = 0;
	enum counter_scope scope = SCOPE_CPU;
	enum counter_type type = COUNTER_CYCLES;
	enum counter_format format = FORMAT_DELTA;

	while (add_command) {

		if (sscanf(add_command, "msr0x%x", &msr_num) == 1)
			goto next;

		if (sscanf(add_command, "msr%d", &msr_num) == 1)
			goto next;

4975 4976 4977 4978 4979
		if (*add_command == '/') {
			path = add_command;
			goto next;
		}

4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004
		if (sscanf(add_command, "u%d", &width) == 1) {
			if ((width == 32) || (width == 64))
				goto next;
			width = 64;
		}
		if (!strncmp(add_command, "cpu", strlen("cpu"))) {
			scope = SCOPE_CPU;
			goto next;
		}
		if (!strncmp(add_command, "core", strlen("core"))) {
			scope = SCOPE_CORE;
			goto next;
		}
		if (!strncmp(add_command, "package", strlen("package"))) {
			scope = SCOPE_PACKAGE;
			goto next;
		}
		if (!strncmp(add_command, "cycles", strlen("cycles"))) {
			type = COUNTER_CYCLES;
			goto next;
		}
		if (!strncmp(add_command, "seconds", strlen("seconds"))) {
			type = COUNTER_SECONDS;
			goto next;
		}
5005 5006 5007 5008
		if (!strncmp(add_command, "usec", strlen("usec"))) {
			type = COUNTER_USEC;
			goto next;
		}
5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032
		if (!strncmp(add_command, "raw", strlen("raw"))) {
			format = FORMAT_RAW;
			goto next;
		}
		if (!strncmp(add_command, "delta", strlen("delta"))) {
			format = FORMAT_DELTA;
			goto next;
		}
		if (!strncmp(add_command, "percent", strlen("percent"))) {
			format = FORMAT_PERCENT;
			goto next;
		}

		if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) {	/* 18 < NAME_BYTES */
			char *eos;

			eos = strchr(name_buffer, ',');
			if (eos)
				*eos = '\0';
			goto next;
		}

next:
		add_command = strchr(add_command, ',');
5033 5034
		if (add_command) {
			*add_command = '\0';
5035
			add_command++;
5036
		}
5037 5038

	}
5039 5040
	if ((msr_num == 0) && (path == NULL)) {
		fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n");
5041 5042 5043 5044 5045
		fail++;
	}

	/* generate default column header */
	if (*name_buffer == '\0') {
5046 5047 5048 5049
		if (width == 32)
			sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
		else
			sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
5050 5051
	}

5052
	if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0))
5053 5054 5055 5056 5057 5058 5059
		fail++;

	if (fail) {
		help();
		exit(1);
	}
}
5060

5061 5062 5063 5064 5065 5066 5067 5068 5069 5070
int is_deferred_skip(char *name)
{
	int i;

	for (i = 0; i < deferred_skip_index; ++i)
		if (!strcmp(name, deferred_skip_names[i]))
			return 1;
	return 0;
}

5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081
void probe_sysfs(void)
{
	char path[64];
	char name_buf[16];
	FILE *input;
	int state;
	char *sp;

	if (!DO_BIC(BIC_sysfs))
		return;

5082
	for (state = 10; state >= 0; --state) {
5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101

		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
			base_cpu, state);
		input = fopen(path, "r");
		if (input == NULL)
			continue;
		fgets(name_buf, sizeof(name_buf), input);

		 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
		sp = strchr(name_buf, '-');
		if (!sp)
			sp = strchrnul(name_buf, '\n');
		*sp = '%';
		*(sp + 1) = '\0';

		fclose(input);

		sprintf(path, "cpuidle/state%d/time", state);

5102 5103 5104
		if (is_deferred_skip(name_buf))
			continue;

5105 5106 5107 5108
		add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC,
				FORMAT_PERCENT, SYSFS_PERCPU);
	}

5109
	for (state = 10; state >= 0; --state) {
5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125

		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
			base_cpu, state);
		input = fopen(path, "r");
		if (input == NULL)
			continue;
		fgets(name_buf, sizeof(name_buf), input);
		 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
		sp = strchr(name_buf, '-');
		if (!sp)
			sp = strchrnul(name_buf, '\n');
		*sp = '\0';
		fclose(input);

		sprintf(path, "cpuidle/state%d/usage", state);

5126 5127 5128
		if (is_deferred_skip(name_buf))
			continue;

5129 5130 5131 5132 5133 5134
		add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS,
				FORMAT_DELTA, SYSFS_PERCPU);
	}

}

5135 5136 5137 5138 5139 5140 5141 5142 5143 5144

/*
 * parse cpuset with following syntax
 * 1,2,4..6,8-10 and set bits in cpu_subset
 */
void parse_cpu_command(char *optarg)
{
	unsigned int start, end;
	char *next;

5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159
	if (!strcmp(optarg, "core")) {
		if (cpu_subset)
			goto error;
		show_core_only++;
		return;
	}
	if (!strcmp(optarg, "package")) {
		if (cpu_subset)
			goto error;
		show_pkg_only++;
		return;
	}
	if (show_core_only || show_pkg_only)
		goto error;

5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216
	cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS);
	if (cpu_subset == NULL)
		err(3, "CPU_ALLOC");
	cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS);

	CPU_ZERO_S(cpu_subset_size, cpu_subset);

	next = optarg;

	while (next && *next) {

		if (*next == '-')	/* no negative cpu numbers */
			goto error;

		start = strtoul(next, &next, 10);

		if (start >= CPU_SUBSET_MAXCPUS)
			goto error;
		CPU_SET_S(start, cpu_subset_size, cpu_subset);

		if (*next == '\0')
			break;

		if (*next == ',') {
			next += 1;
			continue;
		}

		if (*next == '-') {
			next += 1;	/* start range */
		} else if (*next == '.') {
			next += 1;
			if (*next == '.')
				next += 1;	/* start range */
			else
				goto error;
		}

		end = strtoul(next, &next, 10);
		if (end <= start)
			goto error;

		while (++start <= end) {
			if (start >= CPU_SUBSET_MAXCPUS)
				goto error;
			CPU_SET_S(start, cpu_subset_size, cpu_subset);
		}

		if (*next == ',')
			next += 1;
		else if (*next != '\0')
			goto error;
	}

	return;

error:
5217 5218
	fprintf(stderr, "\"--cpu %s\" malformed\n", optarg);
	help();
5219 5220 5221
	exit(-1);
}

5222

L
Len Brown 已提交
5223 5224 5225
void cmdline(int argc, char **argv)
{
	int opt;
5226 5227
	int option_index = 0;
	static struct option long_options[] = {
5228
		{"add",		required_argument,	0, 'a'},
5229
		{"cpu",		required_argument,	0, 'c'},
5230
		{"Dump",	no_argument,		0, 'D'},
5231
		{"debug",	no_argument,		0, 'd'},	/* internal, not documented */
5232
		{"enable",	required_argument,	0, 'e'},
5233
		{"interval",	required_argument,	0, 'i'},
5234
		{"num_iterations",	required_argument,	0, 'n'},
5235
		{"help",	no_argument,		0, 'h'},
5236
		{"hide",	required_argument,	0, 'H'},	// meh, -h taken by --help
5237
		{"Joules",	no_argument,		0, 'J'},
5238
		{"list",	no_argument,		0, 'l'},
5239
		{"out",		required_argument,	0, 'o'},
5240
		{"quiet",	no_argument,		0, 'q'},
5241
		{"show",	required_argument,	0, 's'},
5242 5243 5244 5245 5246
		{"Summary",	no_argument,		0, 'S'},
		{"TCC",		required_argument,	0, 'T'},
		{"version",	no_argument,		0, 'v' },
		{0,		0,			0,  0 }
	};
L
Len Brown 已提交
5247 5248 5249

	progname = argv[0];

5250
	while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v",
5251
				long_options, &option_index)) != -1) {
L
Len Brown 已提交
5252
		switch (opt) {
5253 5254 5255
		case 'a':
			parse_add_command(optarg);
			break;
5256 5257 5258
		case 'c':
			parse_cpu_command(optarg);
			break;
5259
		case 'D':
5260 5261
			dump_only++;
			break;
5262 5263 5264 5265
		case 'e':
			/* --enable specified counter */
			bic_enabled |= bic_lookup(optarg, SHOW_LIST);
			break;
5266 5267
		case 'd':
			debug++;
5268
			ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
L
Len Brown 已提交
5269
			break;
5270
		case 'H':
5271 5272 5273 5274 5275
			/*
			 * --hide: do not show those specified
			 *  multiple invocations simply clear more bits in enabled mask
			 */
			bic_enabled &= ~bic_lookup(optarg, HIDE_LIST);
5276
			break;
5277 5278 5279 5280
		case 'h':
		default:
			help();
			exit(1);
L
Len Brown 已提交
5281
		case 'i':
5282 5283 5284 5285
			{
				double interval = strtod(optarg, NULL);

				if (interval < 0.001) {
5286
					fprintf(outf, "interval %f seconds is too small\n",
5287 5288 5289 5290
						interval);
					exit(2);
				}

5291
				interval_tv.tv_sec = interval_ts.tv_sec = interval;
5292
				interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000;
5293
				interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
5294
			}
L
Len Brown 已提交
5295
			break;
5296 5297
		case 'J':
			rapl_joules++;
5298
			break;
5299
		case 'l':
5300
			ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
5301 5302 5303
			list_header_only++;
			quiet++;
			break;
5304 5305 5306
		case 'o':
			outf = fopen_or_die(optarg, "w");
			break;
5307 5308 5309
		case 'q':
			quiet = 1;
			break;
5310 5311 5312 5313 5314 5315 5316 5317 5318
		case 'n':
			num_iterations = strtod(optarg, NULL);

			if (num_iterations <= 0) {
				fprintf(outf, "iterations %d should be positive number\n",
					num_iterations);
				exit(2);
			}
			break;
5319
		case 's':
5320 5321 5322 5323 5324 5325 5326 5327 5328 5329
			/*
			 * --show: show only those specified
			 *  The 1st invocation will clear and replace the enabled mask
			 *  subsequent invocations can add to it.
			 */
			if (shown == 0)
				bic_enabled = bic_lookup(optarg, SHOW_LIST);
			else
				bic_enabled |= bic_lookup(optarg, SHOW_LIST);
			shown = 1;
5330
			break;
5331 5332
		case 'S':
			summary_only++;
5333 5334 5335 5336
			break;
		case 'T':
			tcc_activation_temp_override = atoi(optarg);
			break;
5337 5338 5339
		case 'v':
			print_version();
			exit(0);
5340
			break;
L
Len Brown 已提交
5341 5342 5343 5344 5345 5346
		}
	}
}

int main(int argc, char **argv)
{
5347 5348
	outf = stderr;

L
Len Brown 已提交
5349 5350
	cmdline(argc, argv);

5351
	if (!quiet)
5352
		print_version();
L
Len Brown 已提交
5353

5354 5355
	probe_sysfs();

L
Len Brown 已提交
5356 5357
	turbostat_init();

5358 5359 5360 5361
	/* dump counters and exit */
	if (dump_only)
		return get_and_dump_counters();

5362 5363 5364 5365 5366 5367 5368
	/* list header and exit */
	if (list_header_only) {
		print_header(",");
		flush_output_stdout();
		return 0;
	}

L
Len Brown 已提交
5369 5370 5371 5372 5373 5374 5375 5376 5377 5378
	/*
	 * if any params left, it must be a command to fork
	 */
	if (argc - optind)
		return fork_it(argv + optind);
	else
		turbostat_loop();

	return 0;
}