turbostat.c 148.8 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
L
Len Brown 已提交
2 3
/*
 * turbostat -- show CPU frequency and C-state residency
4
 * on modern Intel and AMD processors.
L
Len Brown 已提交
5
 *
6
 * Copyright (c) 2013 Intel Corporation.
L
Len Brown 已提交
7 8 9
 * Len Brown <len.brown@intel.com>
 */

10
#define _GNU_SOURCE
11
#include MSRHEADER
12
#include INTEL_FAMILY_HEADER
13
#include <stdarg.h>
L
Len Brown 已提交
14
#include <stdio.h>
15
#include <err.h>
L
Len Brown 已提交
16 17 18 19
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
20
#include <sys/select.h>
L
Len Brown 已提交
21 22 23 24 25
#include <sys/resource.h>
#include <fcntl.h>
#include <signal.h>
#include <sys/time.h>
#include <stdlib.h>
26
#include <getopt.h>
L
Len Brown 已提交
27 28 29
#include <dirent.h>
#include <string.h>
#include <ctype.h>
30
#include <sched.h>
31
#include <time.h>
32
#include <cpuid.h>
33 34
#include <linux/capability.h>
#include <errno.h>
35
#include <math.h>
L
Len Brown 已提交
36 37

char *proc_stat = "/proc/stat";
38
FILE *outf;
39
int *fd_percpu;
40
struct timeval interval_tv = {5, 0};
41
struct timespec interval_ts = {5, 0};
42
struct timespec one_msec = {0, 1000000};
43
unsigned int num_iterations;
44
unsigned int debug;
45
unsigned int quiet;
46
unsigned int shown;
47
unsigned int sums_need_wide_columns;
48 49
unsigned int rapl_joules;
unsigned int summary_only;
50
unsigned int list_header_only;
51
unsigned int dump_only;
L
Len Brown 已提交
52
unsigned int do_snb_cstates;
53
unsigned int do_knl_cstates;
54 55
unsigned int do_slm_cstates;
unsigned int use_c1_residency_msr;
L
Len Brown 已提交
56
unsigned int has_aperf;
57
unsigned int has_epb;
58 59
unsigned int do_irtl_snb;
unsigned int do_irtl_hsw;
60
unsigned int units = 1000000;	/* MHz etc */
L
Len Brown 已提交
61
unsigned int genuine_intel;
62 63
unsigned int authentic_amd;
unsigned int max_level, max_extended_level;
L
Len Brown 已提交
64
unsigned int has_invariant_tsc;
65
unsigned int do_nhm_platform_info;
66
unsigned int no_MSR_MISC_PWR_MGMT;
67
unsigned int aperf_mperf_multiplier = 1;
L
Len Brown 已提交
68
double bclk;
69
double base_hz;
70
unsigned int has_base_hz;
71
double tsc_tweak = 1.0;
72 73 74
unsigned int show_pkg_only;
unsigned int show_core_only;
char *output_buffer, *outp;
75 76 77
unsigned int do_rapl;
unsigned int do_dts;
unsigned int do_ptm;
L
Len Brown 已提交
78
unsigned long long  gfx_cur_rc6_ms;
79 80
unsigned long long cpuidle_cur_cpu_lpi_us;
unsigned long long cpuidle_cur_sys_lpi_us;
L
Len Brown 已提交
81
unsigned int gfx_cur_mhz;
82 83
unsigned int tcc_activation_temp;
unsigned int tcc_activation_temp_override;
84 85
double rapl_power_units, rapl_time_units;
double rapl_dram_energy_units, rapl_energy_units;
86
double rapl_joule_counter_range;
87
unsigned int do_core_perf_limit_reasons;
88
unsigned int has_automatic_cstate_conversion;
89 90
unsigned int do_gfx_perf_limit_reasons;
unsigned int do_ring_perf_limit_reasons;
91 92
unsigned int crystal_hz;
unsigned long long tsc_hz;
93
int base_cpu;
94
double discover_bclk(unsigned int family, unsigned int model);
95 96 97 98 99 100
unsigned int has_hwp;	/* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
			/* IA32_HWP_REQUEST, IA32_HWP_STATUS */
unsigned int has_hwp_notify;		/* IA32_HWP_INTERRUPT */
unsigned int has_hwp_activity_window;	/* IA32_HWP_REQUEST[bits 41:32] */
unsigned int has_hwp_epp;		/* IA32_HWP_REQUEST[bits 31:24] */
unsigned int has_hwp_pkg;		/* IA32_HWP_REQUEST_PKG */
101
unsigned int has_misc_feature_control;
102
unsigned int first_counter_read = 1;
103

L
Len Brown 已提交
104 105 106 107 108 109 110 111 112 113 114 115 116
#define RAPL_PKG		(1 << 0)
					/* 0x610 MSR_PKG_POWER_LIMIT */
					/* 0x611 MSR_PKG_ENERGY_STATUS */
#define RAPL_PKG_PERF_STATUS	(1 << 1)
					/* 0x613 MSR_PKG_PERF_STATUS */
#define RAPL_PKG_POWER_INFO	(1 << 2)
					/* 0x614 MSR_PKG_POWER_INFO */

#define RAPL_DRAM		(1 << 3)
					/* 0x618 MSR_DRAM_POWER_LIMIT */
					/* 0x619 MSR_DRAM_ENERGY_STATUS */
#define RAPL_DRAM_PERF_STATUS	(1 << 4)
					/* 0x61b MSR_DRAM_PERF_STATUS */
117 118
#define RAPL_DRAM_POWER_INFO	(1 << 5)
					/* 0x61c MSR_DRAM_POWER_INFO */
L
Len Brown 已提交
119

120
#define RAPL_CORES_POWER_LIMIT	(1 << 6)
L
Len Brown 已提交
121
					/* 0x638 MSR_PP0_POWER_LIMIT */
122
#define RAPL_CORE_POLICY	(1 << 7)
L
Len Brown 已提交
123 124
					/* 0x63a MSR_PP0_POLICY */

125
#define RAPL_GFX		(1 << 8)
L
Len Brown 已提交
126 127 128
					/* 0x640 MSR_PP1_POWER_LIMIT */
					/* 0x641 MSR_PP1_ENERGY_STATUS */
					/* 0x642 MSR_PP1_POLICY */
129 130 131

#define RAPL_CORES_ENERGY_STATUS	(1 << 9)
					/* 0x639 MSR_PP0_ENERGY_STATUS */
132 133 134 135 136 137 138
#define RAPL_PER_CORE_ENERGY	(1 << 10)
					/* Indicates cores energy collection is per-core,
					 * not per-package. */
#define RAPL_AMD_F17H		(1 << 11)
					/* 0xc0010299 MSR_RAPL_PWR_UNIT */
					/* 0xc001029a MSR_CORE_ENERGY_STAT */
					/* 0xc001029b MSR_PKG_ENERGY_STAT */
139
#define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
140 141
#define	TJMAX_DEFAULT	100

142 143 144 145 146
/* MSRs that are not yet in the kernel-provided header. */
#define MSR_RAPL_PWR_UNIT	0xc0010299
#define MSR_CORE_ENERGY_STAT	0xc001029a
#define MSR_PKG_ENERGY_STAT	0xc001029b

147
#define MAX(a, b) ((a) > (b) ? (a) : (b))
L
Len Brown 已提交
148

149 150 151 152 153
/*
 * buffer size used by sscanf() for added column names
 * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
 */
#define	NAME_BYTES 20
154
#define PATH_BYTES 128
155

L
Len Brown 已提交
156 157 158
int backwards_count;
char *progname;

159 160 161
#define CPU_SUBSET_MAXCPUS	1024	/* need to use before probe... */
cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
162 163
#define MAX_ADDED_COUNTERS 8
#define MAX_ADDED_THREAD_COUNTERS 24
164
#define BITMASK_SIZE 32
165 166

struct thread_data {
167 168
	struct timeval tv_begin;
	struct timeval tv_end;
169 170 171
	unsigned long long tsc;
	unsigned long long aperf;
	unsigned long long mperf;
172
	unsigned long long c1;
173
	unsigned long long  irq_count;
174
	unsigned int smi_count;
175
	unsigned int cpu_id;
176 177
	unsigned int apic_id;
	unsigned int x2apic_id;
178 179 180
	unsigned int flags;
#define CPU_IS_FIRST_THREAD_IN_CORE	0x2
#define CPU_IS_FIRST_CORE_IN_PACKAGE	0x4
181
	unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
182 183 184 185 186 187
} *thread_even, *thread_odd;

struct core_data {
	unsigned long long c3;
	unsigned long long c6;
	unsigned long long c7;
188
	unsigned long long mc6_us;	/* duplicate as per-core for now, even though per module */
189
	unsigned int core_temp_c;
190
	unsigned int core_energy;	/* MSR_CORE_ENERGY_STAT */
191
	unsigned int core_id;
192
	unsigned long long counter[MAX_ADDED_COUNTERS];
193 194 195 196 197 198 199
} *core_even, *core_odd;

struct pkg_data {
	unsigned long long pc2;
	unsigned long long pc3;
	unsigned long long pc6;
	unsigned long long pc7;
200 201 202
	unsigned long long pc8;
	unsigned long long pc9;
	unsigned long long pc10;
203 204
	unsigned long long cpu_lpi;
	unsigned long long sys_lpi;
205 206 207 208
	unsigned long long pkg_wtd_core_c0;
	unsigned long long pkg_any_core_c0;
	unsigned long long pkg_any_gfxe_c0;
	unsigned long long pkg_both_core_gfxe_c0;
209
	long long gfx_rc6_ms;
L
Len Brown 已提交
210
	unsigned int gfx_mhz;
211
	unsigned int package_id;
212 213 214 215 216 217 218
	unsigned int energy_pkg;	/* MSR_PKG_ENERGY_STATUS */
	unsigned int energy_dram;	/* MSR_DRAM_ENERGY_STATUS */
	unsigned int energy_cores;	/* MSR_PP0_ENERGY_STATUS */
	unsigned int energy_gfx;	/* MSR_PP1_ENERGY_STATUS */
	unsigned int rapl_pkg_perf_status;	/* MSR_PKG_PERF_STATUS */
	unsigned int rapl_dram_perf_status;	/* MSR_DRAM_PERF_STATUS */
	unsigned int pkg_temp_c;
219
	unsigned long long counter[MAX_ADDED_COUNTERS];
220 221 222 223 224
} *package_even, *package_odd;

#define ODD_COUNTERS thread_odd, core_odd, package_odd
#define EVEN_COUNTERS thread_even, core_even, package_even

225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
#define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no)	      \
	((thread_base) +						      \
	 ((pkg_no) *							      \
	  topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \
	 ((node_no) * topo.cores_per_node * topo.threads_per_core) +	      \
	 ((core_no) * topo.threads_per_core) +				      \
	 (thread_no))

#define GET_CORE(core_base, core_no, node_no, pkg_no)			\
	((core_base) +							\
	 ((pkg_no) *  topo.nodes_per_pkg * topo.cores_per_node) +	\
	 ((node_no) * topo.cores_per_node) +				\
	 (core_no))


240 241
#define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)

242
enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE};
243
enum counter_type {COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC};
244 245 246 247 248
enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT};

struct msr_counter {
	unsigned int msr_num;
	char name[NAME_BYTES];
249
	char path[PATH_BYTES];
250 251 252 253
	unsigned int width;
	enum counter_type type;
	enum counter_format format;
	struct msr_counter *next;
254 255 256
	unsigned int flags;
#define	FLAGS_HIDE	(1 << 0)
#define	FLAGS_SHOW	(1 << 1)
257
#define	SYSFS_PERCPU	(1 << 1)
258 259 260
};

struct sys_counters {
261 262 263
	unsigned int added_thread_counters;
	unsigned int added_core_counters;
	unsigned int added_package_counters;
264 265 266 267 268
	struct msr_counter *tp;
	struct msr_counter *cp;
	struct msr_counter *pp;
} sys;

269 270 271 272
struct system_summary {
	struct thread_data threads;
	struct core_data cores;
	struct pkg_data packages;
273
} average;
274

275 276
struct cpu_topology {
	int physical_package_id;
277
	int die_id;
278
	int logical_cpu_id;
279 280
	int physical_node_id;
	int logical_node_id;	/* 0-based count within the package */
281
	int physical_core_id;
282
	int thread_id;
283 284
	cpu_set_t *put_ids; /* Processing Unit/Thread IDs */
} *cpus;
285 286 287

struct topo_params {
	int num_packages;
288
	int num_die;
289 290 291
	int num_cpus;
	int num_cores;
	int max_cpu_num;
292
	int max_node_num;
293 294 295
	int nodes_per_pkg;
	int cores_per_node;
	int threads_per_core;
296 297 298 299
} topo;

struct timeval tv_even, tv_odd, tv_delta;

300 301 302
int *irq_column_2_cpu;	/* /proc/interrupts column numbers */
int *irqs_per_cpu;		/* indexed by cpu_num */

303 304 305
void setup_all_buffers(void);

int cpu_is_not_present(int cpu)
306
{
307
	return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
308
}
309
/*
310 311
 * run func(thread, core, package) in topology order
 * skip non-present cpus
312
 */
313 314 315

int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
	struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
316
{
317
	int retval, pkg_no, core_no, thread_no, node_no;
318

319
	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
320 321
		for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) {
			for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
				for (thread_no = 0; thread_no <
					topo.threads_per_core; ++thread_no) {
					struct thread_data *t;
					struct core_data *c;
					struct pkg_data *p;

					t = GET_THREAD(thread_base, thread_no,
						       core_no, node_no,
						       pkg_no);

					if (cpu_is_not_present(t->cpu_id))
						continue;

					c = GET_CORE(core_base, core_no,
						     node_no, pkg_no);
					p = GET_PKG(pkg_base, pkg_no);

					retval = func(t, c, p);
					if (retval)
						return retval;
				}
343 344 345 346
			}
		}
	}
	return 0;
347 348 349 350
}

int cpu_migrate(int cpu)
{
351 352 353
	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
	CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
	if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
354 355 356 357
		return -1;
	else
		return 0;
}
358
int get_msr_fd(int cpu)
L
Len Brown 已提交
359 360 361 362
{
	char pathname[32];
	int fd;

363 364 365 366 367
	fd = fd_percpu[cpu];

	if (fd)
		return fd;

L
Len Brown 已提交
368 369
	sprintf(pathname, "/dev/cpu/%d/msr", cpu);
	fd = open(pathname, O_RDONLY);
370
	if (fd < 0)
371
		err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
L
Len Brown 已提交
372

373 374 375 376 377 378 379 380 381 382
	fd_percpu[cpu] = fd;

	return fd;
}

int get_msr(int cpu, off_t offset, unsigned long long *msr)
{
	ssize_t retval;

	retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
383

384
	if (retval != sizeof *msr)
385
		err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
386 387

	return 0;
L
Len Brown 已提交
388 389
}

390
/*
391 392 393 394
 * This list matches the column headers, except
 * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
 * 2. Core and CPU are moved to the end, we can't have strings that contain them
 *    matching on them for --show and --hide.
395
 */
396
struct msr_counter bic[] = {
397 398
	{ 0x0, "usec" },
	{ 0x0, "Time_Of_Day_Seconds" },
399
	{ 0x0, "Package" },
400
	{ 0x0, "Node" },
401
	{ 0x0, "Avg_MHz" },
402
	{ 0x0, "Busy%" },
403 404 405
	{ 0x0, "Bzy_MHz" },
	{ 0x0, "TSC_MHz" },
	{ 0x0, "IRQ" },
406
	{ 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL},
407
	{ 0x0, "sysfs" },
408 409 410 411 412 413 414 415 416 417 418 419 420 421
	{ 0x0, "CPU%c1" },
	{ 0x0, "CPU%c3" },
	{ 0x0, "CPU%c6" },
	{ 0x0, "CPU%c7" },
	{ 0x0, "ThreadC" },
	{ 0x0, "CoreTmp" },
	{ 0x0, "CoreCnt" },
	{ 0x0, "PkgTmp" },
	{ 0x0, "GFX%rc6" },
	{ 0x0, "GFXMHz" },
	{ 0x0, "Pkg%pc2" },
	{ 0x0, "Pkg%pc3" },
	{ 0x0, "Pkg%pc6" },
	{ 0x0, "Pkg%pc7" },
422 423
	{ 0x0, "Pkg%pc8" },
	{ 0x0, "Pkg%pc9" },
424
	{ 0x0, "Pk%pc10" },
425 426
	{ 0x0, "CPU%LPI" },
	{ 0x0, "SYS%LPI" },
427 428 429 430 431 432 433 434 435 436 437
	{ 0x0, "PkgWatt" },
	{ 0x0, "CorWatt" },
	{ 0x0, "GFXWatt" },
	{ 0x0, "PkgCnt" },
	{ 0x0, "RAMWatt" },
	{ 0x0, "PKG_%" },
	{ 0x0, "RAM_%" },
	{ 0x0, "Pkg_J" },
	{ 0x0, "Cor_J" },
	{ 0x0, "GFX_J" },
	{ 0x0, "RAM_J" },
438
	{ 0x0, "Mod%c6" },
439 440 441 442
	{ 0x0, "Totl%C0" },
	{ 0x0, "Any%C0" },
	{ 0x0, "GFX%C0" },
	{ 0x0, "CPUGFX%" },
443 444
	{ 0x0, "Core" },
	{ 0x0, "CPU" },
445 446
	{ 0x0, "APIC" },
	{ 0x0, "X2APIC" },
447
	{ 0x0, "Die" },
448 449 450
};

#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
451 452 453
#define	BIC_USEC	(1ULL << 0)
#define	BIC_TOD		(1ULL << 1)
#define	BIC_Package	(1ULL << 2)
454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498
#define	BIC_Node	(1ULL << 3)
#define	BIC_Avg_MHz	(1ULL << 4)
#define	BIC_Busy	(1ULL << 5)
#define	BIC_Bzy_MHz	(1ULL << 6)
#define	BIC_TSC_MHz	(1ULL << 7)
#define	BIC_IRQ		(1ULL << 8)
#define	BIC_SMI		(1ULL << 9)
#define	BIC_sysfs	(1ULL << 10)
#define	BIC_CPU_c1	(1ULL << 11)
#define	BIC_CPU_c3	(1ULL << 12)
#define	BIC_CPU_c6	(1ULL << 13)
#define	BIC_CPU_c7	(1ULL << 14)
#define	BIC_ThreadC	(1ULL << 15)
#define	BIC_CoreTmp	(1ULL << 16)
#define	BIC_CoreCnt	(1ULL << 17)
#define	BIC_PkgTmp	(1ULL << 18)
#define	BIC_GFX_rc6	(1ULL << 19)
#define	BIC_GFXMHz	(1ULL << 20)
#define	BIC_Pkgpc2	(1ULL << 21)
#define	BIC_Pkgpc3	(1ULL << 22)
#define	BIC_Pkgpc6	(1ULL << 23)
#define	BIC_Pkgpc7	(1ULL << 24)
#define	BIC_Pkgpc8	(1ULL << 25)
#define	BIC_Pkgpc9	(1ULL << 26)
#define	BIC_Pkgpc10	(1ULL << 27)
#define BIC_CPU_LPI	(1ULL << 28)
#define BIC_SYS_LPI	(1ULL << 29)
#define	BIC_PkgWatt	(1ULL << 30)
#define	BIC_CorWatt	(1ULL << 31)
#define	BIC_GFXWatt	(1ULL << 32)
#define	BIC_PkgCnt	(1ULL << 33)
#define	BIC_RAMWatt	(1ULL << 34)
#define	BIC_PKG__	(1ULL << 35)
#define	BIC_RAM__	(1ULL << 36)
#define	BIC_Pkg_J	(1ULL << 37)
#define	BIC_Cor_J	(1ULL << 38)
#define	BIC_GFX_J	(1ULL << 39)
#define	BIC_RAM_J	(1ULL << 40)
#define	BIC_Mod_c6	(1ULL << 41)
#define	BIC_Totl_c0	(1ULL << 42)
#define	BIC_Any_c0	(1ULL << 43)
#define	BIC_GFX_c0	(1ULL << 44)
#define	BIC_CPUGFX	(1ULL << 45)
#define	BIC_Core	(1ULL << 46)
#define	BIC_CPU		(1ULL << 47)
499 500
#define	BIC_APIC	(1ULL << 48)
#define	BIC_X2APIC	(1ULL << 49)
501
#define	BIC_Die		(1ULL << 50)
502

503
#define BIC_DISABLED_BY_DEFAULT	(BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
504 505

unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
506
unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
507 508

#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
509
#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
510
#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
511
#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
512

513

514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532
#define MAX_DEFERRED 16
char *deferred_skip_names[MAX_DEFERRED];
int deferred_skip_index;

/*
 * HIDE_LIST - hide this list of counters, show the rest [default]
 * SHOW_LIST - show this list of counters, hide the rest
 */
enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;

void help(void)
{
	fprintf(outf,
	"Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
	"\n"
	"Turbostat forks the specified COMMAND and prints statistics\n"
	"when COMMAND completes.\n"
	"If no COMMAND is specified, turbostat wakes every 5-seconds\n"
	"to print statistics, until interrupted.\n"
533 534 535 536
	"  -a, --add	add a counter\n"
	"		  eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
	"  -c, --cpu	cpu-set	limit output to summary plus cpu-set:\n"
	"		  {core | package | j,k,l..m,n-p }\n"
537 538 539 540 541 542
	"  -d, --debug	displays usec, Time_Of_Day_Seconds and more debugging\n"
	"  -D, --Dump	displays the raw counter values\n"
	"  -e, --enable	[all | column]\n"
	"		shows all or the specified disabled column\n"
	"  -H, --hide [column|column,column,...]\n"
	"		hide the specified column(s)\n"
543 544
	"  -i, --interval sec.subsec\n"
	"		Override default 5-second measurement interval\n"
545
	"  -J, --Joules	displays energy in Joules instead of Watts\n"
546 547 548 549 550 551
	"  -l, --list	list column headers only\n"
	"  -n, --num_iterations num\n"
	"		number of the measurement iterations\n"
	"  -o, --out file\n"
	"		create or truncate \"file\" for all output\n"
	"  -q, --quiet	skip decoding system configuration header\n"
552 553 554 555 556 557 558
	"  -s, --show [column|column,column,...]\n"
	"		show only the specified column(s)\n"
	"  -S, --Summary\n"
	"		limits output to 1-line system summary per interval\n"
	"  -T, --TCC temperature\n"
	"		sets the Thermal Control Circuit temperature in\n"
	"		  degrees Celsius\n"
559 560
	"  -h, --help	print this help message\n"
	"  -v, --version	print version information\n"
561 562 563 564
	"\n"
	"For more help, run \"man turbostat\"\n");
}

565 566 567 568 569
/*
 * bic_lookup
 * for all the strings in comma separate name_list,
 * set the approprate bit in return value.
 */
570
unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
571 572 573 574 575 576 577 578 579 580 581 582
{
	int i;
	unsigned long long retval = 0;

	while (name_list) {
		char *comma;

		comma = strchr(name_list, ',');

		if (comma)
			*comma = '\0';

583 584 585
		if (!strcmp(name_list, "all"))
			return ~0;

586 587 588 589 590 591 592
		for (i = 0; i < MAX_BIC; ++i) {
			if (!strcmp(name_list, bic[i].name)) {
				retval |= (1ULL << i);
				break;
			}
		}
		if (i == MAX_BIC) {
593 594 595 596 597 598 599 600 601 602 603 604 605
			if (mode == SHOW_LIST) {
				fprintf(stderr, "Invalid counter name: %s\n", name_list);
				exit(-1);
			}
			deferred_skip_names[deferred_skip_index++] = name_list;
			if (debug)
				fprintf(stderr, "deferred \"%s\"\n", name_list);
			if (deferred_skip_index >= MAX_DEFERRED) {
				fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
					MAX_DEFERRED, name_list);
				help();
				exit(1);
			}
606 607 608 609 610 611 612 613 614
		}

		name_list = comma;
		if (name_list)
			name_list++;

	}
	return retval;
}
615

616

617
void print_header(char *delim)
L
Len Brown 已提交
618
{
619
	struct msr_counter *mp;
620
	int printed = 0;
621

622 623 624 625
	if (DO_BIC(BIC_USEC))
		outp += sprintf(outp, "%susec", (printed++ ? delim : ""));
	if (DO_BIC(BIC_TOD))
		outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
626
	if (DO_BIC(BIC_Package))
627
		outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
628 629
	if (DO_BIC(BIC_Die))
		outp += sprintf(outp, "%sDie", (printed++ ? delim : ""));
630 631
	if (DO_BIC(BIC_Node))
		outp += sprintf(outp, "%sNode", (printed++ ? delim : ""));
632
	if (DO_BIC(BIC_Core))
633
		outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
634
	if (DO_BIC(BIC_CPU))
635
		outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
636 637 638 639
	if (DO_BIC(BIC_APIC))
		outp += sprintf(outp, "%sAPIC", (printed++ ? delim : ""));
	if (DO_BIC(BIC_X2APIC))
		outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : ""));
640
	if (DO_BIC(BIC_Avg_MHz))
641
		outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
642
	if (DO_BIC(BIC_Busy))
643
		outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : ""));
644
	if (DO_BIC(BIC_Bzy_MHz))
645
		outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : ""));
646
	if (DO_BIC(BIC_TSC_MHz))
647
		outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
648

649 650
	if (DO_BIC(BIC_IRQ)) {
		if (sums_need_wide_columns)
651
			outp += sprintf(outp, "%s     IRQ", (printed++ ? delim : ""));
652
		else
653
			outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
654 655
	}

656
	if (DO_BIC(BIC_SMI))
657
		outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
658

659
	for (mp = sys.tp; mp; mp = mp->next) {
660

661 662
		if (mp->format == FORMAT_RAW) {
			if (mp->width == 64)
663
				outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name);
664
			else
665
				outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name);
666
		} else {
667
			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
668
				outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name);
669
			else
670
				outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name);
671 672 673
		}
	}

674
	if (DO_BIC(BIC_CPU_c1))
675
		outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
676
	if (DO_BIC(BIC_CPU_c3))
677
		outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
678
	if (DO_BIC(BIC_CPU_c6))
679
		outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
680
	if (DO_BIC(BIC_CPU_c7))
681
		outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : ""));
682

683
	if (DO_BIC(BIC_Mod_c6))
684
		outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : ""));
685

686
	if (DO_BIC(BIC_CoreTmp))
687
		outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
688

689 690 691 692 693 694 695 696
	if (do_rapl && !rapl_joules) {
		if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
			outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
	} else if (do_rapl && rapl_joules) {
		if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
			outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
	}

697 698 699
	for (mp = sys.cp; mp; mp = mp->next) {
		if (mp->format == FORMAT_RAW) {
			if (mp->width == 64)
700
				outp += sprintf(outp, "%s%18.18s", delim, mp->name);
701
			else
702
				outp += sprintf(outp, "%s%10.10s", delim, mp->name);
703
		} else {
704 705 706 707
			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
				outp += sprintf(outp, "%s%8s", delim, mp->name);
			else
				outp += sprintf(outp, "%s%s", delim, mp->name);
708 709 710
		}
	}

711
	if (DO_BIC(BIC_PkgTmp))
712
		outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
713

714
	if (DO_BIC(BIC_GFX_rc6))
715
		outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : ""));
L
Len Brown 已提交
716

717
	if (DO_BIC(BIC_GFXMHz))
718
		outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
L
Len Brown 已提交
719

720
	if (DO_BIC(BIC_Totl_c0))
721
		outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
722
	if (DO_BIC(BIC_Any_c0))
723
		outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
724
	if (DO_BIC(BIC_GFX_c0))
725
		outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
726
	if (DO_BIC(BIC_CPUGFX))
727
		outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
728

729
	if (DO_BIC(BIC_Pkgpc2))
730
		outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
731
	if (DO_BIC(BIC_Pkgpc3))
732
		outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : ""));
733
	if (DO_BIC(BIC_Pkgpc6))
734
		outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : ""));
735
	if (DO_BIC(BIC_Pkgpc7))
736
		outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : ""));
737
	if (DO_BIC(BIC_Pkgpc8))
738
		outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : ""));
739
	if (DO_BIC(BIC_Pkgpc9))
740
		outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
741
	if (DO_BIC(BIC_Pkgpc10))
742
		outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
743 744 745 746
	if (DO_BIC(BIC_CPU_LPI))
		outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : ""));
	if (DO_BIC(BIC_SYS_LPI))
		outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
L
Len Brown 已提交
747

748
	if (do_rapl && !rapl_joules) {
749
		if (DO_BIC(BIC_PkgWatt))
750
			outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
751
		if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
752
			outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
753
		if (DO_BIC(BIC_GFXWatt))
754
			outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
755
		if (DO_BIC(BIC_RAMWatt))
756
			outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : ""));
757
		if (DO_BIC(BIC_PKG__))
758
			outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
759
		if (DO_BIC(BIC_RAM__))
760
			outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
761
	} else if (do_rapl && rapl_joules) {
762
		if (DO_BIC(BIC_Pkg_J))
763
			outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
764
		if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
765
			outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
766
		if (DO_BIC(BIC_GFX_J))
767
			outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
768
		if (DO_BIC(BIC_RAM_J))
769
			outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : ""));
770
		if (DO_BIC(BIC_PKG__))
771
			outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
772
		if (DO_BIC(BIC_RAM__))
773
			outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
774
	}
775 776 777
	for (mp = sys.pp; mp; mp = mp->next) {
		if (mp->format == FORMAT_RAW) {
			if (mp->width == 64)
778
				outp += sprintf(outp, "%s%18.18s", delim, mp->name);
779
			else
780
				outp += sprintf(outp, "%s%10.10s", delim, mp->name);
781
		} else {
782 783 784 785
			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
				outp += sprintf(outp, "%s%8s", delim, mp->name);
			else
				outp += sprintf(outp, "%s%s", delim, mp->name);
786 787 788
		}
	}

789
	outp += sprintf(outp, "\n");
L
Len Brown 已提交
790 791
}

792 793
int dump_counters(struct thread_data *t, struct core_data *c,
	struct pkg_data *p)
L
Len Brown 已提交
794
{
795 796 797
	int i;
	struct msr_counter *mp;

798
	outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
799 800

	if (t) {
801 802 803 804 805 806
		outp += sprintf(outp, "CPU: %d flags 0x%x\n",
			t->cpu_id, t->flags);
		outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
		outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
		outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
		outp += sprintf(outp, "c1: %016llX\n", t->c1);
807

808
		if (DO_BIC(BIC_IRQ))
809
			outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
810
		if (DO_BIC(BIC_SMI))
811
			outp += sprintf(outp, "SMI: %d\n", t->smi_count);
812 813 814 815 816

		for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
			outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n",
				i, mp->msr_num, t->counter[i]);
		}
817
	}
L
Len Brown 已提交
818

819
	if (c) {
820 821 822 823 824
		outp += sprintf(outp, "core: %d\n", c->core_id);
		outp += sprintf(outp, "c3: %016llX\n", c->c3);
		outp += sprintf(outp, "c6: %016llX\n", c->c6);
		outp += sprintf(outp, "c7: %016llX\n", c->c7);
		outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
825
		outp += sprintf(outp, "Joules: %0X\n", c->core_energy);
826 827 828 829 830

		for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
			outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n",
				i, mp->msr_num, c->counter[i]);
		}
831
		outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
832
	}
L
Len Brown 已提交
833

834
	if (p) {
835
		outp += sprintf(outp, "package: %d\n", p->package_id);
836 837 838 839 840 841

		outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
		outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
		outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
		outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);

842
		outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
843
		if (DO_BIC(BIC_Pkgpc3))
844
			outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
845
		if (DO_BIC(BIC_Pkgpc6))
846
			outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
847
		if (DO_BIC(BIC_Pkgpc7))
848
			outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
849 850 851
		outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
		outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
		outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
852 853 854
		outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
		outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
		outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
855 856 857 858 859 860 861 862 863
		outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg);
		outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores);
		outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx);
		outp += sprintf(outp, "Joules RAM: %0X\n", p->energy_dram);
		outp += sprintf(outp, "Throttle PKG: %0X\n",
			p->rapl_pkg_perf_status);
		outp += sprintf(outp, "Throttle RAM: %0X\n",
			p->rapl_dram_perf_status);
		outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
864 865 866 867 868

		for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
			outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n",
				i, mp->msr_num, p->counter[i]);
		}
869
	}
870 871 872

	outp += sprintf(outp, "\n");

873
	return 0;
L
Len Brown 已提交
874 875
}

L
Len Brown 已提交
876 877 878
/*
 * column formatting convention & formats
 */
879 880
int format_counters(struct thread_data *t, struct core_data *c,
	struct pkg_data *p)
L
Len Brown 已提交
881
{
882
	double interval_float, tsc;
883
	char *fmt8;
884 885
	int i;
	struct msr_counter *mp;
886 887
	char *delim = "\t";
	int printed = 0;
L
Len Brown 已提交
888

889 890 891 892 893 894 895 896
	 /* if showing only 1st thread in core and this isn't one, bail out */
	if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
		return 0;

	 /* if showing only 1st thread in pkg and this isn't one, bail out */
	if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
		return 0;

897 898 899 900 901
	/*if not summary line and --cpu is used */
	if ((t != &average.threads) &&
		(cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
		return 0;

902
	if (DO_BIC(BIC_USEC)) {
903 904 905 906 907 908 909
		/* on each row, print how many usec each timestamp took to gather */
		struct timeval tv;

		timersub(&t->tv_end, &t->tv_begin, &tv);
		outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
	}

910 911 912 913
	/* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */
	if (DO_BIC(BIC_TOD))
		outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);

L
Len Brown 已提交
914 915
	interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;

916 917
	tsc = t->tsc * tsc_tweak;

918 919
	/* topo columns, print blanks on 1st (average) line */
	if (t == &average.threads) {
920
		if (DO_BIC(BIC_Package))
921
			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
922 923
		if (DO_BIC(BIC_Die))
			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
924 925
		if (DO_BIC(BIC_Node))
			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
926
		if (DO_BIC(BIC_Core))
927
			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
928
		if (DO_BIC(BIC_CPU))
929
			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
930 931 932 933
		if (DO_BIC(BIC_APIC))
			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
		if (DO_BIC(BIC_X2APIC))
			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
L
Len Brown 已提交
934
	} else {
935
		if (DO_BIC(BIC_Package)) {
936
			if (p)
937
				outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id);
938
			else
939
				outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
940
		}
941 942 943 944 945 946
		if (DO_BIC(BIC_Die)) {
			if (c)
				outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].die_id);
			else
				outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
		}
947 948 949 950 951 952 953 954 955
		if (DO_BIC(BIC_Node)) {
			if (t)
				outp += sprintf(outp, "%s%d",
						(printed++ ? delim : ""),
					      cpus[t->cpu_id].physical_node_id);
			else
				outp += sprintf(outp, "%s-",
						(printed++ ? delim : ""));
		}
956
		if (DO_BIC(BIC_Core)) {
957
			if (c)
958
				outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
959
			else
960
				outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
961
		}
962
		if (DO_BIC(BIC_CPU))
963
			outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
964 965 966 967
		if (DO_BIC(BIC_APIC))
			outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id);
		if (DO_BIC(BIC_X2APIC))
			outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id);
L
Len Brown 已提交
968
	}
969

970
	if (DO_BIC(BIC_Avg_MHz))
971
		outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
972 973
			1.0 / units * t->aperf / interval_float);

974
	if (DO_BIC(BIC_Busy))
975
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf/tsc);
L
Len Brown 已提交
976

977
	if (DO_BIC(BIC_Bzy_MHz)) {
978
		if (has_base_hz)
979
			outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
980
		else
981
			outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
982
				tsc / units * t->aperf / t->mperf / interval_float);
983
	}
L
Len Brown 已提交
984

985
	if (DO_BIC(BIC_TSC_MHz))
986
		outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc/units/interval_float);
L
Len Brown 已提交
987

988
	/* IRQ */
989 990
	if (DO_BIC(BIC_IRQ)) {
		if (sums_need_wide_columns)
991
			outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count);
992
		else
993
			outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
994
	}
995

996
	/* SMI */
997
	if (DO_BIC(BIC_SMI))
998
		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
999

1000
	/* Added counters */
1001 1002 1003
	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
		if (mp->format == FORMAT_RAW) {
			if (mp->width == 32)
1004
				outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) t->counter[i]);
1005
			else
1006
				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
1007
		} else if (mp->format == FORMAT_DELTA) {
1008
			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1009
				outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]);
1010
			else
1011
				outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]);
1012
		} else if (mp->format == FORMAT_PERCENT) {
1013
			if (mp->type == COUNTER_USEC)
1014
				outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), t->counter[i]/interval_float/10000);
1015
			else
1016
				outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i]/tsc);
1017 1018 1019
		}
	}

1020 1021
	/* C1 */
	if (DO_BIC(BIC_CPU_c1))
1022
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1/tsc);
1023 1024


1025 1026 1027 1028
	/* print per-core data only for 1st thread in core */
	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
		goto done;

1029
	if (DO_BIC(BIC_CPU_c3))
1030
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc);
1031
	if (DO_BIC(BIC_CPU_c6))
1032
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc);
1033
	if (DO_BIC(BIC_CPU_c7))
1034
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7/tsc);
1035

1036 1037
	/* Mod%c6 */
	if (DO_BIC(BIC_Mod_c6))
1038
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc);
1039

1040
	if (DO_BIC(BIC_CoreTmp))
1041
		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
1042

1043 1044 1045
	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
		if (mp->format == FORMAT_RAW) {
			if (mp->width == 32)
1046
				outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) c->counter[i]);
1047
			else
1048
				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
1049
		} else if (mp->format == FORMAT_DELTA) {
1050
			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1051
				outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]);
1052
			else
1053
				outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]);
1054
		} else if (mp->format == FORMAT_PERCENT) {
1055
			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i]/tsc);
1056 1057 1058
		}
	}

1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072
	/*
	 * If measurement interval exceeds minimum RAPL Joule Counter range,
	 * indicate that results are suspect by printing "**" in fraction place.
	 */
	if (interval_float < rapl_joule_counter_range)
		fmt8 = "%s%.2f";
	else
		fmt8 = "%6.0f**";

	if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
	if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units);

1073 1074 1075 1076
	/* print per-package data only for 1st core in package */
	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
		goto done;

1077
	/* PkgTmp */
1078
	if (DO_BIC(BIC_PkgTmp))
1079
		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c);
1080

L
Len Brown 已提交
1081
	/* GFXrc6 */
1082
	if (DO_BIC(BIC_GFX_rc6)) {
1083
		if (p->gfx_rc6_ms == -1) {	/* detect GFX counter reset */
1084
			outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
1085
		} else {
1086
			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
1087 1088 1089
				p->gfx_rc6_ms / 10.0 / interval_float);
		}
	}
L
Len Brown 已提交
1090

L
Len Brown 已提交
1091
	/* GFXMHz */
1092
	if (DO_BIC(BIC_GFXMHz))
1093
		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
L
Len Brown 已提交
1094

1095
	/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
1096
	if (DO_BIC(BIC_Totl_c0))
1097
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc);
1098
	if (DO_BIC(BIC_Any_c0))
1099
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0/tsc);
1100
	if (DO_BIC(BIC_GFX_c0))
1101
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0/tsc);
1102
	if (DO_BIC(BIC_CPUGFX))
1103
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0/tsc);
1104

1105
	if (DO_BIC(BIC_Pkgpc2))
1106
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2/tsc);
1107
	if (DO_BIC(BIC_Pkgpc3))
1108
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3/tsc);
1109
	if (DO_BIC(BIC_Pkgpc6))
1110
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6/tsc);
1111
	if (DO_BIC(BIC_Pkgpc7))
1112
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7/tsc);
1113
	if (DO_BIC(BIC_Pkgpc8))
1114
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8/tsc);
1115
	if (DO_BIC(BIC_Pkgpc9))
1116
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9/tsc);
1117
	if (DO_BIC(BIC_Pkgpc10))
1118
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc);
1119

1120 1121 1122 1123 1124
	if (DO_BIC(BIC_CPU_LPI))
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
	if (DO_BIC(BIC_SYS_LPI))
		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);

1125
	if (DO_BIC(BIC_PkgWatt))
1126
		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
1127
	if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
1128
		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
1129
	if (DO_BIC(BIC_GFXWatt))
1130
		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
1131
	if (DO_BIC(BIC_RAMWatt))
1132
		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units / interval_float);
1133
	if (DO_BIC(BIC_Pkg_J))
1134
		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
1135
	if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
1136
		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
1137
	if (DO_BIC(BIC_GFX_J))
1138
		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
1139
	if (DO_BIC(BIC_RAM_J))
1140
		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
1141
	if (DO_BIC(BIC_PKG__))
1142
		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
1143
	if (DO_BIC(BIC_RAM__))
1144
		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
1145

1146 1147 1148
	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
		if (mp->format == FORMAT_RAW) {
			if (mp->width == 32)
1149
				outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) p->counter[i]);
1150
			else
1151
				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
1152
		} else if (mp->format == FORMAT_DELTA) {
1153
			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1154
				outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]);
1155
			else
1156
				outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]);
1157
		} else if (mp->format == FORMAT_PERCENT) {
1158
			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i]/tsc);
1159 1160 1161
		}
	}

1162
done:
1163 1164
	if (*(outp - 1) != '\n')
		outp += sprintf(outp, "\n");
1165 1166

	return 0;
L
Len Brown 已提交
1167 1168
}

1169
void flush_output_stdout(void)
1170
{
1171 1172 1173 1174 1175 1176 1177 1178 1179 1180
	FILE *filep;

	if (outf == stderr)
		filep = stdout;
	else
		filep = outf;

	fputs(output_buffer, filep);
	fflush(filep);

1181 1182
	outp = output_buffer;
}
1183
void flush_output_stderr(void)
1184
{
1185 1186
	fputs(output_buffer, outf);
	fflush(outf);
1187 1188 1189
	outp = output_buffer;
}
void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
L
Len Brown 已提交
1190
{
L
Len Brown 已提交
1191
	static int printed;
L
Len Brown 已提交
1192

L
Len Brown 已提交
1193
	if (!printed || !summary_only)
1194
		print_header("\t");
L
Len Brown 已提交
1195

1196
	format_counters(&average.threads, &average.cores, &average.packages);
L
Len Brown 已提交
1197

L
Len Brown 已提交
1198 1199 1200 1201 1202
	printed = 1;

	if (summary_only)
		return;

1203
	for_all_cpus(format_counters, t, c, p);
L
Len Brown 已提交
1204 1205
}

1206 1207 1208 1209 1210 1211 1212
#define DELTA_WRAP32(new, old)			\
	if (new > old) {			\
		old = new - old;		\
	} else {				\
		old = 0x100000000 + new - old;	\
	}

1213
int
1214 1215
delta_package(struct pkg_data *new, struct pkg_data *old)
{
1216 1217
	int i;
	struct msr_counter *mp;
1218

1219 1220

	if (DO_BIC(BIC_Totl_c0))
1221
		old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
1222
	if (DO_BIC(BIC_Any_c0))
1223
		old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
1224
	if (DO_BIC(BIC_GFX_c0))
1225
		old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
1226
	if (DO_BIC(BIC_CPUGFX))
1227
		old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
1228

1229
	old->pc2 = new->pc2 - old->pc2;
1230
	if (DO_BIC(BIC_Pkgpc3))
1231
		old->pc3 = new->pc3 - old->pc3;
1232
	if (DO_BIC(BIC_Pkgpc6))
1233
		old->pc6 = new->pc6 - old->pc6;
1234
	if (DO_BIC(BIC_Pkgpc7))
1235
		old->pc7 = new->pc7 - old->pc7;
1236 1237 1238
	old->pc8 = new->pc8 - old->pc8;
	old->pc9 = new->pc9 - old->pc9;
	old->pc10 = new->pc10 - old->pc10;
1239 1240
	old->cpu_lpi = new->cpu_lpi - old->cpu_lpi;
	old->sys_lpi = new->sys_lpi - old->sys_lpi;
1241 1242
	old->pkg_temp_c = new->pkg_temp_c;

1243 1244 1245 1246 1247 1248
	/* flag an error when rc6 counter resets/wraps */
	if (old->gfx_rc6_ms >  new->gfx_rc6_ms)
		old->gfx_rc6_ms = -1;
	else
		old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;

L
Len Brown 已提交
1249 1250
	old->gfx_mhz = new->gfx_mhz;

1251 1252 1253 1254 1255 1256
	DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
	DELTA_WRAP32(new->energy_cores, old->energy_cores);
	DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
	DELTA_WRAP32(new->energy_dram, old->energy_dram);
	DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
	DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
1257

1258 1259 1260 1261 1262 1263 1264
	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
		if (mp->format == FORMAT_RAW)
			old->counter[i] = new->counter[i];
		else
			old->counter[i] = new->counter[i] - old->counter[i];
	}

1265
	return 0;
1266
}
L
Len Brown 已提交
1267

1268 1269
void
delta_core(struct core_data *new, struct core_data *old)
L
Len Brown 已提交
1270
{
1271 1272 1273
	int i;
	struct msr_counter *mp;

1274 1275 1276
	old->c3 = new->c3 - old->c3;
	old->c6 = new->c6 - old->c6;
	old->c7 = new->c7 - old->c7;
1277
	old->core_temp_c = new->core_temp_c;
1278
	old->mc6_us = new->mc6_us - old->mc6_us;
1279

1280 1281
	DELTA_WRAP32(new->core_energy, old->core_energy);

1282 1283 1284 1285 1286 1287
	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
		if (mp->format == FORMAT_RAW)
			old->counter[i] = new->counter[i];
		else
			old->counter[i] = new->counter[i] - old->counter[i];
	}
1288
}
L
Len Brown 已提交
1289

1290 1291 1292
/*
 * old = new - old
 */
1293
int
1294 1295 1296
delta_thread(struct thread_data *new, struct thread_data *old,
	struct core_data *core_delta)
{
1297 1298 1299
	int i;
	struct msr_counter *mp;

1300 1301 1302 1303 1304 1305
	/* we run cpuid just the 1st time, copy the results */
	if (DO_BIC(BIC_APIC))
		new->apic_id = old->apic_id;
	if (DO_BIC(BIC_X2APIC))
		new->x2apic_id = old->x2apic_id;

1306 1307 1308 1309 1310 1311 1312 1313 1314
	/*
	 * the timestamps from start of measurement interval are in "old"
	 * the timestamp from end of measurement interval are in "new"
	 * over-write old w/ new so we can print end of interval values
	 */

	old->tv_begin = new->tv_begin;
	old->tv_end = new->tv_end;

1315 1316 1317
	old->tsc = new->tsc - old->tsc;

	/* check for TSC < 1 Mcycles over interval */
1318 1319 1320 1321
	if (old->tsc < (1000 * 1000))
		errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
		     "You can disable all c-states by booting with \"idle=poll\"\n"
		     "or just the deep ones with \"processor.max_cstate=1\"");
L
Len Brown 已提交
1322

1323
	old->c1 = new->c1 - old->c1;
L
Len Brown 已提交
1324

1325
	if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
1326 1327 1328 1329
		if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
			old->aperf = new->aperf - old->aperf;
			old->mperf = new->mperf - old->mperf;
		} else {
1330
			return -1;
L
Len Brown 已提交
1331
		}
1332
	}
L
Len Brown 已提交
1333 1334


1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345
	if (use_c1_residency_msr) {
		/*
		 * Some models have a dedicated C1 residency MSR,
		 * which should be more accurate than the derivation below.
		 */
	} else {
		/*
		 * As counter collection is not atomic,
		 * it is possible for mperf's non-halted cycles + idle states
		 * to exceed TSC's all cycles: show c1 = 0% in that case.
		 */
1346
		if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak))
1347 1348 1349
			old->c1 = 0;
		else {
			/* normal case, derive c1 */
1350
			old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
1351
				- core_delta->c6 - core_delta->c7;
1352
		}
1353
	}
1354

1355
	if (old->mperf == 0) {
1356 1357
		if (debug > 1)
			fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
1358
		old->mperf = 1;	/* divide by 0 protection */
L
Len Brown 已提交
1359
	}
1360

1361
	if (DO_BIC(BIC_IRQ))
1362 1363
		old->irq_count = new->irq_count - old->irq_count;

1364
	if (DO_BIC(BIC_SMI))
1365
		old->smi_count = new->smi_count - old->smi_count;
1366

1367 1368 1369 1370 1371 1372
	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
		if (mp->format == FORMAT_RAW)
			old->counter[i] = new->counter[i];
		else
			old->counter[i] = new->counter[i] - old->counter[i];
	}
1373
	return 0;
1374 1375 1376 1377 1378 1379
}

int delta_cpu(struct thread_data *t, struct core_data *c,
	struct pkg_data *p, struct thread_data *t2,
	struct core_data *c2, struct pkg_data *p2)
{
1380 1381
	int retval = 0;

1382 1383 1384 1385 1386
	/* calculate core delta only for 1st thread in core */
	if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
		delta_core(c, c2);

	/* always calculate thread delta */
1387 1388 1389
	retval = delta_thread(t, t2, c2);	/* c2 is core delta */
	if (retval)
		return retval;
1390 1391 1392

	/* calculate package delta only for 1st core in package */
	if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
1393
		retval = delta_package(p, p2);
1394

1395
	return retval;
L
Len Brown 已提交
1396 1397
}

1398 1399
void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
1400 1401 1402
	int i;
	struct msr_counter  *mp;

1403 1404 1405 1406 1407
	t->tv_begin.tv_sec = 0;
	t->tv_begin.tv_usec = 0;
	t->tv_end.tv_sec = 0;
	t->tv_end.tv_usec = 0;

1408 1409 1410 1411 1412
	t->tsc = 0;
	t->aperf = 0;
	t->mperf = 0;
	t->c1 = 0;

1413 1414 1415
	t->irq_count = 0;
	t->smi_count = 0;

1416 1417 1418 1419 1420 1421
	/* tells format_counters to dump all fields from this set */
	t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;

	c->c3 = 0;
	c->c6 = 0;
	c->c7 = 0;
1422
	c->mc6_us = 0;
1423
	c->core_temp_c = 0;
1424
	c->core_energy = 0;
1425

1426 1427 1428 1429 1430
	p->pkg_wtd_core_c0 = 0;
	p->pkg_any_core_c0 = 0;
	p->pkg_any_gfxe_c0 = 0;
	p->pkg_both_core_gfxe_c0 = 0;

1431
	p->pc2 = 0;
1432
	if (DO_BIC(BIC_Pkgpc3))
1433
		p->pc3 = 0;
1434
	if (DO_BIC(BIC_Pkgpc6))
1435
		p->pc6 = 0;
1436
	if (DO_BIC(BIC_Pkgpc7))
1437
		p->pc7 = 0;
1438 1439 1440
	p->pc8 = 0;
	p->pc9 = 0;
	p->pc10 = 0;
1441 1442
	p->cpu_lpi = 0;
	p->sys_lpi = 0;
1443 1444 1445 1446 1447 1448 1449 1450

	p->energy_pkg = 0;
	p->energy_dram = 0;
	p->energy_cores = 0;
	p->energy_gfx = 0;
	p->rapl_pkg_perf_status = 0;
	p->rapl_dram_perf_status = 0;
	p->pkg_temp_c = 0;
L
Len Brown 已提交
1451

L
Len Brown 已提交
1452
	p->gfx_rc6_ms = 0;
L
Len Brown 已提交
1453
	p->gfx_mhz = 0;
1454 1455 1456 1457 1458 1459 1460 1461
	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
		t->counter[i] = 0;

	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
		c->counter[i] = 0;

	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
		p->counter[i] = 0;
1462 1463 1464
}
int sum_counters(struct thread_data *t, struct core_data *c,
	struct pkg_data *p)
L
Len Brown 已提交
1465
{
1466 1467 1468
	int i;
	struct msr_counter *mp;

1469 1470 1471 1472 1473 1474
	/* copy un-changing apic_id's */
	if (DO_BIC(BIC_APIC))
		average.threads.apic_id = t->apic_id;
	if (DO_BIC(BIC_X2APIC))
		average.threads.x2apic_id = t->x2apic_id;

1475 1476 1477 1478 1479 1480 1481
	/* remember first tv_begin */
	if (average.threads.tv_begin.tv_sec == 0)
		average.threads.tv_begin = t->tv_begin;

	/* remember last tv_end */
	average.threads.tv_end = t->tv_end;

1482 1483 1484 1485
	average.threads.tsc += t->tsc;
	average.threads.aperf += t->aperf;
	average.threads.mperf += t->mperf;
	average.threads.c1 += t->c1;
L
Len Brown 已提交
1486

1487 1488 1489
	average.threads.irq_count += t->irq_count;
	average.threads.smi_count += t->smi_count;

1490 1491 1492 1493 1494 1495
	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
		if (mp->format == FORMAT_RAW)
			continue;
		average.threads.counter[i] += t->counter[i];
	}

1496 1497 1498
	/* sum per-core values only for 1st thread in core */
	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
		return 0;
L
Len Brown 已提交
1499

1500 1501 1502
	average.cores.c3 += c->c3;
	average.cores.c6 += c->c6;
	average.cores.c7 += c->c7;
1503
	average.cores.mc6_us += c->mc6_us;
1504

1505 1506
	average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);

1507 1508
	average.cores.core_energy += c->core_energy;

1509 1510 1511 1512 1513 1514
	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
		if (mp->format == FORMAT_RAW)
			continue;
		average.cores.counter[i] += c->counter[i];
	}

1515 1516 1517 1518
	/* sum per-pkg values only for 1st core in pkg */
	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
		return 0;

1519
	if (DO_BIC(BIC_Totl_c0))
1520
		average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
1521
	if (DO_BIC(BIC_Any_c0))
1522
		average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
1523
	if (DO_BIC(BIC_GFX_c0))
1524
		average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
1525
	if (DO_BIC(BIC_CPUGFX))
1526 1527
		average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;

1528
	average.packages.pc2 += p->pc2;
1529
	if (DO_BIC(BIC_Pkgpc3))
1530
		average.packages.pc3 += p->pc3;
1531
	if (DO_BIC(BIC_Pkgpc6))
1532
		average.packages.pc6 += p->pc6;
1533
	if (DO_BIC(BIC_Pkgpc7))
1534
		average.packages.pc7 += p->pc7;
1535 1536 1537
	average.packages.pc8 += p->pc8;
	average.packages.pc9 += p->pc9;
	average.packages.pc10 += p->pc10;
1538

1539 1540 1541
	average.packages.cpu_lpi = p->cpu_lpi;
	average.packages.sys_lpi = p->sys_lpi;

1542 1543 1544 1545 1546
	average.packages.energy_pkg += p->energy_pkg;
	average.packages.energy_dram += p->energy_dram;
	average.packages.energy_cores += p->energy_cores;
	average.packages.energy_gfx += p->energy_gfx;

L
Len Brown 已提交
1547
	average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
L
Len Brown 已提交
1548 1549
	average.packages.gfx_mhz = p->gfx_mhz;

1550 1551 1552 1553
	average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);

	average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
	average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
1554 1555 1556 1557 1558 1559

	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
		if (mp->format == FORMAT_RAW)
			continue;
		average.packages.counter[i] += p->counter[i];
	}
1560 1561 1562 1563 1564 1565 1566 1567 1568
	return 0;
}
/*
 * sum the counters for all cpus in the system
 * compute the weighted average
 */
void compute_average(struct thread_data *t, struct core_data *c,
	struct pkg_data *p)
{
1569 1570 1571
	int i;
	struct msr_counter *mp;

1572 1573 1574 1575 1576 1577 1578 1579 1580
	clear_counters(&average.threads, &average.cores, &average.packages);

	for_all_cpus(sum_counters, t, c, p);

	average.threads.tsc /= topo.num_cpus;
	average.threads.aperf /= topo.num_cpus;
	average.threads.mperf /= topo.num_cpus;
	average.threads.c1 /= topo.num_cpus;

1581 1582 1583
	if (average.threads.irq_count > 9999999)
		sums_need_wide_columns = 1;

1584 1585 1586
	average.cores.c3 /= topo.num_cores;
	average.cores.c6 /= topo.num_cores;
	average.cores.c7 /= topo.num_cores;
1587
	average.cores.mc6_us /= topo.num_cores;
1588

1589
	if (DO_BIC(BIC_Totl_c0))
1590
		average.packages.pkg_wtd_core_c0 /= topo.num_packages;
1591
	if (DO_BIC(BIC_Any_c0))
1592
		average.packages.pkg_any_core_c0 /= topo.num_packages;
1593
	if (DO_BIC(BIC_GFX_c0))
1594
		average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
1595
	if (DO_BIC(BIC_CPUGFX))
1596 1597
		average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;

1598
	average.packages.pc2 /= topo.num_packages;
1599
	if (DO_BIC(BIC_Pkgpc3))
1600
		average.packages.pc3 /= topo.num_packages;
1601
	if (DO_BIC(BIC_Pkgpc6))
1602
		average.packages.pc6 /= topo.num_packages;
1603
	if (DO_BIC(BIC_Pkgpc7))
1604
		average.packages.pc7 /= topo.num_packages;
1605 1606 1607 1608

	average.packages.pc8 /= topo.num_packages;
	average.packages.pc9 /= topo.num_packages;
	average.packages.pc10 /= topo.num_packages;
1609 1610 1611 1612

	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
		if (mp->format == FORMAT_RAW)
			continue;
1613 1614 1615
		if (mp->type == COUNTER_ITEMS) {
			if (average.threads.counter[i] > 9999999)
				sums_need_wide_columns = 1;
1616
			continue;
1617
		}
1618 1619 1620 1621 1622
		average.threads.counter[i] /= topo.num_cpus;
	}
	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
		if (mp->format == FORMAT_RAW)
			continue;
1623 1624 1625 1626
		if (mp->type == COUNTER_ITEMS) {
			if (average.cores.counter[i] > 9999999)
				sums_need_wide_columns = 1;
		}
1627 1628 1629 1630 1631
		average.cores.counter[i] /= topo.num_cores;
	}
	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
		if (mp->format == FORMAT_RAW)
			continue;
1632 1633 1634 1635
		if (mp->type == COUNTER_ITEMS) {
			if (average.packages.counter[i] > 9999999)
				sums_need_wide_columns = 1;
		}
1636 1637
		average.packages.counter[i] /= topo.num_packages;
	}
L
Len Brown 已提交
1638 1639
}

1640
static unsigned long long rdtsc(void)
L
Len Brown 已提交
1641
{
1642
	unsigned int low, high;
1643

1644
	asm volatile("rdtsc" : "=a" (low), "=d" (high));
1645

1646 1647
	return low | ((unsigned long long)high) << 32;
}
1648

1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687
/*
 * Open a file, and exit on failure
 */
FILE *fopen_or_die(const char *path, const char *mode)
{
	FILE *filep = fopen(path, mode);

	if (!filep)
		err(1, "%s: open failed", path);
	return filep;
}
/*
 * snapshot_sysfs_counter()
 *
 * return snapshot of given counter
 */
unsigned long long snapshot_sysfs_counter(char *path)
{
	FILE *fp;
	int retval;
	unsigned long long counter;

	fp = fopen_or_die(path, "r");

	retval = fscanf(fp, "%lld", &counter);
	if (retval != 1)
		err(1, "snapshot_sysfs_counter(%s)", path);

	fclose(fp);

	return counter;
}

int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
{
	if (mp->msr_num != 0) {
		if (get_msr(cpu, mp->msr_num, counterp))
			return -1;
	} else {
1688
		char path[128 + PATH_BYTES];
1689 1690 1691 1692 1693 1694 1695 1696 1697

		if (mp->flags & SYSFS_PERCPU) {
			sprintf(path, "/sys/devices/system/cpu/cpu%d/%s",
				 cpu, mp->path);

			*counterp = snapshot_sysfs_counter(path);
		} else {
			*counterp = snapshot_sysfs_counter(mp->path);
		}
1698 1699 1700 1701 1702
	}

	return 0;
}

1703 1704
void get_apic_id(struct thread_data *t)
{
1705
	unsigned int eax, ebx, ecx, edx;
1706

1707 1708 1709
	if (DO_BIC(BIC_APIC)) {
		eax = ebx = ecx = edx = 0;
		__cpuid(1, eax, ebx, ecx, edx);
1710

1711 1712 1713 1714
		t->apic_id = (ebx >> 24) & 0xff;
	}

	if (!DO_BIC(BIC_X2APIC))
1715 1716
		return;

1717 1718
	if (authentic_amd) {
		unsigned int topology_extensions;
1719

1720 1721
		if (max_extended_level < 0x8000001e)
			return;
1722

1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733
		eax = ebx = ecx = edx = 0;
		__cpuid(0x80000001, eax, ebx, ecx, edx);
			topology_extensions = ecx & (1 << 22);

		if (topology_extensions == 0)
			return;

		eax = ebx = ecx = edx = 0;
		__cpuid(0x8000001e, eax, ebx, ecx, edx);

		t->x2apic_id = eax;
1734
		return;
1735
	}
1736

1737 1738 1739 1740
	if (!genuine_intel)
		return;

	if (max_level < 0xb)
1741 1742 1743 1744 1745 1746
		return;

	ecx = 0;
	__cpuid(0xb, eax, ebx, ecx, edx);
	t->x2apic_id = edx;

1747 1748 1749
	if (debug && (t->apic_id != (t->x2apic_id & 0xff)))
		fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n",
				t->cpu_id, t->apic_id, t->x2apic_id);
1750 1751
}

1752 1753 1754 1755 1756 1757 1758 1759
/*
 * get_counters(...)
 * migrate to cpu
 * acquire and record local counters for that cpu
 */
int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
	int cpu = t->cpu_id;
1760
	unsigned long long msr;
1761
	int aperf_mperf_retry_count = 0;
1762 1763
	struct msr_counter *mp;
	int i;
1764

1765 1766
	gettimeofday(&t->tv_begin, (struct timezone *)NULL);

1767
	if (cpu_migrate(cpu)) {
1768
		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
1769
		return -1;
1770
	}
1771

1772 1773
	if (first_counter_read)
		get_apic_id(t);
1774
retry:
1775 1776
	t->tsc = rdtsc();	/* we are running on local CPU of interest */

1777
	if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801
		unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;

		/*
		 * The TSC, APERF and MPERF must be read together for
		 * APERF/MPERF and MPERF/TSC to give accurate results.
		 *
		 * Unfortunately, APERF and MPERF are read by
		 * individual system call, so delays may occur
		 * between them.  If the time to read them
		 * varies by a large amount, we re-read them.
		 */

		/*
		 * This initial dummy APERF read has been seen to
		 * reduce jitter in the subsequent reads.
		 */

		if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
			return -3;

		t->tsc = rdtsc();	/* re-read close to APERF */

		tsc_before = t->tsc;

1802
		if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1803
			return -3;
1804 1805 1806

		tsc_between = rdtsc();

1807
		if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
1808
			return -4;
1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828

		tsc_after = rdtsc();

		aperf_time = tsc_between - tsc_before;
		mperf_time = tsc_after - tsc_between;

		/*
		 * If the system call latency to read APERF and MPERF
		 * differ by more than 2x, then try again.
		 */
		if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
			aperf_mperf_retry_count++;
			if (aperf_mperf_retry_count < 5)
				goto retry;
			else
				warnx("cpu%d jitter %lld %lld",
					cpu, aperf_time, mperf_time);
		}
		aperf_mperf_retry_count = 0;

1829 1830
		t->aperf = t->aperf * aperf_mperf_multiplier;
		t->mperf = t->mperf * aperf_mperf_multiplier;
1831 1832
	}

1833
	if (DO_BIC(BIC_IRQ))
1834
		t->irq_count = irqs_per_cpu[cpu];
1835
	if (DO_BIC(BIC_SMI)) {
1836 1837 1838 1839
		if (get_msr(cpu, MSR_SMI_COUNT, &msr))
			return -5;
		t->smi_count = msr & 0xFFFFFFFF;
	}
1840
	if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
1841 1842 1843 1844
		if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
			return -6;
	}

1845
	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1846
		if (get_mp(cpu, mp, &t->counter[i]))
1847 1848 1849
			return -10;
	}

1850 1851
	/* collect core counters only for 1st thread in core */
	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1852
		goto done;
1853

1854
	if (DO_BIC(BIC_CPU_c3)) {
1855 1856
		if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
			return -6;
1857 1858
	}

1859
	if (DO_BIC(BIC_CPU_c6) && !do_knl_cstates) {
1860 1861
		if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
			return -7;
1862 1863 1864
	} else if (do_knl_cstates) {
		if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
			return -7;
1865 1866
	}

1867
	if (DO_BIC(BIC_CPU_c7))
1868 1869 1870
		if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
			return -8;

1871 1872 1873 1874
	if (DO_BIC(BIC_Mod_c6))
		if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
			return -8;

1875
	if (DO_BIC(BIC_CoreTmp)) {
1876 1877 1878 1879 1880
		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
			return -9;
		c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
	}

1881 1882 1883 1884 1885 1886
	if (do_rapl & RAPL_AMD_F17H) {
		if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr))
			return -14;
		c->core_energy = msr & 0xFFFFFFFF;
	}

1887
	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1888
		if (get_mp(cpu, mp, &c->counter[i]))
1889 1890
			return -10;
	}
1891

1892 1893
	/* collect package counters only for 1st core in package */
	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1894
		goto done;
1895

1896
	if (DO_BIC(BIC_Totl_c0)) {
1897 1898
		if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
			return -10;
1899 1900
	}
	if (DO_BIC(BIC_Any_c0)) {
1901 1902
		if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
			return -11;
1903 1904
	}
	if (DO_BIC(BIC_GFX_c0)) {
1905 1906
		if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
			return -12;
1907 1908
	}
	if (DO_BIC(BIC_CPUGFX)) {
1909 1910 1911
		if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
			return -13;
	}
1912
	if (DO_BIC(BIC_Pkgpc3))
1913 1914
		if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
			return -9;
1915
	if (DO_BIC(BIC_Pkgpc6)) {
1916 1917 1918 1919 1920 1921 1922 1923 1924
		if (do_slm_cstates) {
			if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
				return -10;
		} else {
			if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
				return -10;
		}
	}

1925
	if (DO_BIC(BIC_Pkgpc2))
1926 1927
		if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
			return -11;
1928
	if (DO_BIC(BIC_Pkgpc7))
1929 1930
		if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
			return -12;
1931
	if (DO_BIC(BIC_Pkgpc8))
1932 1933
		if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
			return -13;
1934
	if (DO_BIC(BIC_Pkgpc9))
1935 1936
		if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
			return -13;
1937
	if (DO_BIC(BIC_Pkgpc10))
1938 1939
		if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
			return -13;
1940

1941 1942 1943 1944 1945
	if (DO_BIC(BIC_CPU_LPI))
		p->cpu_lpi = cpuidle_cur_cpu_lpi_us;
	if (DO_BIC(BIC_SYS_LPI))
		p->sys_lpi = cpuidle_cur_sys_lpi_us;

1946 1947 1948 1949 1950
	if (do_rapl & RAPL_PKG) {
		if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
			return -13;
		p->energy_pkg = msr & 0xFFFFFFFF;
	}
1951
	if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975
		if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
			return -14;
		p->energy_cores = msr & 0xFFFFFFFF;
	}
	if (do_rapl & RAPL_DRAM) {
		if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
			return -15;
		p->energy_dram = msr & 0xFFFFFFFF;
	}
	if (do_rapl & RAPL_GFX) {
		if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr))
			return -16;
		p->energy_gfx = msr & 0xFFFFFFFF;
	}
	if (do_rapl & RAPL_PKG_PERF_STATUS) {
		if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr))
			return -16;
		p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
	}
	if (do_rapl & RAPL_DRAM_PERF_STATUS) {
		if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr))
			return -16;
		p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
	}
1976 1977 1978 1979 1980
	if (do_rapl & RAPL_AMD_F17H) {
		if (get_msr(cpu, MSR_PKG_ENERGY_STAT, &msr))
			return -13;
		p->energy_pkg = msr & 0xFFFFFFFF;
	}
1981
	if (DO_BIC(BIC_PkgTmp)) {
1982 1983 1984 1985
		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
			return -17;
		p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
	}
L
Len Brown 已提交
1986

1987
	if (DO_BIC(BIC_GFX_rc6))
L
Len Brown 已提交
1988 1989
		p->gfx_rc6_ms = gfx_cur_rc6_ms;

1990
	if (DO_BIC(BIC_GFXMHz))
L
Len Brown 已提交
1991 1992
		p->gfx_mhz = gfx_cur_mhz;

1993
	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1994
		if (get_mp(cpu, mp, &p->counter[i]))
1995 1996
			return -10;
	}
1997 1998
done:
	gettimeofday(&t->tv_end, (struct timezone *)NULL);
1999

2000
	return 0;
L
Len Brown 已提交
2001 2002
}

2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020
/*
 * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
 * If you change the values, note they are used both in comparisons
 * (>= PCL__7) and to index pkg_cstate_limit_strings[].
 */

#define PCLUKN 0 /* Unknown */
#define PCLRSV 1 /* Reserved */
#define PCL__0 2 /* PC0 */
#define PCL__1 3 /* PC1 */
#define PCL__2 4 /* PC2 */
#define PCL__3 5 /* PC3 */
#define PCL__4 6 /* PC4 */
#define PCL__6 7 /* PC6 */
#define PCL_6N 8 /* PC6 No Retention */
#define PCL_6R 9 /* PC6 Retention */
#define PCL__7 10 /* PC7 */
#define PCL_7S 11 /* PC7 Shrink */
2021 2022
#define PCL__8 12 /* PC8 */
#define PCL__9 13 /* PC9 */
2023 2024
#define PCL_10 14 /* PC10 */
#define PCLUNL 15 /* Unlimited */
2025 2026 2027

int pkg_cstate_limit = PCLUKN;
char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
2028
	"pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited"};
2029

2030 2031 2032
int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2033
int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7};
2034
int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2035
int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2036
int glm_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2037
int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2038

2039 2040 2041 2042 2043 2044 2045

static void
calculate_tsc_tweak()
{
	tsc_tweak = base_hz / tsc_hz;
}

2046 2047
static void
dump_nhm_platform_info(void)
L
Len Brown 已提交
2048 2049 2050 2051
{
	unsigned long long msr;
	unsigned int ratio;

2052
	get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
L
Len Brown 已提交
2053

2054
	fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
2055

L
Len Brown 已提交
2056
	ratio = (msr >> 40) & 0xFF;
2057
	fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n",
L
Len Brown 已提交
2058 2059 2060
		ratio, bclk, ratio * bclk);

	ratio = (msr >> 8) & 0xFF;
2061
	fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
L
Len Brown 已提交
2062 2063
		ratio, bclk, ratio * bclk);

2064
	get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
2065
	fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
2066
		base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
2067

2068 2069 2070 2071 2072 2073 2074 2075 2076
	return;
}

static void
dump_hsw_turbo_ratio_limits(void)
{
	unsigned long long msr;
	unsigned int ratio;

2077
	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
2078

2079
	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
2080 2081 2082

	ratio = (msr >> 8) & 0xFF;
	if (ratio)
2083
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n",
2084 2085 2086 2087
			ratio, bclk, ratio * bclk);

	ratio = (msr >> 0) & 0xFF;
	if (ratio)
2088
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n",
2089 2090 2091 2092 2093 2094 2095 2096 2097
			ratio, bclk, ratio * bclk);
	return;
}

static void
dump_ivt_turbo_ratio_limits(void)
{
	unsigned long long msr;
	unsigned int ratio;
2098

2099
	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
2100

2101
	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
2102 2103 2104

	ratio = (msr >> 56) & 0xFF;
	if (ratio)
2105
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n",
2106 2107 2108 2109
			ratio, bclk, ratio * bclk);

	ratio = (msr >> 48) & 0xFF;
	if (ratio)
2110
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n",
2111 2112 2113 2114
			ratio, bclk, ratio * bclk);

	ratio = (msr >> 40) & 0xFF;
	if (ratio)
2115
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n",
2116 2117 2118 2119
			ratio, bclk, ratio * bclk);

	ratio = (msr >> 32) & 0xFF;
	if (ratio)
2120
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n",
2121 2122 2123 2124
			ratio, bclk, ratio * bclk);

	ratio = (msr >> 24) & 0xFF;
	if (ratio)
2125
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n",
2126 2127 2128 2129
			ratio, bclk, ratio * bclk);

	ratio = (msr >> 16) & 0xFF;
	if (ratio)
2130
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n",
2131 2132 2133 2134
			ratio, bclk, ratio * bclk);

	ratio = (msr >> 8) & 0xFF;
	if (ratio)
2135
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n",
2136 2137 2138 2139
			ratio, bclk, ratio * bclk);

	ratio = (msr >> 0) & 0xFF;
	if (ratio)
2140
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n",
2141
			ratio, bclk, ratio * bclk);
2142 2143
	return;
}
2144 2145 2146 2147 2148 2149 2150 2151 2152
int has_turbo_ratio_group_limits(int family, int model)
{

	if (!genuine_intel)
		return 0;

	switch (model) {
	case INTEL_FAM6_ATOM_GOLDMONT:
	case INTEL_FAM6_SKYLAKE_X:
2153
	case INTEL_FAM6_ATOM_GOLDMONT_X:
2154 2155 2156 2157
		return 1;
	}
	return 0;
}
2158

2159
static void
2160
dump_turbo_ratio_limits(int family, int model)
2161
{
2162 2163
	unsigned long long msr, core_counts;
	unsigned int ratio, group_size;
L
Len Brown 已提交
2164

2165
	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
2166
	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
2167

2168 2169 2170 2171 2172 2173 2174
	if (has_turbo_ratio_group_limits(family, model)) {
		get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
		fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
	} else {
		core_counts = 0x0807060504030201;
	}

2175
	ratio = (msr >> 56) & 0xFF;
2176
	group_size = (core_counts >> 56) & 0xFF;
2177
	if (ratio)
2178 2179
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
			ratio, bclk, ratio * bclk, group_size);
2180 2181

	ratio = (msr >> 48) & 0xFF;
2182
	group_size = (core_counts >> 48) & 0xFF;
2183
	if (ratio)
2184 2185
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
			ratio, bclk, ratio * bclk, group_size);
2186 2187

	ratio = (msr >> 40) & 0xFF;
2188
	group_size = (core_counts >> 40) & 0xFF;
2189
	if (ratio)
2190 2191
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
			ratio, bclk, ratio * bclk, group_size);
2192 2193

	ratio = (msr >> 32) & 0xFF;
2194
	group_size = (core_counts >> 32) & 0xFF;
2195
	if (ratio)
2196 2197
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
			ratio, bclk, ratio * bclk, group_size);
2198

L
Len Brown 已提交
2199
	ratio = (msr >> 24) & 0xFF;
2200
	group_size = (core_counts >> 24) & 0xFF;
L
Len Brown 已提交
2201
	if (ratio)
2202 2203
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
			ratio, bclk, ratio * bclk, group_size);
L
Len Brown 已提交
2204 2205

	ratio = (msr >> 16) & 0xFF;
2206
	group_size = (core_counts >> 16) & 0xFF;
L
Len Brown 已提交
2207
	if (ratio)
2208 2209
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
			ratio, bclk, ratio * bclk, group_size);
L
Len Brown 已提交
2210 2211

	ratio = (msr >> 8) & 0xFF;
2212
	group_size = (core_counts >> 8) & 0xFF;
L
Len Brown 已提交
2213
	if (ratio)
2214 2215
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
			ratio, bclk, ratio * bclk, group_size);
L
Len Brown 已提交
2216 2217

	ratio = (msr >> 0) & 0xFF;
2218
	group_size = (core_counts >> 0) & 0xFF;
L
Len Brown 已提交
2219
	if (ratio)
2220 2221
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
			ratio, bclk, ratio * bclk, group_size);
2222 2223
	return;
}
2224

2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272
static void
dump_atom_turbo_ratio_limits(void)
{
	unsigned long long msr;
	unsigned int ratio;

	get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr);
	fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);

	ratio = (msr >> 0) & 0x3F;
	if (ratio)
		fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n",
			ratio, bclk, ratio * bclk);

	ratio = (msr >> 8) & 0x3F;
	if (ratio)
		fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n",
			ratio, bclk, ratio * bclk);

	ratio = (msr >> 16) & 0x3F;
	if (ratio)
		fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
			ratio, bclk, ratio * bclk);

	get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
	fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);

	ratio = (msr >> 24) & 0x3F;
	if (ratio)
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n",
			ratio, bclk, ratio * bclk);

	ratio = (msr >> 16) & 0x3F;
	if (ratio)
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n",
			ratio, bclk, ratio * bclk);

	ratio = (msr >> 8) & 0x3F;
	if (ratio)
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n",
			ratio, bclk, ratio * bclk);

	ratio = (msr >> 0) & 0x3F;
	if (ratio)
		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n",
			ratio, bclk, ratio * bclk);
}

2273 2274 2275
static void
dump_knl_turbo_ratio_limits(void)
{
2276 2277
	const unsigned int buckets_no = 7;

2278
	unsigned long long msr;
2279 2280 2281 2282
	int delta_cores, delta_ratio;
	int i, b_nr;
	unsigned int cores[buckets_no];
	unsigned int ratio[buckets_no];
2283

2284
	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
2285

2286
	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
2287
		base_cpu, msr);
2288 2289 2290

	/**
	 * Turbo encoding in KNL is as follows:
2291 2292
	 * [0] -- Reserved
	 * [7:1] -- Base value of number of active cores of bucket 1.
2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310
	 * [15:8] -- Base value of freq ratio of bucket 1.
	 * [20:16] -- +ve delta of number of active cores of bucket 2.
	 * i.e. active cores of bucket 2 =
	 * active cores of bucket 1 + delta
	 * [23:21] -- Negative delta of freq ratio of bucket 2.
	 * i.e. freq ratio of bucket 2 =
	 * freq ratio of bucket 1 - delta
	 * [28:24]-- +ve delta of number of active cores of bucket 3.
	 * [31:29]-- -ve delta of freq ratio of bucket 3.
	 * [36:32]-- +ve delta of number of active cores of bucket 4.
	 * [39:37]-- -ve delta of freq ratio of bucket 4.
	 * [44:40]-- +ve delta of number of active cores of bucket 5.
	 * [47:45]-- -ve delta of freq ratio of bucket 5.
	 * [52:48]-- +ve delta of number of active cores of bucket 6.
	 * [55:53]-- -ve delta of freq ratio of bucket 6.
	 * [60:56]-- +ve delta of number of active cores of bucket 7.
	 * [63:61]-- -ve delta of freq ratio of bucket 7.
	 */
2311 2312 2313 2314 2315 2316

	b_nr = 0;
	cores[b_nr] = (msr & 0xFF) >> 1;
	ratio[b_nr] = (msr >> 8) & 0xFF;

	for (i = 16; i < 64; i += 8) {
2317
		delta_cores = (msr >> i) & 0x1F;
2318 2319 2320 2321 2322
		delta_ratio = (msr >> (i + 5)) & 0x7;

		cores[b_nr + 1] = cores[b_nr] + delta_cores;
		ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
		b_nr++;
2323
	}
2324 2325 2326

	for (i = buckets_no - 1; i >= 0; i--)
		if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
2327
			fprintf(outf,
2328
				"%d * %.1f = %.1f MHz max turbo %d active cores\n",
2329
				ratio[i], bclk, ratio[i] * bclk, cores[i]);
2330 2331
}

2332 2333 2334 2335 2336
static void
dump_nhm_cst_cfg(void)
{
	unsigned long long msr;

2337
	get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
2338

2339
	fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
2340

2341
	fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)",
2342 2343 2344 2345 2346
		(msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
		(msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
		(msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
		(msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
		(msr & (1 << 15)) ? "" : "UN",
2347
		(unsigned int)msr & 0xF,
2348
		pkg_cstate_limit_strings[pkg_cstate_limit]);
2349 2350 2351 2352 2353 2354 2355 2356 2357

#define AUTOMATIC_CSTATE_CONVERSION		(1UL << 16)
	if (has_automatic_cstate_conversion) {
		fprintf(outf, ", automatic c-state conversion=%s",
			(msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
	}

	fprintf(outf, ")\n");

2358
	return;
L
Len Brown 已提交
2359 2360
}

2361 2362 2363 2364 2365 2366
static void
dump_config_tdp(void)
{
	unsigned long long msr;

	get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
2367
	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
2368
	fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
2369 2370

	get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
2371
	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
2372
	if (msr) {
2373 2374 2375 2376
		fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
		fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
		fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
		fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
2377
	}
2378
	fprintf(outf, ")\n");
2379 2380

	get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
2381
	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
2382
	if (msr) {
2383 2384 2385 2386
		fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
		fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
		fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
		fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
2387
	}
2388
	fprintf(outf, ")\n");
2389 2390

	get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
2391
	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
2392
	if ((msr) & 0x3)
2393 2394 2395
		fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
	fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
	fprintf(outf, ")\n");
2396

2397
	get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
2398
	fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
2399
	fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
2400 2401
	fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
	fprintf(outf, ")\n");
2402
}
2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443

unsigned int irtl_time_units[] = {1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };

void print_irtl(void)
{
	unsigned long long msr;

	get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
	fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);

	get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
	fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);

	get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
	fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);

	if (!do_irtl_hsw)
		return;

	get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
	fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);

	get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
	fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);

	get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
	fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);

}
2444 2445 2446 2447
void free_fd_percpu(void)
{
	int i;

2448
	for (i = 0; i < topo.max_cpu_num + 1; ++i) {
2449 2450 2451 2452 2453
		if (fd_percpu[i] != 0)
			close(fd_percpu[i]);
	}

	free(fd_percpu);
2454 2455
}

2456
void free_all_buffers(void)
L
Len Brown 已提交
2457
{
2458 2459
	int i;

2460 2461
	CPU_FREE(cpu_present_set);
	cpu_present_set = NULL;
2462
	cpu_present_setsize = 0;
L
Len Brown 已提交
2463

2464 2465 2466
	CPU_FREE(cpu_affinity_set);
	cpu_affinity_set = NULL;
	cpu_affinity_setsize = 0;
L
Len Brown 已提交
2467

2468 2469 2470
	free(thread_even);
	free(core_even);
	free(package_even);
L
Len Brown 已提交
2471

2472 2473 2474
	thread_even = NULL;
	core_even = NULL;
	package_even = NULL;
L
Len Brown 已提交
2475

2476 2477 2478
	free(thread_odd);
	free(core_odd);
	free(package_odd);
L
Len Brown 已提交
2479

2480 2481 2482
	thread_odd = NULL;
	core_odd = NULL;
	package_odd = NULL;
L
Len Brown 已提交
2483

2484 2485 2486
	free(output_buffer);
	output_buffer = NULL;
	outp = NULL;
2487 2488

	free_fd_percpu();
2489 2490 2491

	free(irq_column_2_cpu);
	free(irqs_per_cpu);
2492 2493 2494 2495 2496 2497

	for (i = 0; i <= topo.max_cpu_num; ++i) {
		if (cpus[i].put_ids)
			CPU_FREE(cpus[i].put_ids);
	}
	free(cpus);
L
Len Brown 已提交
2498 2499
}

2500

2501
/*
2502
 * Parse a file containing a single int.
2503 2504
 * Return 0 if file can not be opened
 * Exit if file can be opened, but can not be parsed
2505
 */
2506
int parse_int_file(const char *fmt, ...)
L
Len Brown 已提交
2507
{
2508 2509
	va_list args;
	char path[PATH_MAX];
2510
	FILE *filep;
2511
	int value;
L
Len Brown 已提交
2512

2513 2514 2515
	va_start(args, fmt);
	vsnprintf(path, sizeof(path), fmt, args);
	va_end(args);
2516 2517 2518
	filep = fopen(path, "r");
	if (!filep)
		return 0;
2519 2520
	if (fscanf(filep, "%d", &value) != 1)
		err(1, "%s: failed to parse number from file", path);
2521
	fclose(filep);
2522 2523 2524
	return value;
}

2525 2526 2527 2528 2529
/*
 * cpu_is_first_core_in_package(cpu)
 * return 1 if given CPU is 1st core in package
 */
int cpu_is_first_core_in_package(int cpu)
L
Len Brown 已提交
2530
{
2531
	return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
L
Len Brown 已提交
2532 2533 2534 2535
}

int get_physical_package_id(int cpu)
{
2536
	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
L
Len Brown 已提交
2537 2538
}

2539 2540 2541 2542 2543
int get_die_id(int cpu)
{
	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu);
}

L
Len Brown 已提交
2544 2545
int get_core_id(int cpu)
{
2546
	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
L
Len Brown 已提交
2547 2548
}

2549 2550
void set_node_data(void)
{
2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586
	int pkg, node, lnode, cpu, cpux;
	int cpu_count;

	/* initialize logical_node_id */
	for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu)
		cpus[cpu].logical_node_id = -1;

	cpu_count = 0;
	for (pkg = 0; pkg < topo.num_packages; pkg++) {
		lnode = 0;
		for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
			if (cpus[cpu].physical_package_id != pkg)
				continue;
			/* find a cpu with an unset logical_node_id */
			if (cpus[cpu].logical_node_id != -1)
				continue;
			cpus[cpu].logical_node_id = lnode;
			node = cpus[cpu].physical_node_id;
			cpu_count++;
			/*
			 * find all matching cpus on this pkg and set
			 * the logical_node_id
			 */
			for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) {
				if ((cpus[cpux].physical_package_id == pkg) &&
				   (cpus[cpux].physical_node_id == node)) {
					cpus[cpux].logical_node_id = lnode;
					cpu_count++;
				}
			}
			lnode++;
			if (lnode > topo.nodes_per_pkg)
				topo.nodes_per_pkg = lnode;
		}
		if (cpu_count >= topo.max_cpu_num)
			break;
2587 2588 2589 2590
	}
}

int get_physical_node_id(struct cpu_topology *thiscpu)
2591 2592 2593
{
	char path[80];
	FILE *filep;
2594 2595
	int i;
	int cpu = thiscpu->logical_cpu_id;
2596

2597 2598 2599 2600 2601 2602 2603 2604
	for (i = 0; i <= topo.max_cpu_num; i++) {
		sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist",
			cpu, i);
		filep = fopen(path, "r");
		if (!filep)
			continue;
		fclose(filep);
		return i;
2605
	}
2606 2607
	return -1;
}
2608

2609 2610 2611 2612 2613
int get_thread_siblings(struct cpu_topology *thiscpu)
{
	char path[80], character;
	FILE *filep;
	unsigned long map;
2614
	int so, shift, sib_core;
2615 2616 2617
	int cpu = thiscpu->logical_cpu_id;
	int offset = topo.max_cpu_num + 1;
	size_t size;
2618
	int thread_id = 0;
2619 2620

	thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
2621 2622
	if (thiscpu->thread_id < 0)
		thiscpu->thread_id = thread_id++;
2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633
	if (!thiscpu->put_ids)
		return -1;

	size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
	CPU_ZERO_S(size, thiscpu->put_ids);

	sprintf(path,
		"/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
	filep = fopen_or_die(path, "r");
	do {
		offset -= BITMASK_SIZE;
2634 2635
		if (fscanf(filep, "%lx%c", &map, &character) != 2)
			err(1, "%s: failed to parse file", path);
2636 2637
		for (shift = 0; shift < BITMASK_SIZE; shift++) {
			if ((map >> shift) & 0x1) {
2638 2639 2640 2641 2642 2643 2644 2645 2646
				so = shift + offset;
				sib_core = get_core_id(so);
				if (sib_core == thiscpu->physical_core_id) {
					CPU_SET_S(so, size, thiscpu->put_ids);
					if ((so != cpu) &&
					    (cpus[so].thread_id < 0))
						cpus[so].thread_id =
								    thread_id++;
				}
2647 2648 2649
			}
		}
	} while (!strncmp(&character, ",", 1));
2650
	fclose(filep);
2651 2652

	return CPU_COUNT_S(size, thiscpu->put_ids);
2653 2654
}

L
Len Brown 已提交
2655
/*
2656 2657
 * run func(thread, core, package) in topology order
 * skip non-present cpus
L
Len Brown 已提交
2658 2659
 */

2660 2661 2662 2663 2664 2665 2666
int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *,
	struct pkg_data *, struct thread_data *, struct core_data *,
	struct pkg_data *), struct thread_data *thread_base,
	struct core_data *core_base, struct pkg_data *pkg_base,
	struct thread_data *thread_base2, struct core_data *core_base2,
	struct pkg_data *pkg_base2)
{
2667
	int retval, pkg_no, node_no, core_no, thread_no;
2668 2669

	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702
		for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
			for (core_no = 0; core_no < topo.cores_per_node;
			     ++core_no) {
				for (thread_no = 0; thread_no <
					topo.threads_per_core; ++thread_no) {
					struct thread_data *t, *t2;
					struct core_data *c, *c2;
					struct pkg_data *p, *p2;

					t = GET_THREAD(thread_base, thread_no,
						       core_no, node_no,
						       pkg_no);

					if (cpu_is_not_present(t->cpu_id))
						continue;

					t2 = GET_THREAD(thread_base2, thread_no,
							core_no, node_no,
							pkg_no);

					c = GET_CORE(core_base, core_no,
						     node_no, pkg_no);
					c2 = GET_CORE(core_base2, core_no,
						      node_no,
						      pkg_no);

					p = GET_PKG(pkg_base, pkg_no);
					p2 = GET_PKG(pkg_base2, pkg_no);

					retval = func(t, c, p, t2, c2, p2);
					if (retval)
						return retval;
				}
2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713
			}
		}
	}
	return 0;
}

/*
 * run func(cpu) on every cpu in /proc/stat
 * return max_cpu number
 */
int for_all_proc_cpus(int (func)(int))
L
Len Brown 已提交
2714 2715
{
	FILE *fp;
2716
	int cpu_num;
L
Len Brown 已提交
2717 2718
	int retval;

2719
	fp = fopen_or_die(proc_stat, "r");
L
Len Brown 已提交
2720 2721

	retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
2722 2723
	if (retval != 0)
		err(1, "%s: failed to parse format", proc_stat);
L
Len Brown 已提交
2724

2725 2726
	while (1) {
		retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
L
Len Brown 已提交
2727 2728 2729
		if (retval != 1)
			break;

2730 2731 2732 2733 2734
		retval = func(cpu_num);
		if (retval) {
			fclose(fp);
			return(retval);
		}
L
Len Brown 已提交
2735 2736
	}
	fclose(fp);
2737
	return 0;
L
Len Brown 已提交
2738 2739 2740 2741
}

void re_initialize(void)
{
2742 2743 2744
	free_all_buffers();
	setup_all_buffers();
	printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
L
Len Brown 已提交
2745 2746
}

2747 2748 2749 2750 2751 2752 2753 2754 2755 2756
void set_max_cpu_num(void)
{
	FILE *filep;
	unsigned long dummy;

	topo.max_cpu_num = 0;
	filep = fopen_or_die(
			"/sys/devices/system/cpu/cpu0/topology/thread_siblings",
			"r");
	while (fscanf(filep, "%lx,", &dummy) == 1)
2757
		topo.max_cpu_num += BITMASK_SIZE;
2758 2759 2760
	fclose(filep);
	topo.max_cpu_num--; /* 0 based */
}
2761

L
Len Brown 已提交
2762
/*
2763 2764
 * count_cpus()
 * remember the last one seen, it will be the max
L
Len Brown 已提交
2765
 */
2766
int count_cpus(int cpu)
L
Len Brown 已提交
2767
{
2768
	topo.num_cpus++;
2769 2770 2771 2772 2773
	return 0;
}
int mark_cpu_present(int cpu)
{
	CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
2774
	return 0;
L
Len Brown 已提交
2775 2776
}

2777 2778 2779 2780 2781 2782
int init_thread_id(int cpu)
{
	cpus[cpu].thread_id = -1;
	return 0;
}

2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845
/*
 * snapshot_proc_interrupts()
 *
 * read and record summary of /proc/interrupts
 *
 * return 1 if config change requires a restart, else return 0
 */
int snapshot_proc_interrupts(void)
{
	static FILE *fp;
	int column, retval;

	if (fp == NULL)
		fp = fopen_or_die("/proc/interrupts", "r");
	else
		rewind(fp);

	/* read 1st line of /proc/interrupts to get cpu* name for each column */
	for (column = 0; column < topo.num_cpus; ++column) {
		int cpu_number;

		retval = fscanf(fp, " CPU%d", &cpu_number);
		if (retval != 1)
			break;

		if (cpu_number > topo.max_cpu_num) {
			warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
			return 1;
		}

		irq_column_2_cpu[column] = cpu_number;
		irqs_per_cpu[cpu_number] = 0;
	}

	/* read /proc/interrupt count lines and sum up irqs per cpu */
	while (1) {
		int column;
		char buf[64];

		retval = fscanf(fp, " %s:", buf);	/* flush irq# "N:" */
		if (retval != 1)
			break;

		/* read the count per cpu */
		for (column = 0; column < topo.num_cpus; ++column) {

			int cpu_number, irq_count;

			retval = fscanf(fp, " %d", &irq_count);
			if (retval != 1)
				break;

			cpu_number = irq_column_2_cpu[column];
			irqs_per_cpu[cpu_number] += irq_count;

		}

		while (getc(fp) != '\n')
			;	/* flush interrupt description */

	}
	return 0;
}
L
Len Brown 已提交
2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868
/*
 * snapshot_gfx_rc6_ms()
 *
 * record snapshot of
 * /sys/class/drm/card0/power/rc6_residency_ms
 *
 * return 1 if config change requires a restart, else return 0
 */
int snapshot_gfx_rc6_ms(void)
{
	FILE *fp;
	int retval;

	fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");

	retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
	if (retval != 1)
		err(1, "GFX rc6");

	fclose(fp);

	return 0;
}
L
Len Brown 已提交
2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883
/*
 * snapshot_gfx_mhz()
 *
 * record snapshot of
 * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
 *
 * return 1 if config change requires a restart, else return 0
 */
int snapshot_gfx_mhz(void)
{
	static FILE *fp;
	int retval;

	if (fp == NULL)
		fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
2884
	else {
L
Len Brown 已提交
2885
		rewind(fp);
2886 2887
		fflush(fp);
	}
L
Len Brown 已提交
2888 2889 2890 2891 2892 2893 2894

	retval = fscanf(fp, "%d", &gfx_cur_mhz);
	if (retval != 1)
		err(1, "GFX MHz");

	return 0;
}
2895

2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911
/*
 * snapshot_cpu_lpi()
 *
 * record snapshot of
 * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
 *
 * return 1 if config change requires a restart, else return 0
 */
int snapshot_cpu_lpi_us(void)
{
	FILE *fp;
	int retval;

	fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r");

	retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us);
2912 2913 2914 2915 2916
	if (retval != 1) {
		fprintf(stderr, "Disabling Low Power Idle CPU output\n");
		BIC_NOT_PRESENT(BIC_CPU_LPI);
		return -1;
	}
2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937

	fclose(fp);

	return 0;
}
/*
 * snapshot_sys_lpi()
 *
 * record snapshot of
 * /sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us
 *
 * return 1 if config change requires a restart, else return 0
 */
int snapshot_sys_lpi_us(void)
{
	FILE *fp;
	int retval;

	fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", "r");

	retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
2938 2939 2940 2941 2942
	if (retval != 1) {
		fprintf(stderr, "Disabling Low Power Idle System output\n");
		BIC_NOT_PRESENT(BIC_SYS_LPI);
		return -1;
	}
2943 2944 2945 2946
	fclose(fp);

	return 0;
}
2947 2948 2949 2950 2951 2952 2953
/*
 * snapshot /proc and /sys files
 *
 * return 1 if configuration restart needed, else return 0
 */
int snapshot_proc_sysfs_files(void)
{
2954 2955 2956
	if (DO_BIC(BIC_IRQ))
		if (snapshot_proc_interrupts())
			return 1;
2957

2958
	if (DO_BIC(BIC_GFX_rc6))
L
Len Brown 已提交
2959 2960
		snapshot_gfx_rc6_ms();

2961
	if (DO_BIC(BIC_GFXMHz))
L
Len Brown 已提交
2962 2963
		snapshot_gfx_mhz();

2964 2965 2966 2967 2968 2969
	if (DO_BIC(BIC_CPU_LPI))
		snapshot_cpu_lpi_us();

	if (DO_BIC(BIC_SYS_LPI))
		snapshot_sys_lpi_us();

2970 2971 2972
	return 0;
}

2973 2974 2975 2976 2977 2978 2979 2980 2981 2982
int exit_requested;

static void signal_handler (int signal)
{
	switch (signal) {
	case SIGINT:
		exit_requested = 1;
		if (debug)
			fprintf(stderr, " SIGINT\n");
		break;
2983 2984 2985 2986
	case SIGUSR1:
		if (debug > 1)
			fprintf(stderr, "SIGUSR1\n");
		break;
2987
	}
2988 2989
	/* make sure this manually-invoked interval is at least 1ms long */
	nanosleep(&one_msec, NULL);
2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001
}

void setup_signal_handler(void)
{
	struct sigaction sa;

	memset(&sa, 0, sizeof(sa));

	sa.sa_handler = &signal_handler;

	if (sigaction(SIGINT, &sa, NULL) < 0)
		err(1, "sigaction SIGINT");
3002 3003
	if (sigaction(SIGUSR1, &sa, NULL) < 0)
		err(1, "sigaction SIGUSR1");
3004
}
3005

3006
void do_sleep(void)
3007 3008 3009 3010 3011 3012 3013 3014
{
	struct timeval select_timeout;
	fd_set readfds;
	int retval;

	FD_ZERO(&readfds);
	FD_SET(0, &readfds);

3015 3016 3017 3018
	if (!isatty(fileno(stdin))) {
		nanosleep(&interval_ts, NULL);
		return;
	}
3019

3020
	select_timeout = interval_tv;
3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032
	retval = select(1, &readfds, NULL, NULL, &select_timeout);

	if (retval == 1) {
		switch (getc(stdin)) {
		case 'q':
			exit_requested = 1;
			break;
		}
		/* make sure this manually-invoked interval is at least 1ms long */
		nanosleep(&one_msec, NULL);
	}
}
3033

3034

L
Len Brown 已提交
3035 3036
void turbostat_loop()
{
3037
	int retval;
3038
	int restarted = 0;
3039
	int done_iters = 0;
3040

3041 3042
	setup_signal_handler();

L
Len Brown 已提交
3043
restart:
3044 3045
	restarted++;

3046
	snapshot_proc_sysfs_files();
3047
	retval = for_all_cpus(get_counters, EVEN_COUNTERS);
3048
	first_counter_read = 0;
3049 3050 3051
	if (retval < -1) {
		exit(retval);
	} else if (retval == -1) {
3052 3053 3054
		if (restarted > 1) {
			exit(retval);
		}
3055 3056 3057
		re_initialize();
		goto restart;
	}
3058
	restarted = 0;
3059
	done_iters = 0;
L
Len Brown 已提交
3060 3061 3062
	gettimeofday(&tv_even, (struct timezone *)NULL);

	while (1) {
3063
		if (for_all_proc_cpus(cpu_is_not_present)) {
L
Len Brown 已提交
3064 3065 3066
			re_initialize();
			goto restart;
		}
3067
		do_sleep();
3068 3069
		if (snapshot_proc_sysfs_files())
			goto restart;
3070
		retval = for_all_cpus(get_counters, ODD_COUNTERS);
3071 3072 3073
		if (retval < -1) {
			exit(retval);
		} else if (retval == -1) {
3074 3075 3076
			re_initialize();
			goto restart;
		}
L
Len Brown 已提交
3077 3078
		gettimeofday(&tv_odd, (struct timezone *)NULL);
		timersub(&tv_odd, &tv_even, &tv_delta);
3079 3080 3081 3082
		if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) {
			re_initialize();
			goto restart;
		}
3083 3084
		compute_average(EVEN_COUNTERS);
		format_all_counters(EVEN_COUNTERS);
3085
		flush_output_stdout();
3086 3087
		if (exit_requested)
			break;
3088 3089
		if (num_iterations && ++done_iters >= num_iterations)
			break;
3090
		do_sleep();
3091 3092
		if (snapshot_proc_sysfs_files())
			goto restart;
3093
		retval = for_all_cpus(get_counters, EVEN_COUNTERS);
3094 3095 3096
		if (retval < -1) {
			exit(retval);
		} else if (retval == -1) {
L
Len Brown 已提交
3097 3098 3099 3100 3101
			re_initialize();
			goto restart;
		}
		gettimeofday(&tv_even, (struct timezone *)NULL);
		timersub(&tv_even, &tv_odd, &tv_delta);
3102 3103 3104 3105
		if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) {
			re_initialize();
			goto restart;
		}
3106 3107
		compute_average(ODD_COUNTERS);
		format_all_counters(ODD_COUNTERS);
3108
		flush_output_stdout();
3109 3110
		if (exit_requested)
			break;
3111 3112
		if (num_iterations && ++done_iters >= num_iterations)
			break;
L
Len Brown 已提交
3113 3114 3115 3116 3117 3118
	}
}

void check_dev_msr()
{
	struct stat sb;
3119
	char pathname[32];
L
Len Brown 已提交
3120

3121 3122
	sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
	if (stat(pathname, &sb))
3123 3124
 		if (system("/sbin/modprobe msr > /dev/null 2>&1"))
			err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
L
Len Brown 已提交
3125 3126
}

3127
void check_permissions()
L
Len Brown 已提交
3128
{
3129 3130 3131 3132 3133 3134
	struct __user_cap_header_struct cap_header_data;
	cap_user_header_t cap_header = &cap_header_data;
	struct __user_cap_data_struct cap_data_data;
	cap_user_data_t cap_data = &cap_data_data;
	extern int capget(cap_user_header_t hdrp, cap_user_data_t datap);
	int do_exit = 0;
3135
	char pathname[32];
3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149

	/* check for CAP_SYS_RAWIO */
	cap_header->pid = getpid();
	cap_header->version = _LINUX_CAPABILITY_VERSION;
	if (capget(cap_header, cap_data) < 0)
		err(-6, "capget(2) failed");

	if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) {
		do_exit++;
		warnx("capget(CAP_SYS_RAWIO) failed,"
			" try \"# setcap cap_sys_rawio=ep %s\"", progname);
	}

	/* test file permissions */
3150 3151
	sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
	if (euidaccess(pathname, R_OK)) {
3152 3153 3154 3155 3156 3157 3158
		do_exit++;
		warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
	}

	/* if all else fails, thell them to be root */
	if (do_exit)
		if (getuid() != 0)
3159
			warnx("... or simply run as root");
3160 3161 3162

	if (do_exit)
		exit(-6);
L
Len Brown 已提交
3163 3164
}

3165 3166 3167 3168 3169
/*
 * NHM adds support for additional MSRs:
 *
 * MSR_SMI_COUNT                   0x00000034
 *
3170
 * MSR_PLATFORM_INFO               0x000000ce
3171
 * MSR_PKG_CST_CONFIG_CONTROL     0x000000e2
3172
 *
3173 3174
 * MSR_MISC_PWR_MGMT               0x000001aa
 *
3175 3176 3177 3178 3179
 * MSR_PKG_C3_RESIDENCY            0x000003f8
 * MSR_PKG_C6_RESIDENCY            0x000003f9
 * MSR_CORE_C3_RESIDENCY           0x000003fc
 * MSR_CORE_C6_RESIDENCY           0x000003fd
 *
3180
 * Side effect:
3181
 * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
3182
 * sets has_misc_feature_control
3183
 */
3184
int probe_nhm_msrs(unsigned int family, unsigned int model)
L
Len Brown 已提交
3185
{
3186
	unsigned long long msr;
3187
	unsigned int base_ratio;
3188 3189
	int *pkg_cstate_limits;

L
Len Brown 已提交
3190 3191 3192 3193 3194 3195
	if (!genuine_intel)
		return 0;

	if (family != 6)
		return 0;

3196 3197
	bclk = discover_bclk(family, model);

L
Len Brown 已提交
3198
	switch (model) {
3199 3200
	case INTEL_FAM6_NEHALEM:	/* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
	case INTEL_FAM6_NEHALEM_EX:	/* Nehalem-EX Xeon - Beckton */
3201 3202
		pkg_cstate_limits = nhm_pkg_cstate_limits;
		break;
3203 3204 3205 3206
	case INTEL_FAM6_SANDYBRIDGE:	/* SNB */
	case INTEL_FAM6_SANDYBRIDGE_X:	/* SNB Xeon */
	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
3207
		pkg_cstate_limits = snb_pkg_cstate_limits;
3208
		has_misc_feature_control = 1;
3209
		break;
3210
	case INTEL_FAM6_HASWELL:	/* HSW */
3211 3212
	case INTEL_FAM6_HASWELL_X:	/* HSX */
	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
3213
	case INTEL_FAM6_BROADWELL:	/* BDW */
3214 3215 3216
	case INTEL_FAM6_BROADWELL_GT3E:	/* BDW */
	case INTEL_FAM6_BROADWELL_X:	/* BDX */
	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
3217
	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
3218
		pkg_cstate_limits = hsw_pkg_cstate_limits;
3219
		has_misc_feature_control = 1;
3220
		break;
3221 3222
	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
		pkg_cstate_limits = skx_pkg_cstate_limits;
3223
		has_misc_feature_control = 1;
3224
		break;
3225
	case INTEL_FAM6_ATOM_SILVERMONT:	/* BYT */
3226
		no_MSR_MISC_PWR_MGMT = 1;
3227
	case INTEL_FAM6_ATOM_SILVERMONT_X:	/* AVN */
3228 3229
		pkg_cstate_limits = slv_pkg_cstate_limits;
		break;
3230
	case INTEL_FAM6_ATOM_AIRMONT:	/* AMT */
3231
		pkg_cstate_limits = amt_pkg_cstate_limits;
3232
		no_MSR_MISC_PWR_MGMT = 1;
3233
		break;
3234
	case INTEL_FAM6_XEON_PHI_KNL:	/* PHI */
3235 3236
		pkg_cstate_limits = phi_pkg_cstate_limits;
		break;
3237
	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
3238 3239
	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
	case INTEL_FAM6_ATOM_GOLDMONT_X:	/* DNV */
3240
		pkg_cstate_limits = glm_pkg_cstate_limits;
3241
		break;
L
Len Brown 已提交
3242 3243 3244
	default:
		return 0;
	}
3245
	get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
3246
	pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
3247

3248
	get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
3249 3250 3251 3252
	base_ratio = (msr >> 8) & 0xFF;

	base_hz = base_ratio * bclk * 1000000;
	has_base_hz = 1;
3253
	return 1;
L
Len Brown 已提交
3254
}
3255
/*
3256
 * SLV client has support for unique MSRs:
3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267
 *
 * MSR_CC6_DEMOTION_POLICY_CONFIG
 * MSR_MC6_DEMOTION_POLICY_CONFIG
 */

int has_slv_msrs(unsigned int family, unsigned int model)
{
	if (!genuine_intel)
		return 0;

	switch (model) {
3268 3269 3270
	case INTEL_FAM6_ATOM_SILVERMONT:
	case INTEL_FAM6_ATOM_SILVERMONT_MID:
	case INTEL_FAM6_ATOM_AIRMONT_MID:
3271 3272 3273 3274
		return 1;
	}
	return 0;
}
3275 3276 3277 3278 3279 3280 3281
int is_dnv(unsigned int family, unsigned int model)
{

	if (!genuine_intel)
		return 0;

	switch (model) {
3282
	case INTEL_FAM6_ATOM_GOLDMONT_X:
3283 3284 3285 3286
		return 1;
	}
	return 0;
}
3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298
int is_bdx(unsigned int family, unsigned int model)
{

	if (!genuine_intel)
		return 0;

	switch (model) {
	case INTEL_FAM6_BROADWELL_X:
		return 1;
	}
	return 0;
}
3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310
int is_skx(unsigned int family, unsigned int model)
{

	if (!genuine_intel)
		return 0;

	switch (model) {
	case INTEL_FAM6_SKYLAKE_X:
		return 1;
	}
	return 0;
}
3311

3312
int has_turbo_ratio_limit(unsigned int family, unsigned int model)
3313
{
3314 3315 3316
	if (has_slv_msrs(family, model))
		return 0;

3317 3318
	switch (model) {
	/* Nehalem compatible, but do not include turbo-ratio limit support */
3319 3320
	case INTEL_FAM6_NEHALEM_EX:	/* Nehalem-EX Xeon - Beckton */
	case INTEL_FAM6_XEON_PHI_KNL:	/* PHI - Knights Landing (different MSR definition) */
3321 3322 3323 3324 3325
		return 0;
	default:
		return 1;
	}
}
3326 3327 3328 3329 3330 3331 3332
int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
{
	if (has_slv_msrs(family, model))
		return 1;

	return 0;
}
3333 3334 3335 3336 3337 3338 3339 3340 3341
int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
{
	if (!genuine_intel)
		return 0;

	if (family != 6)
		return 0;

	switch (model) {
3342 3343
	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
	case INTEL_FAM6_HASWELL_X:	/* HSW Xeon */
3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357
		return 1;
	default:
		return 0;
	}
}
int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
{
	if (!genuine_intel)
		return 0;

	if (family != 6)
		return 0;

	switch (model) {
3358
	case INTEL_FAM6_HASWELL_X:	/* HSW Xeon */
3359 3360 3361 3362 3363 3364
		return 1;
	default:
		return 0;
	}
}

3365 3366 3367 3368 3369 3370 3371 3372 3373
int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
{
	if (!genuine_intel)
		return 0;

	if (family != 6)
		return 0;

	switch (model) {
3374
	case INTEL_FAM6_XEON_PHI_KNL:	/* Knights Landing */
3375 3376 3377 3378 3379
		return 1;
	default:
		return 0;
	}
}
3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395
int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
{
	if (!genuine_intel)
		return 0;

	if (family != 6)
		return 0;

	switch (model) {
	case INTEL_FAM6_ATOM_GOLDMONT:
	case INTEL_FAM6_SKYLAKE_X:
		return 1;
	default:
		return 0;
	}
}
3396 3397 3398 3399 3400 3401 3402 3403 3404
int has_config_tdp(unsigned int family, unsigned int model)
{
	if (!genuine_intel)
		return 0;

	if (family != 6)
		return 0;

	switch (model) {
3405
	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
3406
	case INTEL_FAM6_HASWELL:	/* HSW */
3407 3408
	case INTEL_FAM6_HASWELL_X:	/* HSX */
	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
3409
	case INTEL_FAM6_BROADWELL:	/* BDW */
3410 3411 3412
	case INTEL_FAM6_BROADWELL_GT3E:	/* BDW */
	case INTEL_FAM6_BROADWELL_X:	/* BDX */
	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
3413
	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
3414 3415 3416
	case INTEL_FAM6_SKYLAKE_X:	/* SKX */

	case INTEL_FAM6_XEON_PHI_KNL:	/* Knights Landing */
3417 3418 3419 3420 3421 3422
		return 1;
	default:
		return 0;
	}
}

3423
static void
3424
dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436
{
	if (!do_nhm_platform_info)
		return;

	dump_nhm_platform_info();

	if (has_hsw_turbo_ratio_limit(family, model))
		dump_hsw_turbo_ratio_limits();

	if (has_ivt_turbo_ratio_limit(family, model))
		dump_ivt_turbo_ratio_limits();

3437 3438
	if (has_turbo_ratio_limit(family, model))
		dump_turbo_ratio_limits(family, model);
3439

3440 3441 3442
	if (has_atom_turbo_ratio_limit(family, model))
		dump_atom_turbo_ratio_limits();

3443 3444 3445
	if (has_knl_turbo_ratio_limit(family, model))
		dump_knl_turbo_ratio_limits();

3446 3447 3448
	if (has_config_tdp(family, model))
		dump_config_tdp();

3449 3450 3451
	dump_nhm_cst_cfg();
}

3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471
static void
dump_sysfs_cstate_config(void)
{
	char path[64];
	char name_buf[16];
	char desc[64];
	FILE *input;
	int state;
	char *sp;

	if (!DO_BIC(BIC_sysfs))
		return;

	for (state = 0; state < 10; ++state) {

		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
			base_cpu, state);
		input = fopen(path, "r");
		if (input == NULL)
			continue;
3472 3473
		if (!fgets(name_buf, sizeof(name_buf), input))
			err(1, "%s: failed to read file", path);
3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486

		 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
		sp = strchr(name_buf, '-');
		if (!sp)
			sp = strchrnul(name_buf, '\n');
		*sp = '\0';
		fclose(input);

		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc",
			base_cpu, state);
		input = fopen(path, "r");
		if (input == NULL)
			continue;
3487 3488
		if (!fgets(desc, sizeof(desc), input))
			err(1, "%s: failed to read file", path);
3489 3490 3491 3492 3493

		fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc);
		fclose(input);
	}
}
3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506
static void
dump_sysfs_pstate_config(void)
{
	char path[64];
	char driver_buf[64];
	char governor_buf[64];
	FILE *input;
	int turbo;

	sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver",
			base_cpu);
	input = fopen(path, "r");
	if (input == NULL) {
3507
		fprintf(outf, "NSFOD %s\n", path);
3508 3509
		return;
	}
3510 3511
	if (!fgets(driver_buf, sizeof(driver_buf), input))
		err(1, "%s: failed to read file", path);
3512 3513 3514 3515 3516 3517
	fclose(input);

	sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor",
			base_cpu);
	input = fopen(path, "r");
	if (input == NULL) {
3518
		fprintf(outf, "NSFOD %s\n", path);
3519 3520
		return;
	}
3521 3522
	if (!fgets(governor_buf, sizeof(governor_buf), input))
		err(1, "%s: failed to read file", path);
3523 3524 3525 3526 3527 3528 3529 3530
	fclose(input);

	fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf);
	fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf);

	sprintf(path, "/sys/devices/system/cpu/cpufreq/boost");
	input = fopen(path, "r");
	if (input != NULL) {
3531 3532
		if (fscanf(input, "%d", &turbo) != 1)
			err(1, "%s: failed to parse number from file", path);
3533 3534 3535 3536 3537 3538 3539
		fprintf(outf, "cpufreq boost: %d\n", turbo);
		fclose(input);
	}

	sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo");
	input = fopen(path, "r");
	if (input != NULL) {
3540 3541
		if (fscanf(input, "%d", &turbo) != 1)
			err(1, "%s: failed to parse number from file", path);
3542 3543 3544 3545
		fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo);
		fclose(input);
	}
}
3546

3547

3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567
/*
 * print_epb()
 * Decode the ENERGY_PERF_BIAS MSR
 */
int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
	unsigned long long msr;
	char *epb_string;
	int cpu;

	if (!has_epb)
		return 0;

	cpu = t->cpu_id;

	/* EPB is per-package */
	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
		return 0;

	if (cpu_migrate(cpu)) {
3568
		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3569 3570 3571 3572 3573 3574
		return -1;
	}

	if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
		return 0;

3575
	switch (msr & 0xF) {
3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588
	case ENERGY_PERF_BIAS_PERFORMANCE:
		epb_string = "performance";
		break;
	case ENERGY_PERF_BIAS_NORMAL:
		epb_string = "balanced";
		break;
	case ENERGY_PERF_BIAS_POWERSAVE:
		epb_string = "powersave";
		break;
	default:
		epb_string = "custom";
		break;
	}
3589
	fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
3590 3591 3592

	return 0;
}
3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611
/*
 * print_hwp()
 * Decode the MSR_HWP_CAPABILITIES
 */
int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
	unsigned long long msr;
	int cpu;

	if (!has_hwp)
		return 0;

	cpu = t->cpu_id;

	/* MSR_HWP_CAPABILITIES is per-package */
	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
		return 0;

	if (cpu_migrate(cpu)) {
3612
		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3613 3614 3615 3616 3617 3618
		return -1;
	}

	if (get_msr(cpu, MSR_PM_ENABLE, &msr))
		return 0;

3619
	fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n",
3620 3621 3622 3623 3624 3625 3626 3627 3628
		cpu, msr, (msr & (1 << 0)) ? "" : "No-");

	/* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
	if ((msr & (1 << 0)) == 0)
		return 0;

	if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
		return 0;

3629
	fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
3630
			"(high %d guar %d eff %d low %d)\n",
3631 3632 3633 3634 3635 3636 3637 3638 3639
			cpu, msr,
			(unsigned int)HWP_HIGHEST_PERF(msr),
			(unsigned int)HWP_GUARANTEED_PERF(msr),
			(unsigned int)HWP_MOSTEFFICIENT_PERF(msr),
			(unsigned int)HWP_LOWEST_PERF(msr));

	if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
		return 0;

3640
	fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
3641
			"(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653
			cpu, msr,
			(unsigned int)(((msr) >> 0) & 0xff),
			(unsigned int)(((msr) >> 8) & 0xff),
			(unsigned int)(((msr) >> 16) & 0xff),
			(unsigned int)(((msr) >> 24) & 0xff),
			(unsigned int)(((msr) >> 32) & 0xff3),
			(unsigned int)(((msr) >> 42) & 0x1));

	if (has_hwp_pkg) {
		if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
			return 0;

3654
		fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
3655
			"(min %d max %d des %d epp 0x%x window 0x%x)\n",
3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666
			cpu, msr,
			(unsigned int)(((msr) >> 0) & 0xff),
			(unsigned int)(((msr) >> 8) & 0xff),
			(unsigned int)(((msr) >> 16) & 0xff),
			(unsigned int)(((msr) >> 24) & 0xff),
			(unsigned int)(((msr) >> 32) & 0xff3));
	}
	if (has_hwp_notify) {
		if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
			return 0;

3667
		fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
3668 3669 3670 3671 3672 3673 3674 3675
			"(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
			cpu, msr,
			((msr) & 0x1) ? "EN" : "Dis",
			((msr) & 0x2) ? "EN" : "Dis");
	}
	if (get_msr(cpu, MSR_HWP_STATUS, &msr))
		return 0;

3676
	fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
3677 3678 3679 3680
			"(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
			cpu, msr,
			((msr) & 0x1) ? "" : "No-",
			((msr) & 0x2) ? "" : "No-");
3681 3682 3683 3684

	return 0;
}

3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699
/*
 * print_perf_limit()
 */
int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
	unsigned long long msr;
	int cpu;

	cpu = t->cpu_id;

	/* per-package */
	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
		return 0;

	if (cpu_migrate(cpu)) {
3700
		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3701 3702 3703 3704 3705
		return -1;
	}

	if (do_core_perf_limit_reasons) {
		get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
3706 3707
		fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
		fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
3708
			(msr & 1 << 15) ? "bit15, " : "",
3709
			(msr & 1 << 14) ? "bit14, " : "",
3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721
			(msr & 1 << 13) ? "Transitions, " : "",
			(msr & 1 << 12) ? "MultiCoreTurbo, " : "",
			(msr & 1 << 11) ? "PkgPwrL2, " : "",
			(msr & 1 << 10) ? "PkgPwrL1, " : "",
			(msr & 1 << 9) ? "CorePwr, " : "",
			(msr & 1 << 8) ? "Amps, " : "",
			(msr & 1 << 6) ? "VR-Therm, " : "",
			(msr & 1 << 5) ? "Auto-HWP, " : "",
			(msr & 1 << 4) ? "Graphics, " : "",
			(msr & 1 << 2) ? "bit2, " : "",
			(msr & 1 << 1) ? "ThermStatus, " : "",
			(msr & 1 << 0) ? "PROCHOT, " : "");
3722
		fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
3723
			(msr & 1 << 31) ? "bit31, " : "",
3724
			(msr & 1 << 30) ? "bit30, " : "",
3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736
			(msr & 1 << 29) ? "Transitions, " : "",
			(msr & 1 << 28) ? "MultiCoreTurbo, " : "",
			(msr & 1 << 27) ? "PkgPwrL2, " : "",
			(msr & 1 << 26) ? "PkgPwrL1, " : "",
			(msr & 1 << 25) ? "CorePwr, " : "",
			(msr & 1 << 24) ? "Amps, " : "",
			(msr & 1 << 22) ? "VR-Therm, " : "",
			(msr & 1 << 21) ? "Auto-HWP, " : "",
			(msr & 1 << 20) ? "Graphics, " : "",
			(msr & 1 << 18) ? "bit18, " : "",
			(msr & 1 << 17) ? "ThermStatus, " : "",
			(msr & 1 << 16) ? "PROCHOT, " : "");
3737 3738 3739 3740

	}
	if (do_gfx_perf_limit_reasons) {
		get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
3741 3742
		fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
		fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
3743 3744 3745 3746 3747 3748 3749 3750
			(msr & 1 << 0) ? "PROCHOT, " : "",
			(msr & 1 << 1) ? "ThermStatus, " : "",
			(msr & 1 << 4) ? "Graphics, " : "",
			(msr & 1 << 6) ? "VR-Therm, " : "",
			(msr & 1 << 8) ? "Amps, " : "",
			(msr & 1 << 9) ? "GFXPwr, " : "",
			(msr & 1 << 10) ? "PkgPwrL1, " : "",
			(msr & 1 << 11) ? "PkgPwrL2, " : "");
3751
		fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762
			(msr & 1 << 16) ? "PROCHOT, " : "",
			(msr & 1 << 17) ? "ThermStatus, " : "",
			(msr & 1 << 20) ? "Graphics, " : "",
			(msr & 1 << 22) ? "VR-Therm, " : "",
			(msr & 1 << 24) ? "Amps, " : "",
			(msr & 1 << 25) ? "GFXPwr, " : "",
			(msr & 1 << 26) ? "PkgPwrL1, " : "",
			(msr & 1 << 27) ? "PkgPwrL2, " : "");
	}
	if (do_ring_perf_limit_reasons) {
		get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
3763 3764
		fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
		fprintf(outf, " (Active: %s%s%s%s%s%s)",
3765 3766 3767 3768 3769 3770
			(msr & 1 << 0) ? "PROCHOT, " : "",
			(msr & 1 << 1) ? "ThermStatus, " : "",
			(msr & 1 << 6) ? "VR-Therm, " : "",
			(msr & 1 << 8) ? "Amps, " : "",
			(msr & 1 << 10) ? "PkgPwrL1, " : "",
			(msr & 1 << 11) ? "PkgPwrL2, " : "");
3771
		fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
3772 3773 3774 3775 3776 3777 3778 3779 3780 3781
			(msr & 1 << 16) ? "PROCHOT, " : "",
			(msr & 1 << 17) ? "ThermStatus, " : "",
			(msr & 1 << 22) ? "VR-Therm, " : "",
			(msr & 1 << 24) ? "Amps, " : "",
			(msr & 1 << 26) ? "PkgPwrL1, " : "",
			(msr & 1 << 27) ? "PkgPwrL2, " : "");
	}
	return 0;
}

3782 3783 3784
#define	RAPL_POWER_GRANULARITY	0x7FFF	/* 15 bit power granularity */
#define	RAPL_TIME_GRANULARITY	0x3F /* 6 bit time granularity */

3785
double get_tdp_intel(unsigned int model)
3786 3787 3788 3789
{
	unsigned long long msr;

	if (do_rapl & RAPL_PKG_POWER_INFO)
3790
		if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
3791 3792 3793
			return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;

	switch (model) {
3794 3795
	case INTEL_FAM6_ATOM_SILVERMONT:
	case INTEL_FAM6_ATOM_SILVERMONT_X:
3796 3797 3798 3799 3800 3801
		return 30.0;
	default:
		return 135.0;
	}
}

3802 3803 3804 3805 3806 3807 3808 3809 3810 3811
double get_tdp_amd(unsigned int family)
{
	switch (family) {
	case 0x17:
	default:
		/* This is the max stock TDP of HEDT/Server Fam17h chips */
		return 250.0;
	}
}

3812 3813 3814 3815 3816 3817 3818 3819 3820 3821
/*
 * rapl_dram_energy_units_probe()
 * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
 */
static double
rapl_dram_energy_units_probe(int  model, double rapl_energy_units)
{
	/* only called for genuine_intel, family 6 */

	switch (model) {
3822 3823 3824
	case INTEL_FAM6_HASWELL_X:	/* HSX */
	case INTEL_FAM6_BROADWELL_X:	/* BDX */
	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
3825 3826 3827 3828 3829 3830
		return (rapl_dram_energy_units = 15.3 / 1000000);
	default:
		return (rapl_energy_units);
	}
}

3831
void rapl_probe_intel(unsigned int family, unsigned int model)
3832 3833
{
	unsigned long long msr;
3834
	unsigned int time_unit;
3835 3836 3837 3838 3839 3840
	double tdp;

	if (family != 6)
		return;

	switch (model) {
3841 3842
	case INTEL_FAM6_SANDYBRIDGE:
	case INTEL_FAM6_IVYBRIDGE:
3843
	case INTEL_FAM6_HASWELL:	/* HSW */
3844
	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
3845
	case INTEL_FAM6_BROADWELL:	/* BDW */
3846
	case INTEL_FAM6_BROADWELL_GT3E:	/* BDW */
3847
		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
3848 3849 3850 3851 3852 3853 3854 3855 3856
		if (rapl_joules) {
			BIC_PRESENT(BIC_Pkg_J);
			BIC_PRESENT(BIC_Cor_J);
			BIC_PRESENT(BIC_GFX_J);
		} else {
			BIC_PRESENT(BIC_PkgWatt);
			BIC_PRESENT(BIC_CorWatt);
			BIC_PRESENT(BIC_GFXWatt);
		}
3857
		break;
3858
	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
3859
	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
3860
		do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
3861 3862 3863 3864
		if (rapl_joules)
			BIC_PRESENT(BIC_Pkg_J);
		else
			BIC_PRESENT(BIC_PkgWatt);
3865
		break;
3866
	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
3867
	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
3868
		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
3869 3870 3871 3872 3873 3874
		BIC_PRESENT(BIC_PKG__);
		BIC_PRESENT(BIC_RAM__);
		if (rapl_joules) {
			BIC_PRESENT(BIC_Pkg_J);
			BIC_PRESENT(BIC_Cor_J);
			BIC_PRESENT(BIC_RAM_J);
3875
			BIC_PRESENT(BIC_GFX_J);
3876 3877 3878 3879
		} else {
			BIC_PRESENT(BIC_PkgWatt);
			BIC_PRESENT(BIC_CorWatt);
			BIC_PRESENT(BIC_RAMWatt);
3880
			BIC_PRESENT(BIC_GFXWatt);
3881
		}
3882
		break;
3883 3884 3885 3886
	case INTEL_FAM6_HASWELL_X:	/* HSX */
	case INTEL_FAM6_BROADWELL_X:	/* BDX */
	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
3887
		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
3888 3889 3890 3891 3892 3893 3894 3895 3896
		BIC_PRESENT(BIC_PKG__);
		BIC_PRESENT(BIC_RAM__);
		if (rapl_joules) {
			BIC_PRESENT(BIC_Pkg_J);
			BIC_PRESENT(BIC_RAM_J);
		} else {
			BIC_PRESENT(BIC_PkgWatt);
			BIC_PRESENT(BIC_RAMWatt);
		}
L
Len Brown 已提交
3897
		break;
3898 3899
	case INTEL_FAM6_SANDYBRIDGE_X:
	case INTEL_FAM6_IVYBRIDGE_X:
3900
		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911
		BIC_PRESENT(BIC_PKG__);
		BIC_PRESENT(BIC_RAM__);
		if (rapl_joules) {
			BIC_PRESENT(BIC_Pkg_J);
			BIC_PRESENT(BIC_Cor_J);
			BIC_PRESENT(BIC_RAM_J);
		} else {
			BIC_PRESENT(BIC_PkgWatt);
			BIC_PRESENT(BIC_CorWatt);
			BIC_PRESENT(BIC_RAMWatt);
		}
3912
		break;
3913 3914
	case INTEL_FAM6_ATOM_SILVERMONT:	/* BYT */
	case INTEL_FAM6_ATOM_SILVERMONT_X:	/* AVN */
3915
		do_rapl = RAPL_PKG | RAPL_CORES;
3916 3917 3918 3919 3920 3921 3922
		if (rapl_joules) {
			BIC_PRESENT(BIC_Pkg_J);
			BIC_PRESENT(BIC_Cor_J);
		} else {
			BIC_PRESENT(BIC_PkgWatt);
			BIC_PRESENT(BIC_CorWatt);
		}
3923
		break;
3924
	case INTEL_FAM6_ATOM_GOLDMONT_X:	/* DNV */
3925
		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936
		BIC_PRESENT(BIC_PKG__);
		BIC_PRESENT(BIC_RAM__);
		if (rapl_joules) {
			BIC_PRESENT(BIC_Pkg_J);
			BIC_PRESENT(BIC_Cor_J);
			BIC_PRESENT(BIC_RAM_J);
		} else {
			BIC_PRESENT(BIC_PkgWatt);
			BIC_PRESENT(BIC_CorWatt);
			BIC_PRESENT(BIC_RAMWatt);
		}
3937
		break;
3938 3939 3940 3941 3942
	default:
		return;
	}

	/* units on package 0, verify later other packages match */
3943
	if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
3944 3945 3946
		return;

	rapl_power_units = 1.0 / (1 << (msr & 0xF));
3947
	if (model == INTEL_FAM6_ATOM_SILVERMONT)
3948 3949 3950
		rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
	else
		rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
3951

3952 3953
	rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);

3954 3955 3956
	time_unit = msr >> 16 & 0xF;
	if (time_unit == 0)
		time_unit = 0xA;
3957

3958
	rapl_time_units = 1.0 / (1 << (time_unit));
3959

3960
	tdp = get_tdp_intel(model);
3961

3962
	rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
3963
	if (!quiet)
3964
		fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
3965
}
3966

3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985
void rapl_probe_amd(unsigned int family, unsigned int model)
{
	unsigned long long msr;
	unsigned int eax, ebx, ecx, edx;
	unsigned int has_rapl = 0;
	double tdp;

	if (max_extended_level >= 0x80000007) {
		__cpuid(0x80000007, eax, ebx, ecx, edx);
		/* RAPL (Fam 17h) */
		has_rapl = edx & (1 << 14);
	}

	if (!has_rapl)
		return;

	switch (family) {
	case 0x17: /* Zen, Zen+ */
		do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY;
3986 3987
		if (rapl_joules) {
			BIC_PRESENT(BIC_Pkg_J);
3988
			BIC_PRESENT(BIC_Cor_J);
3989 3990
		} else {
			BIC_PRESENT(BIC_PkgWatt);
3991
			BIC_PRESENT(BIC_CorWatt);
3992
		}
3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022
		break;
	default:
		return;
	}

	if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr))
		return;

	rapl_time_units = ldexp(1.0, -(msr >> 16 & 0xf));
	rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f));
	rapl_power_units = ldexp(1.0, -(msr & 0xf));

	tdp = get_tdp_amd(model);

	rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
	if (!quiet)
		fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
}

/*
 * rapl_probe()
 *
 * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
 */
void rapl_probe(unsigned int family, unsigned int model)
{
	if (genuine_intel)
		rapl_probe_intel(family, model);
	if (authentic_amd)
		rapl_probe_amd(family, model);
4023 4024
}

4025
void perf_limit_reasons_probe(unsigned int family, unsigned int model)
4026 4027 4028 4029 4030 4031 4032 4033
{
	if (!genuine_intel)
		return;

	if (family != 6)
		return;

	switch (model) {
4034
	case INTEL_FAM6_HASWELL:	/* HSW */
4035
	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
4036
		do_gfx_perf_limit_reasons = 1;
4037
	case INTEL_FAM6_HASWELL_X:	/* HSX */
4038 4039 4040 4041 4042 4043 4044
		do_core_perf_limit_reasons = 1;
		do_ring_perf_limit_reasons = 1;
	default:
		return;
	}
}

4045 4046 4047 4048 4049 4050
void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
{
	if (is_skx(family, model) || is_bdx(family, model))
		has_automatic_cstate_conversion = 1;
}

4051 4052 4053
int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
	unsigned long long msr;
4054
	unsigned int dts, dts2;
4055 4056 4057 4058 4059 4060 4061 4062
	int cpu;

	if (!(do_dts || do_ptm))
		return 0;

	cpu = t->cpu_id;

	/* DTS is per-core, no need to print for each thread */
4063
	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
4064 4065 4066
		return 0;

	if (cpu_migrate(cpu)) {
4067
		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
4068 4069 4070 4071 4072 4073 4074 4075
		return -1;
	}

	if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
			return 0;

		dts = (msr >> 16) & 0x7F;
4076
		fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
4077 4078 4079 4080 4081 4082 4083
			cpu, msr, tcc_activation_temp - dts);

		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
			return 0;

		dts = (msr >> 16) & 0x7F;
		dts2 = (msr >> 8) & 0x7F;
4084
		fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
4085 4086 4087 4088
			cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
	}


4089
	if (do_dts && debug) {
4090 4091 4092 4093 4094 4095 4096
		unsigned int resolution;

		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
			return 0;

		dts = (msr >> 16) & 0x7F;
		resolution = (msr >> 27) & 0xF;
4097
		fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
4098 4099 4100 4101 4102 4103 4104
			cpu, msr, tcc_activation_temp - dts, resolution);

		if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
			return 0;

		dts = (msr >> 16) & 0x7F;
		dts2 = (msr >> 8) & 0x7F;
4105
		fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
4106 4107 4108 4109 4110
			cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
	}

	return 0;
}
4111

4112 4113
void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
{
4114
	fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126
		cpu, label,
		((msr >> 15) & 1) ? "EN" : "DIS",
		((msr >> 0) & 0x7FFF) * rapl_power_units,
		(1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
		(((msr >> 16) & 1) ? "EN" : "DIS"));

	return;
}

int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
	unsigned long long msr;
4127
	const char *msr_name;
4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138
	int cpu;

	if (!do_rapl)
		return 0;

	/* RAPL counters are per package, so print only for 1st thread/package */
	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
		return 0;

	cpu = t->cpu_id;
	if (cpu_migrate(cpu)) {
4139
		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
4140 4141 4142
		return -1;
	}

4143 4144 4145 4146 4147 4148 4149 4150 4151
	if (do_rapl & RAPL_AMD_F17H) {
		msr_name = "MSR_RAPL_PWR_UNIT";
		if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr))
			return -1;
	} else {
		msr_name = "MSR_RAPL_POWER_UNIT";
		if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
			return -1;
	}
4152

4153
	fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr,
4154 4155
		rapl_power_units, rapl_energy_units, rapl_time_units);

4156 4157
	if (do_rapl & RAPL_PKG_POWER_INFO) {

4158 4159 4160 4161
		if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
                	return -5;


4162
		fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
4163 4164 4165 4166 4167 4168
			cpu, msr,
			((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);

4169 4170 4171
	}
	if (do_rapl & RAPL_PKG) {

4172 4173 4174
		if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
			return -9;

4175
		fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
4176
			cpu, msr, (msr >> 63) & 1 ? "" : "UN");
4177 4178

		print_power_limit_msr(cpu, msr, "PKG Limit #1");
4179
		fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
4180 4181 4182 4183 4184 4185 4186
			cpu,
			((msr >> 47) & 1) ? "EN" : "DIS",
			((msr >> 32) & 0x7FFF) * rapl_power_units,
			(1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
			((msr >> 48) & 1) ? "EN" : "DIS");
	}

4187
	if (do_rapl & RAPL_DRAM_POWER_INFO) {
4188 4189 4190
		if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
                	return -6;

4191
		fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
4192 4193 4194 4195 4196
			cpu, msr,
			((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
4197 4198
	}
	if (do_rapl & RAPL_DRAM) {
4199 4200
		if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
			return -9;
4201
		fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
4202
				cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4203 4204 4205

		print_power_limit_msr(cpu, msr, "DRAM Limit");
	}
4206
	if (do_rapl & RAPL_CORE_POLICY) {
4207 4208
		if (get_msr(cpu, MSR_PP0_POLICY, &msr))
			return -7;
4209

4210
		fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
4211
	}
4212
	if (do_rapl & RAPL_CORES_POWER_LIMIT) {
4213 4214 4215 4216 4217
		if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
			return -9;
		fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
				cpu, msr, (msr >> 31) & 1 ? "" : "UN");
		print_power_limit_msr(cpu, msr, "Cores Limit");
4218 4219
	}
	if (do_rapl & RAPL_GFX) {
4220 4221
		if (get_msr(cpu, MSR_PP1_POLICY, &msr))
			return -8;
4222

4223
		fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
4224

4225 4226 4227 4228 4229
		if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
			return -9;
		fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
				cpu, msr, (msr >> 31) & 1 ? "" : "UN");
		print_power_limit_msr(cpu, msr, "GFX Limit");
4230 4231 4232 4233
	}
	return 0;
}

4234 4235 4236 4237 4238 4239 4240
/*
 * SNB adds support for additional MSRs:
 *
 * MSR_PKG_C7_RESIDENCY            0x000003fa
 * MSR_CORE_C7_RESIDENCY           0x000003fe
 * MSR_PKG_C2_RESIDENCY            0x0000060d
 */
L
Len Brown 已提交
4241

4242
int has_snb_msrs(unsigned int family, unsigned int model)
L
Len Brown 已提交
4243 4244 4245 4246 4247
{
	if (!genuine_intel)
		return 0;

	switch (model) {
4248 4249 4250 4251
	case INTEL_FAM6_SANDYBRIDGE:
	case INTEL_FAM6_SANDYBRIDGE_X:
	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
4252
	case INTEL_FAM6_HASWELL:	/* HSW */
4253 4254
	case INTEL_FAM6_HASWELL_X:	/* HSW */
	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
4255
	case INTEL_FAM6_BROADWELL:	/* BDW */
4256 4257 4258
	case INTEL_FAM6_BROADWELL_GT3E:	/* BDW */
	case INTEL_FAM6_BROADWELL_X:	/* BDX */
	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
4259
	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
4260 4261
	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
4262 4263
	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
	case INTEL_FAM6_ATOM_GOLDMONT_X:	/* DNV */
L
Len Brown 已提交
4264 4265 4266 4267 4268
		return 1;
	}
	return 0;
}

4269 4270 4271
/*
 * HSW adds support for additional MSRs:
 *
4272 4273 4274 4275 4276 4277 4278 4279
 * MSR_PKG_C8_RESIDENCY		0x00000630
 * MSR_PKG_C9_RESIDENCY		0x00000631
 * MSR_PKG_C10_RESIDENCY	0x00000632
 *
 * MSR_PKGC8_IRTL		0x00000633
 * MSR_PKGC9_IRTL		0x00000634
 * MSR_PKGC10_IRTL		0x00000635
 *
4280 4281
 */
int has_hsw_msrs(unsigned int family, unsigned int model)
4282 4283 4284 4285 4286
{
	if (!genuine_intel)
		return 0;

	switch (model) {
4287 4288
	case INTEL_FAM6_HASWELL:
	case INTEL_FAM6_BROADWELL:	/* BDW */
4289
	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
4290
	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
4291
	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
4292
	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311
		return 1;
	}
	return 0;
}

/*
 * SKL adds support for additional MSRS:
 *
 * MSR_PKG_WEIGHTED_CORE_C0_RES    0x00000658
 * MSR_PKG_ANY_CORE_C0_RES         0x00000659
 * MSR_PKG_ANY_GFXE_C0_RES         0x0000065A
 * MSR_PKG_BOTH_CORE_GFXE_C0_RES   0x0000065B
 */
int has_skl_msrs(unsigned int family, unsigned int model)
{
	if (!genuine_intel)
		return 0;

	switch (model) {
4312
	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
4313
	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
4314 4315 4316 4317 4318
		return 1;
	}
	return 0;
}

4319 4320 4321 4322 4323
int is_slm(unsigned int family, unsigned int model)
{
	if (!genuine_intel)
		return 0;
	switch (model) {
4324 4325
	case INTEL_FAM6_ATOM_SILVERMONT:	/* BYT */
	case INTEL_FAM6_ATOM_SILVERMONT_X:	/* AVN */
4326 4327 4328 4329 4330
		return 1;
	}
	return 0;
}

4331 4332 4333 4334 4335
int is_knl(unsigned int family, unsigned int model)
{
	if (!genuine_intel)
		return 0;
	switch (model) {
4336
	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
4337 4338 4339 4340 4341
		return 1;
	}
	return 0;
}

4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354
int is_cnl(unsigned int family, unsigned int model)
{
	if (!genuine_intel)
		return 0;

	switch (model) {
	case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
		return 1;
	}

	return 0;
}

4355 4356 4357 4358 4359 4360 4361
unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
{
	if (is_knl(family, model))
		return 1024;
	return 1;
}

4362 4363 4364 4365 4366 4367 4368 4369 4370
#define SLM_BCLK_FREQS 5
double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};

double slm_bclk(void)
{
	unsigned long long msr = 3;
	unsigned int i;
	double freq;

4371
	if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
4372
		fprintf(outf, "SLM BCLK: unknown\n");
4373 4374 4375

	i = msr & 0xf;
	if (i >= SLM_BCLK_FREQS) {
4376
		fprintf(outf, "SLM BCLK[%d] invalid\n", i);
4377
		i = 3;
4378 4379 4380
	}
	freq = slm_freq_table[i];

4381
	if (!quiet)
4382
		fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
4383 4384 4385 4386

	return freq;
}

L
Len Brown 已提交
4387 4388
double discover_bclk(unsigned int family, unsigned int model)
{
4389
	if (has_snb_msrs(family, model) || is_knl(family, model))
L
Len Brown 已提交
4390
		return 100.00;
4391 4392
	else if (is_slm(family, model))
		return slm_bclk();
L
Len Brown 已提交
4393 4394 4395 4396
	else
		return 133.33;
}

4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424
/*
 * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
 * the Thermal Control Circuit (TCC) activates.
 * This is usually equal to tjMax.
 *
 * Older processors do not have this MSR, so there we guess,
 * but also allow cmdline over-ride with -T.
 *
 * Several MSR temperature values are in units of degrees-C
 * below this value, including the Digital Thermal Sensor (DTS),
 * Package Thermal Management Sensor (PTM), and thermal event thresholds.
 */
int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
	unsigned long long msr;
	unsigned int target_c_local;
	int cpu;

	/* tcc_activation_temp is used only for dts or ptm */
	if (!(do_dts || do_ptm))
		return 0;

	/* this is a per-package concept */
	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
		return 0;

	cpu = t->cpu_id;
	if (cpu_migrate(cpu)) {
4425
		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
4426 4427 4428 4429 4430
		return -1;
	}

	if (tcc_activation_temp_override != 0) {
		tcc_activation_temp = tcc_activation_temp_override;
4431
		fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n",
4432 4433 4434 4435 4436
			cpu, tcc_activation_temp);
		return 0;
	}

	/* Temperature Target MSR is Nehalem and newer only */
4437
	if (!do_nhm_platform_info)
4438 4439
		goto guess;

4440
	if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
4441 4442
		goto guess;

4443
	target_c_local = (msr >> 16) & 0xFF;
4444

4445
	if (!quiet)
4446
		fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
4447 4448
			cpu, msr, target_c_local);

4449
	if (!target_c_local)
4450 4451 4452 4453 4454 4455 4456 4457
		goto guess;

	tcc_activation_temp = target_c_local;

	return 0;

guess:
	tcc_activation_temp = TJMAX_DEFAULT;
4458
	fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
4459 4460 4461 4462
		cpu, tcc_activation_temp);

	return 0;
}
4463

4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474
void decode_feature_control_msr(void)
{
	unsigned long long msr;

	if (!get_msr(base_cpu, MSR_IA32_FEATURE_CONTROL, &msr))
		fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
			base_cpu, msr,
			msr & FEATURE_CONTROL_LOCKED ? "" : "UN-",
			msr & (1 << 18) ? "SGX" : "");
}

4475 4476 4477 4478
void decode_misc_enable_msr(void)
{
	unsigned long long msr;

4479 4480 4481
	if (!genuine_intel)
		return;

4482
	if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
4483
		fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
4484
			base_cpu, msr,
4485 4486
			msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
			msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
4487
			msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-",
4488 4489
			msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
			msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
4490 4491
}

4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506
void decode_misc_feature_control(void)
{
	unsigned long long msr;

	if (!has_misc_feature_control)
		return;

	if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
		fprintf(outf, "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
			base_cpu, msr,
			msr & (0 << 0) ? "No-" : "",
			msr & (1 << 0) ? "No-" : "",
			msr & (2 << 0) ? "No-" : "",
			msr & (3 << 0) ? "No-" : "");
}
4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520
/*
 * Decode MSR_MISC_PWR_MGMT
 *
 * Decode the bits according to the Nehalem documentation
 * bit[0] seems to continue to have same meaning going forward
 * bit[1] less so...
 */
void decode_misc_pwr_mgmt_msr(void)
{
	unsigned long long msr;

	if (!do_nhm_platform_info)
		return;

4521 4522 4523
	if (no_MSR_MISC_PWR_MGMT)
		return;

4524
	if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
4525
		fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
4526 4527
			base_cpu, msr,
			msr & (1 << 0) ? "DIS" : "EN",
4528 4529
			msr & (1 << 1) ? "EN" : "DIS",
			msr & (1 << 8) ? "EN" : "DIS");
4530
}
4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548
/*
 * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
 *
 * This MSRs are present on Silvermont processors,
 * Intel Atom processor E3000 series (Baytrail), and friends.
 */
void decode_c6_demotion_policy_msr(void)
{
	unsigned long long msr;

	if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
		fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
			base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");

	if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr))
		fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n",
			base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
}
4549

4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571
/*
 * When models are the same, for the purpose of turbostat, reuse
 */
unsigned int intel_model_duplicates(unsigned int model)
{

	switch(model) {
	case INTEL_FAM6_NEHALEM_EP:	/* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
	case INTEL_FAM6_NEHALEM:	/* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
	case 0x1F:	/* Core i7 and i5 Processor - Nehalem */
	case INTEL_FAM6_WESTMERE:	/* Westmere Client - Clarkdale, Arrandale */
	case INTEL_FAM6_WESTMERE_EP:	/* Westmere EP - Gulftown */
		return INTEL_FAM6_NEHALEM;

	case INTEL_FAM6_NEHALEM_EX:	/* Nehalem-EX Xeon - Beckton */
	case INTEL_FAM6_WESTMERE_EX:	/* Westmere-EX Xeon - Eagleton */
		return INTEL_FAM6_NEHALEM_EX;

	case INTEL_FAM6_XEON_PHI_KNM:
		return INTEL_FAM6_XEON_PHI_KNL;

	case INTEL_FAM6_HASWELL_ULT:
4572
		return INTEL_FAM6_HASWELL;
4573 4574 4575 4576 4577 4578

	case INTEL_FAM6_BROADWELL_X:
	case INTEL_FAM6_BROADWELL_XEON_D:	/* BDX-DE */
		return INTEL_FAM6_BROADWELL_X;

	case INTEL_FAM6_SKYLAKE_MOBILE:
4579
	case INTEL_FAM6_SKYLAKE:
4580
	case INTEL_FAM6_KABYLAKE_MOBILE:
4581
	case INTEL_FAM6_KABYLAKE:
4582
		return INTEL_FAM6_SKYLAKE_MOBILE;
4583 4584 4585

	case INTEL_FAM6_ICELAKE_MOBILE:
		return INTEL_FAM6_CANNONLAKE_MOBILE;
4586 4587 4588
	}
	return model;
}
4589
void process_cpuid()
L
Len Brown 已提交
4590
{
4591 4592
	unsigned int eax, ebx, ecx, edx;
	unsigned int fms, family, model, stepping, ecx_flags, edx_flags;
4593
	unsigned int has_turbo;
L
Len Brown 已提交
4594 4595 4596

	eax = ebx = ecx = edx = 0;

4597
	__cpuid(0, max_level, ebx, ecx, edx);
L
Len Brown 已提交
4598

4599
	if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
L
Len Brown 已提交
4600
		genuine_intel = 1;
4601 4602
	else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
		authentic_amd = 1;
L
Len Brown 已提交
4603

4604
	if (!quiet)
4605
		fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
L
Len Brown 已提交
4606 4607
			(char *)&ebx, (char *)&edx, (char *)&ecx);

4608
	__cpuid(1, fms, ebx, ecx, edx);
L
Len Brown 已提交
4609 4610 4611
	family = (fms >> 8) & 0xf;
	model = (fms >> 4) & 0xf;
	stepping = fms & 0xf;
4612 4613 4614
	if (family == 0xf)
		family += (fms >> 20) & 0xff;
	if (family >= 6)
L
Len Brown 已提交
4615
		model += ((fms >> 16) & 0xf) << 4;
4616 4617
	ecx_flags = ecx;
	edx_flags = edx;
L
Len Brown 已提交
4618 4619 4620 4621 4622 4623 4624

	/*
	 * check max extended function levels of CPUID.
	 * This is needed to check for invariant TSC.
	 * This check is valid for both Intel and AMD.
	 */
	ebx = ecx = edx = 0;
4625
	__cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
L
Len Brown 已提交
4626

4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641
	if (!quiet) {
		fprintf(outf, "0x%x CPUID levels; 0x%x xlevels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
			max_level, max_extended_level, family, model, stepping, family, model, stepping);
		fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
			ecx_flags & (1 << 0) ? "SSE3" : "-",
			ecx_flags & (1 << 3) ? "MONITOR" : "-",
			ecx_flags & (1 << 6) ? "SMX" : "-",
			ecx_flags & (1 << 7) ? "EIST" : "-",
			ecx_flags & (1 << 8) ? "TM2" : "-",
			edx_flags & (1 << 4) ? "TSC" : "-",
			edx_flags & (1 << 5) ? "MSR" : "-",
			edx_flags & (1 << 22) ? "ACPI-TM" : "-",
			edx_flags & (1 << 28) ? "HT" : "-",
			edx_flags & (1 << 29) ? "TM" : "-");
	}
4642 4643
	if (genuine_intel)
		model = intel_model_duplicates(model);
4644 4645 4646 4647

	if (!(edx_flags & (1 << 5)))
		errx(1, "CPUID: no MSR");

4648
	if (max_extended_level >= 0x80000007) {
L
Len Brown 已提交
4649

4650 4651 4652 4653
		/*
		 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
		 * this check is valid for both Intel and AMD
		 */
4654
		__cpuid(0x80000007, eax, ebx, ecx, edx);
4655 4656
		has_invariant_tsc = edx & (1 << 8);
	}
L
Len Brown 已提交
4657 4658 4659 4660 4661 4662

	/*
	 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
	 * this check is valid for both Intel and AMD
	 */

4663
	__cpuid(0x6, eax, ebx, ecx, edx);
4664
	has_aperf = ecx & (1 << 0);
4665 4666 4667 4668 4669
	if (has_aperf) {
		BIC_PRESENT(BIC_Avg_MHz);
		BIC_PRESENT(BIC_Busy);
		BIC_PRESENT(BIC_Bzy_MHz);
	}
4670
	do_dts = eax & (1 << 0);
4671 4672
	if (do_dts)
		BIC_PRESENT(BIC_CoreTmp);
4673
	has_turbo = eax & (1 << 1);
4674
	do_ptm = eax & (1 << 6);
4675 4676
	if (do_ptm)
		BIC_PRESENT(BIC_PkgTmp);
4677 4678 4679 4680 4681
	has_hwp = eax & (1 << 7);
	has_hwp_notify = eax & (1 << 8);
	has_hwp_activity_window = eax & (1 << 9);
	has_hwp_epp = eax & (1 << 10);
	has_hwp_pkg = eax & (1 << 11);
4682 4683
	has_epb = ecx & (1 << 3);

4684
	if (!quiet)
4685
		fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
4686 4687
			"%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
			has_aperf ? "" : "No-",
4688
			has_turbo ? "" : "No-",
4689 4690 4691 4692 4693 4694 4695 4696
			do_dts ? "" : "No-",
			do_ptm ? "" : "No-",
			has_hwp ? "" : "No-",
			has_hwp_notify ? "" : "No-",
			has_hwp_activity_window ? "" : "No-",
			has_hwp_epp ? "" : "No-",
			has_hwp_pkg ? "" : "No-",
			has_epb ? "" : "No-");
L
Len Brown 已提交
4697

4698
	if (!quiet)
4699 4700
		decode_misc_enable_msr();

4701

4702
	if (max_level >= 0x7 && !quiet) {
4703
		int has_sgx;
L
Len Brown 已提交
4704

4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715
		ecx = 0;

		__cpuid_count(0x7, 0, eax, ebx, ecx, edx);

		has_sgx = ebx & (1 << 2);
		fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-");

		if (has_sgx)
			decode_feature_control_msr();
	}

4716
	if (max_level >= 0x15) {
4717 4718 4719 4720 4721 4722 4723
		unsigned int eax_crystal;
		unsigned int ebx_tsc;

		/*
		 * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
		 */
		eax_crystal = ebx_tsc = crystal_hz = edx = 0;
4724
		__cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
4725 4726 4727

		if (ebx_tsc != 0) {

4728
			if (!quiet && (ebx != 0))
4729
				fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
4730 4731 4732 4733
					eax_crystal, ebx_tsc, crystal_hz);

			if (crystal_hz == 0)
				switch(model) {
4734
				case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
4735 4736
					crystal_hz = 24000000;	/* 24.0 MHz */
					break;
4737
				case INTEL_FAM6_ATOM_GOLDMONT_X:	/* DNV */
4738 4739
					crystal_hz = 25000000;	/* 25.0 MHz */
					break;
4740
				case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
4741
				case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4742
					crystal_hz = 19200000;	/* 19.2 MHz */
4743 4744 4745 4746 4747 4748 4749
					break;
				default:
					crystal_hz = 0;
			}

			if (crystal_hz) {
				tsc_hz =  (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
4750
				if (!quiet)
4751
					fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
4752 4753 4754 4755
						tsc_hz / 1000000, crystal_hz, ebx_tsc,  eax_crystal);
			}
		}
	}
4756 4757 4758 4759 4760 4761 4762 4763
	if (max_level >= 0x16) {
		unsigned int base_mhz, max_mhz, bus_mhz, edx;

		/*
		 * CPUID 16H Base MHz, Max MHz, Bus MHz
		 */
		base_mhz = max_mhz = bus_mhz = edx = 0;

4764
		__cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
4765
		if (!quiet)
4766
			fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
4767 4768
				base_mhz, max_mhz, bus_mhz);
	}
4769

4770 4771 4772
	if (has_aperf)
		aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);

4773 4774 4775 4776 4777 4778 4779 4780 4781 4782
	BIC_PRESENT(BIC_IRQ);
	BIC_PRESENT(BIC_TSC_MHz);

	if (probe_nhm_msrs(family, model)) {
		do_nhm_platform_info = 1;
		BIC_PRESENT(BIC_CPU_c1);
		BIC_PRESENT(BIC_CPU_c3);
		BIC_PRESENT(BIC_CPU_c6);
		BIC_PRESENT(BIC_SMI);
	}
4783
	do_snb_cstates = has_snb_msrs(family, model);
4784 4785 4786 4787

	if (do_snb_cstates)
		BIC_PRESENT(BIC_CPU_c7);

4788
	do_irtl_snb = has_snb_msrs(family, model);
4789 4790 4791 4792 4793 4794 4795 4796
	if (do_snb_cstates && (pkg_cstate_limit >= PCL__2))
		BIC_PRESENT(BIC_Pkgpc2);
	if (pkg_cstate_limit >= PCL__3)
		BIC_PRESENT(BIC_Pkgpc3);
	if (pkg_cstate_limit >= PCL__6)
		BIC_PRESENT(BIC_Pkgpc6);
	if (do_snb_cstates && (pkg_cstate_limit >= PCL__7))
		BIC_PRESENT(BIC_Pkgpc7);
4797
	if (has_slv_msrs(family, model)) {
4798 4799 4800 4801
		BIC_NOT_PRESENT(BIC_Pkgpc2);
		BIC_NOT_PRESENT(BIC_Pkgpc3);
		BIC_PRESENT(BIC_Pkgpc6);
		BIC_NOT_PRESENT(BIC_Pkgpc7);
4802 4803 4804
		BIC_PRESENT(BIC_Mod_c6);
		use_c1_residency_msr = 1;
	}
4805 4806 4807 4808 4809 4810 4811 4812
	if (is_dnv(family, model)) {
		BIC_PRESENT(BIC_CPU_c1);
		BIC_NOT_PRESENT(BIC_CPU_c3);
		BIC_NOT_PRESENT(BIC_Pkgpc3);
		BIC_NOT_PRESENT(BIC_CPU_c7);
		BIC_NOT_PRESENT(BIC_Pkgpc7);
		use_c1_residency_msr = 1;
	}
4813 4814 4815 4816 4817 4818
	if (is_skx(family, model)) {
		BIC_NOT_PRESENT(BIC_CPU_c3);
		BIC_NOT_PRESENT(BIC_Pkgpc3);
		BIC_NOT_PRESENT(BIC_CPU_c7);
		BIC_NOT_PRESENT(BIC_Pkgpc7);
	}
4819 4820 4821 4822
	if (is_bdx(family, model)) {
		BIC_NOT_PRESENT(BIC_CPU_c7);
		BIC_NOT_PRESENT(BIC_Pkgpc7);
	}
4823 4824 4825 4826 4827
	if (has_hsw_msrs(family, model)) {
		BIC_PRESENT(BIC_Pkgpc8);
		BIC_PRESENT(BIC_Pkgpc9);
		BIC_PRESENT(BIC_Pkgpc10);
	}
4828
	do_irtl_hsw = has_hsw_msrs(family, model);
4829 4830 4831 4832 4833 4834
	if (has_skl_msrs(family, model)) {
		BIC_PRESENT(BIC_Totl_c0);
		BIC_PRESENT(BIC_Any_c0);
		BIC_PRESENT(BIC_GFX_c0);
		BIC_PRESENT(BIC_CPUGFX);
	}
4835
	do_slm_cstates = is_slm(family, model);
4836
	do_knl_cstates  = is_knl(family, model);
L
Len Brown 已提交
4837

4838
	if (do_slm_cstates || do_knl_cstates || is_cnl(family, model))
4839
		BIC_NOT_PRESENT(BIC_CPU_c3);
L
Len Brown 已提交
4840

4841
	if (!quiet)
4842 4843
		decode_misc_pwr_mgmt_msr();

4844
	if (!quiet && has_slv_msrs(family, model))
4845 4846
		decode_c6_demotion_policy_msr();

4847
	rapl_probe(family, model);
4848
	perf_limit_reasons_probe(family, model);
4849
	automatic_cstate_conversion_probe(family, model);
4850

4851
	if (!quiet)
4852
		dump_cstate_pstate_config_info(family, model);
4853

4854 4855
	if (!quiet)
		dump_sysfs_cstate_config();
4856 4857
	if (!quiet)
		dump_sysfs_pstate_config();
4858

4859 4860 4861
	if (has_skl_msrs(family, model))
		calculate_tsc_tweak();

4862 4863
	if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
		BIC_PRESENT(BIC_GFX_rc6);
L
Len Brown 已提交
4864

4865 4866
	if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
		BIC_PRESENT(BIC_GFXMHz);
L
Len Brown 已提交
4867

4868 4869 4870 4871 4872 4873 4874 4875 4876 4877
	if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
		BIC_PRESENT(BIC_CPU_LPI);
	else
		BIC_NOT_PRESENT(BIC_CPU_LPI);

	if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", R_OK))
		BIC_PRESENT(BIC_SYS_LPI);
	else
		BIC_NOT_PRESENT(BIC_SYS_LPI);

4878
	if (!quiet)
4879 4880
		decode_misc_feature_control();

4881
	return;
L
Len Brown 已提交
4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900
}

/*
 * in /dev/cpu/ return success for names that are numbers
 * ie. filter out ".", "..", "microcode".
 */
int dir_filter(const struct dirent *dirp)
{
	if (isdigit(dirp->d_name[0]))
		return 1;
	else
		return 0;
}

int open_dev_cpu_msr(int dummy1)
{
	return 0;
}

4901 4902 4903 4904 4905
void topology_probe()
{
	int i;
	int max_core_id = 0;
	int max_package_id = 0;
4906
	int max_die_id = 0;
4907 4908 4909
	int max_siblings = 0;

	/* Initialize num_cpus, max_cpu_num */
4910
	set_max_cpu_num();
4911 4912 4913
	topo.num_cpus = 0;
	for_all_proc_cpus(count_cpus);
	if (!summary_only && topo.num_cpus > 1)
4914
		BIC_PRESENT(BIC_CPU);
4915

4916
	if (debug > 1)
4917
		fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
4918 4919

	cpus = calloc(1, (topo.max_cpu_num  + 1) * sizeof(struct cpu_topology));
4920 4921
	if (cpus == NULL)
		err(1, "calloc cpus");
4922 4923 4924 4925 4926

	/*
	 * Allocate and initialize cpu_present_set
	 */
	cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
4927 4928
	if (cpu_present_set == NULL)
		err(3, "CPU_ALLOC");
4929 4930 4931 4932
	cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
	CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
	for_all_proc_cpus(mark_cpu_present);

4933 4934 4935 4936 4937 4938 4939 4940 4941
	/*
	 * Validate that all cpus in cpu_subset are also in cpu_present_set
	 */
	for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
		if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
			if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set))
				err(1, "cpu%d not present", i);
	}

4942 4943 4944 4945
	/*
	 * Allocate and initialize cpu_affinity_set
	 */
	cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
4946 4947
	if (cpu_affinity_set == NULL)
		err(3, "CPU_ALLOC");
4948 4949 4950
	cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);

4951
	for_all_proc_cpus(init_thread_id);
4952 4953 4954 4955 4956 4957 4958 4959 4960

	/*
	 * For online cpus
	 * find max_core_id, max_package_id
	 */
	for (i = 0; i <= topo.max_cpu_num; ++i) {
		int siblings;

		if (cpu_is_not_present(i)) {
4961
			if (debug > 1)
4962
				fprintf(outf, "cpu%d NOT PRESENT\n", i);
4963 4964 4965
			continue;
		}

4966 4967 4968
		cpus[i].logical_cpu_id = i;

		/* get package information */
4969 4970 4971 4972
		cpus[i].physical_package_id = get_physical_package_id(i);
		if (cpus[i].physical_package_id > max_package_id)
			max_package_id = cpus[i].physical_package_id;

4973 4974 4975 4976 4977
		/* get die information */
		cpus[i].die_id = get_die_id(i);
		if (cpus[i].die_id > max_die_id)
			max_die_id = cpus[i].die_id;

4978
		/* get numa node information */
4979 4980 4981
		cpus[i].physical_node_id = get_physical_node_id(&cpus[i]);
		if (cpus[i].physical_node_id > topo.max_node_num)
			topo.max_node_num = cpus[i].physical_node_id;
4982 4983 4984 4985 4986 4987 4988 4989

		/* get core information */
		cpus[i].physical_core_id = get_core_id(i);
		if (cpus[i].physical_core_id > max_core_id)
			max_core_id = cpus[i].physical_core_id;

		/* get thread information */
		siblings = get_thread_siblings(&cpus[i]);
4990 4991
		if (siblings > max_siblings)
			max_siblings = siblings;
4992
		if (cpus[i].thread_id == 0)
4993
			topo.num_cores++;
4994
	}
4995

4996
	topo.cores_per_node = max_core_id + 1;
4997
	if (debug > 1)
4998
		fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
4999 5000
			max_core_id, topo.cores_per_node);
	if (!summary_only && topo.cores_per_node > 1)
5001
		BIC_PRESENT(BIC_Core);
5002

5003 5004 5005 5006 5007 5008 5009
	topo.num_die = max_die_id + 1;
	if (debug > 1)
		fprintf(outf, "max_die_id %d, sizing for %d die\n",
				max_die_id, topo.num_die);
	if (!summary_only && topo.num_die > 1)
		BIC_PRESENT(BIC_Die);

5010
	topo.num_packages = max_package_id + 1;
5011
	if (debug > 1)
5012
		fprintf(outf, "max_package_id %d, sizing for %d packages\n",
5013
			max_package_id, topo.num_packages);
5014
	if (!summary_only && topo.num_packages > 1)
5015
		BIC_PRESENT(BIC_Package);
5016

5017 5018
	set_node_data();
	if (debug > 1)
5019
		fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg);
5020 5021
	if (!summary_only && topo.nodes_per_pkg > 1)
		BIC_PRESENT(BIC_Node);
5022

5023
	topo.threads_per_core = max_siblings;
5024
	if (debug > 1)
5025
		fprintf(outf, "max_siblings %d\n", max_siblings);
5026 5027 5028 5029 5030

	if (debug < 1)
		return;

	for (i = 0; i <= topo.max_cpu_num; ++i) {
5031 5032
		if (cpu_is_not_present(i))
			continue;
5033
		fprintf(outf,
5034 5035
			"cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n",
			i, cpus[i].physical_package_id, cpus[i].die_id,
5036 5037 5038 5039 5040 5041
			cpus[i].physical_node_id,
			cpus[i].logical_node_id,
			cpus[i].physical_core_id,
			cpus[i].thread_id);
	}

5042 5043 5044
}

void
5045 5046
allocate_counters(struct thread_data **t, struct core_data **c,
		  struct pkg_data **p)
5047 5048
{
	int i;
5049 5050 5051
	int num_cores = topo.cores_per_node * topo.nodes_per_pkg *
			topo.num_packages;
	int num_threads = topo.threads_per_core * num_cores;
5052

5053
	*t = calloc(num_threads, sizeof(struct thread_data));
5054 5055 5056
	if (*t == NULL)
		goto error;

5057
	for (i = 0; i < num_threads; i++)
5058 5059
		(*t)[i].cpu_id = -1;

5060
	*c = calloc(num_cores, sizeof(struct core_data));
5061 5062 5063
	if (*c == NULL)
		goto error;

5064
	for (i = 0; i < num_cores; i++)
5065 5066
		(*c)[i].core_id = -1;

5067
	*p = calloc(topo.num_packages, sizeof(struct pkg_data));
5068 5069 5070 5071 5072 5073 5074 5075
	if (*p == NULL)
		goto error;

	for (i = 0; i < topo.num_packages; i++)
		(*p)[i].package_id = i;

	return;
error:
5076
	err(1, "calloc counters");
5077 5078 5079 5080 5081 5082 5083
}
/*
 * init_counter()
 *
 * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
 */
void init_counter(struct thread_data *thread_base, struct core_data *core_base,
5084
	struct pkg_data *pkg_base, int cpu_id)
5085
{
5086
	int pkg_id = cpus[cpu_id].physical_package_id;
5087
	int node_id = cpus[cpu_id].logical_node_id;
5088 5089
	int core_id = cpus[cpu_id].physical_core_id;
	int thread_id = cpus[cpu_id].thread_id;
5090 5091 5092 5093
	struct thread_data *t;
	struct core_data *c;
	struct pkg_data *p;

5094 5095 5096 5097 5098 5099 5100

	/* Workaround for systems where physical_node_id==-1
	 * and logical_node_id==(-1 - topo.num_cpus)
	 */
	if (node_id < 0)
		node_id = 0;

5101 5102
	t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id);
	c = GET_CORE(core_base, core_id, node_id, pkg_id);
5103
	p = GET_PKG(pkg_base, pkg_id);
5104 5105

	t->cpu_id = cpu_id;
5106
	if (thread_id == 0) {
5107 5108 5109 5110 5111
		t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
		if (cpu_is_first_core_in_package(cpu_id))
			t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
	}

5112 5113
	c->core_id = core_id;
	p->package_id = pkg_id;
5114 5115 5116 5117 5118
}


int initialize_counters(int cpu_id)
{
5119 5120
	init_counter(EVEN_COUNTERS, cpu_id);
	init_counter(ODD_COUNTERS, cpu_id);
5121 5122 5123 5124 5125
	return 0;
}

void allocate_output_buffer()
{
5126
	output_buffer = calloc(1, (1 + topo.num_cpus) * 1024);
5127
	outp = output_buffer;
5128 5129
	if (outp == NULL)
		err(-1, "calloc output buffer");
5130
}
5131 5132
void allocate_fd_percpu(void)
{
5133
	fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
5134 5135 5136
	if (fd_percpu == NULL)
		err(-1, "calloc fd_percpu");
}
5137 5138 5139 5140 5141
void allocate_irq_buffers(void)
{
	irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
	if (irq_column_2_cpu == NULL)
		err(-1, "calloc %d", topo.num_cpus);
5142

5143
	irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
5144
	if (irqs_per_cpu == NULL)
5145
		err(-1, "calloc %d", topo.max_cpu_num + 1);
5146
}
5147 5148 5149
void setup_all_buffers(void)
{
	topology_probe();
5150
	allocate_irq_buffers();
5151
	allocate_fd_percpu();
5152 5153 5154 5155 5156
	allocate_counters(&thread_even, &core_even, &package_even);
	allocate_counters(&thread_odd, &core_odd, &package_odd);
	allocate_output_buffer();
	for_all_proc_cpus(initialize_counters);
}
5157

5158 5159 5160 5161 5162 5163 5164
void set_base_cpu(void)
{
	base_cpu = sched_getcpu();
	if (base_cpu < 0)
		err(-ENODEV, "No valid cpus found");

	if (debug > 1)
5165
		fprintf(outf, "base_cpu = %d\n", base_cpu);
5166 5167
}

L
Len Brown 已提交
5168 5169
void turbostat_init()
{
5170 5171
	setup_all_buffers();
	set_base_cpu();
L
Len Brown 已提交
5172
	check_dev_msr();
5173
	check_permissions();
5174
	process_cpuid();
L
Len Brown 已提交
5175 5176


5177
	if (!quiet)
5178 5179
		for_all_cpus(print_hwp, ODD_COUNTERS);

5180
	if (!quiet)
5181 5182
		for_all_cpus(print_epb, ODD_COUNTERS);

5183
	if (!quiet)
5184 5185
		for_all_cpus(print_perf_limit, ODD_COUNTERS);

5186
	if (!quiet)
5187 5188 5189 5190
		for_all_cpus(print_rapl, ODD_COUNTERS);

	for_all_cpus(set_temperature_target, ODD_COUNTERS);

5191
	if (!quiet)
5192
		for_all_cpus(print_thermal, ODD_COUNTERS);
5193

5194
	if (!quiet && do_irtl_snb)
5195
		print_irtl();
L
Len Brown 已提交
5196 5197 5198 5199 5200
}

int fork_it(char **argv)
{
	pid_t child_pid;
5201
	int status;
5202

5203
	snapshot_proc_sysfs_files();
5204
	status = for_all_cpus(get_counters, EVEN_COUNTERS);
5205
	first_counter_read = 0;
5206 5207
	if (status)
		exit(status);
5208 5209
	/* clear affinity side-effect of get_counters() */
	sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
L
Len Brown 已提交
5210 5211 5212 5213 5214 5215
	gettimeofday(&tv_even, (struct timezone *)NULL);

	child_pid = fork();
	if (!child_pid) {
		/* child */
		execvp(argv[0], argv);
5216
		err(errno, "exec %s", argv[0]);
L
Len Brown 已提交
5217 5218 5219
	} else {

		/* parent */
5220 5221
		if (child_pid == -1)
			err(1, "fork");
L
Len Brown 已提交
5222 5223 5224

		signal(SIGINT, SIG_IGN);
		signal(SIGQUIT, SIG_IGN);
5225 5226
		if (waitpid(child_pid, &status, 0) == -1)
			err(status, "waitpid");
5227 5228 5229

		if (WIFEXITED(status))
			status = WEXITSTATUS(status);
L
Len Brown 已提交
5230
	}
5231 5232 5233 5234
	/*
	 * n.b. fork_it() does not check for errors from for_all_cpus()
	 * because re-starting is problematic when forking
	 */
5235
	snapshot_proc_sysfs_files();
5236
	for_all_cpus(get_counters, ODD_COUNTERS);
L
Len Brown 已提交
5237 5238
	gettimeofday(&tv_odd, (struct timezone *)NULL);
	timersub(&tv_odd, &tv_even, &tv_delta);
5239 5240 5241 5242 5243 5244
	if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
		fprintf(outf, "%s: Counter reset detected\n", progname);
	else {
		compute_average(EVEN_COUNTERS);
		format_all_counters(EVEN_COUNTERS);
	}
L
Len Brown 已提交
5245

5246 5247 5248
	fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);

	flush_output_stderr();
L
Len Brown 已提交
5249

5250
	return status;
L
Len Brown 已提交
5251 5252
}

5253 5254 5255 5256
int get_and_dump_counters(void)
{
	int status;

5257
	snapshot_proc_sysfs_files();
5258 5259 5260 5261 5262 5263 5264 5265
	status = for_all_cpus(get_counters, ODD_COUNTERS);
	if (status)
		return status;

	status = for_all_cpus(dump_counters, ODD_COUNTERS);
	if (status)
		return status;

5266
	flush_output_stdout();
5267 5268 5269 5270

	return status;
}

5271
void print_version() {
5272
	fprintf(outf, "turbostat version 19.03.20"
5273 5274 5275
		" - Len Brown <lenb@kernel.org>\n");
}

5276 5277
int add_counter(unsigned int msr_num, char *path, char *name,
	unsigned int width, enum counter_scope scope,
5278
	enum counter_type type, enum counter_format format, int flags)
5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289
{
	struct msr_counter *msrp;

	msrp = calloc(1, sizeof(struct msr_counter));
	if (msrp == NULL) {
		perror("calloc");
		exit(1);
	}

	msrp->msr_num = msr_num;
	strncpy(msrp->name, name, NAME_BYTES);
5290 5291
	if (path)
		strncpy(msrp->path, path, PATH_BYTES);
5292 5293 5294
	msrp->width = width;
	msrp->type = type;
	msrp->format = format;
5295
	msrp->flags = flags;
5296 5297 5298 5299 5300 5301

	switch (scope) {

	case SCOPE_CPU:
		msrp->next = sys.tp;
		sys.tp = msrp;
5302
		sys.added_thread_counters++;
5303
		if (sys.added_thread_counters > MAX_ADDED_THREAD_COUNTERS) {
5304 5305 5306 5307
			fprintf(stderr, "exceeded max %d added thread counters\n",
				MAX_ADDED_COUNTERS);
			exit(-1);
		}
5308 5309 5310 5311 5312
		break;

	case SCOPE_CORE:
		msrp->next = sys.cp;
		sys.cp = msrp;
5313 5314 5315 5316 5317 5318
		sys.added_core_counters++;
		if (sys.added_core_counters > MAX_ADDED_COUNTERS) {
			fprintf(stderr, "exceeded max %d added core counters\n",
				MAX_ADDED_COUNTERS);
			exit(-1);
		}
5319 5320 5321 5322 5323
		break;

	case SCOPE_PACKAGE:
		msrp->next = sys.pp;
		sys.pp = msrp;
5324 5325 5326 5327 5328 5329
		sys.added_package_counters++;
		if (sys.added_package_counters > MAX_ADDED_COUNTERS) {
			fprintf(stderr, "exceeded max %d added package counters\n",
				MAX_ADDED_COUNTERS);
			exit(-1);
		}
5330 5331 5332 5333 5334 5335 5336 5337 5338
		break;
	}

	return 0;
}

void parse_add_command(char *add_command)
{
	int msr_num = 0;
5339
	char *path = NULL;
5340
	char name_buffer[NAME_BYTES] = "";
5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354
	int width = 64;
	int fail = 0;
	enum counter_scope scope = SCOPE_CPU;
	enum counter_type type = COUNTER_CYCLES;
	enum counter_format format = FORMAT_DELTA;

	while (add_command) {

		if (sscanf(add_command, "msr0x%x", &msr_num) == 1)
			goto next;

		if (sscanf(add_command, "msr%d", &msr_num) == 1)
			goto next;

5355 5356 5357 5358 5359
		if (*add_command == '/') {
			path = add_command;
			goto next;
		}

5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384
		if (sscanf(add_command, "u%d", &width) == 1) {
			if ((width == 32) || (width == 64))
				goto next;
			width = 64;
		}
		if (!strncmp(add_command, "cpu", strlen("cpu"))) {
			scope = SCOPE_CPU;
			goto next;
		}
		if (!strncmp(add_command, "core", strlen("core"))) {
			scope = SCOPE_CORE;
			goto next;
		}
		if (!strncmp(add_command, "package", strlen("package"))) {
			scope = SCOPE_PACKAGE;
			goto next;
		}
		if (!strncmp(add_command, "cycles", strlen("cycles"))) {
			type = COUNTER_CYCLES;
			goto next;
		}
		if (!strncmp(add_command, "seconds", strlen("seconds"))) {
			type = COUNTER_SECONDS;
			goto next;
		}
5385 5386 5387 5388
		if (!strncmp(add_command, "usec", strlen("usec"))) {
			type = COUNTER_USEC;
			goto next;
		}
5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412
		if (!strncmp(add_command, "raw", strlen("raw"))) {
			format = FORMAT_RAW;
			goto next;
		}
		if (!strncmp(add_command, "delta", strlen("delta"))) {
			format = FORMAT_DELTA;
			goto next;
		}
		if (!strncmp(add_command, "percent", strlen("percent"))) {
			format = FORMAT_PERCENT;
			goto next;
		}

		if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) {	/* 18 < NAME_BYTES */
			char *eos;

			eos = strchr(name_buffer, ',');
			if (eos)
				*eos = '\0';
			goto next;
		}

next:
		add_command = strchr(add_command, ',');
5413 5414
		if (add_command) {
			*add_command = '\0';
5415
			add_command++;
5416
		}
5417 5418

	}
5419 5420
	if ((msr_num == 0) && (path == NULL)) {
		fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n");
5421 5422 5423 5424 5425
		fail++;
	}

	/* generate default column header */
	if (*name_buffer == '\0') {
5426 5427 5428 5429
		if (width == 32)
			sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
		else
			sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
5430 5431
	}

5432
	if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0))
5433 5434 5435 5436 5437 5438 5439
		fail++;

	if (fail) {
		help();
		exit(1);
	}
}
5440

5441 5442 5443 5444 5445 5446 5447 5448 5449 5450
int is_deferred_skip(char *name)
{
	int i;

	for (i = 0; i < deferred_skip_index; ++i)
		if (!strcmp(name, deferred_skip_names[i]))
			return 1;
	return 0;
}

5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461
void probe_sysfs(void)
{
	char path[64];
	char name_buf[16];
	FILE *input;
	int state;
	char *sp;

	if (!DO_BIC(BIC_sysfs))
		return;

5462
	for (state = 10; state >= 0; --state) {
5463 5464 5465 5466 5467 5468

		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
			base_cpu, state);
		input = fopen(path, "r");
		if (input == NULL)
			continue;
5469 5470
		if (!fgets(name_buf, sizeof(name_buf), input))
			err(1, "%s: failed to read file", path);
5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482

		 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
		sp = strchr(name_buf, '-');
		if (!sp)
			sp = strchrnul(name_buf, '\n');
		*sp = '%';
		*(sp + 1) = '\0';

		fclose(input);

		sprintf(path, "cpuidle/state%d/time", state);

5483 5484 5485
		if (is_deferred_skip(name_buf))
			continue;

5486 5487 5488 5489
		add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC,
				FORMAT_PERCENT, SYSFS_PERCPU);
	}

5490
	for (state = 10; state >= 0; --state) {
5491 5492 5493 5494 5495 5496

		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
			base_cpu, state);
		input = fopen(path, "r");
		if (input == NULL)
			continue;
5497 5498
		if (!fgets(name_buf, sizeof(name_buf), input))
			err(1, "%s: failed to read file", path);
5499 5500 5501 5502 5503 5504 5505 5506 5507
		 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
		sp = strchr(name_buf, '-');
		if (!sp)
			sp = strchrnul(name_buf, '\n');
		*sp = '\0';
		fclose(input);

		sprintf(path, "cpuidle/state%d/usage", state);

5508 5509 5510
		if (is_deferred_skip(name_buf))
			continue;

5511 5512 5513 5514 5515 5516
		add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS,
				FORMAT_DELTA, SYSFS_PERCPU);
	}

}

5517 5518 5519 5520 5521 5522 5523 5524 5525 5526

/*
 * parse cpuset with following syntax
 * 1,2,4..6,8-10 and set bits in cpu_subset
 */
void parse_cpu_command(char *optarg)
{
	unsigned int start, end;
	char *next;

5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541
	if (!strcmp(optarg, "core")) {
		if (cpu_subset)
			goto error;
		show_core_only++;
		return;
	}
	if (!strcmp(optarg, "package")) {
		if (cpu_subset)
			goto error;
		show_pkg_only++;
		return;
	}
	if (show_core_only || show_pkg_only)
		goto error;

5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598
	cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS);
	if (cpu_subset == NULL)
		err(3, "CPU_ALLOC");
	cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS);

	CPU_ZERO_S(cpu_subset_size, cpu_subset);

	next = optarg;

	while (next && *next) {

		if (*next == '-')	/* no negative cpu numbers */
			goto error;

		start = strtoul(next, &next, 10);

		if (start >= CPU_SUBSET_MAXCPUS)
			goto error;
		CPU_SET_S(start, cpu_subset_size, cpu_subset);

		if (*next == '\0')
			break;

		if (*next == ',') {
			next += 1;
			continue;
		}

		if (*next == '-') {
			next += 1;	/* start range */
		} else if (*next == '.') {
			next += 1;
			if (*next == '.')
				next += 1;	/* start range */
			else
				goto error;
		}

		end = strtoul(next, &next, 10);
		if (end <= start)
			goto error;

		while (++start <= end) {
			if (start >= CPU_SUBSET_MAXCPUS)
				goto error;
			CPU_SET_S(start, cpu_subset_size, cpu_subset);
		}

		if (*next == ',')
			next += 1;
		else if (*next != '\0')
			goto error;
	}

	return;

error:
5599 5600
	fprintf(stderr, "\"--cpu %s\" malformed\n", optarg);
	help();
5601 5602 5603
	exit(-1);
}

5604

L
Len Brown 已提交
5605 5606 5607
void cmdline(int argc, char **argv)
{
	int opt;
5608 5609
	int option_index = 0;
	static struct option long_options[] = {
5610
		{"add",		required_argument,	0, 'a'},
5611
		{"cpu",		required_argument,	0, 'c'},
5612
		{"Dump",	no_argument,		0, 'D'},
5613
		{"debug",	no_argument,		0, 'd'},	/* internal, not documented */
5614
		{"enable",	required_argument,	0, 'e'},
5615
		{"interval",	required_argument,	0, 'i'},
5616
		{"num_iterations",	required_argument,	0, 'n'},
5617
		{"help",	no_argument,		0, 'h'},
5618
		{"hide",	required_argument,	0, 'H'},	// meh, -h taken by --help
5619
		{"Joules",	no_argument,		0, 'J'},
5620
		{"list",	no_argument,		0, 'l'},
5621
		{"out",		required_argument,	0, 'o'},
5622
		{"quiet",	no_argument,		0, 'q'},
5623
		{"show",	required_argument,	0, 's'},
5624 5625 5626 5627 5628
		{"Summary",	no_argument,		0, 'S'},
		{"TCC",		required_argument,	0, 'T'},
		{"version",	no_argument,		0, 'v' },
		{0,		0,			0,  0 }
	};
L
Len Brown 已提交
5629 5630 5631

	progname = argv[0];

5632
	while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v",
5633
				long_options, &option_index)) != -1) {
L
Len Brown 已提交
5634
		switch (opt) {
5635 5636 5637
		case 'a':
			parse_add_command(optarg);
			break;
5638 5639 5640
		case 'c':
			parse_cpu_command(optarg);
			break;
5641
		case 'D':
5642 5643
			dump_only++;
			break;
5644 5645
		case 'e':
			/* --enable specified counter */
5646
			bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST);
5647
			break;
5648 5649
		case 'd':
			debug++;
5650
			ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
L
Len Brown 已提交
5651
			break;
5652
		case 'H':
5653 5654 5655 5656 5657
			/*
			 * --hide: do not show those specified
			 *  multiple invocations simply clear more bits in enabled mask
			 */
			bic_enabled &= ~bic_lookup(optarg, HIDE_LIST);
5658
			break;
5659 5660 5661 5662
		case 'h':
		default:
			help();
			exit(1);
L
Len Brown 已提交
5663
		case 'i':
5664 5665 5666 5667
			{
				double interval = strtod(optarg, NULL);

				if (interval < 0.001) {
5668
					fprintf(outf, "interval %f seconds is too small\n",
5669 5670 5671 5672
						interval);
					exit(2);
				}

5673
				interval_tv.tv_sec = interval_ts.tv_sec = interval;
5674
				interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000;
5675
				interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
5676
			}
L
Len Brown 已提交
5677
			break;
5678 5679
		case 'J':
			rapl_joules++;
5680
			break;
5681
		case 'l':
5682
			ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
5683 5684 5685
			list_header_only++;
			quiet++;
			break;
5686 5687 5688
		case 'o':
			outf = fopen_or_die(optarg, "w");
			break;
5689 5690 5691
		case 'q':
			quiet = 1;
			break;
5692 5693 5694 5695 5696 5697 5698 5699 5700
		case 'n':
			num_iterations = strtod(optarg, NULL);

			if (num_iterations <= 0) {
				fprintf(outf, "iterations %d should be positive number\n",
					num_iterations);
				exit(2);
			}
			break;
5701
		case 's':
5702 5703 5704 5705 5706 5707 5708 5709 5710 5711
			/*
			 * --show: show only those specified
			 *  The 1st invocation will clear and replace the enabled mask
			 *  subsequent invocations can add to it.
			 */
			if (shown == 0)
				bic_enabled = bic_lookup(optarg, SHOW_LIST);
			else
				bic_enabled |= bic_lookup(optarg, SHOW_LIST);
			shown = 1;
5712
			break;
5713 5714
		case 'S':
			summary_only++;
5715 5716 5717 5718
			break;
		case 'T':
			tcc_activation_temp_override = atoi(optarg);
			break;
5719 5720 5721
		case 'v':
			print_version();
			exit(0);
5722
			break;
L
Len Brown 已提交
5723 5724 5725 5726 5727 5728
		}
	}
}

int main(int argc, char **argv)
{
5729
	outf = stderr;
L
Len Brown 已提交
5730 5731
	cmdline(argc, argv);

5732
	if (!quiet)
5733
		print_version();
L
Len Brown 已提交
5734

5735 5736
	probe_sysfs();

L
Len Brown 已提交
5737 5738
	turbostat_init();

5739 5740 5741 5742
	/* dump counters and exit */
	if (dump_only)
		return get_and_dump_counters();

5743 5744 5745 5746 5747 5748 5749
	/* list header and exit */
	if (list_header_only) {
		print_header(",");
		flush_output_stdout();
		return 0;
	}

L
Len Brown 已提交
5750 5751 5752 5753 5754 5755 5756 5757 5758 5759
	/*
	 * if any params left, it must be a command to fork
	 */
	if (argc - optind)
		return fork_it(argv + optind);
	else
		turbostat_loop();

	return 0;
}