stat-shadow.c 15.0 KB
Newer Older
1 2 3 4
#include <stdio.h>
#include "evsel.h"
#include "stat.h"
#include "color.h"
5
#include "pmu.h"
6 7 8 9 10 11 12 13 14 15 16 17

enum {
	CTX_BIT_USER	= 1 << 0,
	CTX_BIT_KERNEL	= 1 << 1,
	CTX_BIT_HV	= 1 << 2,
	CTX_BIT_HOST	= 1 << 3,
	CTX_BIT_IDLE	= 1 << 4,
	CTX_BIT_MAX	= 1 << 5,
};

#define NUM_CTX CTX_BIT_MAX

18 19 20 21 22 23 24
/*
 * AGGR_GLOBAL: Use CPU 0
 * AGGR_SOCKET: Use first CPU of socket
 * AGGR_CORE: Use first CPU of core
 * AGGR_NONE: Use matching CPU
 * AGGR_THREAD: Not supported?
 */
25 26 27 28 29 30 31 32 33 34 35 36 37 38
static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
39
static bool have_frontend_stalled;
40 41 42

struct stats walltime_nsecs_stats;

43 44 45 46 47
void perf_stat__init_shadow_stats(void)
{
	have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend");
}

48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
static int evsel_context(struct perf_evsel *evsel)
{
	int ctx = 0;

	if (evsel->attr.exclude_kernel)
		ctx |= CTX_BIT_KERNEL;
	if (evsel->attr.exclude_user)
		ctx |= CTX_BIT_USER;
	if (evsel->attr.exclude_hv)
		ctx |= CTX_BIT_HV;
	if (evsel->attr.exclude_host)
		ctx |= CTX_BIT_HOST;
	if (evsel->attr.exclude_idle)
		ctx |= CTX_BIT_IDLE;

	return ctx;
}

void perf_stat__reset_shadow_stats(void)
{
	memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
	memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
	memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
	memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
	memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
	memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
	memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
	memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
	memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
	memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
	memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
	memset(runtime_cycles_in_tx_stats, 0,
			sizeof(runtime_cycles_in_tx_stats));
	memset(runtime_transaction_stats, 0,
		sizeof(runtime_transaction_stats));
	memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
	memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
}

/*
 * Update various tracking values we maintain to print
 * more semantic information such as miss/hit ratios,
 * instruction rates, etc:
 */
void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
				    int cpu)
{
	int ctx = evsel_context(counter);

	if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
		update_stats(&runtime_nsecs_stats[cpu], count[0]);
	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
		update_stats(&runtime_cycles_stats[ctx][cpu], count[0]);
	else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
102
		update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count[0]);
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
	else if (perf_stat_evsel__is(counter, TRANSACTION_START))
		update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
	else if (perf_stat_evsel__is(counter, ELISION_START))
		update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
		update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
		update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]);
	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
		update_stats(&runtime_branches_stats[ctx][cpu], count[0]);
	else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
		update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]);
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
		update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]);
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
		update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
		update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
		update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
		update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
}

/* used for get_ratio_color() */
enum grc_type {
	GRC_STALLED_CYCLES_FE,
	GRC_STALLED_CYCLES_BE,
	GRC_CACHE_MISSES,
	GRC_MAX_NR
};

static const char *get_ratio_color(enum grc_type type, double ratio)
{
	static const double grc_table[GRC_MAX_NR][3] = {
		[GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
		[GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
		[GRC_CACHE_MISSES] 	= { 20.0, 10.0, 5.0 },
	};
	const char *color = PERF_COLOR_NORMAL;

	if (ratio > grc_table[type][0])
		color = PERF_COLOR_RED;
	else if (ratio > grc_table[type][1])
		color = PERF_COLOR_MAGENTA;
	else if (ratio > grc_table[type][2])
		color = PERF_COLOR_YELLOW;

	return color;
}

154
static void print_stalled_cycles_frontend(int cpu,
155
					  struct perf_evsel *evsel, double avg,
156
					  struct perf_stat_output_ctx *out)
157 158 159 160 161 162 163 164 165 166 167 168
{
	double total, ratio = 0.0;
	const char *color;
	int ctx = evsel_context(evsel);

	total = avg_stats(&runtime_cycles_stats[ctx][cpu]);

	if (total)
		ratio = avg / total * 100.0;

	color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);

169 170 171 172 173
	if (ratio)
		out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle",
				  ratio);
	else
		out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0);
174 175
}

176
static void print_stalled_cycles_backend(int cpu,
177
					 struct perf_evsel *evsel, double avg,
178
					 struct perf_stat_output_ctx *out)
179 180 181 182 183 184 185 186 187 188 189 190
{
	double total, ratio = 0.0;
	const char *color;
	int ctx = evsel_context(evsel);

	total = avg_stats(&runtime_cycles_stats[ctx][cpu]);

	if (total)
		ratio = avg / total * 100.0;

	color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);

191
	out->print_metric(out->ctx, color, "%6.2f%%", "backend cycles idle", ratio);
192 193
}

194
static void print_branch_misses(int cpu,
195
				struct perf_evsel *evsel,
196 197
				double avg,
				struct perf_stat_output_ctx *out)
198 199 200 201 202 203 204 205 206 207 208 209
{
	double total, ratio = 0.0;
	const char *color;
	int ctx = evsel_context(evsel);

	total = avg_stats(&runtime_branches_stats[ctx][cpu]);

	if (total)
		ratio = avg / total * 100.0;

	color = get_ratio_color(GRC_CACHE_MISSES, ratio);

210
	out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio);
211 212
}

213
static void print_l1_dcache_misses(int cpu,
214
				   struct perf_evsel *evsel,
215 216
				   double avg,
				   struct perf_stat_output_ctx *out)
217 218 219 220 221 222 223 224 225 226 227 228
{
	double total, ratio = 0.0;
	const char *color;
	int ctx = evsel_context(evsel);

	total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]);

	if (total)
		ratio = avg / total * 100.0;

	color = get_ratio_color(GRC_CACHE_MISSES, ratio);

229
	out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
230 231
}

232
static void print_l1_icache_misses(int cpu,
233
				   struct perf_evsel *evsel,
234 235
				   double avg,
				   struct perf_stat_output_ctx *out)
236 237 238 239 240 241 242 243 244 245 246
{
	double total, ratio = 0.0;
	const char *color;
	int ctx = evsel_context(evsel);

	total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]);

	if (total)
		ratio = avg / total * 100.0;

	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
247
	out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
248 249
}

250
static void print_dtlb_cache_misses(int cpu,
251
				    struct perf_evsel *evsel,
252 253
				    double avg,
				    struct perf_stat_output_ctx *out)
254 255 256 257 258 259 260 261 262 263 264
{
	double total, ratio = 0.0;
	const char *color;
	int ctx = evsel_context(evsel);

	total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]);

	if (total)
		ratio = avg / total * 100.0;

	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
265
	out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
266 267
}

268
static void print_itlb_cache_misses(int cpu,
269
				    struct perf_evsel *evsel,
270 271
				    double avg,
				    struct perf_stat_output_ctx *out)
272 273 274 275 276 277 278 279 280 281 282
{
	double total, ratio = 0.0;
	const char *color;
	int ctx = evsel_context(evsel);

	total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]);

	if (total)
		ratio = avg / total * 100.0;

	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
283
	out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
284 285
}

286
static void print_ll_cache_misses(int cpu,
287
				  struct perf_evsel *evsel,
288 289
				  double avg,
				  struct perf_stat_output_ctx *out)
290 291 292 293 294 295 296 297 298 299 300
{
	double total, ratio = 0.0;
	const char *color;
	int ctx = evsel_context(evsel);

	total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]);

	if (total)
		ratio = avg / total * 100.0;

	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
301
	out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
302 303
}

304 305 306
void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
				   double avg, int cpu,
				   struct perf_stat_output_ctx *out)
307
{
308 309
	void *ctxp = out->ctx;
	print_metric_t print_metric = out->print_metric;
310 311 312 313 314 315 316
	double total, ratio = 0.0, total2;
	int ctx = evsel_context(evsel);

	if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
		if (total) {
			ratio = avg / total;
317 318
			print_metric(ctxp, NULL, "%7.2f ",
					"insn per cycle", ratio);
319
		} else {
320
			print_metric(ctxp, NULL, NULL, "insn per cycle", 0);
321 322 323 324 325
		}
		total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));

		if (total && avg) {
326
			out->new_line(ctxp);
327
			ratio = total / avg;
328 329 330
			print_metric(ctxp, NULL, "%7.2f ",
					"stalled cycles per insn",
					ratio);
331
		} else if (have_frontend_stalled) {
332 333
			print_metric(ctxp, NULL, NULL,
				     "stalled cycles per insn", 0);
334
		}
335 336 337 338 339
	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
		if (runtime_branches_stats[ctx][cpu].n != 0)
			print_branch_misses(cpu, evsel, avg, out);
		else
			print_metric(ctxp, NULL, NULL, "of all branches", 0);
340 341 342 343
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
344 345 346 347 348
					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
		if (runtime_l1_dcache_stats[ctx][cpu].n != 0)
			print_l1_dcache_misses(cpu, evsel, avg, out);
		else
			print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0);
349 350 351 352
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
353 354 355 356 357
					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
		if (runtime_l1_icache_stats[ctx][cpu].n != 0)
			print_l1_icache_misses(cpu, evsel, avg, out);
		else
			print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0);
358 359 360 361
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
362 363 364 365 366
					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
		if (runtime_dtlb_cache_stats[ctx][cpu].n != 0)
			print_dtlb_cache_misses(cpu, evsel, avg, out);
		else
			print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0);
367 368 369 370
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
371 372 373 374 375
					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
		if (runtime_itlb_cache_stats[ctx][cpu].n != 0)
			print_itlb_cache_misses(cpu, evsel, avg, out);
		else
			print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0);
376 377 378 379
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
380 381 382 383 384 385
					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
		if (runtime_ll_cache_stats[ctx][cpu].n != 0)
			print_ll_cache_misses(cpu, evsel, avg, out);
		else
			print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0);
	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
386 387 388 389 390
		total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);

		if (total)
			ratio = avg * 100 / total;

391 392 393 394 395
		if (runtime_cacherefs_stats[ctx][cpu].n != 0)
			print_metric(ctxp, NULL, "%8.3f %%",
				     "of all cache refs", ratio);
		else
			print_metric(ctxp, NULL, NULL, "of all cache refs", 0);
396
	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
397
		print_stalled_cycles_frontend(cpu, evsel, avg, out);
398
	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
399
		print_stalled_cycles_backend(cpu, evsel, avg, out);
400 401 402 403 404
	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
		total = avg_stats(&runtime_nsecs_stats[cpu]);

		if (total) {
			ratio = avg / total;
405
			print_metric(ctxp, NULL, "%8.3f", "GHz", ratio);
406
		} else {
407
			print_metric(ctxp, NULL, NULL, "Ghz", 0);
408 409 410 411
		}
	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
		if (total)
412 413 414 415 416 417
			print_metric(ctxp, NULL,
					"%7.2f%%", "transactional cycles",
					100.0 * (avg / total));
		else
			print_metric(ctxp, NULL, NULL, "transactional cycles",
				     0);
418 419 420 421 422 423
	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
		total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
		if (total2 < avg)
			total2 = avg;
		if (total)
424
			print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles",
425
				100.0 * ((total2-avg) / total));
426 427 428
		else
			print_metric(ctxp, NULL, NULL, "aborted cycles", 0);
	} else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
429 430
		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);

431
		if (avg)
432 433
			ratio = total / avg;

434 435 436 437 438 439 440
		if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0)
			print_metric(ctxp, NULL, "%8.0f",
				     "cycles / transaction", ratio);
		else
			print_metric(ctxp, NULL, NULL, "cycles / transaction",
				     0);
	} else if (perf_stat_evsel__is(evsel, ELISION_START)) {
441 442
		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);

443
		if (avg)
444 445
			ratio = total / avg;

446
		print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio);
447 448
	} else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) {
		if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
449 450
			print_metric(ctxp, NULL, "%8.3f", "CPUs utilized",
				     avg / ratio);
451
		else
452
			print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
453 454
	} else if (runtime_nsecs_stats[cpu].n != 0) {
		char unit = 'M';
455
		char unit_buf[10];
456 457 458 459 460 461 462 463 464

		total = avg_stats(&runtime_nsecs_stats[cpu]);

		if (total)
			ratio = 1000.0 * avg / total;
		if (ratio < 0.001) {
			ratio *= 1000;
			unit = 'K';
		}
465 466
		snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
		print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
467
	} else {
468
		print_metric(ctxp, NULL, NULL, NULL, 0);
469 470
	}
}