builtin-stat.c 39.6 KB
Newer Older
1
/*
2 3 4 5 6 7
 * builtin-stat.c
 *
 * Builtin stat command: Give a precise performance counters summary
 * overview about any workload, CPU or specific PID.
 *
 * Sample output:
8

9
   $ perf stat ./hackbench 10
10

11
  Time: 0.118
12

13
  Performance counter stats for './hackbench 10':
14

15 16 17 18 19 20 21 22 23 24 25 26 27
       1708.761321 task-clock                #   11.037 CPUs utilized
            41,190 context-switches          #    0.024 M/sec
             6,735 CPU-migrations            #    0.004 M/sec
            17,318 page-faults               #    0.010 M/sec
     5,205,202,243 cycles                    #    3.046 GHz
     3,856,436,920 stalled-cycles-frontend   #   74.09% frontend cycles idle
     1,600,790,871 stalled-cycles-backend    #   30.75% backend  cycles idle
     2,603,501,247 instructions              #    0.50  insns per cycle
                                             #    1.48  stalled cycles per insn
       484,357,498 branches                  #  283.455 M/sec
         6,388,934 branch-misses             #    1.32% of all branches

        0.154822978  seconds time elapsed
28

29
 *
30
 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
31 32 33 34 35 36 37 38
 *
 * Improvements and fixes by:
 *
 *   Arjan van de Ven <arjan@linux.intel.com>
 *   Yanmin Zhang <yanmin.zhang@intel.com>
 *   Wu Fengguang <fengguang.wu@intel.com>
 *   Mike Galbraith <efault@gmx.de>
 *   Paul Mackerras <paulus@samba.org>
39
 *   Jaswinder Singh Rajput <jaswinder@kernel.org>
40 41
 *
 * Released under the GPL v2. (and only v2, not any later version)
42 43
 */

44
#include "perf.h"
45
#include "builtin.h"
46
#include "util/util.h"
47 48
#include "util/parse-options.h"
#include "util/parse-events.h"
49
#include "util/event.h"
50
#include "util/evlist.h"
51
#include "util/evsel.h"
52
#include "util/debug.h"
53
#include "util/color.h"
54
#include "util/stat.h"
55
#include "util/header.h"
56
#include "util/cpumap.h"
57
#include "util/thread.h"
58
#include "util/thread_map.h"
59

60
#include <stdlib.h>
61
#include <sys/prctl.h>
62
#include <locale.h>
63

S
Stephane Eranian 已提交
64
#define DEFAULT_SEPARATOR	" "
65 66
#define CNTR_NOT_SUPPORTED	"<not supported>"
#define CNTR_NOT_COUNTED	"<not counted>"
S
Stephane Eranian 已提交
67

68 69 70
static void print_stat(int argc, const char **argv);
static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
static void print_counter(struct perf_evsel *counter, char *prefix);
71
static void print_aggr(char *prefix);
72

73
static struct perf_evlist	*evsel_list;
74

75 76 77
static struct perf_target	target = {
	.uid	= UINT_MAX,
};
78

79 80 81 82 83 84
enum aggr_mode {
	AGGR_NONE,
	AGGR_GLOBAL,
	AGGR_SOCKET,
};

85
static int			run_count			=  1;
86
static bool			no_inherit			= false;
87
static bool			scale				=  true;
88
static enum aggr_mode		aggr_mode			= AGGR_GLOBAL;
89
static pid_t			child_pid			= -1;
90
static bool			null_run			=  false;
91
static int			detailed_run			=  0;
92
static bool			big_num				=  true;
S
Stephane Eranian 已提交
93 94 95
static int			big_num_opt			=  -1;
static const char		*csv_sep			= NULL;
static bool			csv_output			= false;
96
static bool			group				= false;
97
static FILE			*output				= NULL;
98 99 100
static const char		*pre_cmd			= NULL;
static const char		*post_cmd			= NULL;
static bool			sync_run			= false;
101
static unsigned int		interval			= 0;
102
static bool			forever				= false;
103
static struct timespec		ref_time;
104 105
static struct cpu_map		*aggr_map;
static int			(*aggr_get_id)(struct cpu_map *m, int cpu);
106

107 108
static volatile int done = 0;

109 110 111 112
struct perf_stat {
	struct stats	  res_stats[3];
};

113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
static inline void diff_timespec(struct timespec *r, struct timespec *a,
				 struct timespec *b)
{
	r->tv_sec = a->tv_sec - b->tv_sec;
	if (a->tv_nsec < b->tv_nsec) {
		r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
		r->tv_sec--;
	} else {
		r->tv_nsec = a->tv_nsec - b->tv_nsec ;
	}
}

static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
{
	return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus;
}

static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
{
	return perf_evsel__cpus(evsel)->nr;
}

135 136 137 138 139
static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
{
	memset(evsel->priv, 0, sizeof(struct perf_stat));
}

140
static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
141
{
142
	evsel->priv = zalloc(sizeof(struct perf_stat));
143 144 145 146 147 148 149 150 151
	return evsel->priv == NULL ? -ENOMEM : 0;
}

static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
{
	free(evsel->priv);
	evsel->priv = NULL;
}

152
static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel)
Y
Yan, Zheng 已提交
153
{
154 155 156 157 158 159 160 161 162 163 164 165 166
	void *addr;
	size_t sz;

	sz = sizeof(*evsel->counts) +
	     (perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values));

	addr = zalloc(sz);
	if (!addr)
		return -ENOMEM;

	evsel->prev_raw_counts =  addr;

	return 0;
Y
Yan, Zheng 已提交
167 168
}

169
static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
Y
Yan, Zheng 已提交
170
{
171 172
	free(evsel->prev_raw_counts);
	evsel->prev_raw_counts = NULL;
Y
Yan, Zheng 已提交
173 174
}

175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
static void perf_evlist__free_stats(struct perf_evlist *evlist)
{
	struct perf_evsel *evsel;

	list_for_each_entry(evsel, &evlist->entries, node) {
		perf_evsel__free_stat_priv(evsel);
		perf_evsel__free_counts(evsel);
		perf_evsel__free_prev_raw_counts(evsel);
	}
}

static int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw)
{
	struct perf_evsel *evsel;

	list_for_each_entry(evsel, &evlist->entries, node) {
		if (perf_evsel__alloc_stat_priv(evsel) < 0 ||
		    perf_evsel__alloc_counts(evsel, perf_evsel__nr_cpus(evsel)) < 0 ||
		    (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel) < 0))
			goto out_free;
	}

	return 0;

out_free:
	perf_evlist__free_stats(evlist);
	return -1;
}

204 205 206 207 208 209 210 211 212 213 214 215
static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
static struct stats runtime_cycles_stats[MAX_NR_CPUS];
static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
static struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
static struct stats runtime_branches_stats[MAX_NR_CPUS];
static struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
static struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
static struct stats walltime_nsecs_stats;
216

217
static void perf_stat__reset_stats(struct perf_evlist *evlist)
218
{
219 220 221 222 223 224 225
	struct perf_evsel *evsel;

	list_for_each_entry(evsel, &evlist->entries, node) {
		perf_evsel__reset_stat_priv(evsel);
		perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel));
	}

226 227 228 229 230 231 232 233 234 235 236 237 238 239
	memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
	memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
	memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
	memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
	memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
	memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
	memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
	memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
	memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
	memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
	memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
	memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
}

240
static int create_perf_stat_counter(struct perf_evsel *evsel)
241
{
242
	struct perf_event_attr *attr = &evsel->attr;
243

244
	if (scale)
245 246
		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
				    PERF_FORMAT_TOTAL_TIME_RUNNING;
247

248 249
	attr->inherit = !no_inherit;

250 251
	if (perf_target__has_cpu(&target))
		return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
252

253
	if (!perf_target__has_task(&target) &&
254
	    perf_evsel__is_group_leader(evsel)) {
255 256
		attr->disabled = 1;
		attr->enable_on_exec = 1;
257
	}
258

259
	return perf_evsel__open_per_thread(evsel, evsel_list->threads);
260 261
}

262 263 264
/*
 * Does the counter have nsecs as a unit?
 */
265
static inline int nsec_counter(struct perf_evsel *evsel)
266
{
267 268
	if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
	    perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
269 270 271 272 273
		return 1;

	return 0;
}

I
Ingo Molnar 已提交
274 275 276 277 278 279 280 281 282 283 284
/*
 * Update various tracking values we maintain to print
 * more semantic information such as miss/hit ratios,
 * instruction rates, etc:
 */
static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
{
	if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
		update_stats(&runtime_nsecs_stats[0], count[0]);
	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
		update_stats(&runtime_cycles_stats[0], count[0]);
285 286
	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
		update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
287
	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
288
		update_stats(&runtime_stalled_cycles_back_stats[0], count[0]);
I
Ingo Molnar 已提交
289 290 291 292
	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
		update_stats(&runtime_branches_stats[0], count[0]);
	else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
		update_stats(&runtime_cacherefs_stats[0], count[0]);
293 294
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
		update_stats(&runtime_l1_dcache_stats[0], count[0]);
295 296 297 298 299 300 301 302
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
		update_stats(&runtime_l1_icache_stats[0], count[0]);
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
		update_stats(&runtime_ll_cache_stats[0], count[0]);
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
		update_stats(&runtime_dtlb_cache_stats[0], count[0]);
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
		update_stats(&runtime_itlb_cache_stats[0], count[0]);
I
Ingo Molnar 已提交
303 304
}

305
/*
306
 * Read out the results of a single counter:
307
 * aggregate counts across CPUs in system-wide mode
308
 */
309
static int read_counter_aggr(struct perf_evsel *counter)
310
{
311
	struct perf_stat *ps = counter->priv;
312 313
	u64 *count = counter->counts->aggr.values;
	int i;
314

Y
Yan, Zheng 已提交
315
	if (__perf_evsel__read(counter, perf_evsel__nr_cpus(counter),
316
			       thread_map__nr(evsel_list->threads), scale) < 0)
317
		return -1;
318 319

	for (i = 0; i < 3; i++)
320
		update_stats(&ps->res_stats[i], count[i]);
321 322

	if (verbose) {
323
		fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
324
			perf_evsel__name(counter), count[0], count[1], count[2]);
325 326
	}

327 328 329
	/*
	 * Save the full runtime - to allow normalization during printout:
	 */
I
Ingo Molnar 已提交
330
	update_shadow_stats(counter, count);
331 332

	return 0;
333 334 335 336 337 338
}

/*
 * Read out the results of a single counter:
 * do not aggregate counts across CPUs in system-wide mode
 */
339
static int read_counter(struct perf_evsel *counter)
340
{
341
	u64 *count;
342 343
	int cpu;

Y
Yan, Zheng 已提交
344
	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
345 346
		if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
			return -1;
347

348
		count = counter->counts->cpu[cpu].values;
349

I
Ingo Molnar 已提交
350
		update_shadow_stats(counter, count);
351
	}
352 353

	return 0;
354 355
}

356 357 358 359 360 361 362 363
static void print_interval(void)
{
	static int num_print_interval;
	struct perf_evsel *counter;
	struct perf_stat *ps;
	struct timespec ts, rs;
	char prefix[64];

364
	if (aggr_mode == AGGR_GLOBAL) {
365 366 367
		list_for_each_entry(counter, &evsel_list->entries, node) {
			ps = counter->priv;
			memset(ps->res_stats, 0, sizeof(ps->res_stats));
368
			read_counter_aggr(counter);
369
		}
370
	} else	{
371 372 373
		list_for_each_entry(counter, &evsel_list->entries, node) {
			ps = counter->priv;
			memset(ps->res_stats, 0, sizeof(ps->res_stats));
374
			read_counter(counter);
375 376
		}
	}
377

378 379 380 381 382
	clock_gettime(CLOCK_MONOTONIC, &ts);
	diff_timespec(&rs, &ts, &ref_time);
	sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);

	if (num_print_interval == 0 && !csv_output) {
383 384
		switch (aggr_mode) {
		case AGGR_SOCKET:
385
			fprintf(output, "#           time socket cpus             counts events\n");
386 387
			break;
		case AGGR_NONE:
388
			fprintf(output, "#           time CPU                 counts events\n");
389 390 391
			break;
		case AGGR_GLOBAL:
		default:
392
			fprintf(output, "#           time             counts events\n");
393
		}
394 395 396 397 398
	}

	if (++num_print_interval == 25)
		num_print_interval = 0;

399 400 401 402 403
	switch (aggr_mode) {
	case AGGR_SOCKET:
		print_aggr(prefix);
		break;
	case AGGR_NONE:
404 405
		list_for_each_entry(counter, &evsel_list->entries, node)
			print_counter(counter, prefix);
406 407 408
		break;
	case AGGR_GLOBAL:
	default:
409 410 411 412 413
		list_for_each_entry(counter, &evsel_list->entries, node)
			print_counter_aggr(counter, prefix);
	}
}

414
static int __run_perf_stat(int argc, const char **argv)
415
{
416
	char msg[512];
417
	unsigned long long t0, t1;
418
	struct perf_evsel *counter;
419
	struct timespec ts;
420
	int status = 0;
421
	const bool forks = (argc > 0);
422

423 424 425 426 427 428 429 430
	if (interval) {
		ts.tv_sec  = interval / 1000;
		ts.tv_nsec = (interval % 1000) * 1000000;
	} else {
		ts.tv_sec  = 1;
		ts.tv_nsec = 0;
	}

431
	if (forks) {
432 433 434 435
		if (perf_evlist__prepare_workload(evsel_list, &target, argv,
						  false, false) < 0) {
			perror("failed to prepare workload");
			return -1;
436
		}
437 438
	}

439
	if (group)
440
		perf_evlist__set_leader(evsel_list);
441

442
	list_for_each_entry(counter, &evsel_list->entries, node) {
443
		if (create_perf_stat_counter(counter) < 0) {
444 445 446 447
			/*
			 * PPC returns ENXIO for HW counters until 2.6.37
			 * (behavior changed with commit b0a873e).
			 */
448
			if (errno == EINVAL || errno == ENOSYS ||
449 450
			    errno == ENOENT || errno == EOPNOTSUPP ||
			    errno == ENXIO) {
451 452
				if (verbose)
					ui__warning("%s event is not supported by the kernel.\n",
453
						    perf_evsel__name(counter));
454
				counter->supported = false;
455
				continue;
456
			}
457

458 459 460 461
			perf_evsel__open_strerror(counter, &target,
						  errno, msg, sizeof(msg));
			ui__error("%s\n", msg);

462 463
			if (child_pid != -1)
				kill(child_pid, SIGTERM);
464

465 466
			return -1;
		}
467
		counter->supported = true;
468
	}
469

470
	if (perf_evlist__apply_filters(evsel_list)) {
471 472 473 474 475
		error("failed to set filter with %d (%s)\n", errno,
			strerror(errno));
		return -1;
	}

476 477 478 479
	/*
	 * Enable counters and exec the command:
	 */
	t0 = rdclock();
480
	clock_gettime(CLOCK_MONOTONIC, &ref_time);
481

482
	if (forks) {
483 484
		perf_evlist__start_workload(evsel_list);

485 486 487 488 489 490
		if (interval) {
			while (!waitpid(child_pid, &status, WNOHANG)) {
				nanosleep(&ts, NULL);
				print_interval();
			}
		}
491
		wait(&status);
492 493
		if (WIFSIGNALED(status))
			psignal(WTERMSIG(status), argv[0]);
494
	} else {
495 496 497 498 499
		while (!done) {
			nanosleep(&ts, NULL);
			if (interval)
				print_interval();
		}
500
	}
501 502 503

	t1 = rdclock();

504
	update_stats(&walltime_nsecs_stats, t1 - t0);
505

506
	if (aggr_mode == AGGR_GLOBAL) {
507
		list_for_each_entry(counter, &evsel_list->entries, node) {
508
			read_counter_aggr(counter);
Y
Yan, Zheng 已提交
509
			perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
510
					     thread_map__nr(evsel_list->threads));
511
		}
512 513 514 515 516
	} else {
		list_for_each_entry(counter, &evsel_list->entries, node) {
			read_counter(counter);
			perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1);
		}
517
	}
518

519 520 521
	return WEXITSTATUS(status);
}

522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547
static int run_perf_stat(int argc __maybe_unused, const char **argv)
{
	int ret;

	if (pre_cmd) {
		ret = system(pre_cmd);
		if (ret)
			return ret;
	}

	if (sync_run)
		sync();

	ret = __run_perf_stat(argc, argv);
	if (ret)
		return ret;

	if (post_cmd) {
		ret = system(post_cmd);
		if (ret)
			return ret;
	}

	return ret;
}

548 549
static void print_noise_pct(double total, double avg)
{
550
	double pct = rel_stddev_stats(total, avg);
551

552
	if (csv_output)
553
		fprintf(output, "%s%.2f%%", csv_sep, pct);
554
	else if (pct)
555
		fprintf(output, "  ( +-%6.2f%% )", pct);
556 557
}

558
static void print_noise(struct perf_evsel *evsel, double avg)
559
{
560 561
	struct perf_stat *ps;

562 563 564
	if (run_count == 1)
		return;

565
	ps = evsel->priv;
566
	print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
567 568
}

569
static void aggr_printout(struct perf_evsel *evsel, int cpu, int nr)
I
Ingo Molnar 已提交
570
{
571 572 573
	switch (aggr_mode) {
	case AGGR_SOCKET:
		fprintf(output, "S%*d%s%*d%s",
574 575 576 577 578 579
			csv_output ? 0 : -5,
			cpu,
			csv_sep,
			csv_output ? 0 : 4,
			nr,
			csv_sep);
580 581 582
			break;
	case AGGR_NONE:
		fprintf(output, "CPU%*d%s",
S
Stephane Eranian 已提交
583
			csv_output ? 0 : -4,
Y
Yan, Zheng 已提交
584
			perf_evsel__cpus(evsel)->map[cpu], csv_sep);
585 586 587 588 589 590 591 592 593 594 595 596 597
		break;
	case AGGR_GLOBAL:
	default:
		break;
	}
}

static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
{
	double msecs = avg / 1e6;
	const char *fmt = csv_output ? "%.6f%s%s" : "%18.6f%s%-25s";

	aggr_printout(evsel, cpu, nr);
S
Stephane Eranian 已提交
598

599
	fprintf(output, fmt, msecs, csv_sep, perf_evsel__name(evsel));
S
Stephane Eranian 已提交
600

S
Stephane Eranian 已提交
601
	if (evsel->cgrp)
602
		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
S
Stephane Eranian 已提交
603

604
	if (csv_output || interval)
S
Stephane Eranian 已提交
605
		return;
I
Ingo Molnar 已提交
606

607
	if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
608 609
		fprintf(output, " # %8.3f CPUs utilized          ",
			avg / avg_stats(&walltime_nsecs_stats));
610 611
	else
		fprintf(output, "                                   ");
I
Ingo Molnar 已提交
612 613
}

614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640
/* used for get_ratio_color() */
enum grc_type {
	GRC_STALLED_CYCLES_FE,
	GRC_STALLED_CYCLES_BE,
	GRC_CACHE_MISSES,
	GRC_MAX_NR
};

static const char *get_ratio_color(enum grc_type type, double ratio)
{
	static const double grc_table[GRC_MAX_NR][3] = {
		[GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
		[GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
		[GRC_CACHE_MISSES] 	= { 20.0, 10.0, 5.0 },
	};
	const char *color = PERF_COLOR_NORMAL;

	if (ratio > grc_table[type][0])
		color = PERF_COLOR_RED;
	else if (ratio > grc_table[type][1])
		color = PERF_COLOR_MAGENTA;
	else if (ratio > grc_table[type][2])
		color = PERF_COLOR_YELLOW;

	return color;
}

641 642 643
static void print_stalled_cycles_frontend(int cpu,
					  struct perf_evsel *evsel
					  __maybe_unused, double avg)
644 645 646 647 648 649 650 651 652
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_cycles_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

653
	color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
654

655 656 657
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " frontend cycles idle   ");
658 659
}

660 661 662
static void print_stalled_cycles_backend(int cpu,
					 struct perf_evsel *evsel
					 __maybe_unused, double avg)
663 664 665 666 667 668 669 670 671
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_cycles_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

672
	color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
673

674 675 676
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " backend  cycles idle   ");
677 678
}

679 680 681
static void print_branch_misses(int cpu,
				struct perf_evsel *evsel __maybe_unused,
				double avg)
682 683 684 685 686 687 688 689 690
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_branches_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

691
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
692

693 694 695
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all branches        ");
696 697
}

698 699 700
static void print_l1_dcache_misses(int cpu,
				   struct perf_evsel *evsel __maybe_unused,
				   double avg)
701 702 703 704 705 706 707 708 709
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_l1_dcache_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

710
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
711

712 713 714
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all L1-dcache hits  ");
715 716
}

717 718 719
static void print_l1_icache_misses(int cpu,
				   struct perf_evsel *evsel __maybe_unused,
				   double avg)
720 721 722 723 724 725 726 727 728
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_l1_icache_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

729
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
730

731 732 733
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all L1-icache hits  ");
734 735
}

736 737 738
static void print_dtlb_cache_misses(int cpu,
				    struct perf_evsel *evsel __maybe_unused,
				    double avg)
739 740 741 742 743 744 745 746 747
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_dtlb_cache_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

748
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
749

750 751 752
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all dTLB cache hits ");
753 754
}

755 756 757
static void print_itlb_cache_misses(int cpu,
				    struct perf_evsel *evsel __maybe_unused,
				    double avg)
758 759 760 761 762 763 764 765 766
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_itlb_cache_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

767
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
768

769 770 771
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all iTLB cache hits ");
772 773
}

774 775 776
static void print_ll_cache_misses(int cpu,
				  struct perf_evsel *evsel __maybe_unused,
				  double avg)
777 778 779 780 781 782 783 784 785
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_ll_cache_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

786
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
787

788 789 790
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all LL-cache hits   ");
791 792
}

793
static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
I
Ingo Molnar 已提交
794
{
795
	double total, ratio = 0.0;
S
Stephane Eranian 已提交
796 797 798
	const char *fmt;

	if (csv_output)
799
		fmt = "%.0f%s%s";
S
Stephane Eranian 已提交
800
	else if (big_num)
801
		fmt = "%'18.0f%s%-25s";
S
Stephane Eranian 已提交
802
	else
803
		fmt = "%18.0f%s%-25s";
804

805 806 807
	aggr_printout(evsel, cpu, nr);

	if (aggr_mode == AGGR_GLOBAL)
808
		cpu = 0;
809

810
	fprintf(output, fmt, avg, csv_sep, perf_evsel__name(evsel));
S
Stephane Eranian 已提交
811

S
Stephane Eranian 已提交
812
	if (evsel->cgrp)
813
		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
S
Stephane Eranian 已提交
814

815
	if (csv_output || interval)
S
Stephane Eranian 已提交
816
		return;
I
Ingo Molnar 已提交
817

818
	if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
819
		total = avg_stats(&runtime_cycles_stats[cpu]);
820 821 822
		if (total)
			ratio = avg / total;

823
		fprintf(output, " #   %5.2f  insns per cycle        ", ratio);
824

825 826
		total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
827 828 829

		if (total && avg) {
			ratio = total / avg;
830
			fprintf(output, "\n                                             #   %5.2f  stalled cycles per insn", ratio);
831 832
		}

833
	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
834
			runtime_branches_stats[cpu].n != 0) {
835
		print_branch_misses(cpu, evsel, avg);
836 837 838 839 840
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
841
			runtime_l1_dcache_stats[cpu].n != 0) {
842
		print_l1_dcache_misses(cpu, evsel, avg);
843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
			runtime_l1_icache_stats[cpu].n != 0) {
		print_l1_icache_misses(cpu, evsel, avg);
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
			runtime_dtlb_cache_stats[cpu].n != 0) {
		print_dtlb_cache_misses(cpu, evsel, avg);
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
			runtime_itlb_cache_stats[cpu].n != 0) {
		print_itlb_cache_misses(cpu, evsel, avg);
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
			runtime_ll_cache_stats[cpu].n != 0) {
		print_ll_cache_misses(cpu, evsel, avg);
871 872 873 874 875 876 877
	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
			runtime_cacherefs_stats[cpu].n != 0) {
		total = avg_stats(&runtime_cacherefs_stats[cpu]);

		if (total)
			ratio = avg * 100 / total;

878
		fprintf(output, " # %8.3f %% of all cache refs    ", ratio);
879

880 881
	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
		print_stalled_cycles_frontend(cpu, evsel, avg);
882
	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
883
		print_stalled_cycles_backend(cpu, evsel, avg);
884
	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
885
		total = avg_stats(&runtime_nsecs_stats[cpu]);
886 887

		if (total)
888
			ratio = 1.0 * avg / total;
889

890
		fprintf(output, " # %8.3f GHz                    ", ratio);
891
	} else if (runtime_nsecs_stats[cpu].n != 0) {
N
Namhyung Kim 已提交
892 893
		char unit = 'M';

894
		total = avg_stats(&runtime_nsecs_stats[cpu]);
895 896

		if (total)
897
			ratio = 1000.0 * avg / total;
N
Namhyung Kim 已提交
898 899 900 901
		if (ratio < 0.001) {
			ratio *= 1000;
			unit = 'K';
		}
902

N
Namhyung Kim 已提交
903
		fprintf(output, " # %8.3f %c/sec                  ", ratio, unit);
904
	} else {
905
		fprintf(output, "                                   ");
I
Ingo Molnar 已提交
906 907 908
	}
}

909
static void print_aggr(char *prefix)
910 911
{
	struct perf_evsel *counter;
912
	int cpu, s, s2, id, nr;
913 914
	u64 ena, run, val;

915
	if (!(aggr_map || aggr_get_id))
916 917
		return;

918 919
	for (s = 0; s < aggr_map->nr; s++) {
		id = aggr_map->map[s];
920 921 922 923
		list_for_each_entry(counter, &evsel_list->entries, node) {
			val = ena = run = 0;
			nr = 0;
			for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
924 925
				s2 = aggr_get_id(evsel_list->cpus, cpu);
				if (s2 != id)
926 927 928 929 930 931 932 933 934 935
					continue;
				val += counter->counts->cpu[cpu].val;
				ena += counter->counts->cpu[cpu].ena;
				run += counter->counts->cpu[cpu].run;
				nr++;
			}
			if (prefix)
				fprintf(output, "%s", prefix);

			if (run == 0 || ena == 0) {
936 937 938
				aggr_printout(counter, cpu, nr);

				fprintf(output, "%*s%s%*s",
939 940 941 942 943
					csv_output ? 0 : 18,
					counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
					csv_sep,
					csv_output ? 0 : -24,
					perf_evsel__name(counter));
944

945 946 947 948 949 950 951 952 953
				if (counter->cgrp)
					fprintf(output, "%s%s",
						csv_sep, counter->cgrp->name);

				fputc('\n', output);
				continue;
			}

			if (nsec_counter(counter))
954
				nsec_printout(id, nr, counter, val);
955
			else
956
				abs_printout(id, nr, counter, val);
957 958 959 960 961 962 963 964 965 966 967 968 969

			if (!csv_output) {
				print_noise(counter, 1.0);

				if (run != ena)
					fprintf(output, "  (%.2f%%)",
						100.0 * run / ena);
			}
			fputc('\n', output);
		}
	}
}

970 971
/*
 * Print out the results of a single counter:
972
 * aggregated counts in system-wide mode
973
 */
974
static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
975
{
976 977
	struct perf_stat *ps = counter->priv;
	double avg = avg_stats(&ps->res_stats[0]);
978
	int scaled = counter->counts->scaled;
979

980 981 982
	if (prefix)
		fprintf(output, "%s", prefix);

983
	if (scaled == -1) {
984
		fprintf(output, "%*s%s%*s",
S
Stephane Eranian 已提交
985
			csv_output ? 0 : 18,
986
			counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
S
Stephane Eranian 已提交
987 988
			csv_sep,
			csv_output ? 0 : -24,
989
			perf_evsel__name(counter));
S
Stephane Eranian 已提交
990 991

		if (counter->cgrp)
992
			fprintf(output, "%s%s", csv_sep, counter->cgrp->name);
S
Stephane Eranian 已提交
993

994
		fputc('\n', output);
995 996
		return;
	}
997

I
Ingo Molnar 已提交
998
	if (nsec_counter(counter))
999
		nsec_printout(-1, 0, counter, avg);
I
Ingo Molnar 已提交
1000
	else
1001
		abs_printout(-1, 0, counter, avg);
1002

1003 1004
	print_noise(counter, avg);

S
Stephane Eranian 已提交
1005
	if (csv_output) {
1006
		fputc('\n', output);
S
Stephane Eranian 已提交
1007 1008 1009
		return;
	}

1010 1011 1012
	if (scaled) {
		double avg_enabled, avg_running;

1013 1014
		avg_enabled = avg_stats(&ps->res_stats[1]);
		avg_running = avg_stats(&ps->res_stats[2]);
1015

1016
		fprintf(output, " [%5.2f%%]", 100 * avg_running / avg_enabled);
1017
	}
1018
	fprintf(output, "\n");
1019 1020
}

1021 1022 1023 1024
/*
 * Print out the results of a single counter:
 * does not use aggregated count in system-wide
 */
1025
static void print_counter(struct perf_evsel *counter, char *prefix)
1026 1027 1028 1029
{
	u64 ena, run, val;
	int cpu;

Y
Yan, Zheng 已提交
1030
	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
1031 1032 1033
		val = counter->counts->cpu[cpu].val;
		ena = counter->counts->cpu[cpu].ena;
		run = counter->counts->cpu[cpu].run;
1034 1035 1036 1037

		if (prefix)
			fprintf(output, "%s", prefix);

1038
		if (run == 0 || ena == 0) {
1039
			fprintf(output, "CPU%*d%s%*s%s%*s",
S
Stephane Eranian 已提交
1040
				csv_output ? 0 : -4,
Y
Yan, Zheng 已提交
1041
				perf_evsel__cpus(counter)->map[cpu], csv_sep,
S
Stephane Eranian 已提交
1042
				csv_output ? 0 : 18,
1043 1044
				counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
				csv_sep,
S
Stephane Eranian 已提交
1045
				csv_output ? 0 : -24,
1046
				perf_evsel__name(counter));
1047

S
Stephane Eranian 已提交
1048
			if (counter->cgrp)
1049 1050
				fprintf(output, "%s%s",
					csv_sep, counter->cgrp->name);
S
Stephane Eranian 已提交
1051

1052
			fputc('\n', output);
1053 1054 1055 1056
			continue;
		}

		if (nsec_counter(counter))
1057
			nsec_printout(cpu, 0, counter, val);
1058
		else
1059
			abs_printout(cpu, 0, counter, val);
1060

S
Stephane Eranian 已提交
1061 1062
		if (!csv_output) {
			print_noise(counter, 1.0);
1063

1064
			if (run != ena)
1065 1066
				fprintf(output, "  (%.2f%%)",
					100.0 * run / ena);
1067
		}
1068
		fputc('\n', output);
1069 1070 1071
	}
}

1072 1073
static void print_stat(int argc, const char **argv)
{
1074 1075
	struct perf_evsel *counter;
	int i;
1076

1077 1078
	fflush(stdout);

S
Stephane Eranian 已提交
1079
	if (!csv_output) {
1080 1081
		fprintf(output, "\n");
		fprintf(output, " Performance counter stats for ");
1082
		if (!perf_target__has_task(&target)) {
1083
			fprintf(output, "\'%s", argv[0]);
S
Stephane Eranian 已提交
1084
			for (i = 1; i < argc; i++)
1085
				fprintf(output, " %s", argv[i]);
1086 1087
		} else if (target.pid)
			fprintf(output, "process id \'%s", target.pid);
S
Stephane Eranian 已提交
1088
		else
1089
			fprintf(output, "thread id \'%s", target.tid);
I
Ingo Molnar 已提交
1090

1091
		fprintf(output, "\'");
S
Stephane Eranian 已提交
1092
		if (run_count > 1)
1093 1094
			fprintf(output, " (%d runs)", run_count);
		fprintf(output, ":\n\n");
S
Stephane Eranian 已提交
1095
	}
1096

1097 1098 1099 1100 1101
	switch (aggr_mode) {
	case AGGR_SOCKET:
		print_aggr(NULL);
		break;
	case AGGR_GLOBAL:
1102
		list_for_each_entry(counter, &evsel_list->entries, node)
1103
			print_counter_aggr(counter, NULL);
1104 1105 1106 1107 1108 1109 1110
		break;
	case AGGR_NONE:
		list_for_each_entry(counter, &evsel_list->entries, node)
			print_counter(counter, NULL);
		break;
	default:
		break;
1111
	}
1112

S
Stephane Eranian 已提交
1113
	if (!csv_output) {
1114
		if (!null_run)
1115 1116
			fprintf(output, "\n");
		fprintf(output, " %17.9f seconds time elapsed",
S
Stephane Eranian 已提交
1117 1118
				avg_stats(&walltime_nsecs_stats)/1e9);
		if (run_count > 1) {
1119
			fprintf(output, "                                        ");
1120 1121
			print_noise_pct(stddev_stats(&walltime_nsecs_stats),
					avg_stats(&walltime_nsecs_stats));
S
Stephane Eranian 已提交
1122
		}
1123
		fprintf(output, "\n\n");
I
Ingo Molnar 已提交
1124
	}
1125 1126
}

1127 1128
static volatile int signr = -1;

1129
static void skip_signal(int signo)
1130
{
1131
	if ((child_pid == -1) || interval)
1132 1133
		done = 1;

1134 1135 1136 1137 1138
	signr = signo;
}

static void sig_atexit(void)
{
1139 1140 1141
	if (child_pid != -1)
		kill(child_pid, SIGTERM);

1142 1143 1144 1145 1146
	if (signr == -1)
		return;

	signal(signr, SIG_DFL);
	kill(getpid(), signr);
1147 1148
}

1149 1150
static int stat__set_big_num(const struct option *opt __maybe_unused,
			     const char *s __maybe_unused, int unset)
S
Stephane Eranian 已提交
1151 1152 1153 1154 1155
{
	big_num_opt = unset ? 0 : 1;
	return 0;
}

1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174
static int perf_stat_init_aggr_mode(void)
{
	switch (aggr_mode) {
	case AGGR_SOCKET:
		if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
			perror("cannot build socket map");
			return -1;
		}
		aggr_get_id = cpu_map__get_socket;
		break;
	case AGGR_NONE:
	case AGGR_GLOBAL:
	default:
		break;
	}
	return 0;
}


1175 1176 1177 1178 1179 1180
/*
 * Add default attributes, if there were no attributes specified or
 * if -d/--detailed, -d -d or -d -d -d is used:
 */
static int add_default_attributes(void)
{
1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287
	struct perf_event_attr default_attrs[] = {

  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES	},
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS		},
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},

  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND	},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS	},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES		},

};

/*
 * Detailed stats (-d), covering the L1 and last level data caches:
 */
	struct perf_event_attr detailed_attrs[] = {

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_LL			<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_LL			<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
};

/*
 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
 */
	struct perf_event_attr very_detailed_attrs[] = {

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

};

/*
 * Very, very detailed stats (-d -d -d), adding prefetch events:
 */
	struct perf_event_attr very_very_detailed_attrs[] = {

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
};

1288 1289 1290 1291 1292
	/* Set attrs if no event is selected and !null_run: */
	if (null_run)
		return 0;

	if (!evsel_list->nr_entries) {
1293
		if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
1294
			return -1;
1295 1296 1297 1298 1299 1300 1301 1302
	}

	/* Detailed events get appended to the event list: */

	if (detailed_run <  1)
		return 0;

	/* Append detailed run extra attributes: */
1303
	if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
1304
		return -1;
1305 1306 1307 1308 1309

	if (detailed_run < 2)
		return 0;

	/* Append very detailed run extra attributes: */
1310
	if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
1311
		return -1;
1312 1313 1314 1315 1316

	if (detailed_run < 3)
		return 0;

	/* Append very, very detailed run extra attributes: */
1317
	return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
1318 1319
}

1320
int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1321
{
1322
	bool append_file = false;
1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344
	int output_fd = 0;
	const char *output_name	= NULL;
	const struct option options[] = {
	OPT_CALLBACK('e', "event", &evsel_list, "event",
		     "event selector. use 'perf list' to list available events",
		     parse_events_option),
	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
		     "event filter", parse_filter),
	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
		    "child tasks do not inherit counters"),
	OPT_STRING('p', "pid", &target.pid, "pid",
		   "stat events on existing process id"),
	OPT_STRING('t', "tid", &target.tid, "tid",
		   "stat events on existing thread id"),
	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
		    "system-wide collection from all CPUs"),
	OPT_BOOLEAN('g', "group", &group,
		    "put the counters into a counter group"),
	OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"),
	OPT_INCR('v', "verbose", &verbose,
		    "be more verbose (show counter open errors, etc)"),
	OPT_INTEGER('r', "repeat", &run_count,
1345
		    "repeat command and print average + stddev (max: 100, forever: 0)"),
1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356
	OPT_BOOLEAN('n', "null", &null_run,
		    "null run - dont start any counters"),
	OPT_INCR('d', "detailed", &detailed_run,
		    "detailed run - start a lot of events"),
	OPT_BOOLEAN('S', "sync", &sync_run,
		    "call sync() before starting a run"),
	OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 
			   "print large numbers with thousands\' separators",
			   stat__set_big_num),
	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
		    "list of cpus to monitor in system-wide"),
1357 1358
	OPT_SET_UINT('A', "no-aggr", &aggr_mode,
		    "disable CPU count aggregation", AGGR_NONE),
1359 1360 1361 1362 1363 1364 1365 1366
	OPT_STRING('x', "field-separator", &csv_sep, "separator",
		   "print counts with custom separator"),
	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
		     "monitor event in cgroup name only", parse_cgroups),
	OPT_STRING('o', "output", &output_name, "file", "output file name"),
	OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
	OPT_INTEGER(0, "log-fd", &output_fd,
		    "log output to fd, instead of stderr"),
1367 1368 1369 1370
	OPT_STRING(0, "pre", &pre_cmd, "command",
			"command to run prior to the measured command"),
	OPT_STRING(0, "post", &post_cmd, "command",
			"command to run after to the measured command"),
1371 1372
	OPT_UINTEGER('I', "interval-print", &interval,
		    "print counts at regular interval in ms (>= 100)"),
1373 1374
	OPT_SET_UINT(0, "aggr-socket", &aggr_mode,
		     "aggregate counts per processor socket", AGGR_SOCKET),
1375 1376 1377 1378 1379 1380 1381
	OPT_END()
	};
	const char * const stat_usage[] = {
		"perf stat [<options>] [<command>]",
		NULL
	};
	int status = -ENOMEM, run_idx;
1382
	const char *mode;
1383

1384 1385
	setlocale(LC_ALL, "");

1386
	evsel_list = perf_evlist__new();
1387 1388 1389
	if (evsel_list == NULL)
		return -ENOMEM;

1390 1391
	argc = parse_options(argc, argv, options, stat_usage,
		PARSE_OPT_STOP_AT_NON_OPTION);
S
Stephane Eranian 已提交
1392

1393 1394 1395 1396
	output = stderr;
	if (output_name && strcmp(output_name, "-"))
		output = NULL;

1397 1398 1399 1400
	if (output_name && output_fd) {
		fprintf(stderr, "cannot use both --output and --log-fd\n");
		usage_with_options(stat_usage, options);
	}
1401 1402 1403 1404 1405 1406

	if (output_fd < 0) {
		fprintf(stderr, "argument to --log-fd must be a > 0\n");
		usage_with_options(stat_usage, options);
	}

1407 1408 1409 1410 1411 1412 1413
	if (!output) {
		struct timespec tm;
		mode = append_file ? "a" : "w";

		output = fopen(output_name, mode);
		if (!output) {
			perror("failed to create output file");
1414
			return -1;
1415 1416 1417
		}
		clock_gettime(CLOCK_REALTIME, &tm);
		fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
1418
	} else if (output_fd > 0) {
1419 1420 1421 1422 1423 1424
		mode = append_file ? "a" : "w";
		output = fdopen(output_fd, mode);
		if (!output) {
			perror("Failed opening logfd");
			return -errno;
		}
1425 1426
	}

1427
	if (csv_sep) {
S
Stephane Eranian 已提交
1428
		csv_output = true;
1429 1430 1431
		if (!strcmp(csv_sep, "\\t"))
			csv_sep = "\t";
	} else
S
Stephane Eranian 已提交
1432 1433 1434 1435 1436 1437
		csv_sep = DEFAULT_SEPARATOR;

	/*
	 * let the spreadsheet do the pretty-printing
	 */
	if (csv_output) {
J
Jim Cromie 已提交
1438
		/* User explicitly passed -B? */
S
Stephane Eranian 已提交
1439 1440 1441 1442 1443 1444 1445 1446
		if (big_num_opt == 1) {
			fprintf(stderr, "-B option not supported with -x\n");
			usage_with_options(stat_usage, options);
		} else /* Nope, so disable big number formatting */
			big_num = false;
	} else if (big_num_opt == 0) /* User passed --no-big-num */
		big_num = false;

1447
	if (!argc && !perf_target__has_task(&target))
1448
		usage_with_options(stat_usage, options);
1449
	if (run_count < 0) {
1450
		usage_with_options(stat_usage, options);
1451 1452 1453 1454
	} else if (run_count == 0) {
		forever = true;
		run_count = 1;
	}
1455

S
Stephane Eranian 已提交
1456
	/* no_aggr, cgroup are for system-wide only */
1457 1458
	if ((aggr_mode != AGGR_GLOBAL || nr_cgroups)
	     && !perf_target__has_cpu(&target)) {
S
Stephane Eranian 已提交
1459 1460 1461
		fprintf(stderr, "both cgroup and no-aggregation "
			"modes only available in system-wide mode\n");

1462
		usage_with_options(stat_usage, options);
1463
		return -1;
1464 1465
	}

1466 1467
	if (add_default_attributes())
		goto out;
1468

1469
	perf_target__validate(&target);
1470

1471
	if (perf_evlist__create_maps(evsel_list, &target) < 0) {
1472
		if (perf_target__has_task(&target))
1473
			pr_err("Problems finding threads of monitor\n");
1474
		if (perf_target__has_cpu(&target))
1475
			perror("failed to parse CPUs map");
1476

1477
		usage_with_options(stat_usage, options);
1478 1479
		return -1;
	}
1480 1481 1482 1483 1484
	if (interval && interval < 100) {
		pr_err("print interval must be >= 100ms\n");
		usage_with_options(stat_usage, options);
		return -1;
	}
1485

1486 1487
	if (perf_evlist__alloc_stats(evsel_list, interval))
		goto out_free_maps;
1488

1489 1490 1491
	if (perf_stat_init_aggr_mode())
		goto out;

I
Ingo Molnar 已提交
1492 1493 1494 1495 1496 1497
	/*
	 * We dont want to block the signals - that would cause
	 * child tasks to inherit that and Ctrl-C would not work.
	 * What we want is for Ctrl-C to work in the exec()-ed
	 * task, but being ignored by perf stat itself:
	 */
1498
	atexit(sig_atexit);
1499 1500
	if (!forever)
		signal(SIGINT,  skip_signal);
1501
	signal(SIGCHLD, skip_signal);
I
Ingo Molnar 已提交
1502 1503 1504
	signal(SIGALRM, skip_signal);
	signal(SIGABRT, skip_signal);

1505
	status = 0;
1506
	for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
1507
		if (run_count != 1 && verbose)
1508 1509
			fprintf(output, "[ perf stat: executing run #%d ... ]\n",
				run_idx + 1);
I
Ingo Molnar 已提交
1510

1511
		status = run_perf_stat(argc, argv);
1512 1513
		if (forever && status != -1) {
			print_stat(argc, argv);
1514
			perf_stat__reset_stats(evsel_list);
1515
		}
1516 1517
	}

1518
	if (!forever && status != -1 && !interval)
1519
		print_stat(argc, argv);
1520 1521 1522

	perf_evlist__free_stats(evsel_list);
out_free_maps:
1523
	perf_evlist__delete_maps(evsel_list);
1524 1525
out:
	perf_evlist__delete(evsel_list);
1526
	return status;
1527
}