builtin-stat.c 33.0 KB
Newer Older
1
/*
2 3 4 5 6 7
 * builtin-stat.c
 *
 * Builtin stat command: Give a precise performance counters summary
 * overview about any workload, CPU or specific PID.
 *
 * Sample output:
8

9
   $ perf stat ./hackbench 10
10

11
  Time: 0.118
12

13
  Performance counter stats for './hackbench 10':
14

15 16 17 18 19 20 21 22 23 24 25 26 27
       1708.761321 task-clock                #   11.037 CPUs utilized
            41,190 context-switches          #    0.024 M/sec
             6,735 CPU-migrations            #    0.004 M/sec
            17,318 page-faults               #    0.010 M/sec
     5,205,202,243 cycles                    #    3.046 GHz
     3,856,436,920 stalled-cycles-frontend   #   74.09% frontend cycles idle
     1,600,790,871 stalled-cycles-backend    #   30.75% backend  cycles idle
     2,603,501,247 instructions              #    0.50  insns per cycle
                                             #    1.48  stalled cycles per insn
       484,357,498 branches                  #  283.455 M/sec
         6,388,934 branch-misses             #    1.32% of all branches

        0.154822978  seconds time elapsed
28

29
 *
30
 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
31 32 33 34 35 36 37 38
 *
 * Improvements and fixes by:
 *
 *   Arjan van de Ven <arjan@linux.intel.com>
 *   Yanmin Zhang <yanmin.zhang@intel.com>
 *   Wu Fengguang <fengguang.wu@intel.com>
 *   Mike Galbraith <efault@gmx.de>
 *   Paul Mackerras <paulus@samba.org>
39
 *   Jaswinder Singh Rajput <jaswinder@kernel.org>
40 41
 *
 * Released under the GPL v2. (and only v2, not any later version)
42 43
 */

44
#include "perf.h"
45
#include "builtin.h"
46
#include "util/util.h"
47 48
#include "util/parse-options.h"
#include "util/parse-events.h"
49
#include "util/event.h"
50
#include "util/evlist.h"
51
#include "util/evsel.h"
52
#include "util/debug.h"
53
#include "util/color.h"
54
#include "util/stat.h"
55
#include "util/header.h"
56
#include "util/cpumap.h"
57
#include "util/thread.h"
58
#include "util/thread_map.h"
59 60

#include <sys/prctl.h>
61
#include <locale.h>
62

S
Stephane Eranian 已提交
63
#define DEFAULT_SEPARATOR	" "
64 65
#define CNTR_NOT_SUPPORTED	"<not supported>"
#define CNTR_NOT_COUNTED	"<not counted>"
S
Stephane Eranian 已提交
66

67
static struct perf_event_attr default_attrs[] = {
68

69 70 71 72
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES	},
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS		},
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},
73

74
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
75 76
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND	},
77 78 79
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS	},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES		},
80

81
};
82

83
/*
84
 * Detailed stats (-d), covering the L1 and last level data caches:
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
 */
static struct perf_event_attr detailed_attrs[] = {

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_LL			<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_LL			<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
};

113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
/*
 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
 */
static struct perf_event_attr very_detailed_attrs[] = {

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

};

/*
 * Very, very detailed stats (-d -d -d), adding prefetch events:
 */
static struct perf_event_attr very_very_detailed_attrs[] = {

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
};



176
static struct perf_evlist	*evsel_list;
177

178 179 180
static struct perf_target	target = {
	.uid	= UINT_MAX,
};
181

182
static int			run_idx				=  0;
183
static int			run_count			=  1;
184
static bool			no_inherit			= false;
185
static bool			scale				=  true;
186
static bool			no_aggr				= false;
187
static pid_t			child_pid			= -1;
188
static bool			null_run			=  false;
189
static int			detailed_run			=  0;
I
Ingo Molnar 已提交
190
static bool			sync_run			=  false;
191
static bool			big_num				=  true;
S
Stephane Eranian 已提交
192 193 194
static int			big_num_opt			=  -1;
static const char		*csv_sep			= NULL;
static bool			csv_output			= false;
195
static bool			group				= false;
196 197
static const char		*output_name			= NULL;
static FILE			*output				= NULL;
198
static int			output_fd;
199

200 201
static volatile int done = 0;

202 203 204 205
struct perf_stat {
	struct stats	  res_stats[3];
};

206
static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
207
{
208
	evsel->priv = zalloc(sizeof(struct perf_stat));
209 210 211 212 213 214 215 216 217
	return evsel->priv == NULL ? -ENOMEM : 0;
}

static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
{
	free(evsel->priv);
	evsel->priv = NULL;
}

218 219 220 221 222 223 224 225 226 227 228 229
static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
static struct stats runtime_cycles_stats[MAX_NR_CPUS];
static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
static struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
static struct stats runtime_branches_stats[MAX_NR_CPUS];
static struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
static struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
static struct stats walltime_nsecs_stats;
230

231 232
static int create_perf_stat_counter(struct perf_evsel *evsel,
				    struct perf_evsel *first)
233
{
234
	struct perf_event_attr *attr = &evsel->attr;
235 236
	bool exclude_guest_missing = false;
	int ret;
237

238
	if (scale)
239 240
		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
				    PERF_FORMAT_TOTAL_TIME_RUNNING;
241

242 243
	attr->inherit = !no_inherit;

244 245 246 247
retry:
	if (exclude_guest_missing)
		evsel->attr.exclude_guest = evsel->attr.exclude_host = 0;

248
	if (perf_target__has_cpu(&target)) {
249
		ret = perf_evsel__open_per_cpu(evsel, evsel_list->cpus);
250 251 252 253 254
		if (ret)
			goto check_ret;
		return 0;
	}

255
	if (!perf_target__has_task(&target) && (!group || evsel == first)) {
256 257
		attr->disabled = 1;
		attr->enable_on_exec = 1;
258
	}
259

260
	ret = perf_evsel__open_per_thread(evsel, evsel_list->threads);
261 262 263 264 265 266 267 268 269 270 271 272 273 274
	if (!ret)
		return 0;
	/* fall through */
check_ret:
	if (ret && errno == EINVAL) {
		if (!exclude_guest_missing &&
		    (evsel->attr.exclude_guest || evsel->attr.exclude_host)) {
			pr_debug("Old kernel, cannot exclude "
				 "guest or host samples.\n");
			exclude_guest_missing = true;
			goto retry;
		}
	}
	return ret;
275 276
}

277 278 279
/*
 * Does the counter have nsecs as a unit?
 */
280
static inline int nsec_counter(struct perf_evsel *evsel)
281
{
282 283
	if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
	    perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
284 285 286 287 288
		return 1;

	return 0;
}

I
Ingo Molnar 已提交
289 290 291 292 293 294 295 296 297 298 299
/*
 * Update various tracking values we maintain to print
 * more semantic information such as miss/hit ratios,
 * instruction rates, etc:
 */
static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
{
	if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
		update_stats(&runtime_nsecs_stats[0], count[0]);
	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
		update_stats(&runtime_cycles_stats[0], count[0]);
300 301
	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
		update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
302
	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
303
		update_stats(&runtime_stalled_cycles_back_stats[0], count[0]);
I
Ingo Molnar 已提交
304 305 306 307
	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
		update_stats(&runtime_branches_stats[0], count[0]);
	else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
		update_stats(&runtime_cacherefs_stats[0], count[0]);
308 309
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
		update_stats(&runtime_l1_dcache_stats[0], count[0]);
310 311 312 313 314 315 316 317
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
		update_stats(&runtime_l1_icache_stats[0], count[0]);
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
		update_stats(&runtime_ll_cache_stats[0], count[0]);
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
		update_stats(&runtime_dtlb_cache_stats[0], count[0]);
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
		update_stats(&runtime_itlb_cache_stats[0], count[0]);
I
Ingo Molnar 已提交
318 319
}

320
/*
321
 * Read out the results of a single counter:
322
 * aggregate counts across CPUs in system-wide mode
323
 */
324
static int read_counter_aggr(struct perf_evsel *counter)
325
{
326
	struct perf_stat *ps = counter->priv;
327 328
	u64 *count = counter->counts->aggr.values;
	int i;
329

330 331
	if (__perf_evsel__read(counter, evsel_list->cpus->nr,
			       evsel_list->threads->nr, scale) < 0)
332
		return -1;
333 334

	for (i = 0; i < 3; i++)
335
		update_stats(&ps->res_stats[i], count[i]);
336 337

	if (verbose) {
338
		fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
339
			perf_evsel__name(counter), count[0], count[1], count[2]);
340 341
	}

342 343 344
	/*
	 * Save the full runtime - to allow normalization during printout:
	 */
I
Ingo Molnar 已提交
345
	update_shadow_stats(counter, count);
346 347

	return 0;
348 349 350 351 352 353
}

/*
 * Read out the results of a single counter:
 * do not aggregate counts across CPUs in system-wide mode
 */
354
static int read_counter(struct perf_evsel *counter)
355
{
356
	u64 *count;
357 358
	int cpu;

359
	for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) {
360 361
		if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
			return -1;
362

363
		count = counter->counts->cpu[cpu].values;
364

I
Ingo Molnar 已提交
365
		update_shadow_stats(counter, count);
366
	}
367 368

	return 0;
369 370
}

371
static int run_perf_stat(int argc __maybe_unused, const char **argv)
372 373
{
	unsigned long long t0, t1;
374
	struct perf_evsel *counter, *first;
375
	int status = 0;
376
	int child_ready_pipe[2], go_pipe[2];
377
	const bool forks = (argc > 0);
378
	char buf;
379

380
	if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
381
		perror("failed to create pipes");
382
		return -1;
383 384
	}

385
	if (forks) {
386
		if ((child_pid = fork()) < 0)
387 388
			perror("failed to fork");

389
		if (!child_pid) {
390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416
			close(child_ready_pipe[0]);
			close(go_pipe[1]);
			fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);

			/*
			 * Do a dummy execvp to get the PLT entry resolved,
			 * so we avoid the resolver overhead on the real
			 * execvp call.
			 */
			execvp("", (char **)argv);

			/*
			 * Tell the parent we're ready to go
			 */
			close(child_ready_pipe[1]);

			/*
			 * Wait until the parent tells us to go.
			 */
			if (read(go_pipe[0], &buf, 1) == -1)
				perror("unable to read pipe");

			execvp(argv[0], (char **)argv);

			perror(argv[0]);
			exit(-1);
		}
417

418
		if (perf_target__none(&target))
419
			evsel_list->threads->map[0] = child_pid;
420

421
		/*
422
		 * Wait for the child to be ready to exec.
423 424
		 */
		close(child_ready_pipe[1]);
425 426
		close(go_pipe[0]);
		if (read(child_ready_pipe[0], &buf, 1) == -1)
427
			perror("unable to read pipe");
428
		close(child_ready_pipe[0]);
429 430
	}

431
	if (group)
432
		perf_evlist__set_leader(evsel_list);
433

434
	first = perf_evlist__first(evsel_list);
435

436
	list_for_each_entry(counter, &evsel_list->entries, node) {
437
		if (create_perf_stat_counter(counter, first) < 0) {
438 439 440 441
			/*
			 * PPC returns ENXIO for HW counters until 2.6.37
			 * (behavior changed with commit b0a873e).
			 */
442
			if (errno == EINVAL || errno == ENOSYS ||
443 444
			    errno == ENOENT || errno == EOPNOTSUPP ||
			    errno == ENXIO) {
445 446
				if (verbose)
					ui__warning("%s event is not supported by the kernel.\n",
447
						    perf_evsel__name(counter));
448
				counter->supported = false;
449
				continue;
450
			}
451 452

			if (errno == EPERM || errno == EACCES) {
453 454 455
				error("You may not have permission to collect %sstats.\n"
				      "\t Consider tweaking"
				      " /proc/sys/kernel/perf_event_paranoid or running as root.",
456
				      target.system_wide ? "system-wide " : "");
457 458 459 460 461 462 463
			} else {
				error("open_counter returned with %d (%s). "
				      "/bin/dmesg may provide additional information.\n",
				       errno, strerror(errno));
			}
			if (child_pid != -1)
				kill(child_pid, SIGTERM);
464 465

			pr_err("Not all events could be opened.\n");
466 467
			return -1;
		}
468
		counter->supported = true;
469
	}
470

471 472 473 474 475 476
	if (perf_evlist__set_filters(evsel_list)) {
		error("failed to set filter with %d (%s)\n", errno,
			strerror(errno));
		return -1;
	}

477 478 479 480 481
	/*
	 * Enable counters and exec the command:
	 */
	t0 = rdclock();

482 483 484
	if (forks) {
		close(go_pipe[1]);
		wait(&status);
485 486
		if (WIFSIGNALED(status))
			psignal(WTERMSIG(status), argv[0]);
487
	} else {
488
		while(!done) sleep(1);
489
	}
490 491 492

	t1 = rdclock();

493
	update_stats(&walltime_nsecs_stats, t1 - t0);
494

495
	if (no_aggr) {
496
		list_for_each_entry(counter, &evsel_list->entries, node) {
497
			read_counter(counter);
498
			perf_evsel__close_fd(counter, evsel_list->cpus->nr, 1);
499
		}
500
	} else {
501
		list_for_each_entry(counter, &evsel_list->entries, node) {
502
			read_counter_aggr(counter);
503 504
			perf_evsel__close_fd(counter, evsel_list->cpus->nr,
					     evsel_list->threads->nr);
505
		}
506
	}
507

508 509 510
	return WEXITSTATUS(status);
}

511 512
static void print_noise_pct(double total, double avg)
{
513
	double pct = rel_stddev_stats(total, avg);
514

515
	if (csv_output)
516
		fprintf(output, "%s%.2f%%", csv_sep, pct);
517
	else if (pct)
518
		fprintf(output, "  ( +-%6.2f%% )", pct);
519 520
}

521
static void print_noise(struct perf_evsel *evsel, double avg)
522
{
523 524
	struct perf_stat *ps;

525 526 527
	if (run_count == 1)
		return;

528
	ps = evsel->priv;
529
	print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
530 531
}

532
static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
I
Ingo Molnar 已提交
533
{
534
	double msecs = avg / 1e6;
S
Stephane Eranian 已提交
535
	char cpustr[16] = { '\0', };
536
	const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s";
I
Ingo Molnar 已提交
537

538
	if (no_aggr)
S
Stephane Eranian 已提交
539 540
		sprintf(cpustr, "CPU%*d%s",
			csv_output ? 0 : -4,
541
			evsel_list->cpus->map[cpu], csv_sep);
S
Stephane Eranian 已提交
542

543
	fprintf(output, fmt, cpustr, msecs, csv_sep, perf_evsel__name(evsel));
S
Stephane Eranian 已提交
544

S
Stephane Eranian 已提交
545
	if (evsel->cgrp)
546
		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
S
Stephane Eranian 已提交
547

S
Stephane Eranian 已提交
548 549
	if (csv_output)
		return;
I
Ingo Molnar 已提交
550

551
	if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
552 553
		fprintf(output, " # %8.3f CPUs utilized          ",
			avg / avg_stats(&walltime_nsecs_stats));
554 555
	else
		fprintf(output, "                                   ");
I
Ingo Molnar 已提交
556 557
}

558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584
/* used for get_ratio_color() */
enum grc_type {
	GRC_STALLED_CYCLES_FE,
	GRC_STALLED_CYCLES_BE,
	GRC_CACHE_MISSES,
	GRC_MAX_NR
};

static const char *get_ratio_color(enum grc_type type, double ratio)
{
	static const double grc_table[GRC_MAX_NR][3] = {
		[GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
		[GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
		[GRC_CACHE_MISSES] 	= { 20.0, 10.0, 5.0 },
	};
	const char *color = PERF_COLOR_NORMAL;

	if (ratio > grc_table[type][0])
		color = PERF_COLOR_RED;
	else if (ratio > grc_table[type][1])
		color = PERF_COLOR_MAGENTA;
	else if (ratio > grc_table[type][2])
		color = PERF_COLOR_YELLOW;

	return color;
}

585 586 587
static void print_stalled_cycles_frontend(int cpu,
					  struct perf_evsel *evsel
					  __maybe_unused, double avg)
588 589 590 591 592 593 594 595 596
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_cycles_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

597
	color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
598

599 600 601
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " frontend cycles idle   ");
602 603
}

604 605 606
static void print_stalled_cycles_backend(int cpu,
					 struct perf_evsel *evsel
					 __maybe_unused, double avg)
607 608 609 610 611 612 613 614 615
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_cycles_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

616
	color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
617

618 619 620
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " backend  cycles idle   ");
621 622
}

623 624 625
static void print_branch_misses(int cpu,
				struct perf_evsel *evsel __maybe_unused,
				double avg)
626 627 628 629 630 631 632 633 634
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_branches_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

635
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
636

637 638 639
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all branches        ");
640 641
}

642 643 644
static void print_l1_dcache_misses(int cpu,
				   struct perf_evsel *evsel __maybe_unused,
				   double avg)
645 646 647 648 649 650 651 652 653
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_l1_dcache_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

654
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
655

656 657 658
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all L1-dcache hits  ");
659 660
}

661 662 663
static void print_l1_icache_misses(int cpu,
				   struct perf_evsel *evsel __maybe_unused,
				   double avg)
664 665 666 667 668 669 670 671 672
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_l1_icache_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

673
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
674

675 676 677
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all L1-icache hits  ");
678 679
}

680 681 682
static void print_dtlb_cache_misses(int cpu,
				    struct perf_evsel *evsel __maybe_unused,
				    double avg)
683 684 685 686 687 688 689 690 691
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_dtlb_cache_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

692
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
693

694 695 696
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all dTLB cache hits ");
697 698
}

699 700 701
static void print_itlb_cache_misses(int cpu,
				    struct perf_evsel *evsel __maybe_unused,
				    double avg)
702 703 704 705 706 707 708 709 710
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_itlb_cache_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

711
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
712

713 714 715
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all iTLB cache hits ");
716 717
}

718 719 720
static void print_ll_cache_misses(int cpu,
				  struct perf_evsel *evsel __maybe_unused,
				  double avg)
721 722 723 724 725 726 727 728 729
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_ll_cache_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

730
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
731

732 733 734
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all LL-cache hits   ");
735 736
}

737
static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
I
Ingo Molnar 已提交
738
{
739
	double total, ratio = 0.0;
740
	char cpustr[16] = { '\0', };
S
Stephane Eranian 已提交
741 742 743 744 745
	const char *fmt;

	if (csv_output)
		fmt = "%s%.0f%s%s";
	else if (big_num)
746
		fmt = "%s%'18.0f%s%-25s";
S
Stephane Eranian 已提交
747
	else
748
		fmt = "%s%18.0f%s%-25s";
749 750

	if (no_aggr)
S
Stephane Eranian 已提交
751 752
		sprintf(cpustr, "CPU%*d%s",
			csv_output ? 0 : -4,
753
			evsel_list->cpus->map[cpu], csv_sep);
754 755
	else
		cpu = 0;
756

757
	fprintf(output, fmt, cpustr, avg, csv_sep, perf_evsel__name(evsel));
S
Stephane Eranian 已提交
758

S
Stephane Eranian 已提交
759
	if (evsel->cgrp)
760
		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
S
Stephane Eranian 已提交
761

S
Stephane Eranian 已提交
762 763
	if (csv_output)
		return;
I
Ingo Molnar 已提交
764

765
	if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
766
		total = avg_stats(&runtime_cycles_stats[cpu]);
767 768 769 770

		if (total)
			ratio = avg / total;

771
		fprintf(output, " #   %5.2f  insns per cycle        ", ratio);
772

773 774
		total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
775 776 777

		if (total && avg) {
			ratio = total / avg;
778
			fprintf(output, "\n                                             #   %5.2f  stalled cycles per insn", ratio);
779 780
		}

781
	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
782
			runtime_branches_stats[cpu].n != 0) {
783
		print_branch_misses(cpu, evsel, avg);
784 785 786 787 788
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
789
			runtime_l1_dcache_stats[cpu].n != 0) {
790
		print_l1_dcache_misses(cpu, evsel, avg);
791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
			runtime_l1_icache_stats[cpu].n != 0) {
		print_l1_icache_misses(cpu, evsel, avg);
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
			runtime_dtlb_cache_stats[cpu].n != 0) {
		print_dtlb_cache_misses(cpu, evsel, avg);
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
			runtime_itlb_cache_stats[cpu].n != 0) {
		print_itlb_cache_misses(cpu, evsel, avg);
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
			runtime_ll_cache_stats[cpu].n != 0) {
		print_ll_cache_misses(cpu, evsel, avg);
819 820 821 822 823 824 825
	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
			runtime_cacherefs_stats[cpu].n != 0) {
		total = avg_stats(&runtime_cacherefs_stats[cpu]);

		if (total)
			ratio = avg * 100 / total;

826
		fprintf(output, " # %8.3f %% of all cache refs    ", ratio);
827

828 829
	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
		print_stalled_cycles_frontend(cpu, evsel, avg);
830
	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
831
		print_stalled_cycles_backend(cpu, evsel, avg);
832
	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
833
		total = avg_stats(&runtime_nsecs_stats[cpu]);
834 835

		if (total)
836
			ratio = 1.0 * avg / total;
837

838
		fprintf(output, " # %8.3f GHz                    ", ratio);
839
	} else if (runtime_nsecs_stats[cpu].n != 0) {
N
Namhyung Kim 已提交
840 841
		char unit = 'M';

842
		total = avg_stats(&runtime_nsecs_stats[cpu]);
843 844

		if (total)
845
			ratio = 1000.0 * avg / total;
N
Namhyung Kim 已提交
846 847 848 849
		if (ratio < 0.001) {
			ratio *= 1000;
			unit = 'K';
		}
850

N
Namhyung Kim 已提交
851
		fprintf(output, " # %8.3f %c/sec                  ", ratio, unit);
852
	} else {
853
		fprintf(output, "                                   ");
I
Ingo Molnar 已提交
854 855 856
	}
}

857 858
/*
 * Print out the results of a single counter:
859
 * aggregated counts in system-wide mode
860
 */
861
static void print_counter_aggr(struct perf_evsel *counter)
862
{
863 864
	struct perf_stat *ps = counter->priv;
	double avg = avg_stats(&ps->res_stats[0]);
865
	int scaled = counter->counts->scaled;
866 867

	if (scaled == -1) {
868
		fprintf(output, "%*s%s%*s",
S
Stephane Eranian 已提交
869
			csv_output ? 0 : 18,
870
			counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
S
Stephane Eranian 已提交
871 872
			csv_sep,
			csv_output ? 0 : -24,
873
			perf_evsel__name(counter));
S
Stephane Eranian 已提交
874 875

		if (counter->cgrp)
876
			fprintf(output, "%s%s", csv_sep, counter->cgrp->name);
S
Stephane Eranian 已提交
877

878
		fputc('\n', output);
879 880
		return;
	}
881

I
Ingo Molnar 已提交
882
	if (nsec_counter(counter))
883
		nsec_printout(-1, counter, avg);
I
Ingo Molnar 已提交
884
	else
885
		abs_printout(-1, counter, avg);
886

887 888
	print_noise(counter, avg);

S
Stephane Eranian 已提交
889
	if (csv_output) {
890
		fputc('\n', output);
S
Stephane Eranian 已提交
891 892 893
		return;
	}

894 895 896
	if (scaled) {
		double avg_enabled, avg_running;

897 898
		avg_enabled = avg_stats(&ps->res_stats[1]);
		avg_running = avg_stats(&ps->res_stats[2]);
899

900
		fprintf(output, " [%5.2f%%]", 100 * avg_running / avg_enabled);
901
	}
902
	fprintf(output, "\n");
903 904
}

905 906 907 908
/*
 * Print out the results of a single counter:
 * does not use aggregated count in system-wide
 */
909
static void print_counter(struct perf_evsel *counter)
910 911 912 913
{
	u64 ena, run, val;
	int cpu;

914
	for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) {
915 916 917
		val = counter->counts->cpu[cpu].val;
		ena = counter->counts->cpu[cpu].ena;
		run = counter->counts->cpu[cpu].run;
918
		if (run == 0 || ena == 0) {
919
			fprintf(output, "CPU%*d%s%*s%s%*s",
S
Stephane Eranian 已提交
920
				csv_output ? 0 : -4,
921
				evsel_list->cpus->map[cpu], csv_sep,
S
Stephane Eranian 已提交
922
				csv_output ? 0 : 18,
923 924
				counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
				csv_sep,
S
Stephane Eranian 已提交
925
				csv_output ? 0 : -24,
926
				perf_evsel__name(counter));
927

S
Stephane Eranian 已提交
928
			if (counter->cgrp)
929 930
				fprintf(output, "%s%s",
					csv_sep, counter->cgrp->name);
S
Stephane Eranian 已提交
931

932
			fputc('\n', output);
933 934 935 936 937 938 939 940
			continue;
		}

		if (nsec_counter(counter))
			nsec_printout(cpu, counter, val);
		else
			abs_printout(cpu, counter, val);

S
Stephane Eranian 已提交
941 942
		if (!csv_output) {
			print_noise(counter, 1.0);
943

944
			if (run != ena)
945 946
				fprintf(output, "  (%.2f%%)",
					100.0 * run / ena);
947
		}
948
		fputc('\n', output);
949 950 951
	}
}

952 953
static void print_stat(int argc, const char **argv)
{
954 955
	struct perf_evsel *counter;
	int i;
956

957 958
	fflush(stdout);

S
Stephane Eranian 已提交
959
	if (!csv_output) {
960 961
		fprintf(output, "\n");
		fprintf(output, " Performance counter stats for ");
962
		if (!perf_target__has_task(&target)) {
963
			fprintf(output, "\'%s", argv[0]);
S
Stephane Eranian 已提交
964
			for (i = 1; i < argc; i++)
965
				fprintf(output, " %s", argv[i]);
966 967
		} else if (target.pid)
			fprintf(output, "process id \'%s", target.pid);
S
Stephane Eranian 已提交
968
		else
969
			fprintf(output, "thread id \'%s", target.tid);
I
Ingo Molnar 已提交
970

971
		fprintf(output, "\'");
S
Stephane Eranian 已提交
972
		if (run_count > 1)
973 974
			fprintf(output, " (%d runs)", run_count);
		fprintf(output, ":\n\n");
S
Stephane Eranian 已提交
975
	}
976

977
	if (no_aggr) {
978
		list_for_each_entry(counter, &evsel_list->entries, node)
979 980
			print_counter(counter);
	} else {
981
		list_for_each_entry(counter, &evsel_list->entries, node)
982 983
			print_counter_aggr(counter);
	}
984

S
Stephane Eranian 已提交
985
	if (!csv_output) {
986
		if (!null_run)
987 988
			fprintf(output, "\n");
		fprintf(output, " %17.9f seconds time elapsed",
S
Stephane Eranian 已提交
989 990
				avg_stats(&walltime_nsecs_stats)/1e9);
		if (run_count > 1) {
991
			fprintf(output, "                                        ");
992 993
			print_noise_pct(stddev_stats(&walltime_nsecs_stats),
					avg_stats(&walltime_nsecs_stats));
S
Stephane Eranian 已提交
994
		}
995
		fprintf(output, "\n\n");
I
Ingo Molnar 已提交
996
	}
997 998
}

999 1000
static volatile int signr = -1;

1001
static void skip_signal(int signo)
1002
{
1003
	if(child_pid == -1)
1004 1005
		done = 1;

1006 1007 1008 1009 1010
	signr = signo;
}

static void sig_atexit(void)
{
1011 1012 1013
	if (child_pid != -1)
		kill(child_pid, SIGTERM);

1014 1015 1016 1017 1018
	if (signr == -1)
		return;

	signal(signr, SIG_DFL);
	kill(getpid(), signr);
1019 1020 1021
}

static const char * const stat_usage[] = {
1022
	"perf stat [<options>] [<command>]",
1023 1024 1025
	NULL
};

1026 1027
static int stat__set_big_num(const struct option *opt __maybe_unused,
			     const char *s __maybe_unused, int unset)
S
Stephane Eranian 已提交
1028 1029 1030 1031 1032
{
	big_num_opt = unset ? 0 : 1;
	return 0;
}

1033 1034
static bool append_file;

1035
static const struct option options[] = {
1036
	OPT_CALLBACK('e', "event", &evsel_list, "event",
1037
		     "event selector. use 'perf list' to list available events",
1038
		     parse_events_option),
1039 1040
	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
		     "event filter", parse_filter),
1041 1042
	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
		    "child tasks do not inherit counters"),
1043
	OPT_STRING('p', "pid", &target.pid, "pid",
1044
		   "stat events on existing process id"),
1045
	OPT_STRING('t', "tid", &target.tid, "tid",
1046
		   "stat events on existing thread id"),
1047
	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
1048
		    "system-wide collection from all CPUs"),
1049 1050
	OPT_BOOLEAN('g', "group", &group,
		    "put the counters into a counter group"),
1051
	OPT_BOOLEAN('c', "scale", &scale,
1052
		    "scale/normalize counters"),
1053
	OPT_INCR('v', "verbose", &verbose,
1054
		    "be more verbose (show counter open errors, etc)"),
1055 1056
	OPT_INTEGER('r', "repeat", &run_count,
		    "repeat command and print average + stddev (max: 100)"),
1057 1058
	OPT_BOOLEAN('n', "null", &null_run,
		    "null run - dont start any counters"),
1059
	OPT_INCR('d', "detailed", &detailed_run,
1060
		    "detailed run - start a lot of events"),
I
Ingo Molnar 已提交
1061 1062
	OPT_BOOLEAN('S', "sync", &sync_run,
		    "call sync() before starting a run"),
S
Stephane Eranian 已提交
1063 1064 1065
	OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 
			   "print large numbers with thousands\' separators",
			   stat__set_big_num),
1066
	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
1067
		    "list of cpus to monitor in system-wide"),
1068 1069
	OPT_BOOLEAN('A', "no-aggr", &no_aggr,
		    "disable CPU count aggregation"),
S
Stephane Eranian 已提交
1070 1071
	OPT_STRING('x', "field-separator", &csv_sep, "separator",
		   "print counts with custom separator"),
S
Stephane Eranian 已提交
1072 1073 1074
	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
		     "monitor event in cgroup name only",
		     parse_cgroups),
1075 1076 1077
	OPT_STRING('o', "output", &output_name, "file",
		    "output file name"),
	OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
1078 1079
	OPT_INTEGER(0, "log-fd", &output_fd,
		    "log output to fd, instead of stderr"),
1080 1081 1082
	OPT_END()
};

1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093
/*
 * Add default attributes, if there were no attributes specified or
 * if -d/--detailed, -d -d or -d -d -d is used:
 */
static int add_default_attributes(void)
{
	/* Set attrs if no event is selected and !null_run: */
	if (null_run)
		return 0;

	if (!evsel_list->nr_entries) {
1094
		if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
1095
			return -1;
1096 1097 1098 1099 1100 1101 1102 1103
	}

	/* Detailed events get appended to the event list: */

	if (detailed_run <  1)
		return 0;

	/* Append detailed run extra attributes: */
1104
	if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
1105
		return -1;
1106 1107 1108 1109 1110

	if (detailed_run < 2)
		return 0;

	/* Append very detailed run extra attributes: */
1111
	if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
1112
		return -1;
1113 1114 1115 1116 1117

	if (detailed_run < 3)
		return 0;

	/* Append very, very detailed run extra attributes: */
1118
	return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
1119 1120
}

1121
int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1122
{
1123 1124
	struct perf_evsel *pos;
	int status = -ENOMEM;
1125
	const char *mode;
1126

1127 1128
	setlocale(LC_ALL, "");

1129
	evsel_list = perf_evlist__new(NULL, NULL);
1130 1131 1132
	if (evsel_list == NULL)
		return -ENOMEM;

1133 1134
	argc = parse_options(argc, argv, options, stat_usage,
		PARSE_OPT_STOP_AT_NON_OPTION);
S
Stephane Eranian 已提交
1135

1136 1137 1138 1139
	output = stderr;
	if (output_name && strcmp(output_name, "-"))
		output = NULL;

1140 1141 1142 1143
	if (output_name && output_fd) {
		fprintf(stderr, "cannot use both --output and --log-fd\n");
		usage_with_options(stat_usage, options);
	}
1144 1145 1146 1147 1148 1149

	if (output_fd < 0) {
		fprintf(stderr, "argument to --log-fd must be a > 0\n");
		usage_with_options(stat_usage, options);
	}

1150 1151 1152 1153 1154 1155 1156
	if (!output) {
		struct timespec tm;
		mode = append_file ? "a" : "w";

		output = fopen(output_name, mode);
		if (!output) {
			perror("failed to create output file");
1157
			return -1;
1158 1159 1160
		}
		clock_gettime(CLOCK_REALTIME, &tm);
		fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
1161
	} else if (output_fd > 0) {
1162 1163 1164 1165 1166 1167
		mode = append_file ? "a" : "w";
		output = fdopen(output_fd, mode);
		if (!output) {
			perror("Failed opening logfd");
			return -errno;
		}
1168 1169
	}

1170
	if (csv_sep) {
S
Stephane Eranian 已提交
1171
		csv_output = true;
1172 1173 1174
		if (!strcmp(csv_sep, "\\t"))
			csv_sep = "\t";
	} else
S
Stephane Eranian 已提交
1175 1176 1177 1178 1179 1180
		csv_sep = DEFAULT_SEPARATOR;

	/*
	 * let the spreadsheet do the pretty-printing
	 */
	if (csv_output) {
J
Jim Cromie 已提交
1181
		/* User explicitly passed -B? */
S
Stephane Eranian 已提交
1182 1183 1184 1185 1186 1187 1188 1189
		if (big_num_opt == 1) {
			fprintf(stderr, "-B option not supported with -x\n");
			usage_with_options(stat_usage, options);
		} else /* Nope, so disable big number formatting */
			big_num = false;
	} else if (big_num_opt == 0) /* User passed --no-big-num */
		big_num = false;

1190
	if (!argc && !perf_target__has_task(&target))
1191
		usage_with_options(stat_usage, options);
1192
	if (run_count <= 0)
1193
		usage_with_options(stat_usage, options);
1194

S
Stephane Eranian 已提交
1195
	/* no_aggr, cgroup are for system-wide only */
1196
	if ((no_aggr || nr_cgroups) && !perf_target__has_cpu(&target)) {
S
Stephane Eranian 已提交
1197 1198 1199
		fprintf(stderr, "both cgroup and no-aggregation "
			"modes only available in system-wide mode\n");

1200
		usage_with_options(stat_usage, options);
S
Stephane Eranian 已提交
1201
	}
1202

1203 1204
	if (add_default_attributes())
		goto out;
1205

1206
	perf_target__validate(&target);
1207

1208
	if (perf_evlist__create_maps(evsel_list, &target) < 0) {
1209
		if (perf_target__has_task(&target))
1210
			pr_err("Problems finding threads of monitor\n");
1211
		if (perf_target__has_cpu(&target))
1212
			perror("failed to parse CPUs map");
1213

1214
		usage_with_options(stat_usage, options);
1215 1216
		return -1;
	}
1217

1218
	list_for_each_entry(pos, &evsel_list->entries, node) {
1219
		if (perf_evsel__alloc_stat_priv(pos) < 0 ||
1220
		    perf_evsel__alloc_counts(pos, evsel_list->cpus->nr) < 0)
1221
			goto out_free_fd;
1222 1223
	}

I
Ingo Molnar 已提交
1224 1225 1226 1227 1228 1229
	/*
	 * We dont want to block the signals - that would cause
	 * child tasks to inherit that and Ctrl-C would not work.
	 * What we want is for Ctrl-C to work in the exec()-ed
	 * task, but being ignored by perf stat itself:
	 */
1230
	atexit(sig_atexit);
I
Ingo Molnar 已提交
1231 1232 1233 1234
	signal(SIGINT,  skip_signal);
	signal(SIGALRM, skip_signal);
	signal(SIGABRT, skip_signal);

1235 1236 1237
	status = 0;
	for (run_idx = 0; run_idx < run_count; run_idx++) {
		if (run_count != 1 && verbose)
1238 1239
			fprintf(output, "[ perf stat: executing run #%d ... ]\n",
				run_idx + 1);
I
Ingo Molnar 已提交
1240 1241 1242 1243

		if (sync_run)
			sync();

1244 1245 1246
		status = run_perf_stat(argc, argv);
	}

1247 1248
	if (status != -1)
		print_stat(argc, argv);
1249
out_free_fd:
1250
	list_for_each_entry(pos, &evsel_list->entries, node)
1251
		perf_evsel__free_stat_priv(pos);
1252
	perf_evlist__delete_maps(evsel_list);
1253 1254
out:
	perf_evlist__delete(evsel_list);
1255
	return status;
1256
}