builtin-stat.c 33.2 KB
Newer Older
1
/*
2 3 4 5 6 7
 * builtin-stat.c
 *
 * Builtin stat command: Give a precise performance counters summary
 * overview about any workload, CPU or specific PID.
 *
 * Sample output:
8

9
   $ perf stat ./hackbench 10
10

11
  Time: 0.118
12

13
  Performance counter stats for './hackbench 10':
14

15 16 17 18 19 20 21 22 23 24 25 26 27
       1708.761321 task-clock                #   11.037 CPUs utilized
            41,190 context-switches          #    0.024 M/sec
             6,735 CPU-migrations            #    0.004 M/sec
            17,318 page-faults               #    0.010 M/sec
     5,205,202,243 cycles                    #    3.046 GHz
     3,856,436,920 stalled-cycles-frontend   #   74.09% frontend cycles idle
     1,600,790,871 stalled-cycles-backend    #   30.75% backend  cycles idle
     2,603,501,247 instructions              #    0.50  insns per cycle
                                             #    1.48  stalled cycles per insn
       484,357,498 branches                  #  283.455 M/sec
         6,388,934 branch-misses             #    1.32% of all branches

        0.154822978  seconds time elapsed
28

29
 *
30
 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
31 32 33 34 35 36 37 38
 *
 * Improvements and fixes by:
 *
 *   Arjan van de Ven <arjan@linux.intel.com>
 *   Yanmin Zhang <yanmin.zhang@intel.com>
 *   Wu Fengguang <fengguang.wu@intel.com>
 *   Mike Galbraith <efault@gmx.de>
 *   Paul Mackerras <paulus@samba.org>
39
 *   Jaswinder Singh Rajput <jaswinder@kernel.org>
40 41
 *
 * Released under the GPL v2. (and only v2, not any later version)
42 43
 */

44
#include "perf.h"
45
#include "builtin.h"
46
#include "util/util.h"
47 48
#include "util/parse-options.h"
#include "util/parse-events.h"
49
#include "util/event.h"
50
#include "util/evlist.h"
51
#include "util/evsel.h"
52
#include "util/debug.h"
53
#include "util/color.h"
54
#include "util/header.h"
55
#include "util/cpumap.h"
56
#include "util/thread.h"
57
#include "util/thread_map.h"
58 59

#include <sys/prctl.h>
60
#include <math.h>
61
#include <locale.h>
62

S
Stephane Eranian 已提交
63
#define DEFAULT_SEPARATOR	" "
64 65
#define CNTR_NOT_SUPPORTED	"<not supported>"
#define CNTR_NOT_COUNTED	"<not counted>"
S
Stephane Eranian 已提交
66

67
static struct perf_event_attr default_attrs[] = {
68

69 70 71 72
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES	},
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS		},
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},
73

74
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
75 76
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND	},
77 78 79
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS	},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES		},
80

81
};
82

83
/*
84
 * Detailed stats (-d), covering the L1 and last level data caches:
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
 */
static struct perf_event_attr detailed_attrs[] = {

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_LL			<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_LL			<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
};

113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
/*
 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
 */
static struct perf_event_attr very_detailed_attrs[] = {

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

};

/*
 * Very, very detailed stats (-d -d -d), adding prefetch events:
 */
static struct perf_event_attr very_very_detailed_attrs[] = {

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
};



176
static struct perf_evlist	*evsel_list;
177

178
static bool			system_wide			=  false;
179
static int			run_idx				=  0;
180

181
static int			run_count			=  1;
182
static bool			no_inherit			= false;
183
static bool			scale				=  true;
184
static bool			no_aggr				= false;
185 186
static const char		*target_pid;
static const char		*target_tid;
187
static pid_t			child_pid			= -1;
188
static bool			null_run			=  false;
189
static int			detailed_run			=  0;
I
Ingo Molnar 已提交
190
static bool			sync_run			=  false;
191
static bool			big_num				=  true;
S
Stephane Eranian 已提交
192
static int			big_num_opt			=  -1;
193
static const char		*cpu_list;
S
Stephane Eranian 已提交
194 195
static const char		*csv_sep			= NULL;
static bool			csv_output			= false;
196
static bool			group				= false;
197 198
static const char		*output_name			= NULL;
static FILE			*output				= NULL;
199
static int			output_fd;
200

201 202
static volatile int done = 0;

203 204
struct stats
{
205
	double n, mean, M2;
206
};
207

208 209 210 211
struct perf_stat {
	struct stats	  res_stats[3];
};

212
static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
213
{
214
	evsel->priv = zalloc(sizeof(struct perf_stat));
215 216 217 218 219 220 221 222 223
	return evsel->priv == NULL ? -ENOMEM : 0;
}

static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
{
	free(evsel->priv);
	evsel->priv = NULL;
}

224 225
static void update_stats(struct stats *stats, u64 val)
{
226
	double delta;
227

228 229 230 231
	stats->n++;
	delta = val - stats->mean;
	stats->mean += delta / stats->n;
	stats->M2 += delta*(val - stats->mean);
232 233
}

234 235
static double avg_stats(struct stats *stats)
{
236
	return stats->mean;
237
}
238

239
/*
240 241
 * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
 *
242 243 244
 *       (\Sum n_i^2) - ((\Sum n_i)^2)/n
 * s^2 = -------------------------------
 *                  n - 1
245 246 247 248 249 250 251 252 253
 *
 * http://en.wikipedia.org/wiki/Stddev
 *
 * The std dev of the mean is related to the std dev by:
 *
 *             s
 * s_mean = -------
 *          sqrt(n)
 *
254 255 256
 */
static double stddev_stats(struct stats *stats)
{
257 258 259 260 261 262 263
	double variance, variance_mean;

	if (!stats->n)
		return 0.0;

	variance = stats->M2 / (stats->n - 1);
	variance_mean = variance / stats->n;
264

265
	return sqrt(variance_mean);
266
}
267

268 269 270 271 272 273 274 275 276 277 278 279
static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
static struct stats runtime_cycles_stats[MAX_NR_CPUS];
static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
static struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
static struct stats runtime_branches_stats[MAX_NR_CPUS];
static struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
static struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
static struct stats walltime_nsecs_stats;
280

281 282
static int create_perf_stat_counter(struct perf_evsel *evsel,
				    struct perf_evsel *first)
283
{
284
	struct perf_event_attr *attr = &evsel->attr;
285 286 287 288
	struct xyarray *group_fd = NULL;

	if (group && evsel != first)
		group_fd = first->fd;
289

290
	if (scale)
291 292
		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
				    PERF_FORMAT_TOTAL_TIME_RUNNING;
293

294 295
	attr->inherit = !no_inherit;

296
	if (system_wide)
297 298
		return perf_evsel__open_per_cpu(evsel, evsel_list->cpus,
						group, group_fd);
299
	if (!target_pid && !target_tid && (!group || evsel == first)) {
300 301
		attr->disabled = 1;
		attr->enable_on_exec = 1;
302
	}
303

304 305
	return perf_evsel__open_per_thread(evsel, evsel_list->threads,
					   group, group_fd);
306 307
}

308 309 310
/*
 * Does the counter have nsecs as a unit?
 */
311
static inline int nsec_counter(struct perf_evsel *evsel)
312
{
313 314
	if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
	    perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
315 316 317 318 319
		return 1;

	return 0;
}

I
Ingo Molnar 已提交
320 321 322 323 324 325 326 327 328 329 330
/*
 * Update various tracking values we maintain to print
 * more semantic information such as miss/hit ratios,
 * instruction rates, etc:
 */
static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
{
	if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
		update_stats(&runtime_nsecs_stats[0], count[0]);
	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
		update_stats(&runtime_cycles_stats[0], count[0]);
331 332
	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
		update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
333
	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
334
		update_stats(&runtime_stalled_cycles_back_stats[0], count[0]);
I
Ingo Molnar 已提交
335 336 337 338
	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
		update_stats(&runtime_branches_stats[0], count[0]);
	else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
		update_stats(&runtime_cacherefs_stats[0], count[0]);
339 340
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
		update_stats(&runtime_l1_dcache_stats[0], count[0]);
341 342 343 344 345 346 347 348
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
		update_stats(&runtime_l1_icache_stats[0], count[0]);
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
		update_stats(&runtime_ll_cache_stats[0], count[0]);
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
		update_stats(&runtime_dtlb_cache_stats[0], count[0]);
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
		update_stats(&runtime_itlb_cache_stats[0], count[0]);
I
Ingo Molnar 已提交
349 350
}

351
/*
352
 * Read out the results of a single counter:
353
 * aggregate counts across CPUs in system-wide mode
354
 */
355
static int read_counter_aggr(struct perf_evsel *counter)
356
{
357
	struct perf_stat *ps = counter->priv;
358 359
	u64 *count = counter->counts->aggr.values;
	int i;
360

361 362
	if (__perf_evsel__read(counter, evsel_list->cpus->nr,
			       evsel_list->threads->nr, scale) < 0)
363
		return -1;
364 365

	for (i = 0; i < 3; i++)
366
		update_stats(&ps->res_stats[i], count[i]);
367 368

	if (verbose) {
369
		fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
370
			event_name(counter), count[0], count[1], count[2]);
371 372
	}

373 374 375
	/*
	 * Save the full runtime - to allow normalization during printout:
	 */
I
Ingo Molnar 已提交
376
	update_shadow_stats(counter, count);
377 378

	return 0;
379 380 381 382 383 384
}

/*
 * Read out the results of a single counter:
 * do not aggregate counts across CPUs in system-wide mode
 */
385
static int read_counter(struct perf_evsel *counter)
386
{
387
	u64 *count;
388 389
	int cpu;

390
	for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) {
391 392
		if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
			return -1;
393

394
		count = counter->counts->cpu[cpu].values;
395

I
Ingo Molnar 已提交
396
		update_shadow_stats(counter, count);
397
	}
398 399

	return 0;
400 401
}

402
static int run_perf_stat(int argc __used, const char **argv)
403 404
{
	unsigned long long t0, t1;
405
	struct perf_evsel *counter, *first;
406
	int status = 0;
407
	int child_ready_pipe[2], go_pipe[2];
408
	const bool forks = (argc > 0);
409
	char buf;
410

411
	if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
412 413 414 415
		perror("failed to create pipes");
		exit(1);
	}

416
	if (forks) {
417
		if ((child_pid = fork()) < 0)
418 419
			perror("failed to fork");

420
		if (!child_pid) {
421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447
			close(child_ready_pipe[0]);
			close(go_pipe[1]);
			fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);

			/*
			 * Do a dummy execvp to get the PLT entry resolved,
			 * so we avoid the resolver overhead on the real
			 * execvp call.
			 */
			execvp("", (char **)argv);

			/*
			 * Tell the parent we're ready to go
			 */
			close(child_ready_pipe[1]);

			/*
			 * Wait until the parent tells us to go.
			 */
			if (read(go_pipe[0], &buf, 1) == -1)
				perror("unable to read pipe");

			execvp(argv[0], (char **)argv);

			perror(argv[0]);
			exit(-1);
		}
448

449
		if (!target_tid && !target_pid && !system_wide)
450
			evsel_list->threads->map[0] = child_pid;
451

452
		/*
453
		 * Wait for the child to be ready to exec.
454 455
		 */
		close(child_ready_pipe[1]);
456 457
		close(go_pipe[0]);
		if (read(child_ready_pipe[0], &buf, 1) == -1)
458
			perror("unable to read pipe");
459
		close(child_ready_pipe[0]);
460 461
	}

462 463
	first = list_entry(evsel_list->entries.next, struct perf_evsel, node);

464
	list_for_each_entry(counter, &evsel_list->entries, node) {
465
		if (create_perf_stat_counter(counter, first) < 0) {
466 467
			if (errno == EINVAL || errno == ENOSYS ||
			    errno == ENOENT || errno == EOPNOTSUPP) {
468 469 470
				if (verbose)
					ui__warning("%s event is not supported by the kernel.\n",
						    event_name(counter));
471
				counter->supported = false;
472
				continue;
473
			}
474 475

			if (errno == EPERM || errno == EACCES) {
476 477 478 479 480 481 482 483 484 485 486 487 488 489
				error("You may not have permission to collect %sstats.\n"
				      "\t Consider tweaking"
				      " /proc/sys/kernel/perf_event_paranoid or running as root.",
				      system_wide ? "system-wide " : "");
			} else {
				error("open_counter returned with %d (%s). "
				      "/bin/dmesg may provide additional information.\n",
				       errno, strerror(errno));
			}
			if (child_pid != -1)
				kill(child_pid, SIGTERM);
			die("Not all events could be opened.\n");
			return -1;
		}
490
		counter->supported = true;
491
	}
492

493 494 495 496 497 498
	if (perf_evlist__set_filters(evsel_list)) {
		error("failed to set filter with %d (%s)\n", errno,
			strerror(errno));
		return -1;
	}

499 500 501 502 503
	/*
	 * Enable counters and exec the command:
	 */
	t0 = rdclock();

504 505 506
	if (forks) {
		close(go_pipe[1]);
		wait(&status);
507 508
		if (WIFSIGNALED(status))
			psignal(WTERMSIG(status), argv[0]);
509
	} else {
510
		while(!done) sleep(1);
511
	}
512 513 514

	t1 = rdclock();

515
	update_stats(&walltime_nsecs_stats, t1 - t0);
516

517
	if (no_aggr) {
518
		list_for_each_entry(counter, &evsel_list->entries, node) {
519
			read_counter(counter);
520
			perf_evsel__close_fd(counter, evsel_list->cpus->nr, 1);
521
		}
522
	} else {
523
		list_for_each_entry(counter, &evsel_list->entries, node) {
524
			read_counter_aggr(counter);
525 526
			perf_evsel__close_fd(counter, evsel_list->cpus->nr,
					     evsel_list->threads->nr);
527
		}
528
	}
529

530 531 532
	return WEXITSTATUS(status);
}

533 534 535 536 537 538 539
static void print_noise_pct(double total, double avg)
{
	double pct = 0.0;

	if (avg)
		pct = 100.0*total/avg;

540
	if (csv_output)
541
		fprintf(output, "%s%.2f%%", csv_sep, pct);
542
	else if (pct)
543
		fprintf(output, "  ( +-%6.2f%% )", pct);
544 545
}

546
static void print_noise(struct perf_evsel *evsel, double avg)
547
{
548 549
	struct perf_stat *ps;

550 551 552
	if (run_count == 1)
		return;

553
	ps = evsel->priv;
554
	print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
555 556
}

557
static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
I
Ingo Molnar 已提交
558
{
559
	double msecs = avg / 1e6;
S
Stephane Eranian 已提交
560
	char cpustr[16] = { '\0', };
561
	const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s";
I
Ingo Molnar 已提交
562

563
	if (no_aggr)
S
Stephane Eranian 已提交
564 565
		sprintf(cpustr, "CPU%*d%s",
			csv_output ? 0 : -4,
566
			evsel_list->cpus->map[cpu], csv_sep);
S
Stephane Eranian 已提交
567

568
	fprintf(output, fmt, cpustr, msecs, csv_sep, event_name(evsel));
S
Stephane Eranian 已提交
569

S
Stephane Eranian 已提交
570
	if (evsel->cgrp)
571
		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
S
Stephane Eranian 已提交
572

S
Stephane Eranian 已提交
573 574
	if (csv_output)
		return;
I
Ingo Molnar 已提交
575

576
	if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
577 578
		fprintf(output, " # %8.3f CPUs utilized          ",
			avg / avg_stats(&walltime_nsecs_stats));
579 580
	else
		fprintf(output, "                                   ");
I
Ingo Molnar 已提交
581 582
}

583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609
/* used for get_ratio_color() */
enum grc_type {
	GRC_STALLED_CYCLES_FE,
	GRC_STALLED_CYCLES_BE,
	GRC_CACHE_MISSES,
	GRC_MAX_NR
};

static const char *get_ratio_color(enum grc_type type, double ratio)
{
	static const double grc_table[GRC_MAX_NR][3] = {
		[GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
		[GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
		[GRC_CACHE_MISSES] 	= { 20.0, 10.0, 5.0 },
	};
	const char *color = PERF_COLOR_NORMAL;

	if (ratio > grc_table[type][0])
		color = PERF_COLOR_RED;
	else if (ratio > grc_table[type][1])
		color = PERF_COLOR_MAGENTA;
	else if (ratio > grc_table[type][2])
		color = PERF_COLOR_YELLOW;

	return color;
}

610 611 612 613 614 615 616 617 618 619
static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __used, double avg)
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_cycles_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

620
	color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
621

622 623 624
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " frontend cycles idle   ");
625 626 627
}

static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel __used, double avg)
628 629 630 631 632 633 634 635 636
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_cycles_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

637
	color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
638

639 640 641
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " backend  cycles idle   ");
642 643
}

644 645 646 647 648 649 650 651 652 653
static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double avg)
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_branches_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

654
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
655

656 657 658
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all branches        ");
659 660
}

661 662 663 664 665 666 667 668 669 670
static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_l1_dcache_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

671
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
672

673 674 675
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all L1-dcache hits  ");
676 677
}

678 679 680 681 682 683 684 685 686 687
static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_l1_icache_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

688
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
689

690 691 692
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all L1-icache hits  ");
693 694 695 696 697 698 699 700 701 702 703 704
}

static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_dtlb_cache_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

705
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
706

707 708 709
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all dTLB cache hits ");
710 711 712 713 714 715 716 717 718 719 720 721
}

static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_itlb_cache_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

722
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
723

724 725 726
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all iTLB cache hits ");
727 728 729 730 731 732 733 734 735 736 737 738
}

static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_ll_cache_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

739
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
740

741 742 743
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all LL-cache hits   ");
744 745
}

746
static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
I
Ingo Molnar 已提交
747
{
748
	double total, ratio = 0.0;
749
	char cpustr[16] = { '\0', };
S
Stephane Eranian 已提交
750 751 752 753 754
	const char *fmt;

	if (csv_output)
		fmt = "%s%.0f%s%s";
	else if (big_num)
755
		fmt = "%s%'18.0f%s%-25s";
S
Stephane Eranian 已提交
756
	else
757
		fmt = "%s%18.0f%s%-25s";
758 759

	if (no_aggr)
S
Stephane Eranian 已提交
760 761
		sprintf(cpustr, "CPU%*d%s",
			csv_output ? 0 : -4,
762
			evsel_list->cpus->map[cpu], csv_sep);
763 764
	else
		cpu = 0;
765

766
	fprintf(output, fmt, cpustr, avg, csv_sep, event_name(evsel));
S
Stephane Eranian 已提交
767

S
Stephane Eranian 已提交
768
	if (evsel->cgrp)
769
		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
S
Stephane Eranian 已提交
770

S
Stephane Eranian 已提交
771 772
	if (csv_output)
		return;
I
Ingo Molnar 已提交
773

774
	if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
775
		total = avg_stats(&runtime_cycles_stats[cpu]);
776 777 778 779

		if (total)
			ratio = avg / total;

780
		fprintf(output, " #   %5.2f  insns per cycle        ", ratio);
781

782 783
		total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
784 785 786

		if (total && avg) {
			ratio = total / avg;
787
			fprintf(output, "\n                                             #   %5.2f  stalled cycles per insn", ratio);
788 789
		}

790
	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
791
			runtime_branches_stats[cpu].n != 0) {
792
		print_branch_misses(cpu, evsel, avg);
793 794 795 796 797
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
798
			runtime_l1_dcache_stats[cpu].n != 0) {
799
		print_l1_dcache_misses(cpu, evsel, avg);
800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
			runtime_l1_icache_stats[cpu].n != 0) {
		print_l1_icache_misses(cpu, evsel, avg);
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
			runtime_dtlb_cache_stats[cpu].n != 0) {
		print_dtlb_cache_misses(cpu, evsel, avg);
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
			runtime_itlb_cache_stats[cpu].n != 0) {
		print_itlb_cache_misses(cpu, evsel, avg);
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
			runtime_ll_cache_stats[cpu].n != 0) {
		print_ll_cache_misses(cpu, evsel, avg);
828 829 830 831 832 833 834
	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
			runtime_cacherefs_stats[cpu].n != 0) {
		total = avg_stats(&runtime_cacherefs_stats[cpu]);

		if (total)
			ratio = avg * 100 / total;

835
		fprintf(output, " # %8.3f %% of all cache refs    ", ratio);
836

837 838
	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
		print_stalled_cycles_frontend(cpu, evsel, avg);
839
	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
840
		print_stalled_cycles_backend(cpu, evsel, avg);
841
	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
842
		total = avg_stats(&runtime_nsecs_stats[cpu]);
843 844

		if (total)
845
			ratio = 1.0 * avg / total;
846

847
		fprintf(output, " # %8.3f GHz                    ", ratio);
848
	} else if (runtime_nsecs_stats[cpu].n != 0) {
N
Namhyung Kim 已提交
849 850
		char unit = 'M';

851
		total = avg_stats(&runtime_nsecs_stats[cpu]);
852 853

		if (total)
854
			ratio = 1000.0 * avg / total;
N
Namhyung Kim 已提交
855 856 857 858
		if (ratio < 0.001) {
			ratio *= 1000;
			unit = 'K';
		}
859

N
Namhyung Kim 已提交
860
		fprintf(output, " # %8.3f %c/sec                  ", ratio, unit);
861
	} else {
862
		fprintf(output, "                                   ");
I
Ingo Molnar 已提交
863 864 865
	}
}

866 867
/*
 * Print out the results of a single counter:
868
 * aggregated counts in system-wide mode
869
 */
870
static void print_counter_aggr(struct perf_evsel *counter)
871
{
872 873
	struct perf_stat *ps = counter->priv;
	double avg = avg_stats(&ps->res_stats[0]);
874
	int scaled = counter->counts->scaled;
875 876

	if (scaled == -1) {
877
		fprintf(output, "%*s%s%*s",
S
Stephane Eranian 已提交
878
			csv_output ? 0 : 18,
879
			counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
S
Stephane Eranian 已提交
880 881 882 883 884
			csv_sep,
			csv_output ? 0 : -24,
			event_name(counter));

		if (counter->cgrp)
885
			fprintf(output, "%s%s", csv_sep, counter->cgrp->name);
S
Stephane Eranian 已提交
886

887
		fputc('\n', output);
888 889
		return;
	}
890

I
Ingo Molnar 已提交
891
	if (nsec_counter(counter))
892
		nsec_printout(-1, counter, avg);
I
Ingo Molnar 已提交
893
	else
894
		abs_printout(-1, counter, avg);
895

896 897
	print_noise(counter, avg);

S
Stephane Eranian 已提交
898
	if (csv_output) {
899
		fputc('\n', output);
S
Stephane Eranian 已提交
900 901 902
		return;
	}

903 904 905
	if (scaled) {
		double avg_enabled, avg_running;

906 907
		avg_enabled = avg_stats(&ps->res_stats[1]);
		avg_running = avg_stats(&ps->res_stats[2]);
908

909
		fprintf(output, " [%5.2f%%]", 100 * avg_running / avg_enabled);
910
	}
911
	fprintf(output, "\n");
912 913
}

914 915 916 917
/*
 * Print out the results of a single counter:
 * does not use aggregated count in system-wide
 */
918
static void print_counter(struct perf_evsel *counter)
919 920 921 922
{
	u64 ena, run, val;
	int cpu;

923
	for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) {
924 925 926
		val = counter->counts->cpu[cpu].val;
		ena = counter->counts->cpu[cpu].ena;
		run = counter->counts->cpu[cpu].run;
927
		if (run == 0 || ena == 0) {
928
			fprintf(output, "CPU%*d%s%*s%s%*s",
S
Stephane Eranian 已提交
929
				csv_output ? 0 : -4,
930
				evsel_list->cpus->map[cpu], csv_sep,
S
Stephane Eranian 已提交
931
				csv_output ? 0 : 18,
932 933
				counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
				csv_sep,
S
Stephane Eranian 已提交
934
				csv_output ? 0 : -24,
S
Stephane Eranian 已提交
935
				event_name(counter));
936

S
Stephane Eranian 已提交
937
			if (counter->cgrp)
938 939
				fprintf(output, "%s%s",
					csv_sep, counter->cgrp->name);
S
Stephane Eranian 已提交
940

941
			fputc('\n', output);
942 943 944 945 946 947 948 949
			continue;
		}

		if (nsec_counter(counter))
			nsec_printout(cpu, counter, val);
		else
			abs_printout(cpu, counter, val);

S
Stephane Eranian 已提交
950 951
		if (!csv_output) {
			print_noise(counter, 1.0);
952

953
			if (run != ena)
954 955
				fprintf(output, "  (%.2f%%)",
					100.0 * run / ena);
956
		}
957
		fputc('\n', output);
958 959 960
	}
}

961 962
static void print_stat(int argc, const char **argv)
{
963 964
	struct perf_evsel *counter;
	int i;
965

966 967
	fflush(stdout);

S
Stephane Eranian 已提交
968
	if (!csv_output) {
969 970
		fprintf(output, "\n");
		fprintf(output, " Performance counter stats for ");
971
		if (!target_pid && !target_tid) {
972
			fprintf(output, "\'%s", argv[0]);
S
Stephane Eranian 已提交
973
			for (i = 1; i < argc; i++)
974
				fprintf(output, " %s", argv[i]);
975 976
		} else if (target_pid)
			fprintf(output, "process id \'%s", target_pid);
S
Stephane Eranian 已提交
977
		else
978
			fprintf(output, "thread id \'%s", target_tid);
I
Ingo Molnar 已提交
979

980
		fprintf(output, "\'");
S
Stephane Eranian 已提交
981
		if (run_count > 1)
982 983
			fprintf(output, " (%d runs)", run_count);
		fprintf(output, ":\n\n");
S
Stephane Eranian 已提交
984
	}
985

986
	if (no_aggr) {
987
		list_for_each_entry(counter, &evsel_list->entries, node)
988 989
			print_counter(counter);
	} else {
990
		list_for_each_entry(counter, &evsel_list->entries, node)
991 992
			print_counter_aggr(counter);
	}
993

S
Stephane Eranian 已提交
994
	if (!csv_output) {
995
		if (!null_run)
996 997
			fprintf(output, "\n");
		fprintf(output, " %17.9f seconds time elapsed",
S
Stephane Eranian 已提交
998 999
				avg_stats(&walltime_nsecs_stats)/1e9);
		if (run_count > 1) {
1000
			fprintf(output, "                                        ");
1001 1002
			print_noise_pct(stddev_stats(&walltime_nsecs_stats),
					avg_stats(&walltime_nsecs_stats));
S
Stephane Eranian 已提交
1003
		}
1004
		fprintf(output, "\n\n");
I
Ingo Molnar 已提交
1005
	}
1006 1007
}

1008 1009
static volatile int signr = -1;

1010
static void skip_signal(int signo)
1011
{
1012
	if(child_pid == -1)
1013 1014
		done = 1;

1015 1016 1017 1018 1019
	signr = signo;
}

static void sig_atexit(void)
{
1020 1021 1022
	if (child_pid != -1)
		kill(child_pid, SIGTERM);

1023 1024 1025 1026 1027
	if (signr == -1)
		return;

	signal(signr, SIG_DFL);
	kill(getpid(), signr);
1028 1029 1030
}

static const char * const stat_usage[] = {
1031
	"perf stat [<options>] [<command>]",
1032 1033 1034
	NULL
};

S
Stephane Eranian 已提交
1035 1036 1037 1038 1039 1040 1041
static int stat__set_big_num(const struct option *opt __used,
			     const char *s __used, int unset)
{
	big_num_opt = unset ? 0 : 1;
	return 0;
}

1042 1043
static bool append_file;

1044
static const struct option options[] = {
1045
	OPT_CALLBACK('e', "event", &evsel_list, "event",
1046
		     "event selector. use 'perf list' to list available events",
1047
		     parse_events_option),
1048 1049
	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
		     "event filter", parse_filter),
1050 1051
	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
		    "child tasks do not inherit counters"),
1052 1053 1054 1055
	OPT_STRING('p', "pid", &target_pid, "pid",
		   "stat events on existing process id"),
	OPT_STRING('t', "tid", &target_tid, "tid",
		   "stat events on existing thread id"),
1056
	OPT_BOOLEAN('a', "all-cpus", &system_wide,
1057
		    "system-wide collection from all CPUs"),
1058 1059
	OPT_BOOLEAN('g', "group", &group,
		    "put the counters into a counter group"),
1060
	OPT_BOOLEAN('c', "scale", &scale,
1061
		    "scale/normalize counters"),
1062
	OPT_INCR('v', "verbose", &verbose,
1063
		    "be more verbose (show counter open errors, etc)"),
1064 1065
	OPT_INTEGER('r', "repeat", &run_count,
		    "repeat command and print average + stddev (max: 100)"),
1066 1067
	OPT_BOOLEAN('n', "null", &null_run,
		    "null run - dont start any counters"),
1068
	OPT_INCR('d', "detailed", &detailed_run,
1069
		    "detailed run - start a lot of events"),
I
Ingo Molnar 已提交
1070 1071
	OPT_BOOLEAN('S', "sync", &sync_run,
		    "call sync() before starting a run"),
S
Stephane Eranian 已提交
1072 1073 1074
	OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 
			   "print large numbers with thousands\' separators",
			   stat__set_big_num),
1075 1076
	OPT_STRING('C', "cpu", &cpu_list, "cpu",
		    "list of cpus to monitor in system-wide"),
1077 1078
	OPT_BOOLEAN('A', "no-aggr", &no_aggr,
		    "disable CPU count aggregation"),
S
Stephane Eranian 已提交
1079 1080
	OPT_STRING('x', "field-separator", &csv_sep, "separator",
		   "print counts with custom separator"),
S
Stephane Eranian 已提交
1081 1082 1083
	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
		     "monitor event in cgroup name only",
		     parse_cgroups),
1084 1085 1086
	OPT_STRING('o', "output", &output_name, "file",
		    "output file name"),
	OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
1087 1088
	OPT_INTEGER(0, "log-fd", &output_fd,
		    "log output to fd, instead of stderr"),
1089 1090 1091
	OPT_END()
};

1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102
/*
 * Add default attributes, if there were no attributes specified or
 * if -d/--detailed, -d -d or -d -d -d is used:
 */
static int add_default_attributes(void)
{
	/* Set attrs if no event is selected and !null_run: */
	if (null_run)
		return 0;

	if (!evsel_list->nr_entries) {
1103 1104
		if (perf_evlist__add_attrs_array(evsel_list, default_attrs) < 0)
			return -1;
1105 1106 1107 1108 1109 1110 1111 1112
	}

	/* Detailed events get appended to the event list: */

	if (detailed_run <  1)
		return 0;

	/* Append detailed run extra attributes: */
1113 1114
	if (perf_evlist__add_attrs_array(evsel_list, detailed_attrs) < 0)
		return -1;
1115 1116 1117 1118 1119

	if (detailed_run < 2)
		return 0;

	/* Append very detailed run extra attributes: */
1120 1121
	if (perf_evlist__add_attrs_array(evsel_list, very_detailed_attrs) < 0)
		return -1;
1122 1123 1124 1125 1126

	if (detailed_run < 3)
		return 0;

	/* Append very, very detailed run extra attributes: */
1127
	return perf_evlist__add_attrs_array(evsel_list, very_very_detailed_attrs);
1128 1129
}

1130
int cmd_stat(int argc, const char **argv, const char *prefix __used)
1131
{
1132 1133
	struct perf_evsel *pos;
	int status = -ENOMEM;
1134
	const char *mode;
1135

1136 1137
	setlocale(LC_ALL, "");

1138
	evsel_list = perf_evlist__new(NULL, NULL);
1139 1140 1141
	if (evsel_list == NULL)
		return -ENOMEM;

1142 1143
	argc = parse_options(argc, argv, options, stat_usage,
		PARSE_OPT_STOP_AT_NON_OPTION);
S
Stephane Eranian 已提交
1144

1145 1146 1147 1148
	output = stderr;
	if (output_name && strcmp(output_name, "-"))
		output = NULL;

1149 1150 1151 1152
	if (output_name && output_fd) {
		fprintf(stderr, "cannot use both --output and --log-fd\n");
		usage_with_options(stat_usage, options);
	}
1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163
	if (!output) {
		struct timespec tm;
		mode = append_file ? "a" : "w";

		output = fopen(output_name, mode);
		if (!output) {
			perror("failed to create output file");
			exit(-1);
		}
		clock_gettime(CLOCK_REALTIME, &tm);
		fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
1164 1165 1166 1167 1168 1169 1170
	} else if (output_fd != 2) {
		mode = append_file ? "a" : "w";
		output = fdopen(output_fd, mode);
		if (!output) {
			perror("Failed opening logfd");
			return -errno;
		}
1171 1172
	}

1173
	if (csv_sep) {
S
Stephane Eranian 已提交
1174
		csv_output = true;
1175 1176 1177
		if (!strcmp(csv_sep, "\\t"))
			csv_sep = "\t";
	} else
S
Stephane Eranian 已提交
1178 1179 1180 1181 1182 1183
		csv_sep = DEFAULT_SEPARATOR;

	/*
	 * let the spreadsheet do the pretty-printing
	 */
	if (csv_output) {
J
Jim Cromie 已提交
1184
		/* User explicitly passed -B? */
S
Stephane Eranian 已提交
1185 1186 1187 1188 1189 1190 1191 1192
		if (big_num_opt == 1) {
			fprintf(stderr, "-B option not supported with -x\n");
			usage_with_options(stat_usage, options);
		} else /* Nope, so disable big number formatting */
			big_num = false;
	} else if (big_num_opt == 0) /* User passed --no-big-num */
		big_num = false;

1193
	if (!argc && !target_pid && !target_tid)
1194
		usage_with_options(stat_usage, options);
1195
	if (run_count <= 0)
1196
		usage_with_options(stat_usage, options);
1197

S
Stephane Eranian 已提交
1198 1199 1200 1201 1202
	/* no_aggr, cgroup are for system-wide only */
	if ((no_aggr || nr_cgroups) && !system_wide) {
		fprintf(stderr, "both cgroup and no-aggregation "
			"modes only available in system-wide mode\n");

1203
		usage_with_options(stat_usage, options);
S
Stephane Eranian 已提交
1204
	}
1205

1206 1207
	if (add_default_attributes())
		goto out;
1208

1209
	if (target_pid)
1210 1211
		target_tid = target_pid;

1212 1213
	evsel_list->threads = thread_map__new_str(target_pid,
						  target_tid, UINT_MAX);
1214
	if (evsel_list->threads == NULL) {
1215 1216 1217 1218
		pr_err("Problems finding threads of monitor\n");
		usage_with_options(stat_usage, options);
	}

1219
	if (system_wide)
1220
		evsel_list->cpus = cpu_map__new(cpu_list);
1221
	else
1222
		evsel_list->cpus = cpu_map__dummy_new();
1223

1224
	if (evsel_list->cpus == NULL) {
1225
		perror("failed to parse CPUs map");
1226
		usage_with_options(stat_usage, options);
1227 1228
		return -1;
	}
1229

1230
	list_for_each_entry(pos, &evsel_list->entries, node) {
1231
		if (perf_evsel__alloc_stat_priv(pos) < 0 ||
1232
		    perf_evsel__alloc_counts(pos, evsel_list->cpus->nr) < 0)
1233
			goto out_free_fd;
1234 1235
	}

I
Ingo Molnar 已提交
1236 1237 1238 1239 1240 1241
	/*
	 * We dont want to block the signals - that would cause
	 * child tasks to inherit that and Ctrl-C would not work.
	 * What we want is for Ctrl-C to work in the exec()-ed
	 * task, but being ignored by perf stat itself:
	 */
1242
	atexit(sig_atexit);
I
Ingo Molnar 已提交
1243 1244 1245 1246
	signal(SIGINT,  skip_signal);
	signal(SIGALRM, skip_signal);
	signal(SIGABRT, skip_signal);

1247 1248 1249
	status = 0;
	for (run_idx = 0; run_idx < run_count; run_idx++) {
		if (run_count != 1 && verbose)
1250 1251
			fprintf(output, "[ perf stat: executing run #%d ... ]\n",
				run_idx + 1);
I
Ingo Molnar 已提交
1252 1253 1254 1255

		if (sync_run)
			sync();

1256 1257 1258
		status = run_perf_stat(argc, argv);
	}

1259 1260
	if (status != -1)
		print_stat(argc, argv);
1261
out_free_fd:
1262
	list_for_each_entry(pos, &evsel_list->entries, node)
1263
		perf_evsel__free_stat_priv(pos);
1264
	perf_evlist__delete_maps(evsel_list);
1265 1266
out:
	perf_evlist__delete(evsel_list);
1267
	return status;
1268
}