builtin-stat.c 33.6 KB
Newer Older
1
/*
2 3 4 5 6 7
 * builtin-stat.c
 *
 * Builtin stat command: Give a precise performance counters summary
 * overview about any workload, CPU or specific PID.
 *
 * Sample output:
8

9
   $ perf stat ./hackbench 10
10

11
  Time: 0.118
12

13
  Performance counter stats for './hackbench 10':
14

15 16 17 18 19 20 21 22 23 24 25 26 27
       1708.761321 task-clock                #   11.037 CPUs utilized
            41,190 context-switches          #    0.024 M/sec
             6,735 CPU-migrations            #    0.004 M/sec
            17,318 page-faults               #    0.010 M/sec
     5,205,202,243 cycles                    #    3.046 GHz
     3,856,436,920 stalled-cycles-frontend   #   74.09% frontend cycles idle
     1,600,790,871 stalled-cycles-backend    #   30.75% backend  cycles idle
     2,603,501,247 instructions              #    0.50  insns per cycle
                                             #    1.48  stalled cycles per insn
       484,357,498 branches                  #  283.455 M/sec
         6,388,934 branch-misses             #    1.32% of all branches

        0.154822978  seconds time elapsed
28

29
 *
30
 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
31 32 33 34 35 36 37 38
 *
 * Improvements and fixes by:
 *
 *   Arjan van de Ven <arjan@linux.intel.com>
 *   Yanmin Zhang <yanmin.zhang@intel.com>
 *   Wu Fengguang <fengguang.wu@intel.com>
 *   Mike Galbraith <efault@gmx.de>
 *   Paul Mackerras <paulus@samba.org>
39
 *   Jaswinder Singh Rajput <jaswinder@kernel.org>
40 41
 *
 * Released under the GPL v2. (and only v2, not any later version)
42 43
 */

44
#include "perf.h"
45
#include "builtin.h"
46
#include "util/util.h"
47 48
#include "util/parse-options.h"
#include "util/parse-events.h"
49
#include "util/event.h"
50
#include "util/evlist.h"
51
#include "util/evsel.h"
52
#include "util/debug.h"
53
#include "util/color.h"
54
#include "util/header.h"
55
#include "util/cpumap.h"
56
#include "util/thread.h"
57
#include "util/thread_map.h"
58 59

#include <sys/prctl.h>
60
#include <math.h>
61
#include <locale.h>
62

S
Stephane Eranian 已提交
63
#define DEFAULT_SEPARATOR	" "
64 65
#define CNTR_NOT_SUPPORTED	"<not supported>"
#define CNTR_NOT_COUNTED	"<not counted>"
S
Stephane Eranian 已提交
66

67
static struct perf_event_attr default_attrs[] = {
68

69 70 71 72
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES	},
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS		},
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},
73

74
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
75 76
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND	},
77 78 79
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS	},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES		},
80

81
};
82

83
/*
84
 * Detailed stats (-d), covering the L1 and last level data caches:
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
 */
static struct perf_event_attr detailed_attrs[] = {

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_LL			<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_LL			<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
};

113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
/*
 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
 */
static struct perf_event_attr very_detailed_attrs[] = {

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

};

/*
 * Very, very detailed stats (-d -d -d), adding prefetch events:
 */
static struct perf_event_attr very_very_detailed_attrs[] = {

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
};



176
static struct perf_evlist	*evsel_list;
177

178 179 180
static struct perf_target	target = {
	.uid	= UINT_MAX,
};
181

182
static int			run_idx				=  0;
183
static int			run_count			=  1;
184
static bool			no_inherit			= false;
185
static bool			scale				=  true;
186
static bool			no_aggr				= false;
187
static pid_t			child_pid			= -1;
188
static bool			null_run			=  false;
189
static int			detailed_run			=  0;
I
Ingo Molnar 已提交
190
static bool			sync_run			=  false;
191
static bool			big_num				=  true;
S
Stephane Eranian 已提交
192 193 194
static int			big_num_opt			=  -1;
static const char		*csv_sep			= NULL;
static bool			csv_output			= false;
195
static bool			group				= false;
196 197
static const char		*output_name			= NULL;
static FILE			*output				= NULL;
198
static int			output_fd;
199

200 201
static volatile int done = 0;

202 203
struct stats
{
204
	double n, mean, M2;
205
};
206

207 208 209 210
struct perf_stat {
	struct stats	  res_stats[3];
};

211
static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
212
{
213
	evsel->priv = zalloc(sizeof(struct perf_stat));
214 215 216 217 218 219 220 221 222
	return evsel->priv == NULL ? -ENOMEM : 0;
}

static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
{
	free(evsel->priv);
	evsel->priv = NULL;
}

223 224
static void update_stats(struct stats *stats, u64 val)
{
225
	double delta;
226

227 228 229 230
	stats->n++;
	delta = val - stats->mean;
	stats->mean += delta / stats->n;
	stats->M2 += delta*(val - stats->mean);
231 232
}

233 234
static double avg_stats(struct stats *stats)
{
235
	return stats->mean;
236
}
237

238
/*
239 240
 * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
 *
241 242 243
 *       (\Sum n_i^2) - ((\Sum n_i)^2)/n
 * s^2 = -------------------------------
 *                  n - 1
244 245 246 247 248 249 250 251 252
 *
 * http://en.wikipedia.org/wiki/Stddev
 *
 * The std dev of the mean is related to the std dev by:
 *
 *             s
 * s_mean = -------
 *          sqrt(n)
 *
253 254 255
 */
static double stddev_stats(struct stats *stats)
{
256 257 258 259 260 261 262
	double variance, variance_mean;

	if (!stats->n)
		return 0.0;

	variance = stats->M2 / (stats->n - 1);
	variance_mean = variance / stats->n;
263

264
	return sqrt(variance_mean);
265
}
266

267 268 269 270 271 272 273 274 275 276 277 278
static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
static struct stats runtime_cycles_stats[MAX_NR_CPUS];
static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
static struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
static struct stats runtime_branches_stats[MAX_NR_CPUS];
static struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
static struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
static struct stats walltime_nsecs_stats;
279

280 281
static int create_perf_stat_counter(struct perf_evsel *evsel,
				    struct perf_evsel *first)
282
{
283
	struct perf_event_attr *attr = &evsel->attr;
284
	struct xyarray *group_fd = NULL;
285 286
	bool exclude_guest_missing = false;
	int ret;
287 288 289

	if (group && evsel != first)
		group_fd = first->fd;
290

291
	if (scale)
292 293
		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
				    PERF_FORMAT_TOTAL_TIME_RUNNING;
294

295 296
	attr->inherit = !no_inherit;

297 298 299 300
retry:
	if (exclude_guest_missing)
		evsel->attr.exclude_guest = evsel->attr.exclude_host = 0;

301
	if (perf_target__has_cpu(&target)) {
302
		ret = perf_evsel__open_per_cpu(evsel, evsel_list->cpus,
303
					       group, group_fd);
304 305 306 307 308
		if (ret)
			goto check_ret;
		return 0;
	}

309
	if (!perf_target__has_task(&target) && (!group || evsel == first)) {
310 311
		attr->disabled = 1;
		attr->enable_on_exec = 1;
312
	}
313

314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
	ret = perf_evsel__open_per_thread(evsel, evsel_list->threads,
					  group, group_fd);
	if (!ret)
		return 0;
	/* fall through */
check_ret:
	if (ret && errno == EINVAL) {
		if (!exclude_guest_missing &&
		    (evsel->attr.exclude_guest || evsel->attr.exclude_host)) {
			pr_debug("Old kernel, cannot exclude "
				 "guest or host samples.\n");
			exclude_guest_missing = true;
			goto retry;
		}
	}
	return ret;
330 331
}

332 333 334
/*
 * Does the counter have nsecs as a unit?
 */
335
static inline int nsec_counter(struct perf_evsel *evsel)
336
{
337 338
	if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
	    perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
339 340 341 342 343
		return 1;

	return 0;
}

I
Ingo Molnar 已提交
344 345 346 347 348 349 350 351 352 353 354
/*
 * Update various tracking values we maintain to print
 * more semantic information such as miss/hit ratios,
 * instruction rates, etc:
 */
static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
{
	if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
		update_stats(&runtime_nsecs_stats[0], count[0]);
	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
		update_stats(&runtime_cycles_stats[0], count[0]);
355 356
	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
		update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
357
	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
358
		update_stats(&runtime_stalled_cycles_back_stats[0], count[0]);
I
Ingo Molnar 已提交
359 360 361 362
	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
		update_stats(&runtime_branches_stats[0], count[0]);
	else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
		update_stats(&runtime_cacherefs_stats[0], count[0]);
363 364
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
		update_stats(&runtime_l1_dcache_stats[0], count[0]);
365 366 367 368 369 370 371 372
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
		update_stats(&runtime_l1_icache_stats[0], count[0]);
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
		update_stats(&runtime_ll_cache_stats[0], count[0]);
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
		update_stats(&runtime_dtlb_cache_stats[0], count[0]);
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
		update_stats(&runtime_itlb_cache_stats[0], count[0]);
I
Ingo Molnar 已提交
373 374
}

375
/*
376
 * Read out the results of a single counter:
377
 * aggregate counts across CPUs in system-wide mode
378
 */
379
static int read_counter_aggr(struct perf_evsel *counter)
380
{
381
	struct perf_stat *ps = counter->priv;
382 383
	u64 *count = counter->counts->aggr.values;
	int i;
384

385 386
	if (__perf_evsel__read(counter, evsel_list->cpus->nr,
			       evsel_list->threads->nr, scale) < 0)
387
		return -1;
388 389

	for (i = 0; i < 3; i++)
390
		update_stats(&ps->res_stats[i], count[i]);
391 392

	if (verbose) {
393
		fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
394
			event_name(counter), count[0], count[1], count[2]);
395 396
	}

397 398 399
	/*
	 * Save the full runtime - to allow normalization during printout:
	 */
I
Ingo Molnar 已提交
400
	update_shadow_stats(counter, count);
401 402

	return 0;
403 404 405 406 407 408
}

/*
 * Read out the results of a single counter:
 * do not aggregate counts across CPUs in system-wide mode
 */
409
static int read_counter(struct perf_evsel *counter)
410
{
411
	u64 *count;
412 413
	int cpu;

414
	for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) {
415 416
		if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
			return -1;
417

418
		count = counter->counts->cpu[cpu].values;
419

I
Ingo Molnar 已提交
420
		update_shadow_stats(counter, count);
421
	}
422 423

	return 0;
424 425
}

426
static int run_perf_stat(int argc __used, const char **argv)
427 428
{
	unsigned long long t0, t1;
429
	struct perf_evsel *counter, *first;
430
	int status = 0;
431
	int child_ready_pipe[2], go_pipe[2];
432
	const bool forks = (argc > 0);
433
	char buf;
434

435
	if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
436 437 438 439
		perror("failed to create pipes");
		exit(1);
	}

440
	if (forks) {
441
		if ((child_pid = fork()) < 0)
442 443
			perror("failed to fork");

444
		if (!child_pid) {
445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471
			close(child_ready_pipe[0]);
			close(go_pipe[1]);
			fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);

			/*
			 * Do a dummy execvp to get the PLT entry resolved,
			 * so we avoid the resolver overhead on the real
			 * execvp call.
			 */
			execvp("", (char **)argv);

			/*
			 * Tell the parent we're ready to go
			 */
			close(child_ready_pipe[1]);

			/*
			 * Wait until the parent tells us to go.
			 */
			if (read(go_pipe[0], &buf, 1) == -1)
				perror("unable to read pipe");

			execvp(argv[0], (char **)argv);

			perror(argv[0]);
			exit(-1);
		}
472

473
		if (perf_target__none(&target))
474
			evsel_list->threads->map[0] = child_pid;
475

476
		/*
477
		 * Wait for the child to be ready to exec.
478 479
		 */
		close(child_ready_pipe[1]);
480 481
		close(go_pipe[0]);
		if (read(child_ready_pipe[0], &buf, 1) == -1)
482
			perror("unable to read pipe");
483
		close(child_ready_pipe[0]);
484 485
	}

486 487
	first = list_entry(evsel_list->entries.next, struct perf_evsel, node);

488
	list_for_each_entry(counter, &evsel_list->entries, node) {
489
		if (create_perf_stat_counter(counter, first) < 0) {
490 491 492 493
			/*
			 * PPC returns ENXIO for HW counters until 2.6.37
			 * (behavior changed with commit b0a873e).
			 */
494
			if (errno == EINVAL || errno == ENOSYS ||
495 496
			    errno == ENOENT || errno == EOPNOTSUPP ||
			    errno == ENXIO) {
497 498 499
				if (verbose)
					ui__warning("%s event is not supported by the kernel.\n",
						    event_name(counter));
500
				counter->supported = false;
501
				continue;
502
			}
503 504

			if (errno == EPERM || errno == EACCES) {
505 506 507
				error("You may not have permission to collect %sstats.\n"
				      "\t Consider tweaking"
				      " /proc/sys/kernel/perf_event_paranoid or running as root.",
508
				      target.system_wide ? "system-wide " : "");
509 510 511 512 513 514 515 516 517 518
			} else {
				error("open_counter returned with %d (%s). "
				      "/bin/dmesg may provide additional information.\n",
				       errno, strerror(errno));
			}
			if (child_pid != -1)
				kill(child_pid, SIGTERM);
			die("Not all events could be opened.\n");
			return -1;
		}
519
		counter->supported = true;
520
	}
521

522 523 524 525 526 527
	if (perf_evlist__set_filters(evsel_list)) {
		error("failed to set filter with %d (%s)\n", errno,
			strerror(errno));
		return -1;
	}

528 529 530 531 532
	/*
	 * Enable counters and exec the command:
	 */
	t0 = rdclock();

533 534 535
	if (forks) {
		close(go_pipe[1]);
		wait(&status);
536 537
		if (WIFSIGNALED(status))
			psignal(WTERMSIG(status), argv[0]);
538
	} else {
539
		while(!done) sleep(1);
540
	}
541 542 543

	t1 = rdclock();

544
	update_stats(&walltime_nsecs_stats, t1 - t0);
545

546
	if (no_aggr) {
547
		list_for_each_entry(counter, &evsel_list->entries, node) {
548
			read_counter(counter);
549
			perf_evsel__close_fd(counter, evsel_list->cpus->nr, 1);
550
		}
551
	} else {
552
		list_for_each_entry(counter, &evsel_list->entries, node) {
553
			read_counter_aggr(counter);
554 555
			perf_evsel__close_fd(counter, evsel_list->cpus->nr,
					     evsel_list->threads->nr);
556
		}
557
	}
558

559 560 561
	return WEXITSTATUS(status);
}

562 563 564 565 566 567 568
static void print_noise_pct(double total, double avg)
{
	double pct = 0.0;

	if (avg)
		pct = 100.0*total/avg;

569
	if (csv_output)
570
		fprintf(output, "%s%.2f%%", csv_sep, pct);
571
	else if (pct)
572
		fprintf(output, "  ( +-%6.2f%% )", pct);
573 574
}

575
static void print_noise(struct perf_evsel *evsel, double avg)
576
{
577 578
	struct perf_stat *ps;

579 580 581
	if (run_count == 1)
		return;

582
	ps = evsel->priv;
583
	print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
584 585
}

586
static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
I
Ingo Molnar 已提交
587
{
588
	double msecs = avg / 1e6;
S
Stephane Eranian 已提交
589
	char cpustr[16] = { '\0', };
590
	const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s";
I
Ingo Molnar 已提交
591

592
	if (no_aggr)
S
Stephane Eranian 已提交
593 594
		sprintf(cpustr, "CPU%*d%s",
			csv_output ? 0 : -4,
595
			evsel_list->cpus->map[cpu], csv_sep);
S
Stephane Eranian 已提交
596

597
	fprintf(output, fmt, cpustr, msecs, csv_sep, event_name(evsel));
S
Stephane Eranian 已提交
598

S
Stephane Eranian 已提交
599
	if (evsel->cgrp)
600
		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
S
Stephane Eranian 已提交
601

S
Stephane Eranian 已提交
602 603
	if (csv_output)
		return;
I
Ingo Molnar 已提交
604

605
	if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
606 607
		fprintf(output, " # %8.3f CPUs utilized          ",
			avg / avg_stats(&walltime_nsecs_stats));
608 609
	else
		fprintf(output, "                                   ");
I
Ingo Molnar 已提交
610 611
}

612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638
/* used for get_ratio_color() */
enum grc_type {
	GRC_STALLED_CYCLES_FE,
	GRC_STALLED_CYCLES_BE,
	GRC_CACHE_MISSES,
	GRC_MAX_NR
};

static const char *get_ratio_color(enum grc_type type, double ratio)
{
	static const double grc_table[GRC_MAX_NR][3] = {
		[GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
		[GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
		[GRC_CACHE_MISSES] 	= { 20.0, 10.0, 5.0 },
	};
	const char *color = PERF_COLOR_NORMAL;

	if (ratio > grc_table[type][0])
		color = PERF_COLOR_RED;
	else if (ratio > grc_table[type][1])
		color = PERF_COLOR_MAGENTA;
	else if (ratio > grc_table[type][2])
		color = PERF_COLOR_YELLOW;

	return color;
}

639 640 641 642 643 644 645 646 647 648
static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __used, double avg)
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_cycles_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

649
	color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
650

651 652 653
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " frontend cycles idle   ");
654 655 656
}

static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel __used, double avg)
657 658 659 660 661 662 663 664 665
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_cycles_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

666
	color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
667

668 669 670
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " backend  cycles idle   ");
671 672
}

673 674 675 676 677 678 679 680 681 682
static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double avg)
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_branches_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

683
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
684

685 686 687
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all branches        ");
688 689
}

690 691 692 693 694 695 696 697 698 699
static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_l1_dcache_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

700
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
701

702 703 704
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all L1-dcache hits  ");
705 706
}

707 708 709 710 711 712 713 714 715 716
static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_l1_icache_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

717
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
718

719 720 721
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all L1-icache hits  ");
722 723 724 725 726 727 728 729 730 731 732 733
}

static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_dtlb_cache_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

734
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
735

736 737 738
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all dTLB cache hits ");
739 740 741 742 743 744 745 746 747 748 749 750
}

static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_itlb_cache_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

751
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
752

753 754 755
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all iTLB cache hits ");
756 757 758 759 760 761 762 763 764 765 766 767
}

static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_ll_cache_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

768
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
769

770 771 772
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all LL-cache hits   ");
773 774
}

775
static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
I
Ingo Molnar 已提交
776
{
777
	double total, ratio = 0.0;
778
	char cpustr[16] = { '\0', };
S
Stephane Eranian 已提交
779 780 781 782 783
	const char *fmt;

	if (csv_output)
		fmt = "%s%.0f%s%s";
	else if (big_num)
784
		fmt = "%s%'18.0f%s%-25s";
S
Stephane Eranian 已提交
785
	else
786
		fmt = "%s%18.0f%s%-25s";
787 788

	if (no_aggr)
S
Stephane Eranian 已提交
789 790
		sprintf(cpustr, "CPU%*d%s",
			csv_output ? 0 : -4,
791
			evsel_list->cpus->map[cpu], csv_sep);
792 793
	else
		cpu = 0;
794

795
	fprintf(output, fmt, cpustr, avg, csv_sep, event_name(evsel));
S
Stephane Eranian 已提交
796

S
Stephane Eranian 已提交
797
	if (evsel->cgrp)
798
		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
S
Stephane Eranian 已提交
799

S
Stephane Eranian 已提交
800 801
	if (csv_output)
		return;
I
Ingo Molnar 已提交
802

803
	if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
804
		total = avg_stats(&runtime_cycles_stats[cpu]);
805 806 807 808

		if (total)
			ratio = avg / total;

809
		fprintf(output, " #   %5.2f  insns per cycle        ", ratio);
810

811 812
		total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
813 814 815

		if (total && avg) {
			ratio = total / avg;
816
			fprintf(output, "\n                                             #   %5.2f  stalled cycles per insn", ratio);
817 818
		}

819
	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
820
			runtime_branches_stats[cpu].n != 0) {
821
		print_branch_misses(cpu, evsel, avg);
822 823 824 825 826
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
827
			runtime_l1_dcache_stats[cpu].n != 0) {
828
		print_l1_dcache_misses(cpu, evsel, avg);
829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
			runtime_l1_icache_stats[cpu].n != 0) {
		print_l1_icache_misses(cpu, evsel, avg);
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
			runtime_dtlb_cache_stats[cpu].n != 0) {
		print_dtlb_cache_misses(cpu, evsel, avg);
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
			runtime_itlb_cache_stats[cpu].n != 0) {
		print_itlb_cache_misses(cpu, evsel, avg);
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
			runtime_ll_cache_stats[cpu].n != 0) {
		print_ll_cache_misses(cpu, evsel, avg);
857 858 859 860 861 862 863
	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
			runtime_cacherefs_stats[cpu].n != 0) {
		total = avg_stats(&runtime_cacherefs_stats[cpu]);

		if (total)
			ratio = avg * 100 / total;

864
		fprintf(output, " # %8.3f %% of all cache refs    ", ratio);
865

866 867
	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
		print_stalled_cycles_frontend(cpu, evsel, avg);
868
	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
869
		print_stalled_cycles_backend(cpu, evsel, avg);
870
	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
871
		total = avg_stats(&runtime_nsecs_stats[cpu]);
872 873

		if (total)
874
			ratio = 1.0 * avg / total;
875

876
		fprintf(output, " # %8.3f GHz                    ", ratio);
877
	} else if (runtime_nsecs_stats[cpu].n != 0) {
N
Namhyung Kim 已提交
878 879
		char unit = 'M';

880
		total = avg_stats(&runtime_nsecs_stats[cpu]);
881 882

		if (total)
883
			ratio = 1000.0 * avg / total;
N
Namhyung Kim 已提交
884 885 886 887
		if (ratio < 0.001) {
			ratio *= 1000;
			unit = 'K';
		}
888

N
Namhyung Kim 已提交
889
		fprintf(output, " # %8.3f %c/sec                  ", ratio, unit);
890
	} else {
891
		fprintf(output, "                                   ");
I
Ingo Molnar 已提交
892 893 894
	}
}

895 896
/*
 * Print out the results of a single counter:
897
 * aggregated counts in system-wide mode
898
 */
899
static void print_counter_aggr(struct perf_evsel *counter)
900
{
901 902
	struct perf_stat *ps = counter->priv;
	double avg = avg_stats(&ps->res_stats[0]);
903
	int scaled = counter->counts->scaled;
904 905

	if (scaled == -1) {
906
		fprintf(output, "%*s%s%*s",
S
Stephane Eranian 已提交
907
			csv_output ? 0 : 18,
908
			counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
S
Stephane Eranian 已提交
909 910 911 912 913
			csv_sep,
			csv_output ? 0 : -24,
			event_name(counter));

		if (counter->cgrp)
914
			fprintf(output, "%s%s", csv_sep, counter->cgrp->name);
S
Stephane Eranian 已提交
915

916
		fputc('\n', output);
917 918
		return;
	}
919

I
Ingo Molnar 已提交
920
	if (nsec_counter(counter))
921
		nsec_printout(-1, counter, avg);
I
Ingo Molnar 已提交
922
	else
923
		abs_printout(-1, counter, avg);
924

925 926
	print_noise(counter, avg);

S
Stephane Eranian 已提交
927
	if (csv_output) {
928
		fputc('\n', output);
S
Stephane Eranian 已提交
929 930 931
		return;
	}

932 933 934
	if (scaled) {
		double avg_enabled, avg_running;

935 936
		avg_enabled = avg_stats(&ps->res_stats[1]);
		avg_running = avg_stats(&ps->res_stats[2]);
937

938
		fprintf(output, " [%5.2f%%]", 100 * avg_running / avg_enabled);
939
	}
940
	fprintf(output, "\n");
941 942
}

943 944 945 946
/*
 * Print out the results of a single counter:
 * does not use aggregated count in system-wide
 */
947
static void print_counter(struct perf_evsel *counter)
948 949 950 951
{
	u64 ena, run, val;
	int cpu;

952
	for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) {
953 954 955
		val = counter->counts->cpu[cpu].val;
		ena = counter->counts->cpu[cpu].ena;
		run = counter->counts->cpu[cpu].run;
956
		if (run == 0 || ena == 0) {
957
			fprintf(output, "CPU%*d%s%*s%s%*s",
S
Stephane Eranian 已提交
958
				csv_output ? 0 : -4,
959
				evsel_list->cpus->map[cpu], csv_sep,
S
Stephane Eranian 已提交
960
				csv_output ? 0 : 18,
961 962
				counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
				csv_sep,
S
Stephane Eranian 已提交
963
				csv_output ? 0 : -24,
S
Stephane Eranian 已提交
964
				event_name(counter));
965

S
Stephane Eranian 已提交
966
			if (counter->cgrp)
967 968
				fprintf(output, "%s%s",
					csv_sep, counter->cgrp->name);
S
Stephane Eranian 已提交
969

970
			fputc('\n', output);
971 972 973 974 975 976 977 978
			continue;
		}

		if (nsec_counter(counter))
			nsec_printout(cpu, counter, val);
		else
			abs_printout(cpu, counter, val);

S
Stephane Eranian 已提交
979 980
		if (!csv_output) {
			print_noise(counter, 1.0);
981

982
			if (run != ena)
983 984
				fprintf(output, "  (%.2f%%)",
					100.0 * run / ena);
985
		}
986
		fputc('\n', output);
987 988 989
	}
}

990 991
static void print_stat(int argc, const char **argv)
{
992 993
	struct perf_evsel *counter;
	int i;
994

995 996
	fflush(stdout);

S
Stephane Eranian 已提交
997
	if (!csv_output) {
998 999
		fprintf(output, "\n");
		fprintf(output, " Performance counter stats for ");
1000
		if (!perf_target__has_task(&target)) {
1001
			fprintf(output, "\'%s", argv[0]);
S
Stephane Eranian 已提交
1002
			for (i = 1; i < argc; i++)
1003
				fprintf(output, " %s", argv[i]);
1004 1005
		} else if (target.pid)
			fprintf(output, "process id \'%s", target.pid);
S
Stephane Eranian 已提交
1006
		else
1007
			fprintf(output, "thread id \'%s", target.tid);
I
Ingo Molnar 已提交
1008

1009
		fprintf(output, "\'");
S
Stephane Eranian 已提交
1010
		if (run_count > 1)
1011 1012
			fprintf(output, " (%d runs)", run_count);
		fprintf(output, ":\n\n");
S
Stephane Eranian 已提交
1013
	}
1014

1015
	if (no_aggr) {
1016
		list_for_each_entry(counter, &evsel_list->entries, node)
1017 1018
			print_counter(counter);
	} else {
1019
		list_for_each_entry(counter, &evsel_list->entries, node)
1020 1021
			print_counter_aggr(counter);
	}
1022

S
Stephane Eranian 已提交
1023
	if (!csv_output) {
1024
		if (!null_run)
1025 1026
			fprintf(output, "\n");
		fprintf(output, " %17.9f seconds time elapsed",
S
Stephane Eranian 已提交
1027 1028
				avg_stats(&walltime_nsecs_stats)/1e9);
		if (run_count > 1) {
1029
			fprintf(output, "                                        ");
1030 1031
			print_noise_pct(stddev_stats(&walltime_nsecs_stats),
					avg_stats(&walltime_nsecs_stats));
S
Stephane Eranian 已提交
1032
		}
1033
		fprintf(output, "\n\n");
I
Ingo Molnar 已提交
1034
	}
1035 1036
}

1037 1038
static volatile int signr = -1;

1039
static void skip_signal(int signo)
1040
{
1041
	if(child_pid == -1)
1042 1043
		done = 1;

1044 1045 1046 1047 1048
	signr = signo;
}

static void sig_atexit(void)
{
1049 1050 1051
	if (child_pid != -1)
		kill(child_pid, SIGTERM);

1052 1053 1054 1055 1056
	if (signr == -1)
		return;

	signal(signr, SIG_DFL);
	kill(getpid(), signr);
1057 1058 1059
}

static const char * const stat_usage[] = {
1060
	"perf stat [<options>] [<command>]",
1061 1062 1063
	NULL
};

S
Stephane Eranian 已提交
1064 1065 1066 1067 1068 1069 1070
static int stat__set_big_num(const struct option *opt __used,
			     const char *s __used, int unset)
{
	big_num_opt = unset ? 0 : 1;
	return 0;
}

1071 1072
static bool append_file;

1073
static const struct option options[] = {
1074
	OPT_CALLBACK('e', "event", &evsel_list, "event",
1075
		     "event selector. use 'perf list' to list available events",
1076
		     parse_events_option),
1077 1078
	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
		     "event filter", parse_filter),
1079 1080
	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
		    "child tasks do not inherit counters"),
1081
	OPT_STRING('p', "pid", &target.pid, "pid",
1082
		   "stat events on existing process id"),
1083
	OPT_STRING('t', "tid", &target.tid, "tid",
1084
		   "stat events on existing thread id"),
1085
	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
1086
		    "system-wide collection from all CPUs"),
1087 1088
	OPT_BOOLEAN('g', "group", &group,
		    "put the counters into a counter group"),
1089
	OPT_BOOLEAN('c', "scale", &scale,
1090
		    "scale/normalize counters"),
1091
	OPT_INCR('v', "verbose", &verbose,
1092
		    "be more verbose (show counter open errors, etc)"),
1093 1094
	OPT_INTEGER('r', "repeat", &run_count,
		    "repeat command and print average + stddev (max: 100)"),
1095 1096
	OPT_BOOLEAN('n', "null", &null_run,
		    "null run - dont start any counters"),
1097
	OPT_INCR('d', "detailed", &detailed_run,
1098
		    "detailed run - start a lot of events"),
I
Ingo Molnar 已提交
1099 1100
	OPT_BOOLEAN('S', "sync", &sync_run,
		    "call sync() before starting a run"),
S
Stephane Eranian 已提交
1101 1102 1103
	OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 
			   "print large numbers with thousands\' separators",
			   stat__set_big_num),
1104
	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
1105
		    "list of cpus to monitor in system-wide"),
1106 1107
	OPT_BOOLEAN('A', "no-aggr", &no_aggr,
		    "disable CPU count aggregation"),
S
Stephane Eranian 已提交
1108 1109
	OPT_STRING('x', "field-separator", &csv_sep, "separator",
		   "print counts with custom separator"),
S
Stephane Eranian 已提交
1110 1111 1112
	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
		     "monitor event in cgroup name only",
		     parse_cgroups),
1113 1114 1115
	OPT_STRING('o', "output", &output_name, "file",
		    "output file name"),
	OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
1116 1117
	OPT_INTEGER(0, "log-fd", &output_fd,
		    "log output to fd, instead of stderr"),
1118 1119 1120
	OPT_END()
};

1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131
/*
 * Add default attributes, if there were no attributes specified or
 * if -d/--detailed, -d -d or -d -d -d is used:
 */
static int add_default_attributes(void)
{
	/* Set attrs if no event is selected and !null_run: */
	if (null_run)
		return 0;

	if (!evsel_list->nr_entries) {
1132
		if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
1133
			return -1;
1134 1135 1136 1137 1138 1139 1140 1141
	}

	/* Detailed events get appended to the event list: */

	if (detailed_run <  1)
		return 0;

	/* Append detailed run extra attributes: */
1142
	if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
1143
		return -1;
1144 1145 1146 1147 1148

	if (detailed_run < 2)
		return 0;

	/* Append very detailed run extra attributes: */
1149
	if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
1150
		return -1;
1151 1152 1153 1154 1155

	if (detailed_run < 3)
		return 0;

	/* Append very, very detailed run extra attributes: */
1156
	return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
1157 1158
}

1159
int cmd_stat(int argc, const char **argv, const char *prefix __used)
1160
{
1161 1162
	struct perf_evsel *pos;
	int status = -ENOMEM;
1163
	const char *mode;
1164

1165 1166
	setlocale(LC_ALL, "");

1167
	evsel_list = perf_evlist__new(NULL, NULL);
1168 1169 1170
	if (evsel_list == NULL)
		return -ENOMEM;

1171 1172
	argc = parse_options(argc, argv, options, stat_usage,
		PARSE_OPT_STOP_AT_NON_OPTION);
S
Stephane Eranian 已提交
1173

1174 1175 1176 1177
	output = stderr;
	if (output_name && strcmp(output_name, "-"))
		output = NULL;

1178 1179 1180 1181
	if (output_name && output_fd) {
		fprintf(stderr, "cannot use both --output and --log-fd\n");
		usage_with_options(stat_usage, options);
	}
1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192
	if (!output) {
		struct timespec tm;
		mode = append_file ? "a" : "w";

		output = fopen(output_name, mode);
		if (!output) {
			perror("failed to create output file");
			exit(-1);
		}
		clock_gettime(CLOCK_REALTIME, &tm);
		fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
1193 1194 1195 1196 1197 1198 1199
	} else if (output_fd != 2) {
		mode = append_file ? "a" : "w";
		output = fdopen(output_fd, mode);
		if (!output) {
			perror("Failed opening logfd");
			return -errno;
		}
1200 1201
	}

1202
	if (csv_sep) {
S
Stephane Eranian 已提交
1203
		csv_output = true;
1204 1205 1206
		if (!strcmp(csv_sep, "\\t"))
			csv_sep = "\t";
	} else
S
Stephane Eranian 已提交
1207 1208 1209 1210 1211 1212
		csv_sep = DEFAULT_SEPARATOR;

	/*
	 * let the spreadsheet do the pretty-printing
	 */
	if (csv_output) {
J
Jim Cromie 已提交
1213
		/* User explicitly passed -B? */
S
Stephane Eranian 已提交
1214 1215 1216 1217 1218 1219 1220 1221
		if (big_num_opt == 1) {
			fprintf(stderr, "-B option not supported with -x\n");
			usage_with_options(stat_usage, options);
		} else /* Nope, so disable big number formatting */
			big_num = false;
	} else if (big_num_opt == 0) /* User passed --no-big-num */
		big_num = false;

1222
	if (!argc && !perf_target__has_task(&target))
1223
		usage_with_options(stat_usage, options);
1224
	if (run_count <= 0)
1225
		usage_with_options(stat_usage, options);
1226

S
Stephane Eranian 已提交
1227
	/* no_aggr, cgroup are for system-wide only */
1228
	if ((no_aggr || nr_cgroups) && !perf_target__has_cpu(&target)) {
S
Stephane Eranian 已提交
1229 1230 1231
		fprintf(stderr, "both cgroup and no-aggregation "
			"modes only available in system-wide mode\n");

1232
		usage_with_options(stat_usage, options);
S
Stephane Eranian 已提交
1233
	}
1234

1235 1236
	if (add_default_attributes())
		goto out;
1237

1238
	perf_target__validate(&target);
1239

1240
	if (perf_evlist__create_maps(evsel_list, &target) < 0) {
1241
		if (perf_target__has_task(&target))
1242
			pr_err("Problems finding threads of monitor\n");
1243
		if (perf_target__has_cpu(&target))
1244
			perror("failed to parse CPUs map");
1245

1246
		usage_with_options(stat_usage, options);
1247 1248
		return -1;
	}
1249

1250
	list_for_each_entry(pos, &evsel_list->entries, node) {
1251
		if (perf_evsel__alloc_stat_priv(pos) < 0 ||
1252
		    perf_evsel__alloc_counts(pos, evsel_list->cpus->nr) < 0)
1253
			goto out_free_fd;
1254 1255
	}

I
Ingo Molnar 已提交
1256 1257 1258 1259 1260 1261
	/*
	 * We dont want to block the signals - that would cause
	 * child tasks to inherit that and Ctrl-C would not work.
	 * What we want is for Ctrl-C to work in the exec()-ed
	 * task, but being ignored by perf stat itself:
	 */
1262
	atexit(sig_atexit);
I
Ingo Molnar 已提交
1263 1264 1265 1266
	signal(SIGINT,  skip_signal);
	signal(SIGALRM, skip_signal);
	signal(SIGABRT, skip_signal);

1267 1268 1269
	status = 0;
	for (run_idx = 0; run_idx < run_count; run_idx++) {
		if (run_count != 1 && verbose)
1270 1271
			fprintf(output, "[ perf stat: executing run #%d ... ]\n",
				run_idx + 1);
I
Ingo Molnar 已提交
1272 1273 1274 1275

		if (sync_run)
			sync();

1276 1277 1278
		status = run_perf_stat(argc, argv);
	}

1279 1280
	if (status != -1)
		print_stat(argc, argv);
1281
out_free_fd:
1282
	list_for_each_entry(pos, &evsel_list->entries, node)
1283
		perf_evsel__free_stat_priv(pos);
1284
	perf_evlist__delete_maps(evsel_list);
1285 1286
out:
	perf_evlist__delete(evsel_list);
1287
	return status;
1288
}