builtin-stat.c 34.4 KB
Newer Older
1
/*
2 3 4 5 6 7
 * builtin-stat.c
 *
 * Builtin stat command: Give a precise performance counters summary
 * overview about any workload, CPU or specific PID.
 *
 * Sample output:
8

9
   $ perf stat ./hackbench 10
10

11
  Time: 0.118
12

13
  Performance counter stats for './hackbench 10':
14

15 16 17 18 19 20 21 22 23 24 25 26 27
       1708.761321 task-clock                #   11.037 CPUs utilized
            41,190 context-switches          #    0.024 M/sec
             6,735 CPU-migrations            #    0.004 M/sec
            17,318 page-faults               #    0.010 M/sec
     5,205,202,243 cycles                    #    3.046 GHz
     3,856,436,920 stalled-cycles-frontend   #   74.09% frontend cycles idle
     1,600,790,871 stalled-cycles-backend    #   30.75% backend  cycles idle
     2,603,501,247 instructions              #    0.50  insns per cycle
                                             #    1.48  stalled cycles per insn
       484,357,498 branches                  #  283.455 M/sec
         6,388,934 branch-misses             #    1.32% of all branches

        0.154822978  seconds time elapsed
28

29
 *
30
 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
31 32 33 34 35 36 37 38
 *
 * Improvements and fixes by:
 *
 *   Arjan van de Ven <arjan@linux.intel.com>
 *   Yanmin Zhang <yanmin.zhang@intel.com>
 *   Wu Fengguang <fengguang.wu@intel.com>
 *   Mike Galbraith <efault@gmx.de>
 *   Paul Mackerras <paulus@samba.org>
39
 *   Jaswinder Singh Rajput <jaswinder@kernel.org>
40 41
 *
 * Released under the GPL v2. (and only v2, not any later version)
42 43
 */

44
#include "perf.h"
45
#include "builtin.h"
46
#include "util/cgroup.h"
47
#include "util/util.h"
48 49
#include "util/parse-options.h"
#include "util/parse-events.h"
50
#include "util/pmu.h"
51
#include "util/event.h"
52
#include "util/evlist.h"
53
#include "util/evsel.h"
54
#include "util/debug.h"
55
#include "util/color.h"
56
#include "util/stat.h"
57
#include "util/header.h"
58
#include "util/cpumap.h"
59
#include "util/thread.h"
60
#include "util/thread_map.h"
61

62
#include <stdlib.h>
63
#include <sys/prctl.h>
64
#include <locale.h>
65

S
Stephane Eranian 已提交
66
#define DEFAULT_SEPARATOR	" "
67 68
#define CNTR_NOT_SUPPORTED	"<not supported>"
#define CNTR_NOT_COUNTED	"<not counted>"
S
Stephane Eranian 已提交
69

70
static void print_counters(struct timespec *ts, int argc, const char **argv);
71

72
/* Default events used for perf stat -T */
73 74
static const char *transaction_attrs = {
	"task-clock,"
75 76 77 78 79 80 81 82 83 84 85
	"{"
	"instructions,"
	"cycles,"
	"cpu/cycles-t/,"
	"cpu/tx-start/,"
	"cpu/el-start/,"
	"cpu/cycles-ct/"
	"}"
};

/* More limited version when the CPU does not have all events. */
86 87
static const char * transaction_limited_attrs = {
	"task-clock,"
88 89 90 91 92 93 94 95
	"{"
	"instructions,"
	"cycles,"
	"cpu/cycles-t/,"
	"cpu/tx-start/"
	"}"
};

96
static struct perf_evlist	*evsel_list;
97

98
static struct target target = {
99 100
	.uid	= UINT_MAX,
};
101

102
static int			run_count			=  1;
103
static bool			no_inherit			= false;
104
static volatile pid_t		child_pid			= -1;
105
static bool			null_run			=  false;
106
static int			detailed_run			=  0;
107
static bool			transaction_run;
108
static bool			big_num				=  true;
S
Stephane Eranian 已提交
109 110 111
static int			big_num_opt			=  -1;
static const char		*csv_sep			= NULL;
static bool			csv_output			= false;
112
static bool			group				= false;
113 114 115
static const char		*pre_cmd			= NULL;
static const char		*post_cmd			= NULL;
static bool			sync_run			= false;
116
static unsigned int		initial_delay			= 0;
117
static unsigned int		unit_width			= 4; /* strlen("unit") */
118
static bool			forever				= false;
119
static struct timespec		ref_time;
120 121
static struct cpu_map		*aggr_map;
static int			(*aggr_get_id)(struct cpu_map *m, int cpu);
122

123 124
static volatile int done = 0;

125 126
static struct perf_stat_config stat_config = {
	.aggr_mode	= AGGR_GLOBAL,
127
	.scale		= true,
128 129
};

130 131 132 133 134 135 136 137 138 139 140 141
static inline void diff_timespec(struct timespec *r, struct timespec *a,
				 struct timespec *b)
{
	r->tv_sec = a->tv_sec - b->tv_sec;
	if (a->tv_nsec < b->tv_nsec) {
		r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
		r->tv_sec--;
	} else {
		r->tv_nsec = a->tv_nsec - b->tv_nsec ;
	}
}

142 143 144
static void perf_stat__reset_stats(void)
{
	perf_evlist__reset_stats(evsel_list);
145
	perf_stat__reset_shadow_stats();
146 147
}

148
static int create_perf_stat_counter(struct perf_evsel *evsel)
149
{
150
	struct perf_event_attr *attr = &evsel->attr;
151

152
	if (stat_config.scale)
153 154
		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
				    PERF_FORMAT_TOTAL_TIME_RUNNING;
155

156 157
	attr->inherit = !no_inherit;

158
	if (target__has_cpu(&target))
159
		return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
160

161
	if (!target__has_task(&target) && perf_evsel__is_group_leader(evsel)) {
162
		attr->disabled = 1;
163 164
		if (!initial_delay)
			attr->enable_on_exec = 1;
165
	}
166

167
	return perf_evsel__open_per_thread(evsel, evsel_list->threads);
168 169
}

170 171 172
/*
 * Does the counter have nsecs as a unit?
 */
173
static inline int nsec_counter(struct perf_evsel *evsel)
174
{
175 176
	if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
	    perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
177 178 179 180 181
		return 1;

	return 0;
}

182 183 184 185
/*
 * Read out the results of a single counter:
 * do not aggregate counts across CPUs in system-wide mode
 */
186
static int read_counter(struct perf_evsel *counter)
187
{
188 189 190
	int nthreads = thread_map__nr(evsel_list->threads);
	int ncpus = perf_evsel__nr_cpus(counter);
	int cpu, thread;
191

192 193 194
	if (!counter->supported)
		return -ENOENT;

195 196 197 198 199
	if (counter->system_wide)
		nthreads = 1;

	for (thread = 0; thread < nthreads; thread++) {
		for (cpu = 0; cpu < ncpus; cpu++) {
200 201 202 203
			struct perf_counts_values *count;

			count = perf_counts(counter->counts, cpu, thread);
			if (perf_evsel__read(counter, cpu, thread, count))
204 205
				return -1;
		}
206
	}
207 208

	return 0;
209 210
}

211
static void read_counters(bool close_counters)
212 213 214
{
	struct perf_evsel *counter;

215
	evlist__for_each(evsel_list, counter) {
216 217 218
		if (read_counter(counter))
			pr_warning("failed to read counter %s\n", counter->name);

219
		if (perf_stat_process_counter(&stat_config, counter))
220
			pr_warning("failed to process counter %s\n", counter->name);
221

222
		if (close_counters) {
223 224
			perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
					     thread_map__nr(evsel_list->threads));
225 226
		}
	}
227 228
}

229
static void process_interval(void)
230 231 232 233
{
	struct timespec ts, rs;

	read_counters(false);
234

235 236 237
	clock_gettime(CLOCK_MONOTONIC, &ts);
	diff_timespec(&rs, &ts, &ref_time);

238
	print_counters(&rs, 0, NULL);
239 240
}

241 242 243 244 245 246 247 248 249
static void handle_initial_delay(void)
{
	struct perf_evsel *counter;

	if (initial_delay) {
		const int ncpus = cpu_map__nr(evsel_list->cpus),
			nthreads = thread_map__nr(evsel_list->threads);

		usleep(initial_delay * 1000);
250
		evlist__for_each(evsel_list, counter)
251 252 253 254
			perf_evsel__enable(counter, ncpus, nthreads);
	}
}

255
static volatile int workload_exec_errno;
256 257 258 259 260 261

/*
 * perf_evlist__prepare_workload will send a SIGUSR1
 * if the fork fails, since we asked by setting its
 * want_signal to true.
 */
262 263
static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info,
					void *ucontext __maybe_unused)
264
{
265
	workload_exec_errno = info->si_value.sival_int;
266 267
}

268
static int __run_perf_stat(int argc, const char **argv)
269
{
270
	int interval = stat_config.interval;
271
	char msg[512];
272
	unsigned long long t0, t1;
273
	struct perf_evsel *counter;
274
	struct timespec ts;
275
	size_t l;
276
	int status = 0;
277
	const bool forks = (argc > 0);
278

279 280 281 282 283 284 285 286
	if (interval) {
		ts.tv_sec  = interval / 1000;
		ts.tv_nsec = (interval % 1000) * 1000000;
	} else {
		ts.tv_sec  = 1;
		ts.tv_nsec = 0;
	}

287
	if (forks) {
288 289
		if (perf_evlist__prepare_workload(evsel_list, &target, argv, false,
						  workload_exec_failed_signal) < 0) {
290 291
			perror("failed to prepare workload");
			return -1;
292
		}
293
		child_pid = evsel_list->workload.pid;
294 295
	}

296
	if (group)
297
		perf_evlist__set_leader(evsel_list);
298

299
	evlist__for_each(evsel_list, counter) {
300
		if (create_perf_stat_counter(counter) < 0) {
301 302 303 304
			/*
			 * PPC returns ENXIO for HW counters until 2.6.37
			 * (behavior changed with commit b0a873e).
			 */
305
			if (errno == EINVAL || errno == ENOSYS ||
306 307
			    errno == ENOENT || errno == EOPNOTSUPP ||
			    errno == ENXIO) {
308 309
				if (verbose)
					ui__warning("%s event is not supported by the kernel.\n",
310
						    perf_evsel__name(counter));
311
				counter->supported = false;
312 313 314 315

				if ((counter->leader != counter) ||
				    !(counter->leader->nr_members > 1))
					continue;
316
			}
317

318 319 320 321
			perf_evsel__open_strerror(counter, &target,
						  errno, msg, sizeof(msg));
			ui__error("%s\n", msg);

322 323
			if (child_pid != -1)
				kill(child_pid, SIGTERM);
324

325 326
			return -1;
		}
327
		counter->supported = true;
328 329 330 331

		l = strlen(counter->unit);
		if (l > unit_width)
			unit_width = l;
332
	}
333

334 335 336
	if (perf_evlist__apply_filters(evsel_list, &counter)) {
		error("failed to set filter \"%s\" on event %s with %d (%s)\n",
			counter->filter, perf_evsel__name(counter), errno,
337
			strerror_r(errno, msg, sizeof(msg)));
338 339 340
		return -1;
	}

341 342 343 344
	/*
	 * Enable counters and exec the command:
	 */
	t0 = rdclock();
345
	clock_gettime(CLOCK_MONOTONIC, &ref_time);
346

347
	if (forks) {
348
		perf_evlist__start_workload(evsel_list);
349
		handle_initial_delay();
350

351 352 353
		if (interval) {
			while (!waitpid(child_pid, &status, WNOHANG)) {
				nanosleep(&ts, NULL);
354
				process_interval();
355 356
			}
		}
357
		wait(&status);
358

359 360 361
		if (workload_exec_errno) {
			const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
			pr_err("Workload failed: %s\n", emsg);
362
			return -1;
363
		}
364

365 366
		if (WIFSIGNALED(status))
			psignal(WTERMSIG(status), argv[0]);
367
	} else {
368
		handle_initial_delay();
369 370 371
		while (!done) {
			nanosleep(&ts, NULL);
			if (interval)
372
				process_interval();
373
		}
374
	}
375 376 377

	t1 = rdclock();

378
	update_stats(&walltime_nsecs_stats, t1 - t0);
379

380
	read_counters(true);
381

382 383 384
	return WEXITSTATUS(status);
}

385
static int run_perf_stat(int argc, const char **argv)
386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410
{
	int ret;

	if (pre_cmd) {
		ret = system(pre_cmd);
		if (ret)
			return ret;
	}

	if (sync_run)
		sync();

	ret = __run_perf_stat(argc, argv);
	if (ret)
		return ret;

	if (post_cmd) {
		ret = system(post_cmd);
		if (ret)
			return ret;
	}

	return ret;
}

411 412 413
static void print_running(u64 run, u64 ena)
{
	if (csv_output) {
414
		fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f",
415 416 417 418 419
					csv_sep,
					run,
					csv_sep,
					ena ? 100.0 * run / ena : 100.0);
	} else if (run != ena) {
420
		fprintf(stat_config.output, "  (%.2f%%)", 100.0 * run / ena);
421 422 423
	}
}

424 425
static void print_noise_pct(double total, double avg)
{
426
	double pct = rel_stddev_stats(total, avg);
427

428
	if (csv_output)
429
		fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct);
430
	else if (pct)
431
		fprintf(stat_config.output, "  ( +-%6.2f%% )", pct);
432 433
}

434
static void print_noise(struct perf_evsel *evsel, double avg)
435
{
436 437
	struct perf_stat *ps;

438 439 440
	if (run_count == 1)
		return;

441
	ps = evsel->priv;
442
	print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
443 444
}

445
static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
I
Ingo Molnar 已提交
446
{
447
	switch (stat_config.aggr_mode) {
448
	case AGGR_CORE:
449
		fprintf(stat_config.output, "S%d-C%*d%s%*d%s",
450 451 452 453 454 455 456 457
			cpu_map__id_to_socket(id),
			csv_output ? 0 : -8,
			cpu_map__id_to_cpu(id),
			csv_sep,
			csv_output ? 0 : 4,
			nr,
			csv_sep);
		break;
458
	case AGGR_SOCKET:
459
		fprintf(stat_config.output, "S%*d%s%*d%s",
460
			csv_output ? 0 : -5,
461
			id,
462 463 464 465
			csv_sep,
			csv_output ? 0 : 4,
			nr,
			csv_sep);
466 467
			break;
	case AGGR_NONE:
468
		fprintf(stat_config.output, "CPU%*d%s",
S
Stephane Eranian 已提交
469
			csv_output ? 0 : -4,
470
			perf_evsel__cpus(evsel)->map[id], csv_sep);
471
		break;
472
	case AGGR_THREAD:
473
		fprintf(stat_config.output, "%*s-%*d%s",
474 475 476 477 478 479
			csv_output ? 0 : 16,
			thread_map__comm(evsel->threads, id),
			csv_output ? 0 : -8,
			thread_map__pid(evsel->threads, id),
			csv_sep);
		break;
480 481 482 483 484 485
	case AGGR_GLOBAL:
	default:
		break;
	}
}

486
static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
487
{
488
	FILE *output = stat_config.output;
489
	double msecs = avg / 1e6;
490
	const char *fmt_v, *fmt_n;
491
	char name[25];
492

493 494 495
	fmt_v = csv_output ? "%.6f%s" : "%18.6f%s";
	fmt_n = csv_output ? "%s" : "%-25s";

496
	aggr_printout(evsel, id, nr);
S
Stephane Eranian 已提交
497

498 499
	scnprintf(name, sizeof(name), "%s%s",
		  perf_evsel__name(evsel), csv_output ? "" : " (msec)");
500 501 502 503 504 505 506 507 508

	fprintf(output, fmt_v, msecs, csv_sep);

	if (csv_output)
		fprintf(output, "%s%s", evsel->unit, csv_sep);
	else
		fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep);

	fprintf(output, fmt_n, name);
S
Stephane Eranian 已提交
509

S
Stephane Eranian 已提交
510
	if (evsel->cgrp)
511
		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
S
Stephane Eranian 已提交
512

513
	if (csv_output || stat_config.interval)
S
Stephane Eranian 已提交
514
		return;
I
Ingo Molnar 已提交
515

516
	if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
517 518
		fprintf(output, " # %8.3f CPUs utilized          ",
			avg / avg_stats(&walltime_nsecs_stats));
519 520
	else
		fprintf(output, "                                   ");
I
Ingo Molnar 已提交
521 522
}

523 524
static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
{
525
	FILE *output = stat_config.output;
526 527 528 529 530 531 532 533 534 535 536 537 538 539 540
	double sc =  evsel->scale;
	const char *fmt;
	int cpu = cpu_map__id_to_cpu(id);

	if (csv_output) {
		fmt = sc != 1.0 ?  "%.2f%s" : "%.0f%s";
	} else {
		if (big_num)
			fmt = sc != 1.0 ? "%'18.2f%s" : "%'18.0f%s";
		else
			fmt = sc != 1.0 ? "%18.2f%s" : "%18.0f%s";
	}

	aggr_printout(evsel, id, nr);

541
	if (stat_config.aggr_mode == AGGR_GLOBAL)
542 543 544 545 546 547 548 549 550 551 552 553 554 555
		cpu = 0;

	fprintf(output, fmt, avg, csv_sep);

	if (evsel->unit)
		fprintf(output, "%-*s%s",
			csv_output ? 0 : unit_width,
			evsel->unit, csv_sep);

	fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel));

	if (evsel->cgrp)
		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);

556
	if (csv_output || stat_config.interval)
557 558
		return;

559 560
	perf_stat__print_shadow_stats(output, evsel, avg, cpu,
				      stat_config.aggr_mode);
561 562
}

563
static void print_aggr(char *prefix)
564
{
565
	FILE *output = stat_config.output;
566
	struct perf_evsel *counter;
567
	int cpu, cpu2, s, s2, id, nr;
568
	double uval;
569 570
	u64 ena, run, val;

571
	if (!(aggr_map || aggr_get_id))
572 573
		return;

574 575
	for (s = 0; s < aggr_map->nr; s++) {
		id = aggr_map->map[s];
576
		evlist__for_each(evsel_list, counter) {
577 578 579
			val = ena = run = 0;
			nr = 0;
			for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
580 581
				cpu2 = perf_evsel__cpus(counter)->map[cpu];
				s2 = aggr_get_id(evsel_list->cpus, cpu2);
582
				if (s2 != id)
583
					continue;
584 585 586
				val += perf_counts(counter->counts, cpu, 0)->val;
				ena += perf_counts(counter->counts, cpu, 0)->ena;
				run += perf_counts(counter->counts, cpu, 0)->run;
587 588 589 590 591 592
				nr++;
			}
			if (prefix)
				fprintf(output, "%s", prefix);

			if (run == 0 || ena == 0) {
593
				aggr_printout(counter, id, nr);
594

595
				fprintf(output, "%*s%s",
596 597
					csv_output ? 0 : 18,
					counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
598 599 600 601 602 603 604 605
					csv_sep);

				fprintf(output, "%-*s%s",
					csv_output ? 0 : unit_width,
					counter->unit, csv_sep);

				fprintf(output, "%*s",
					csv_output ? 0 : -25,
606
					perf_evsel__name(counter));
607

608 609 610 611
				if (counter->cgrp)
					fprintf(output, "%s%s",
						csv_sep, counter->cgrp->name);

612
				print_running(run, ena);
613 614 615
				fputc('\n', output);
				continue;
			}
616
			uval = val * counter->scale;
617 618

			if (nsec_counter(counter))
619
				nsec_printout(id, nr, counter, uval);
620
			else
621
				abs_printout(id, nr, counter, uval);
622

623
			if (!csv_output)
624 625
				print_noise(counter, 1.0);

626
			print_running(run, ena);
627 628 629 630 631
			fputc('\n', output);
		}
	}
}

632 633
static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
{
634
	FILE *output = stat_config.output;
635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666
	int nthreads = thread_map__nr(counter->threads);
	int ncpus = cpu_map__nr(counter->cpus);
	int cpu, thread;
	double uval;

	for (thread = 0; thread < nthreads; thread++) {
		u64 ena = 0, run = 0, val = 0;

		for (cpu = 0; cpu < ncpus; cpu++) {
			val += perf_counts(counter->counts, cpu, thread)->val;
			ena += perf_counts(counter->counts, cpu, thread)->ena;
			run += perf_counts(counter->counts, cpu, thread)->run;
		}

		if (prefix)
			fprintf(output, "%s", prefix);

		uval = val * counter->scale;

		if (nsec_counter(counter))
			nsec_printout(thread, 0, counter, uval);
		else
			abs_printout(thread, 0, counter, uval);

		if (!csv_output)
			print_noise(counter, 1.0);

		print_running(run, ena);
		fputc('\n', output);
	}
}

667 668
/*
 * Print out the results of a single counter:
669
 * aggregated counts in system-wide mode
670
 */
671
static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
672
{
673
	FILE *output = stat_config.output;
674 675
	struct perf_stat *ps = counter->priv;
	double avg = avg_stats(&ps->res_stats[0]);
676
	int scaled = counter->counts->scaled;
677
	double uval;
678 679 680 681
	double avg_enabled, avg_running;

	avg_enabled = avg_stats(&ps->res_stats[1]);
	avg_running = avg_stats(&ps->res_stats[2]);
682

683 684 685
	if (prefix)
		fprintf(output, "%s", prefix);

686
	if (scaled == -1 || !counter->supported) {
687
		fprintf(output, "%*s%s",
S
Stephane Eranian 已提交
688
			csv_output ? 0 : 18,
689
			counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
690 691 692 693 694 695
			csv_sep);
		fprintf(output, "%-*s%s",
			csv_output ? 0 : unit_width,
			counter->unit, csv_sep);
		fprintf(output, "%*s",
			csv_output ? 0 : -25,
696
			perf_evsel__name(counter));
S
Stephane Eranian 已提交
697 698

		if (counter->cgrp)
699
			fprintf(output, "%s%s", csv_sep, counter->cgrp->name);
S
Stephane Eranian 已提交
700

701
		print_running(avg_running, avg_enabled);
702
		fputc('\n', output);
703 704
		return;
	}
705

706 707
	uval = avg * counter->scale;

I
Ingo Molnar 已提交
708
	if (nsec_counter(counter))
709
		nsec_printout(-1, 0, counter, uval);
I
Ingo Molnar 已提交
710
	else
711
		abs_printout(-1, 0, counter, uval);
712

713 714
	print_noise(counter, avg);

715
	print_running(avg_running, avg_enabled);
716
	fprintf(output, "\n");
717 718
}

719 720 721 722
/*
 * Print out the results of a single counter:
 * does not use aggregated count in system-wide
 */
723
static void print_counter(struct perf_evsel *counter, char *prefix)
724
{
725
	FILE *output = stat_config.output;
726
	u64 ena, run, val;
727
	double uval;
728 729
	int cpu;

Y
Yan, Zheng 已提交
730
	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
731 732 733
		val = perf_counts(counter->counts, cpu, 0)->val;
		ena = perf_counts(counter->counts, cpu, 0)->ena;
		run = perf_counts(counter->counts, cpu, 0)->run;
734 735 736 737

		if (prefix)
			fprintf(output, "%s", prefix);

738
		if (run == 0 || ena == 0) {
739
			fprintf(output, "CPU%*d%s%*s%s",
S
Stephane Eranian 已提交
740
				csv_output ? 0 : -4,
Y
Yan, Zheng 已提交
741
				perf_evsel__cpus(counter)->map[cpu], csv_sep,
S
Stephane Eranian 已提交
742
				csv_output ? 0 : 18,
743
				counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
744 745 746 747 748 749 750 751 752
				csv_sep);

				fprintf(output, "%-*s%s",
					csv_output ? 0 : unit_width,
					counter->unit, csv_sep);

				fprintf(output, "%*s",
					csv_output ? 0 : -25,
					perf_evsel__name(counter));
753

S
Stephane Eranian 已提交
754
			if (counter->cgrp)
755 756
				fprintf(output, "%s%s",
					csv_sep, counter->cgrp->name);
S
Stephane Eranian 已提交
757

758
			print_running(run, ena);
759
			fputc('\n', output);
760 761 762
			continue;
		}

763 764
		uval = val * counter->scale;

765
		if (nsec_counter(counter))
766
			nsec_printout(cpu, 0, counter, uval);
767
		else
768
			abs_printout(cpu, 0, counter, uval);
769

770
		if (!csv_output)
S
Stephane Eranian 已提交
771
			print_noise(counter, 1.0);
772
		print_running(run, ena);
773

774
		fputc('\n', output);
775 776 777
	}
}

778 779
static void print_interval(char *prefix, struct timespec *ts)
{
780
	FILE *output = stat_config.output;
781 782 783 784 785
	static int num_print_interval;

	sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);

	if (num_print_interval == 0 && !csv_output) {
786
		switch (stat_config.aggr_mode) {
787 788 789 790 791 792 793 794 795
		case AGGR_SOCKET:
			fprintf(output, "#           time socket cpus             counts %*s events\n", unit_width, "unit");
			break;
		case AGGR_CORE:
			fprintf(output, "#           time core         cpus             counts %*s events\n", unit_width, "unit");
			break;
		case AGGR_NONE:
			fprintf(output, "#           time CPU                counts %*s events\n", unit_width, "unit");
			break;
796 797 798
		case AGGR_THREAD:
			fprintf(output, "#           time             comm-pid                  counts %*s events\n", unit_width, "unit");
			break;
799 800 801 802 803 804 805 806 807 808 809
		case AGGR_GLOBAL:
		default:
			fprintf(output, "#           time             counts %*s events\n", unit_width, "unit");
		}
	}

	if (++num_print_interval == 25)
		num_print_interval = 0;
}

static void print_header(int argc, const char **argv)
810
{
811
	FILE *output = stat_config.output;
812
	int i;
813

814 815
	fflush(stdout);

S
Stephane Eranian 已提交
816
	if (!csv_output) {
817 818
		fprintf(output, "\n");
		fprintf(output, " Performance counter stats for ");
819 820 821 822
		if (target.system_wide)
			fprintf(output, "\'system wide");
		else if (target.cpu_list)
			fprintf(output, "\'CPU(s) %s", target.cpu_list);
823
		else if (!target__has_task(&target)) {
824
			fprintf(output, "\'%s", argv[0]);
S
Stephane Eranian 已提交
825
			for (i = 1; i < argc; i++)
826
				fprintf(output, " %s", argv[i]);
827 828
		} else if (target.pid)
			fprintf(output, "process id \'%s", target.pid);
S
Stephane Eranian 已提交
829
		else
830
			fprintf(output, "thread id \'%s", target.tid);
I
Ingo Molnar 已提交
831

832
		fprintf(output, "\'");
S
Stephane Eranian 已提交
833
		if (run_count > 1)
834 835
			fprintf(output, " (%d runs)", run_count);
		fprintf(output, ":\n\n");
S
Stephane Eranian 已提交
836
	}
837 838 839 840
}

static void print_footer(void)
{
841 842
	FILE *output = stat_config.output;

843 844 845 846 847 848 849 850 851 852 853 854 855 856
	if (!null_run)
		fprintf(output, "\n");
	fprintf(output, " %17.9f seconds time elapsed",
			avg_stats(&walltime_nsecs_stats)/1e9);
	if (run_count > 1) {
		fprintf(output, "                                        ");
		print_noise_pct(stddev_stats(&walltime_nsecs_stats),
				avg_stats(&walltime_nsecs_stats));
	}
	fprintf(output, "\n\n");
}

static void print_counters(struct timespec *ts, int argc, const char **argv)
{
857
	int interval = stat_config.interval;
858 859 860 861 862 863 864
	struct perf_evsel *counter;
	char buf[64], *prefix = NULL;

	if (interval)
		print_interval(prefix = buf, ts);
	else
		print_header(argc, argv);
865

866
	switch (stat_config.aggr_mode) {
867
	case AGGR_CORE:
868
	case AGGR_SOCKET:
869
		print_aggr(prefix);
870
		break;
871 872 873 874
	case AGGR_THREAD:
		evlist__for_each(evsel_list, counter)
			print_aggr_thread(counter, prefix);
		break;
875
	case AGGR_GLOBAL:
876
		evlist__for_each(evsel_list, counter)
877
			print_counter_aggr(counter, prefix);
878 879
		break;
	case AGGR_NONE:
880
		evlist__for_each(evsel_list, counter)
881
			print_counter(counter, prefix);
882 883 884
		break;
	default:
		break;
885
	}
886

887 888 889
	if (!interval && !csv_output)
		print_footer();

890
	fflush(stat_config.output);
891 892
}

893 894
static volatile int signr = -1;

895
static void skip_signal(int signo)
896
{
897
	if ((child_pid == -1) || stat_config.interval)
898 899
		done = 1;

900
	signr = signo;
901 902 903 904 905 906 907
	/*
	 * render child_pid harmless
	 * won't send SIGTERM to a random
	 * process in case of race condition
	 * and fast PID recycling
	 */
	child_pid = -1;
908 909 910 911
}

static void sig_atexit(void)
{
912 913 914 915 916 917 918 919 920 921 922 923
	sigset_t set, oset;

	/*
	 * avoid race condition with SIGCHLD handler
	 * in skip_signal() which is modifying child_pid
	 * goal is to avoid send SIGTERM to a random
	 * process
	 */
	sigemptyset(&set);
	sigaddset(&set, SIGCHLD);
	sigprocmask(SIG_BLOCK, &set, &oset);

924 925 926
	if (child_pid != -1)
		kill(child_pid, SIGTERM);

927 928
	sigprocmask(SIG_SETMASK, &oset, NULL);

929 930 931 932 933
	if (signr == -1)
		return;

	signal(signr, SIG_DFL);
	kill(getpid(), signr);
934 935
}

936 937
static int stat__set_big_num(const struct option *opt __maybe_unused,
			     const char *s __maybe_unused, int unset)
S
Stephane Eranian 已提交
938 939 940 941 942
{
	big_num_opt = unset ? 0 : 1;
	return 0;
}

943 944
static int perf_stat_init_aggr_mode(void)
{
945
	switch (stat_config.aggr_mode) {
946 947 948 949 950 951 952
	case AGGR_SOCKET:
		if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
			perror("cannot build socket map");
			return -1;
		}
		aggr_get_id = cpu_map__get_socket;
		break;
953 954 955 956 957 958 959
	case AGGR_CORE:
		if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
			perror("cannot build core map");
			return -1;
		}
		aggr_get_id = cpu_map__get_core;
		break;
960 961
	case AGGR_NONE:
	case AGGR_GLOBAL:
962
	case AGGR_THREAD:
963 964 965 966 967 968
	default:
		break;
	}
	return 0;
}

969 970 971 972 973 974
/*
 * Add default attributes, if there were no attributes specified or
 * if -d/--detailed, -d -d or -d -d -d is used:
 */
static int add_default_attributes(void)
{
975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081
	struct perf_event_attr default_attrs[] = {

  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES	},
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS		},
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},

  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND	},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS	},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES		},

};

/*
 * Detailed stats (-d), covering the L1 and last level data caches:
 */
	struct perf_event_attr detailed_attrs[] = {

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_LL			<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_LL			<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
};

/*
 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
 */
	struct perf_event_attr very_detailed_attrs[] = {

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

};

/*
 * Very, very detailed stats (-d -d -d), adding prefetch events:
 */
	struct perf_event_attr very_very_detailed_attrs[] = {

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
};

1082 1083 1084 1085
	/* Set attrs if no event is selected and !null_run: */
	if (null_run)
		return 0;

1086 1087 1088 1089
	if (transaction_run) {
		int err;
		if (pmu_have_event("cpu", "cycles-ct") &&
		    pmu_have_event("cpu", "el-start"))
1090
			err = parse_events(evsel_list, transaction_attrs, NULL);
1091
		else
1092 1093
			err = parse_events(evsel_list, transaction_limited_attrs, NULL);
		if (err) {
1094 1095 1096 1097 1098 1099
			fprintf(stderr, "Cannot set up transaction events\n");
			return -1;
		}
		return 0;
	}

1100
	if (!evsel_list->nr_entries) {
1101
		if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
1102
			return -1;
1103 1104 1105 1106 1107 1108 1109 1110
	}

	/* Detailed events get appended to the event list: */

	if (detailed_run <  1)
		return 0;

	/* Append detailed run extra attributes: */
1111
	if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
1112
		return -1;
1113 1114 1115 1116 1117

	if (detailed_run < 2)
		return 0;

	/* Append very detailed run extra attributes: */
1118
	if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
1119
		return -1;
1120 1121 1122 1123 1124

	if (detailed_run < 3)
		return 0;

	/* Append very, very detailed run extra attributes: */
1125
	return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
1126 1127
}

1128
int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1129
{
1130
	bool append_file = false;
1131 1132 1133
	int output_fd = 0;
	const char *output_name	= NULL;
	const struct option options[] = {
1134 1135
	OPT_BOOLEAN('T', "transaction", &transaction_run,
		    "hardware transaction statistics"),
1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150
	OPT_CALLBACK('e', "event", &evsel_list, "event",
		     "event selector. use 'perf list' to list available events",
		     parse_events_option),
	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
		     "event filter", parse_filter),
	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
		    "child tasks do not inherit counters"),
	OPT_STRING('p', "pid", &target.pid, "pid",
		   "stat events on existing process id"),
	OPT_STRING('t', "tid", &target.tid, "tid",
		   "stat events on existing thread id"),
	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
		    "system-wide collection from all CPUs"),
	OPT_BOOLEAN('g', "group", &group,
		    "put the counters into a counter group"),
1151
	OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"),
1152 1153 1154
	OPT_INCR('v', "verbose", &verbose,
		    "be more verbose (show counter open errors, etc)"),
	OPT_INTEGER('r', "repeat", &run_count,
1155
		    "repeat command and print average + stddev (max: 100, forever: 0)"),
1156 1157 1158 1159 1160 1161
	OPT_BOOLEAN('n', "null", &null_run,
		    "null run - dont start any counters"),
	OPT_INCR('d', "detailed", &detailed_run,
		    "detailed run - start a lot of events"),
	OPT_BOOLEAN('S', "sync", &sync_run,
		    "call sync() before starting a run"),
1162
	OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
1163 1164 1165 1166
			   "print large numbers with thousands\' separators",
			   stat__set_big_num),
	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
		    "list of cpus to monitor in system-wide"),
1167
	OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
1168
		    "disable CPU count aggregation", AGGR_NONE),
1169 1170 1171 1172 1173 1174 1175 1176
	OPT_STRING('x', "field-separator", &csv_sep, "separator",
		   "print counts with custom separator"),
	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
		     "monitor event in cgroup name only", parse_cgroups),
	OPT_STRING('o', "output", &output_name, "file", "output file name"),
	OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
	OPT_INTEGER(0, "log-fd", &output_fd,
		    "log output to fd, instead of stderr"),
1177 1178 1179 1180
	OPT_STRING(0, "pre", &pre_cmd, "command",
			"command to run prior to the measured command"),
	OPT_STRING(0, "post", &post_cmd, "command",
			"command to run after to the measured command"),
1181
	OPT_UINTEGER('I', "interval-print", &stat_config.interval,
1182
		    "print counts at regular interval in ms (>= 100)"),
1183
	OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
1184
		     "aggregate counts per processor socket", AGGR_SOCKET),
1185
	OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
1186
		     "aggregate counts per physical processor core", AGGR_CORE),
1187
	OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
1188
		     "aggregate counts per thread", AGGR_THREAD),
1189 1190
	OPT_UINTEGER('D', "delay", &initial_delay,
		     "ms to wait before starting measurement after program start"),
1191 1192 1193 1194 1195 1196
	OPT_END()
	};
	const char * const stat_usage[] = {
		"perf stat [<options>] [<command>]",
		NULL
	};
1197
	int status = -EINVAL, run_idx;
1198
	const char *mode;
1199
	FILE *output = stderr;
1200
	unsigned int interval;
1201

1202 1203
	setlocale(LC_ALL, "");

1204
	evsel_list = perf_evlist__new();
1205 1206 1207
	if (evsel_list == NULL)
		return -ENOMEM;

1208 1209
	argc = parse_options(argc, argv, options, stat_usage,
		PARSE_OPT_STOP_AT_NON_OPTION);
S
Stephane Eranian 已提交
1210

1211 1212
	interval = stat_config.interval;

1213 1214 1215
	if (output_name && strcmp(output_name, "-"))
		output = NULL;

1216 1217
	if (output_name && output_fd) {
		fprintf(stderr, "cannot use both --output and --log-fd\n");
1218 1219 1220
		parse_options_usage(stat_usage, options, "o", 1);
		parse_options_usage(NULL, options, "log-fd", 0);
		goto out;
1221
	}
1222 1223 1224

	if (output_fd < 0) {
		fprintf(stderr, "argument to --log-fd must be a > 0\n");
1225 1226
		parse_options_usage(stat_usage, options, "log-fd", 0);
		goto out;
1227 1228
	}

1229 1230 1231 1232 1233 1234 1235
	if (!output) {
		struct timespec tm;
		mode = append_file ? "a" : "w";

		output = fopen(output_name, mode);
		if (!output) {
			perror("failed to create output file");
1236
			return -1;
1237 1238 1239
		}
		clock_gettime(CLOCK_REALTIME, &tm);
		fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
1240
	} else if (output_fd > 0) {
1241 1242 1243 1244 1245 1246
		mode = append_file ? "a" : "w";
		output = fdopen(output_fd, mode);
		if (!output) {
			perror("Failed opening logfd");
			return -errno;
		}
1247 1248
	}

1249 1250
	stat_config.output = output;

1251
	if (csv_sep) {
S
Stephane Eranian 已提交
1252
		csv_output = true;
1253 1254 1255
		if (!strcmp(csv_sep, "\\t"))
			csv_sep = "\t";
	} else
S
Stephane Eranian 已提交
1256 1257 1258 1259 1260 1261
		csv_sep = DEFAULT_SEPARATOR;

	/*
	 * let the spreadsheet do the pretty-printing
	 */
	if (csv_output) {
J
Jim Cromie 已提交
1262
		/* User explicitly passed -B? */
S
Stephane Eranian 已提交
1263 1264
		if (big_num_opt == 1) {
			fprintf(stderr, "-B option not supported with -x\n");
1265 1266 1267
			parse_options_usage(stat_usage, options, "B", 1);
			parse_options_usage(NULL, options, "x", 1);
			goto out;
S
Stephane Eranian 已提交
1268 1269 1270 1271 1272
		} else /* Nope, so disable big number formatting */
			big_num = false;
	} else if (big_num_opt == 0) /* User passed --no-big-num */
		big_num = false;

1273
	if (!argc && target__none(&target))
1274
		usage_with_options(stat_usage, options);
1275

1276
	if (run_count < 0) {
1277 1278 1279
		pr_err("Run count must be a positive number\n");
		parse_options_usage(stat_usage, options, "r", 1);
		goto out;
1280 1281 1282 1283
	} else if (run_count == 0) {
		forever = true;
		run_count = 1;
	}
1284

1285
	if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) {
1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296
		fprintf(stderr, "The --per-thread option is only available "
			"when monitoring via -p -t options.\n");
		parse_options_usage(NULL, options, "p", 1);
		parse_options_usage(NULL, options, "t", 1);
		goto out;
	}

	/*
	 * no_aggr, cgroup are for system-wide only
	 * --per-thread is aggregated per thread, we dont mix it with cpu mode
	 */
1297 1298
	if (((stat_config.aggr_mode != AGGR_GLOBAL &&
	      stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) &&
1299
	    !target__has_cpu(&target)) {
S
Stephane Eranian 已提交
1300 1301 1302
		fprintf(stderr, "both cgroup and no-aggregation "
			"modes only available in system-wide mode\n");

1303 1304 1305 1306
		parse_options_usage(stat_usage, options, "G", 1);
		parse_options_usage(NULL, options, "A", 1);
		parse_options_usage(NULL, options, "a", 1);
		goto out;
1307 1308
	}

1309 1310
	if (add_default_attributes())
		goto out;
1311

1312
	target__validate(&target);
1313

1314
	if (perf_evlist__create_maps(evsel_list, &target) < 0) {
1315
		if (target__has_task(&target)) {
1316
			pr_err("Problems finding threads of monitor\n");
1317 1318
			parse_options_usage(stat_usage, options, "p", 1);
			parse_options_usage(NULL, options, "t", 1);
1319
		} else if (target__has_cpu(&target)) {
1320
			perror("failed to parse CPUs map");
1321 1322 1323 1324
			parse_options_usage(stat_usage, options, "C", 1);
			parse_options_usage(NULL, options, "a", 1);
		}
		goto out;
1325
	}
1326 1327 1328 1329 1330

	/*
	 * Initialize thread_map with comm names,
	 * so we could print it out on output.
	 */
1331
	if (stat_config.aggr_mode == AGGR_THREAD)
1332 1333
		thread_map__read_comms(evsel_list->threads);

1334 1335
	if (interval && interval < 100) {
		pr_err("print interval must be >= 100ms\n");
1336
		parse_options_usage(stat_usage, options, "I", 1);
1337
		goto out;
1338
	}
1339

1340
	if (perf_evlist__alloc_stats(evsel_list, interval))
1341
		goto out;
1342

1343
	if (perf_stat_init_aggr_mode())
1344
		goto out;
1345

I
Ingo Molnar 已提交
1346 1347 1348 1349 1350 1351
	/*
	 * We dont want to block the signals - that would cause
	 * child tasks to inherit that and Ctrl-C would not work.
	 * What we want is for Ctrl-C to work in the exec()-ed
	 * task, but being ignored by perf stat itself:
	 */
1352
	atexit(sig_atexit);
1353 1354
	if (!forever)
		signal(SIGINT,  skip_signal);
1355
	signal(SIGCHLD, skip_signal);
I
Ingo Molnar 已提交
1356 1357 1358
	signal(SIGALRM, skip_signal);
	signal(SIGABRT, skip_signal);

1359
	status = 0;
1360
	for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
1361
		if (run_count != 1 && verbose)
1362 1363
			fprintf(output, "[ perf stat: executing run #%d ... ]\n",
				run_idx + 1);
I
Ingo Molnar 已提交
1364

1365
		status = run_perf_stat(argc, argv);
1366
		if (forever && status != -1) {
1367
			print_counters(NULL, argc, argv);
1368
			perf_stat__reset_stats();
1369
		}
1370 1371
	}

1372
	if (!forever && status != -1 && !interval)
1373
		print_counters(NULL, argc, argv);
1374 1375

	perf_evlist__free_stats(evsel_list);
1376 1377
out:
	perf_evlist__delete(evsel_list);
1378
	return status;
1379
}