builtin-stat.c 35.6 KB
Newer Older
1
/*
2 3 4 5 6 7
 * builtin-stat.c
 *
 * Builtin stat command: Give a precise performance counters summary
 * overview about any workload, CPU or specific PID.
 *
 * Sample output:
8

9
   $ perf stat ./hackbench 10
10

11
  Time: 0.118
12

13
  Performance counter stats for './hackbench 10':
14

15 16 17 18 19 20 21 22 23 24 25 26 27
       1708.761321 task-clock                #   11.037 CPUs utilized
            41,190 context-switches          #    0.024 M/sec
             6,735 CPU-migrations            #    0.004 M/sec
            17,318 page-faults               #    0.010 M/sec
     5,205,202,243 cycles                    #    3.046 GHz
     3,856,436,920 stalled-cycles-frontend   #   74.09% frontend cycles idle
     1,600,790,871 stalled-cycles-backend    #   30.75% backend  cycles idle
     2,603,501,247 instructions              #    0.50  insns per cycle
                                             #    1.48  stalled cycles per insn
       484,357,498 branches                  #  283.455 M/sec
         6,388,934 branch-misses             #    1.32% of all branches

        0.154822978  seconds time elapsed
28

29
 *
30
 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
31 32 33 34 35 36 37 38
 *
 * Improvements and fixes by:
 *
 *   Arjan van de Ven <arjan@linux.intel.com>
 *   Yanmin Zhang <yanmin.zhang@intel.com>
 *   Wu Fengguang <fengguang.wu@intel.com>
 *   Mike Galbraith <efault@gmx.de>
 *   Paul Mackerras <paulus@samba.org>
39
 *   Jaswinder Singh Rajput <jaswinder@kernel.org>
40 41
 *
 * Released under the GPL v2. (and only v2, not any later version)
42 43
 */

44
#include "perf.h"
45
#include "builtin.h"
46
#include "util/cgroup.h"
47
#include "util/util.h"
48 49
#include "util/parse-options.h"
#include "util/parse-events.h"
50
#include "util/pmu.h"
51
#include "util/event.h"
52
#include "util/evlist.h"
53
#include "util/evsel.h"
54
#include "util/debug.h"
55
#include "util/color.h"
56
#include "util/stat.h"
57
#include "util/header.h"
58
#include "util/cpumap.h"
59
#include "util/thread.h"
60
#include "util/thread_map.h"
61
#include "util/counts.h"
62

63
#include <stdlib.h>
64
#include <sys/prctl.h>
65
#include <locale.h>
66

S
Stephane Eranian 已提交
67
#define DEFAULT_SEPARATOR	" "
68 69
#define CNTR_NOT_SUPPORTED	"<not supported>"
#define CNTR_NOT_COUNTED	"<not counted>"
S
Stephane Eranian 已提交
70

71
static void print_counters(struct timespec *ts, int argc, const char **argv);
72

73
/* Default events used for perf stat -T */
74 75
static const char *transaction_attrs = {
	"task-clock,"
76 77 78 79 80 81 82 83 84 85 86
	"{"
	"instructions,"
	"cycles,"
	"cpu/cycles-t/,"
	"cpu/tx-start/,"
	"cpu/el-start/,"
	"cpu/cycles-ct/"
	"}"
};

/* More limited version when the CPU does not have all events. */
87 88
static const char * transaction_limited_attrs = {
	"task-clock,"
89 90 91 92 93 94 95 96
	"{"
	"instructions,"
	"cycles,"
	"cpu/cycles-t/,"
	"cpu/tx-start/"
	"}"
};

97
static struct perf_evlist	*evsel_list;
98

99
static struct target target = {
100 101
	.uid	= UINT_MAX,
};
102

103 104
typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu);

105
static int			run_count			=  1;
106
static bool			no_inherit			= false;
107
static volatile pid_t		child_pid			= -1;
108
static bool			null_run			=  false;
109
static int			detailed_run			=  0;
110
static bool			transaction_run;
111
static bool			big_num				=  true;
S
Stephane Eranian 已提交
112 113 114
static int			big_num_opt			=  -1;
static const char		*csv_sep			= NULL;
static bool			csv_output			= false;
115
static bool			group				= false;
116 117 118
static const char		*pre_cmd			= NULL;
static const char		*post_cmd			= NULL;
static bool			sync_run			= false;
119
static unsigned int		initial_delay			= 0;
120
static unsigned int		unit_width			= 4; /* strlen("unit") */
121
static bool			forever				= false;
122
static struct timespec		ref_time;
123
static struct cpu_map		*aggr_map;
124
static aggr_get_id_t		aggr_get_id;
J
Jiri Olsa 已提交
125 126 127
static bool			append_file;
static const char		*output_name;
static int			output_fd;
128

129 130
static volatile int done = 0;

131 132
static struct perf_stat_config stat_config = {
	.aggr_mode	= AGGR_GLOBAL,
133
	.scale		= true,
134 135
};

136 137 138 139 140 141 142 143 144 145 146 147
static inline void diff_timespec(struct timespec *r, struct timespec *a,
				 struct timespec *b)
{
	r->tv_sec = a->tv_sec - b->tv_sec;
	if (a->tv_nsec < b->tv_nsec) {
		r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
		r->tv_sec--;
	} else {
		r->tv_nsec = a->tv_nsec - b->tv_nsec ;
	}
}

148 149 150
static void perf_stat__reset_stats(void)
{
	perf_evlist__reset_stats(evsel_list);
151
	perf_stat__reset_shadow_stats();
152 153
}

154
static int create_perf_stat_counter(struct perf_evsel *evsel)
155
{
156
	struct perf_event_attr *attr = &evsel->attr;
157

158
	if (stat_config.scale)
159 160
		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
				    PERF_FORMAT_TOTAL_TIME_RUNNING;
161

162 163
	attr->inherit = !no_inherit;

164 165 166 167 168 169 170
	/*
	 * Some events get initialized with sample_(period/type) set,
	 * like tracepoints. Clear it up for counting.
	 */
	attr->sample_period = 0;
	attr->sample_type   = 0;

171
	if (target__has_cpu(&target))
172
		return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
173

174
	if (!target__has_task(&target) && perf_evsel__is_group_leader(evsel)) {
175
		attr->disabled = 1;
176 177
		if (!initial_delay)
			attr->enable_on_exec = 1;
178
	}
179

180
	return perf_evsel__open_per_thread(evsel, evsel_list->threads);
181 182
}

183 184 185
/*
 * Does the counter have nsecs as a unit?
 */
186
static inline int nsec_counter(struct perf_evsel *evsel)
187
{
188 189
	if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
	    perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
190 191 192 193 194
		return 1;

	return 0;
}

195 196 197 198
/*
 * Read out the results of a single counter:
 * do not aggregate counts across CPUs in system-wide mode
 */
199
static int read_counter(struct perf_evsel *counter)
200
{
201 202 203
	int nthreads = thread_map__nr(evsel_list->threads);
	int ncpus = perf_evsel__nr_cpus(counter);
	int cpu, thread;
204

205 206 207
	if (!counter->supported)
		return -ENOENT;

208 209 210 211 212
	if (counter->system_wide)
		nthreads = 1;

	for (thread = 0; thread < nthreads; thread++) {
		for (cpu = 0; cpu < ncpus; cpu++) {
213 214 215 216
			struct perf_counts_values *count;

			count = perf_counts(counter->counts, cpu, thread);
			if (perf_evsel__read(counter, cpu, thread, count))
217 218
				return -1;
		}
219
	}
220 221

	return 0;
222 223
}

224
static void read_counters(bool close_counters)
225 226 227
{
	struct perf_evsel *counter;

228
	evlist__for_each(evsel_list, counter) {
229
		if (read_counter(counter))
230
			pr_debug("failed to read counter %s\n", counter->name);
231

232
		if (perf_stat_process_counter(&stat_config, counter))
233
			pr_warning("failed to process counter %s\n", counter->name);
234

235
		if (close_counters) {
236 237
			perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
					     thread_map__nr(evsel_list->threads));
238 239
		}
	}
240 241
}

242
static void process_interval(void)
243 244 245 246
{
	struct timespec ts, rs;

	read_counters(false);
247

248 249 250
	clock_gettime(CLOCK_MONOTONIC, &ts);
	diff_timespec(&rs, &ts, &ref_time);

251
	print_counters(&rs, 0, NULL);
252 253
}

254 255 256 257
static void handle_initial_delay(void)
{
	if (initial_delay) {
		usleep(initial_delay * 1000);
258
		perf_evlist__enable(evsel_list);
259 260 261
	}
}

262
static volatile int workload_exec_errno;
263 264 265 266 267 268

/*
 * perf_evlist__prepare_workload will send a SIGUSR1
 * if the fork fails, since we asked by setting its
 * want_signal to true.
 */
269 270
static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info,
					void *ucontext __maybe_unused)
271
{
272
	workload_exec_errno = info->si_value.sival_int;
273 274
}

275
static int __run_perf_stat(int argc, const char **argv)
276
{
277
	int interval = stat_config.interval;
278
	char msg[512];
279
	unsigned long long t0, t1;
280
	struct perf_evsel *counter;
281
	struct timespec ts;
282
	size_t l;
283
	int status = 0;
284
	const bool forks = (argc > 0);
285

286 287 288 289 290 291 292 293
	if (interval) {
		ts.tv_sec  = interval / 1000;
		ts.tv_nsec = (interval % 1000) * 1000000;
	} else {
		ts.tv_sec  = 1;
		ts.tv_nsec = 0;
	}

294
	if (forks) {
295 296
		if (perf_evlist__prepare_workload(evsel_list, &target, argv, false,
						  workload_exec_failed_signal) < 0) {
297 298
			perror("failed to prepare workload");
			return -1;
299
		}
300
		child_pid = evsel_list->workload.pid;
301 302
	}

303
	if (group)
304
		perf_evlist__set_leader(evsel_list);
305

306
	evlist__for_each(evsel_list, counter) {
307
		if (create_perf_stat_counter(counter) < 0) {
308 309 310 311
			/*
			 * PPC returns ENXIO for HW counters until 2.6.37
			 * (behavior changed with commit b0a873e).
			 */
312
			if (errno == EINVAL || errno == ENOSYS ||
313 314
			    errno == ENOENT || errno == EOPNOTSUPP ||
			    errno == ENXIO) {
315 316
				if (verbose)
					ui__warning("%s event is not supported by the kernel.\n",
317
						    perf_evsel__name(counter));
318
				counter->supported = false;
319 320 321 322

				if ((counter->leader != counter) ||
				    !(counter->leader->nr_members > 1))
					continue;
323
			}
324

325 326 327 328
			perf_evsel__open_strerror(counter, &target,
						  errno, msg, sizeof(msg));
			ui__error("%s\n", msg);

329 330
			if (child_pid != -1)
				kill(child_pid, SIGTERM);
331

332 333
			return -1;
		}
334
		counter->supported = true;
335 336 337 338

		l = strlen(counter->unit);
		if (l > unit_width)
			unit_width = l;
339
	}
340

341 342 343
	if (perf_evlist__apply_filters(evsel_list, &counter)) {
		error("failed to set filter \"%s\" on event %s with %d (%s)\n",
			counter->filter, perf_evsel__name(counter), errno,
344
			strerror_r(errno, msg, sizeof(msg)));
345 346 347
		return -1;
	}

348 349 350 351
	/*
	 * Enable counters and exec the command:
	 */
	t0 = rdclock();
352
	clock_gettime(CLOCK_MONOTONIC, &ref_time);
353

354
	if (forks) {
355
		perf_evlist__start_workload(evsel_list);
356
		handle_initial_delay();
357

358 359 360
		if (interval) {
			while (!waitpid(child_pid, &status, WNOHANG)) {
				nanosleep(&ts, NULL);
361
				process_interval();
362 363
			}
		}
364
		wait(&status);
365

366 367 368
		if (workload_exec_errno) {
			const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
			pr_err("Workload failed: %s\n", emsg);
369
			return -1;
370
		}
371

372 373
		if (WIFSIGNALED(status))
			psignal(WTERMSIG(status), argv[0]);
374
	} else {
375
		handle_initial_delay();
376 377 378
		while (!done) {
			nanosleep(&ts, NULL);
			if (interval)
379
				process_interval();
380
		}
381
	}
382 383 384

	t1 = rdclock();

385
	update_stats(&walltime_nsecs_stats, t1 - t0);
386

387
	read_counters(true);
388

389 390 391
	return WEXITSTATUS(status);
}

392
static int run_perf_stat(int argc, const char **argv)
393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417
{
	int ret;

	if (pre_cmd) {
		ret = system(pre_cmd);
		if (ret)
			return ret;
	}

	if (sync_run)
		sync();

	ret = __run_perf_stat(argc, argv);
	if (ret)
		return ret;

	if (post_cmd) {
		ret = system(post_cmd);
		if (ret)
			return ret;
	}

	return ret;
}

418 419 420
static void print_running(u64 run, u64 ena)
{
	if (csv_output) {
421
		fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f",
422 423 424 425 426
					csv_sep,
					run,
					csv_sep,
					ena ? 100.0 * run / ena : 100.0);
	} else if (run != ena) {
427
		fprintf(stat_config.output, "  (%.2f%%)", 100.0 * run / ena);
428 429 430
	}
}

431 432
static void print_noise_pct(double total, double avg)
{
433
	double pct = rel_stddev_stats(total, avg);
434

435
	if (csv_output)
436
		fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct);
437
	else if (pct)
438
		fprintf(stat_config.output, "  ( +-%6.2f%% )", pct);
439 440
}

441
static void print_noise(struct perf_evsel *evsel, double avg)
442
{
443
	struct perf_stat_evsel *ps;
444

445 446 447
	if (run_count == 1)
		return;

448
	ps = evsel->priv;
449
	print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
450 451
}

452
static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
I
Ingo Molnar 已提交
453
{
454
	switch (stat_config.aggr_mode) {
455
	case AGGR_CORE:
456
		fprintf(stat_config.output, "S%d-C%*d%s%*d%s",
457 458 459 460 461 462 463 464
			cpu_map__id_to_socket(id),
			csv_output ? 0 : -8,
			cpu_map__id_to_cpu(id),
			csv_sep,
			csv_output ? 0 : 4,
			nr,
			csv_sep);
		break;
465
	case AGGR_SOCKET:
466
		fprintf(stat_config.output, "S%*d%s%*d%s",
467
			csv_output ? 0 : -5,
468
			id,
469 470 471 472
			csv_sep,
			csv_output ? 0 : 4,
			nr,
			csv_sep);
473 474
			break;
	case AGGR_NONE:
475
		fprintf(stat_config.output, "CPU%*d%s",
S
Stephane Eranian 已提交
476
			csv_output ? 0 : -4,
477
			perf_evsel__cpus(evsel)->map[id], csv_sep);
478
		break;
479
	case AGGR_THREAD:
480
		fprintf(stat_config.output, "%*s-%*d%s",
481 482 483 484 485 486
			csv_output ? 0 : 16,
			thread_map__comm(evsel->threads, id),
			csv_output ? 0 : -8,
			thread_map__pid(evsel->threads, id),
			csv_sep);
		break;
487
	case AGGR_GLOBAL:
J
Jiri Olsa 已提交
488
	case AGGR_UNSET:
489 490 491 492 493
	default:
		break;
	}
}

494
static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
495
{
496
	FILE *output = stat_config.output;
497
	double msecs = avg / 1e6;
498
	const char *fmt_v, *fmt_n;
499
	char name[25];
500

501 502 503
	fmt_v = csv_output ? "%.6f%s" : "%18.6f%s";
	fmt_n = csv_output ? "%s" : "%-25s";

504
	aggr_printout(evsel, id, nr);
S
Stephane Eranian 已提交
505

506 507
	scnprintf(name, sizeof(name), "%s%s",
		  perf_evsel__name(evsel), csv_output ? "" : " (msec)");
508 509 510 511 512 513 514 515 516

	fprintf(output, fmt_v, msecs, csv_sep);

	if (csv_output)
		fprintf(output, "%s%s", evsel->unit, csv_sep);
	else
		fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep);

	fprintf(output, fmt_n, name);
S
Stephane Eranian 已提交
517

S
Stephane Eranian 已提交
518
	if (evsel->cgrp)
519
		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
I
Ingo Molnar 已提交
520 521
}

522 523
static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
{
524
	FILE *output = stat_config.output;
525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549
	double sc =  evsel->scale;
	const char *fmt;

	if (csv_output) {
		fmt = sc != 1.0 ?  "%.2f%s" : "%.0f%s";
	} else {
		if (big_num)
			fmt = sc != 1.0 ? "%'18.2f%s" : "%'18.0f%s";
		else
			fmt = sc != 1.0 ? "%18.2f%s" : "%18.0f%s";
	}

	aggr_printout(evsel, id, nr);

	fprintf(output, fmt, avg, csv_sep);

	if (evsel->unit)
		fprintf(output, "%-*s%s",
			csv_output ? 0 : unit_width,
			evsel->unit, csv_sep);

	fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel));

	if (evsel->cgrp)
		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
550
}
551

552 553 554 555 556 557 558 559 560 561 562
static void printout(int id, int nr, struct perf_evsel *counter, double uval)
{
	int cpu = cpu_map__id_to_cpu(id);

	if (stat_config.aggr_mode == AGGR_GLOBAL)
		cpu = 0;

	if (nsec_counter(counter))
		nsec_printout(id, nr, counter, uval);
	else
		abs_printout(id, nr, counter, uval);
563

564 565 566 567
	if (!csv_output && !stat_config.interval)
		perf_stat__print_shadow_stats(stat_config.output, counter,
					      uval, cpu,
					      stat_config.aggr_mode);
568 569
}

570
static void print_aggr(char *prefix)
571
{
572
	FILE *output = stat_config.output;
573
	struct perf_evsel *counter;
574
	int cpu, s, s2, id, nr;
575
	double uval;
576 577
	u64 ena, run, val;

578
	if (!(aggr_map || aggr_get_id))
579 580
		return;

581 582
	for (s = 0; s < aggr_map->nr; s++) {
		id = aggr_map->map[s];
583
		evlist__for_each(evsel_list, counter) {
584 585 586
			val = ena = run = 0;
			nr = 0;
			for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
587
				s2 = aggr_get_id(perf_evsel__cpus(counter), cpu);
588
				if (s2 != id)
589
					continue;
590 591 592
				val += perf_counts(counter->counts, cpu, 0)->val;
				ena += perf_counts(counter->counts, cpu, 0)->ena;
				run += perf_counts(counter->counts, cpu, 0)->run;
593 594 595 596 597 598
				nr++;
			}
			if (prefix)
				fprintf(output, "%s", prefix);

			if (run == 0 || ena == 0) {
599
				aggr_printout(counter, id, nr);
600

601
				fprintf(output, "%*s%s",
602 603
					csv_output ? 0 : 18,
					counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
604 605 606 607 608 609 610 611
					csv_sep);

				fprintf(output, "%-*s%s",
					csv_output ? 0 : unit_width,
					counter->unit, csv_sep);

				fprintf(output, "%*s",
					csv_output ? 0 : -25,
612
					perf_evsel__name(counter));
613

614 615 616 617
				if (counter->cgrp)
					fprintf(output, "%s%s",
						csv_sep, counter->cgrp->name);

618
				print_running(run, ena);
619 620 621
				fputc('\n', output);
				continue;
			}
622
			uval = val * counter->scale;
623
			printout(id, nr, counter, uval);
624
			if (!csv_output)
625 626
				print_noise(counter, 1.0);

627
			print_running(run, ena);
628 629 630 631 632
			fputc('\n', output);
		}
	}
}

633 634
static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
{
635
	FILE *output = stat_config.output;
636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653
	int nthreads = thread_map__nr(counter->threads);
	int ncpus = cpu_map__nr(counter->cpus);
	int cpu, thread;
	double uval;

	for (thread = 0; thread < nthreads; thread++) {
		u64 ena = 0, run = 0, val = 0;

		for (cpu = 0; cpu < ncpus; cpu++) {
			val += perf_counts(counter->counts, cpu, thread)->val;
			ena += perf_counts(counter->counts, cpu, thread)->ena;
			run += perf_counts(counter->counts, cpu, thread)->run;
		}

		if (prefix)
			fprintf(output, "%s", prefix);

		uval = val * counter->scale;
654
		printout(thread, 0, counter, uval);
655 656 657 658 659 660 661 662 663

		if (!csv_output)
			print_noise(counter, 1.0);

		print_running(run, ena);
		fputc('\n', output);
	}
}

664 665
/*
 * Print out the results of a single counter:
666
 * aggregated counts in system-wide mode
667
 */
668
static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
669
{
670
	FILE *output = stat_config.output;
671
	struct perf_stat_evsel *ps = counter->priv;
672
	double avg = avg_stats(&ps->res_stats[0]);
673
	int scaled = counter->counts->scaled;
674
	double uval;
675 676 677 678
	double avg_enabled, avg_running;

	avg_enabled = avg_stats(&ps->res_stats[1]);
	avg_running = avg_stats(&ps->res_stats[2]);
679

680 681 682
	if (prefix)
		fprintf(output, "%s", prefix);

683
	if (scaled == -1 || !counter->supported) {
684
		fprintf(output, "%*s%s",
S
Stephane Eranian 已提交
685
			csv_output ? 0 : 18,
686
			counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
687 688 689 690 691 692
			csv_sep);
		fprintf(output, "%-*s%s",
			csv_output ? 0 : unit_width,
			counter->unit, csv_sep);
		fprintf(output, "%*s",
			csv_output ? 0 : -25,
693
			perf_evsel__name(counter));
S
Stephane Eranian 已提交
694 695

		if (counter->cgrp)
696
			fprintf(output, "%s%s", csv_sep, counter->cgrp->name);
S
Stephane Eranian 已提交
697

698
		print_running(avg_running, avg_enabled);
699
		fputc('\n', output);
700 701
		return;
	}
702

703
	uval = avg * counter->scale;
704
	printout(-1, 0, counter, uval);
705

706 707
	print_noise(counter, avg);

708
	print_running(avg_running, avg_enabled);
709
	fprintf(output, "\n");
710 711
}

712 713 714 715
/*
 * Print out the results of a single counter:
 * does not use aggregated count in system-wide
 */
716
static void print_counter(struct perf_evsel *counter, char *prefix)
717
{
718
	FILE *output = stat_config.output;
719
	u64 ena, run, val;
720
	double uval;
721 722
	int cpu;

Y
Yan, Zheng 已提交
723
	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
724 725 726
		val = perf_counts(counter->counts, cpu, 0)->val;
		ena = perf_counts(counter->counts, cpu, 0)->ena;
		run = perf_counts(counter->counts, cpu, 0)->run;
727 728 729 730

		if (prefix)
			fprintf(output, "%s", prefix);

731
		if (run == 0 || ena == 0) {
732
			fprintf(output, "CPU%*d%s%*s%s",
S
Stephane Eranian 已提交
733
				csv_output ? 0 : -4,
Y
Yan, Zheng 已提交
734
				perf_evsel__cpus(counter)->map[cpu], csv_sep,
S
Stephane Eranian 已提交
735
				csv_output ? 0 : 18,
736
				counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
737 738 739 740 741 742 743 744 745
				csv_sep);

				fprintf(output, "%-*s%s",
					csv_output ? 0 : unit_width,
					counter->unit, csv_sep);

				fprintf(output, "%*s",
					csv_output ? 0 : -25,
					perf_evsel__name(counter));
746

S
Stephane Eranian 已提交
747
			if (counter->cgrp)
748 749
				fprintf(output, "%s%s",
					csv_sep, counter->cgrp->name);
S
Stephane Eranian 已提交
750

751
			print_running(run, ena);
752
			fputc('\n', output);
753 754 755
			continue;
		}

756
		uval = val * counter->scale;
757
		printout(cpu, 0, counter, uval);
758
		if (!csv_output)
S
Stephane Eranian 已提交
759
			print_noise(counter, 1.0);
760
		print_running(run, ena);
761

762
		fputc('\n', output);
763 764 765
	}
}

766 767
static void print_interval(char *prefix, struct timespec *ts)
{
768
	FILE *output = stat_config.output;
769 770 771 772 773
	static int num_print_interval;

	sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);

	if (num_print_interval == 0 && !csv_output) {
774
		switch (stat_config.aggr_mode) {
775 776 777 778 779 780 781 782 783
		case AGGR_SOCKET:
			fprintf(output, "#           time socket cpus             counts %*s events\n", unit_width, "unit");
			break;
		case AGGR_CORE:
			fprintf(output, "#           time core         cpus             counts %*s events\n", unit_width, "unit");
			break;
		case AGGR_NONE:
			fprintf(output, "#           time CPU                counts %*s events\n", unit_width, "unit");
			break;
784 785 786
		case AGGR_THREAD:
			fprintf(output, "#           time             comm-pid                  counts %*s events\n", unit_width, "unit");
			break;
787 788 789
		case AGGR_GLOBAL:
		default:
			fprintf(output, "#           time             counts %*s events\n", unit_width, "unit");
J
Jiri Olsa 已提交
790 791
		case AGGR_UNSET:
			break;
792 793 794 795 796 797 798 799
		}
	}

	if (++num_print_interval == 25)
		num_print_interval = 0;
}

static void print_header(int argc, const char **argv)
800
{
801
	FILE *output = stat_config.output;
802
	int i;
803

804 805
	fflush(stdout);

S
Stephane Eranian 已提交
806
	if (!csv_output) {
807 808
		fprintf(output, "\n");
		fprintf(output, " Performance counter stats for ");
809 810 811 812
		if (target.system_wide)
			fprintf(output, "\'system wide");
		else if (target.cpu_list)
			fprintf(output, "\'CPU(s) %s", target.cpu_list);
813
		else if (!target__has_task(&target)) {
814
			fprintf(output, "\'%s", argv[0]);
S
Stephane Eranian 已提交
815
			for (i = 1; i < argc; i++)
816
				fprintf(output, " %s", argv[i]);
817 818
		} else if (target.pid)
			fprintf(output, "process id \'%s", target.pid);
S
Stephane Eranian 已提交
819
		else
820
			fprintf(output, "thread id \'%s", target.tid);
I
Ingo Molnar 已提交
821

822
		fprintf(output, "\'");
S
Stephane Eranian 已提交
823
		if (run_count > 1)
824 825
			fprintf(output, " (%d runs)", run_count);
		fprintf(output, ":\n\n");
S
Stephane Eranian 已提交
826
	}
827 828 829 830
}

static void print_footer(void)
{
831 832
	FILE *output = stat_config.output;

833 834 835 836 837 838 839 840 841 842 843 844 845 846
	if (!null_run)
		fprintf(output, "\n");
	fprintf(output, " %17.9f seconds time elapsed",
			avg_stats(&walltime_nsecs_stats)/1e9);
	if (run_count > 1) {
		fprintf(output, "                                        ");
		print_noise_pct(stddev_stats(&walltime_nsecs_stats),
				avg_stats(&walltime_nsecs_stats));
	}
	fprintf(output, "\n\n");
}

static void print_counters(struct timespec *ts, int argc, const char **argv)
{
847
	int interval = stat_config.interval;
848 849 850 851 852 853 854
	struct perf_evsel *counter;
	char buf[64], *prefix = NULL;

	if (interval)
		print_interval(prefix = buf, ts);
	else
		print_header(argc, argv);
855

856
	switch (stat_config.aggr_mode) {
857
	case AGGR_CORE:
858
	case AGGR_SOCKET:
859
		print_aggr(prefix);
860
		break;
861 862 863 864
	case AGGR_THREAD:
		evlist__for_each(evsel_list, counter)
			print_aggr_thread(counter, prefix);
		break;
865
	case AGGR_GLOBAL:
866
		evlist__for_each(evsel_list, counter)
867
			print_counter_aggr(counter, prefix);
868 869
		break;
	case AGGR_NONE:
870
		evlist__for_each(evsel_list, counter)
871
			print_counter(counter, prefix);
872
		break;
J
Jiri Olsa 已提交
873
	case AGGR_UNSET:
874 875
	default:
		break;
876
	}
877

878 879 880
	if (!interval && !csv_output)
		print_footer();

881
	fflush(stat_config.output);
882 883
}

884 885
static volatile int signr = -1;

886
static void skip_signal(int signo)
887
{
888
	if ((child_pid == -1) || stat_config.interval)
889 890
		done = 1;

891
	signr = signo;
892 893 894 895 896 897 898
	/*
	 * render child_pid harmless
	 * won't send SIGTERM to a random
	 * process in case of race condition
	 * and fast PID recycling
	 */
	child_pid = -1;
899 900 901 902
}

static void sig_atexit(void)
{
903 904 905 906 907 908 909 910 911 912 913 914
	sigset_t set, oset;

	/*
	 * avoid race condition with SIGCHLD handler
	 * in skip_signal() which is modifying child_pid
	 * goal is to avoid send SIGTERM to a random
	 * process
	 */
	sigemptyset(&set);
	sigaddset(&set, SIGCHLD);
	sigprocmask(SIG_BLOCK, &set, &oset);

915 916 917
	if (child_pid != -1)
		kill(child_pid, SIGTERM);

918 919
	sigprocmask(SIG_SETMASK, &oset, NULL);

920 921 922 923 924
	if (signr == -1)
		return;

	signal(signr, SIG_DFL);
	kill(getpid(), signr);
925 926
}

927 928
static int stat__set_big_num(const struct option *opt __maybe_unused,
			     const char *s __maybe_unused, int unset)
S
Stephane Eranian 已提交
929 930 931 932 933
{
	big_num_opt = unset ? 0 : 1;
	return 0;
}

J
Jiri Olsa 已提交
934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994
static const struct option stat_options[] = {
	OPT_BOOLEAN('T', "transaction", &transaction_run,
		    "hardware transaction statistics"),
	OPT_CALLBACK('e', "event", &evsel_list, "event",
		     "event selector. use 'perf list' to list available events",
		     parse_events_option),
	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
		     "event filter", parse_filter),
	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
		    "child tasks do not inherit counters"),
	OPT_STRING('p', "pid", &target.pid, "pid",
		   "stat events on existing process id"),
	OPT_STRING('t', "tid", &target.tid, "tid",
		   "stat events on existing thread id"),
	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
		    "system-wide collection from all CPUs"),
	OPT_BOOLEAN('g', "group", &group,
		    "put the counters into a counter group"),
	OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"),
	OPT_INCR('v', "verbose", &verbose,
		    "be more verbose (show counter open errors, etc)"),
	OPT_INTEGER('r', "repeat", &run_count,
		    "repeat command and print average + stddev (max: 100, forever: 0)"),
	OPT_BOOLEAN('n', "null", &null_run,
		    "null run - dont start any counters"),
	OPT_INCR('d', "detailed", &detailed_run,
		    "detailed run - start a lot of events"),
	OPT_BOOLEAN('S', "sync", &sync_run,
		    "call sync() before starting a run"),
	OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
			   "print large numbers with thousands\' separators",
			   stat__set_big_num),
	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
		    "list of cpus to monitor in system-wide"),
	OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
		    "disable CPU count aggregation", AGGR_NONE),
	OPT_STRING('x', "field-separator", &csv_sep, "separator",
		   "print counts with custom separator"),
	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
		     "monitor event in cgroup name only", parse_cgroups),
	OPT_STRING('o', "output", &output_name, "file", "output file name"),
	OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
	OPT_INTEGER(0, "log-fd", &output_fd,
		    "log output to fd, instead of stderr"),
	OPT_STRING(0, "pre", &pre_cmd, "command",
			"command to run prior to the measured command"),
	OPT_STRING(0, "post", &post_cmd, "command",
			"command to run after to the measured command"),
	OPT_UINTEGER('I', "interval-print", &stat_config.interval,
		    "print counts at regular interval in ms (>= 10)"),
	OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
		     "aggregate counts per processor socket", AGGR_SOCKET),
	OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
		     "aggregate counts per physical processor core", AGGR_CORE),
	OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
		     "aggregate counts per thread", AGGR_THREAD),
	OPT_UINTEGER('D', "delay", &initial_delay,
		     "ms to wait before starting measurement after program start"),
	OPT_END()
};

995 996 997 998 999 1000 1001 1002 1003 1004
static int perf_stat__get_socket(struct cpu_map *map, int cpu)
{
	return cpu_map__get_socket(map, cpu, NULL);
}

static int perf_stat__get_core(struct cpu_map *map, int cpu)
{
	return cpu_map__get_core(map, cpu, NULL);
}

1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043
static int cpu_map__get_max(struct cpu_map *map)
{
	int i, max = -1;

	for (i = 0; i < map->nr; i++) {
		if (map->map[i] > max)
			max = map->map[i];
	}

	return max;
}

static struct cpu_map *cpus_aggr_map;

static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx)
{
	int cpu;

	if (idx >= map->nr)
		return -1;

	cpu = map->map[idx];

	if (cpus_aggr_map->map[cpu] == -1)
		cpus_aggr_map->map[cpu] = get_id(map, idx);

	return cpus_aggr_map->map[cpu];
}

static int perf_stat__get_socket_cached(struct cpu_map *map, int idx)
{
	return perf_stat__get_aggr(perf_stat__get_socket, map, idx);
}

static int perf_stat__get_core_cached(struct cpu_map *map, int idx)
{
	return perf_stat__get_aggr(perf_stat__get_core, map, idx);
}

1044 1045
static int perf_stat_init_aggr_mode(void)
{
1046 1047
	int nr;

1048
	switch (stat_config.aggr_mode) {
1049 1050 1051 1052 1053
	case AGGR_SOCKET:
		if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
			perror("cannot build socket map");
			return -1;
		}
1054
		aggr_get_id = perf_stat__get_socket_cached;
1055
		break;
1056 1057 1058 1059 1060
	case AGGR_CORE:
		if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
			perror("cannot build core map");
			return -1;
		}
1061
		aggr_get_id = perf_stat__get_core_cached;
1062
		break;
1063 1064
	case AGGR_NONE:
	case AGGR_GLOBAL:
1065
	case AGGR_THREAD:
J
Jiri Olsa 已提交
1066
	case AGGR_UNSET:
1067 1068 1069
	default:
		break;
	}
1070 1071 1072 1073 1074 1075 1076 1077 1078

	/*
	 * The evsel_list->cpus is the base we operate on,
	 * taking the highest cpu number to be the size of
	 * the aggregation translate cpumap.
	 */
	nr = cpu_map__get_max(evsel_list->cpus);
	cpus_aggr_map = cpu_map__empty_new(nr + 1);
	return cpus_aggr_map ? 0 : -ENOMEM;
1079 1080
}

1081 1082 1083 1084 1085 1086
/*
 * Add default attributes, if there were no attributes specified or
 * if -d/--detailed, -d -d or -d -d -d is used:
 */
static int add_default_attributes(void)
{
1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193
	struct perf_event_attr default_attrs[] = {

  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES	},
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS		},
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},

  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND	},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS	},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES		},

};

/*
 * Detailed stats (-d), covering the L1 and last level data caches:
 */
	struct perf_event_attr detailed_attrs[] = {

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_LL			<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_LL			<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
};

/*
 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
 */
	struct perf_event_attr very_detailed_attrs[] = {

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

};

/*
 * Very, very detailed stats (-d -d -d), adding prefetch events:
 */
	struct perf_event_attr very_very_detailed_attrs[] = {

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
};

1194 1195 1196 1197
	/* Set attrs if no event is selected and !null_run: */
	if (null_run)
		return 0;

1198 1199 1200 1201
	if (transaction_run) {
		int err;
		if (pmu_have_event("cpu", "cycles-ct") &&
		    pmu_have_event("cpu", "el-start"))
1202
			err = parse_events(evsel_list, transaction_attrs, NULL);
1203
		else
1204 1205
			err = parse_events(evsel_list, transaction_limited_attrs, NULL);
		if (err) {
1206 1207 1208 1209 1210 1211
			fprintf(stderr, "Cannot set up transaction events\n");
			return -1;
		}
		return 0;
	}

1212
	if (!evsel_list->nr_entries) {
1213
		if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
1214
			return -1;
1215 1216 1217 1218 1219 1220 1221 1222
	}

	/* Detailed events get appended to the event list: */

	if (detailed_run <  1)
		return 0;

	/* Append detailed run extra attributes: */
1223
	if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
1224
		return -1;
1225 1226 1227 1228 1229

	if (detailed_run < 2)
		return 0;

	/* Append very detailed run extra attributes: */
1230
	if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
1231
		return -1;
1232 1233 1234 1235 1236

	if (detailed_run < 3)
		return 0;

	/* Append very, very detailed run extra attributes: */
1237
	return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
1238 1239
}

1240
int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1241
{
1242 1243 1244 1245
	const char * const stat_usage[] = {
		"perf stat [<options>] [<command>]",
		NULL
	};
1246
	int status = -EINVAL, run_idx;
1247
	const char *mode;
1248
	FILE *output = stderr;
1249
	unsigned int interval;
1250

1251 1252
	setlocale(LC_ALL, "");

1253
	evsel_list = perf_evlist__new();
1254 1255 1256
	if (evsel_list == NULL)
		return -ENOMEM;

J
Jiri Olsa 已提交
1257
	argc = parse_options(argc, argv, stat_options, stat_usage,
1258
		PARSE_OPT_STOP_AT_NON_OPTION);
S
Stephane Eranian 已提交
1259

1260 1261
	interval = stat_config.interval;

1262 1263 1264
	if (output_name && strcmp(output_name, "-"))
		output = NULL;

1265 1266
	if (output_name && output_fd) {
		fprintf(stderr, "cannot use both --output and --log-fd\n");
J
Jiri Olsa 已提交
1267 1268
		parse_options_usage(stat_usage, stat_options, "o", 1);
		parse_options_usage(NULL, stat_options, "log-fd", 0);
1269
		goto out;
1270
	}
1271 1272 1273

	if (output_fd < 0) {
		fprintf(stderr, "argument to --log-fd must be a > 0\n");
J
Jiri Olsa 已提交
1274
		parse_options_usage(stat_usage, stat_options, "log-fd", 0);
1275
		goto out;
1276 1277
	}

1278 1279 1280 1281 1282 1283 1284
	if (!output) {
		struct timespec tm;
		mode = append_file ? "a" : "w";

		output = fopen(output_name, mode);
		if (!output) {
			perror("failed to create output file");
1285
			return -1;
1286 1287 1288
		}
		clock_gettime(CLOCK_REALTIME, &tm);
		fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
1289
	} else if (output_fd > 0) {
1290 1291 1292 1293 1294 1295
		mode = append_file ? "a" : "w";
		output = fdopen(output_fd, mode);
		if (!output) {
			perror("Failed opening logfd");
			return -errno;
		}
1296 1297
	}

1298 1299
	stat_config.output = output;

1300
	if (csv_sep) {
S
Stephane Eranian 已提交
1301
		csv_output = true;
1302 1303 1304
		if (!strcmp(csv_sep, "\\t"))
			csv_sep = "\t";
	} else
S
Stephane Eranian 已提交
1305 1306 1307 1308 1309 1310
		csv_sep = DEFAULT_SEPARATOR;

	/*
	 * let the spreadsheet do the pretty-printing
	 */
	if (csv_output) {
J
Jim Cromie 已提交
1311
		/* User explicitly passed -B? */
S
Stephane Eranian 已提交
1312 1313
		if (big_num_opt == 1) {
			fprintf(stderr, "-B option not supported with -x\n");
J
Jiri Olsa 已提交
1314 1315
			parse_options_usage(stat_usage, stat_options, "B", 1);
			parse_options_usage(NULL, stat_options, "x", 1);
1316
			goto out;
S
Stephane Eranian 已提交
1317 1318 1319 1320 1321
		} else /* Nope, so disable big number formatting */
			big_num = false;
	} else if (big_num_opt == 0) /* User passed --no-big-num */
		big_num = false;

1322
	if (!argc && target__none(&target))
J
Jiri Olsa 已提交
1323
		usage_with_options(stat_usage, stat_options);
1324

1325
	if (run_count < 0) {
1326
		pr_err("Run count must be a positive number\n");
J
Jiri Olsa 已提交
1327
		parse_options_usage(stat_usage, stat_options, "r", 1);
1328
		goto out;
1329 1330 1331 1332
	} else if (run_count == 0) {
		forever = true;
		run_count = 1;
	}
1333

1334
	if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) {
1335 1336
		fprintf(stderr, "The --per-thread option is only available "
			"when monitoring via -p -t options.\n");
J
Jiri Olsa 已提交
1337 1338
		parse_options_usage(NULL, stat_options, "p", 1);
		parse_options_usage(NULL, stat_options, "t", 1);
1339 1340 1341 1342 1343 1344 1345
		goto out;
	}

	/*
	 * no_aggr, cgroup are for system-wide only
	 * --per-thread is aggregated per thread, we dont mix it with cpu mode
	 */
1346 1347
	if (((stat_config.aggr_mode != AGGR_GLOBAL &&
	      stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) &&
1348
	    !target__has_cpu(&target)) {
S
Stephane Eranian 已提交
1349 1350 1351
		fprintf(stderr, "both cgroup and no-aggregation "
			"modes only available in system-wide mode\n");

J
Jiri Olsa 已提交
1352 1353 1354
		parse_options_usage(stat_usage, stat_options, "G", 1);
		parse_options_usage(NULL, stat_options, "A", 1);
		parse_options_usage(NULL, stat_options, "a", 1);
1355
		goto out;
1356 1357
	}

1358 1359
	if (add_default_attributes())
		goto out;
1360

1361
	target__validate(&target);
1362

1363
	if (perf_evlist__create_maps(evsel_list, &target) < 0) {
1364
		if (target__has_task(&target)) {
1365
			pr_err("Problems finding threads of monitor\n");
J
Jiri Olsa 已提交
1366 1367
			parse_options_usage(stat_usage, stat_options, "p", 1);
			parse_options_usage(NULL, stat_options, "t", 1);
1368
		} else if (target__has_cpu(&target)) {
1369
			perror("failed to parse CPUs map");
J
Jiri Olsa 已提交
1370 1371
			parse_options_usage(stat_usage, stat_options, "C", 1);
			parse_options_usage(NULL, stat_options, "a", 1);
1372 1373
		}
		goto out;
1374
	}
1375 1376 1377 1378 1379

	/*
	 * Initialize thread_map with comm names,
	 * so we could print it out on output.
	 */
1380
	if (stat_config.aggr_mode == AGGR_THREAD)
1381 1382
		thread_map__read_comms(evsel_list->threads);

1383
	if (interval && interval < 100) {
1384 1385
		if (interval < 10) {
			pr_err("print interval must be >= 10ms\n");
J
Jiri Olsa 已提交
1386
			parse_options_usage(stat_usage, stat_options, "I", 1);
1387 1388 1389 1390 1391
			goto out;
		} else
			pr_warning("print interval < 100ms. "
				   "The overhead percentage could be high in some cases. "
				   "Please proceed with caution.\n");
1392
	}
1393

1394
	if (perf_evlist__alloc_stats(evsel_list, interval))
1395
		goto out;
1396

1397
	if (perf_stat_init_aggr_mode())
1398
		goto out;
1399

I
Ingo Molnar 已提交
1400 1401 1402 1403 1404 1405
	/*
	 * We dont want to block the signals - that would cause
	 * child tasks to inherit that and Ctrl-C would not work.
	 * What we want is for Ctrl-C to work in the exec()-ed
	 * task, but being ignored by perf stat itself:
	 */
1406
	atexit(sig_atexit);
1407 1408
	if (!forever)
		signal(SIGINT,  skip_signal);
1409
	signal(SIGCHLD, skip_signal);
I
Ingo Molnar 已提交
1410 1411 1412
	signal(SIGALRM, skip_signal);
	signal(SIGABRT, skip_signal);

1413
	status = 0;
1414
	for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
1415
		if (run_count != 1 && verbose)
1416 1417
			fprintf(output, "[ perf stat: executing run #%d ... ]\n",
				run_idx + 1);
I
Ingo Molnar 已提交
1418

1419
		status = run_perf_stat(argc, argv);
1420
		if (forever && status != -1) {
1421
			print_counters(NULL, argc, argv);
1422
			perf_stat__reset_stats();
1423
		}
1424 1425
	}

1426
	if (!forever && status != -1 && !interval)
1427
		print_counters(NULL, argc, argv);
1428 1429

	perf_evlist__free_stats(evsel_list);
1430 1431
out:
	perf_evlist__delete(evsel_list);
1432
	return status;
1433
}