builtin-stat.c 36.0 KB
Newer Older
1
/*
2 3 4 5 6 7
 * builtin-stat.c
 *
 * Builtin stat command: Give a precise performance counters summary
 * overview about any workload, CPU or specific PID.
 *
 * Sample output:
8

9
   $ perf stat ./hackbench 10
10

11
  Time: 0.118
12

13
  Performance counter stats for './hackbench 10':
14

15 16 17 18 19 20 21 22 23 24 25 26 27
       1708.761321 task-clock                #   11.037 CPUs utilized
            41,190 context-switches          #    0.024 M/sec
             6,735 CPU-migrations            #    0.004 M/sec
            17,318 page-faults               #    0.010 M/sec
     5,205,202,243 cycles                    #    3.046 GHz
     3,856,436,920 stalled-cycles-frontend   #   74.09% frontend cycles idle
     1,600,790,871 stalled-cycles-backend    #   30.75% backend  cycles idle
     2,603,501,247 instructions              #    0.50  insns per cycle
                                             #    1.48  stalled cycles per insn
       484,357,498 branches                  #  283.455 M/sec
         6,388,934 branch-misses             #    1.32% of all branches

        0.154822978  seconds time elapsed
28

29
 *
30
 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
31 32 33 34 35 36 37 38
 *
 * Improvements and fixes by:
 *
 *   Arjan van de Ven <arjan@linux.intel.com>
 *   Yanmin Zhang <yanmin.zhang@intel.com>
 *   Wu Fengguang <fengguang.wu@intel.com>
 *   Mike Galbraith <efault@gmx.de>
 *   Paul Mackerras <paulus@samba.org>
39
 *   Jaswinder Singh Rajput <jaswinder@kernel.org>
40 41
 *
 * Released under the GPL v2. (and only v2, not any later version)
42 43
 */

44
#include "perf.h"
45
#include "builtin.h"
46
#include "util/cgroup.h"
47
#include "util/util.h"
48 49
#include "util/parse-options.h"
#include "util/parse-events.h"
50
#include "util/pmu.h"
51
#include "util/event.h"
52
#include "util/evlist.h"
53
#include "util/evsel.h"
54
#include "util/debug.h"
55
#include "util/color.h"
56
#include "util/stat.h"
57
#include "util/header.h"
58
#include "util/cpumap.h"
59
#include "util/thread.h"
60
#include "util/thread_map.h"
61
#include "util/counts.h"
62

63
#include <stdlib.h>
64
#include <sys/prctl.h>
65
#include <locale.h>
66

S
Stephane Eranian 已提交
67
#define DEFAULT_SEPARATOR	" "
68 69
#define CNTR_NOT_SUPPORTED	"<not supported>"
#define CNTR_NOT_COUNTED	"<not counted>"
S
Stephane Eranian 已提交
70

71
static void print_counters(struct timespec *ts, int argc, const char **argv);
72

73
/* Default events used for perf stat -T */
74 75
static const char *transaction_attrs = {
	"task-clock,"
76 77 78 79 80 81 82 83 84 85 86
	"{"
	"instructions,"
	"cycles,"
	"cpu/cycles-t/,"
	"cpu/tx-start/,"
	"cpu/el-start/,"
	"cpu/cycles-ct/"
	"}"
};

/* More limited version when the CPU does not have all events. */
87 88
static const char * transaction_limited_attrs = {
	"task-clock,"
89 90 91 92 93 94 95 96
	"{"
	"instructions,"
	"cycles,"
	"cpu/cycles-t/,"
	"cpu/tx-start/"
	"}"
};

97
static struct perf_evlist	*evsel_list;
98

99
static struct target target = {
100 101
	.uid	= UINT_MAX,
};
102

103 104
typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu);

105
static int			run_count			=  1;
106
static bool			no_inherit			= false;
107
static volatile pid_t		child_pid			= -1;
108
static bool			null_run			=  false;
109
static int			detailed_run			=  0;
110
static bool			transaction_run;
111
static bool			big_num				=  true;
S
Stephane Eranian 已提交
112 113 114
static int			big_num_opt			=  -1;
static const char		*csv_sep			= NULL;
static bool			csv_output			= false;
115
static bool			group				= false;
116 117 118
static const char		*pre_cmd			= NULL;
static const char		*post_cmd			= NULL;
static bool			sync_run			= false;
119
static unsigned int		initial_delay			= 0;
120
static unsigned int		unit_width			= 4; /* strlen("unit") */
121
static bool			forever				= false;
122
static struct timespec		ref_time;
123
static struct cpu_map		*aggr_map;
124
static aggr_get_id_t		aggr_get_id;
J
Jiri Olsa 已提交
125 126 127
static bool			append_file;
static const char		*output_name;
static int			output_fd;
128

129 130
static volatile int done = 0;

131 132
static struct perf_stat_config stat_config = {
	.aggr_mode	= AGGR_GLOBAL,
133
	.scale		= true,
134 135
};

136 137 138 139 140 141 142 143 144 145 146 147
static inline void diff_timespec(struct timespec *r, struct timespec *a,
				 struct timespec *b)
{
	r->tv_sec = a->tv_sec - b->tv_sec;
	if (a->tv_nsec < b->tv_nsec) {
		r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
		r->tv_sec--;
	} else {
		r->tv_nsec = a->tv_nsec - b->tv_nsec ;
	}
}

148 149 150
static void perf_stat__reset_stats(void)
{
	perf_evlist__reset_stats(evsel_list);
151
	perf_stat__reset_shadow_stats();
152 153
}

154
static int create_perf_stat_counter(struct perf_evsel *evsel)
155
{
156
	struct perf_event_attr *attr = &evsel->attr;
157

158
	if (stat_config.scale)
159 160
		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
				    PERF_FORMAT_TOTAL_TIME_RUNNING;
161

162 163
	attr->inherit = !no_inherit;

164 165 166 167 168 169 170
	/*
	 * Some events get initialized with sample_(period/type) set,
	 * like tracepoints. Clear it up for counting.
	 */
	attr->sample_period = 0;
	attr->sample_type   = 0;

J
Jiri Olsa 已提交
171 172 173 174 175 176 177 178
	/*
	 * Disabling all counters initially, they will be enabled
	 * either manually by us or by kernel via enable_on_exec
	 * set later.
	 */
	if (perf_evsel__is_group_leader(evsel))
		attr->disabled = 1;

179
	if (target__has_cpu(&target))
180
		return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
181

182
	if (!target__has_task(&target) && perf_evsel__is_group_leader(evsel)) {
183 184
		if (!initial_delay)
			attr->enable_on_exec = 1;
185
	}
186

187
	return perf_evsel__open_per_thread(evsel, evsel_list->threads);
188 189
}

190 191 192
/*
 * Does the counter have nsecs as a unit?
 */
193
static inline int nsec_counter(struct perf_evsel *evsel)
194
{
195 196
	if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
	    perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
197 198 199 200 201
		return 1;

	return 0;
}

202 203 204 205
/*
 * Read out the results of a single counter:
 * do not aggregate counts across CPUs in system-wide mode
 */
206
static int read_counter(struct perf_evsel *counter)
207
{
208 209 210
	int nthreads = thread_map__nr(evsel_list->threads);
	int ncpus = perf_evsel__nr_cpus(counter);
	int cpu, thread;
211

212 213 214
	if (!counter->supported)
		return -ENOENT;

215 216 217 218 219
	if (counter->system_wide)
		nthreads = 1;

	for (thread = 0; thread < nthreads; thread++) {
		for (cpu = 0; cpu < ncpus; cpu++) {
220 221 222 223
			struct perf_counts_values *count;

			count = perf_counts(counter->counts, cpu, thread);
			if (perf_evsel__read(counter, cpu, thread, count))
224 225
				return -1;
		}
226
	}
227 228

	return 0;
229 230
}

231
static void read_counters(bool close_counters)
232 233 234
{
	struct perf_evsel *counter;

235
	evlist__for_each(evsel_list, counter) {
236
		if (read_counter(counter))
237
			pr_debug("failed to read counter %s\n", counter->name);
238

239
		if (perf_stat_process_counter(&stat_config, counter))
240
			pr_warning("failed to process counter %s\n", counter->name);
241

242
		if (close_counters) {
243 244
			perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
					     thread_map__nr(evsel_list->threads));
245 246
		}
	}
247 248
}

249
static void process_interval(void)
250 251 252 253
{
	struct timespec ts, rs;

	read_counters(false);
254

255 256 257
	clock_gettime(CLOCK_MONOTONIC, &ts);
	diff_timespec(&rs, &ts, &ref_time);

258
	print_counters(&rs, 0, NULL);
259 260
}

J
Jiri Olsa 已提交
261
static void enable_counters(void)
262
{
J
Jiri Olsa 已提交
263
	if (initial_delay)
264
		usleep(initial_delay * 1000);
J
Jiri Olsa 已提交
265 266 267 268 269 270 271

	/*
	 * We need to enable counters only if:
	 * - we don't have tracee (attaching to task or cpu)
	 * - we have initial delay configured
	 */
	if (!target__none(&target) || initial_delay)
272
		perf_evlist__enable(evsel_list);
273 274
}

275
static volatile int workload_exec_errno;
276 277 278 279 280 281

/*
 * perf_evlist__prepare_workload will send a SIGUSR1
 * if the fork fails, since we asked by setting its
 * want_signal to true.
 */
282 283
static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info,
					void *ucontext __maybe_unused)
284
{
285
	workload_exec_errno = info->si_value.sival_int;
286 287
}

288
static int __run_perf_stat(int argc, const char **argv)
289
{
290
	int interval = stat_config.interval;
291
	char msg[512];
292
	unsigned long long t0, t1;
293
	struct perf_evsel *counter;
294
	struct timespec ts;
295
	size_t l;
296
	int status = 0;
297
	const bool forks = (argc > 0);
298

299 300 301 302 303 304 305 306
	if (interval) {
		ts.tv_sec  = interval / 1000;
		ts.tv_nsec = (interval % 1000) * 1000000;
	} else {
		ts.tv_sec  = 1;
		ts.tv_nsec = 0;
	}

307
	if (forks) {
308 309
		if (perf_evlist__prepare_workload(evsel_list, &target, argv, false,
						  workload_exec_failed_signal) < 0) {
310 311
			perror("failed to prepare workload");
			return -1;
312
		}
313
		child_pid = evsel_list->workload.pid;
314 315
	}

316
	if (group)
317
		perf_evlist__set_leader(evsel_list);
318

319
	evlist__for_each(evsel_list, counter) {
320
		if (create_perf_stat_counter(counter) < 0) {
321 322 323 324
			/*
			 * PPC returns ENXIO for HW counters until 2.6.37
			 * (behavior changed with commit b0a873e).
			 */
325
			if (errno == EINVAL || errno == ENOSYS ||
326 327
			    errno == ENOENT || errno == EOPNOTSUPP ||
			    errno == ENXIO) {
328 329
				if (verbose)
					ui__warning("%s event is not supported by the kernel.\n",
330
						    perf_evsel__name(counter));
331
				counter->supported = false;
332 333 334 335

				if ((counter->leader != counter) ||
				    !(counter->leader->nr_members > 1))
					continue;
336
			}
337

338 339 340 341
			perf_evsel__open_strerror(counter, &target,
						  errno, msg, sizeof(msg));
			ui__error("%s\n", msg);

342 343
			if (child_pid != -1)
				kill(child_pid, SIGTERM);
344

345 346
			return -1;
		}
347
		counter->supported = true;
348 349 350 351

		l = strlen(counter->unit);
		if (l > unit_width)
			unit_width = l;
352
	}
353

354 355 356
	if (perf_evlist__apply_filters(evsel_list, &counter)) {
		error("failed to set filter \"%s\" on event %s with %d (%s)\n",
			counter->filter, perf_evsel__name(counter), errno,
357
			strerror_r(errno, msg, sizeof(msg)));
358 359 360
		return -1;
	}

361 362 363 364
	/*
	 * Enable counters and exec the command:
	 */
	t0 = rdclock();
365
	clock_gettime(CLOCK_MONOTONIC, &ref_time);
366

367
	if (forks) {
368
		perf_evlist__start_workload(evsel_list);
J
Jiri Olsa 已提交
369
		enable_counters();
370

371 372 373
		if (interval) {
			while (!waitpid(child_pid, &status, WNOHANG)) {
				nanosleep(&ts, NULL);
374
				process_interval();
375 376
			}
		}
377
		wait(&status);
378

379 380 381
		if (workload_exec_errno) {
			const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
			pr_err("Workload failed: %s\n", emsg);
382
			return -1;
383
		}
384

385 386
		if (WIFSIGNALED(status))
			psignal(WTERMSIG(status), argv[0]);
387
	} else {
J
Jiri Olsa 已提交
388
		enable_counters();
389 390 391
		while (!done) {
			nanosleep(&ts, NULL);
			if (interval)
392
				process_interval();
393
		}
394
	}
395 396 397

	t1 = rdclock();

398
	update_stats(&walltime_nsecs_stats, t1 - t0);
399

400
	read_counters(true);
401

402 403 404
	return WEXITSTATUS(status);
}

405
static int run_perf_stat(int argc, const char **argv)
406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430
{
	int ret;

	if (pre_cmd) {
		ret = system(pre_cmd);
		if (ret)
			return ret;
	}

	if (sync_run)
		sync();

	ret = __run_perf_stat(argc, argv);
	if (ret)
		return ret;

	if (post_cmd) {
		ret = system(post_cmd);
		if (ret)
			return ret;
	}

	return ret;
}

431 432 433
static void print_running(u64 run, u64 ena)
{
	if (csv_output) {
434
		fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f",
435 436 437 438 439
					csv_sep,
					run,
					csv_sep,
					ena ? 100.0 * run / ena : 100.0);
	} else if (run != ena) {
440
		fprintf(stat_config.output, "  (%.2f%%)", 100.0 * run / ena);
441 442 443
	}
}

444 445
static void print_noise_pct(double total, double avg)
{
446
	double pct = rel_stddev_stats(total, avg);
447

448
	if (csv_output)
449
		fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct);
450
	else if (pct)
451
		fprintf(stat_config.output, "  ( +-%6.2f%% )", pct);
452 453
}

454
static void print_noise(struct perf_evsel *evsel, double avg)
455
{
456
	struct perf_stat_evsel *ps;
457

458 459 460
	if (run_count == 1)
		return;

461
	ps = evsel->priv;
462
	print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
463 464
}

465
static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
I
Ingo Molnar 已提交
466
{
467
	switch (stat_config.aggr_mode) {
468
	case AGGR_CORE:
469
		fprintf(stat_config.output, "S%d-C%*d%s%*d%s",
470 471 472 473 474 475 476 477
			cpu_map__id_to_socket(id),
			csv_output ? 0 : -8,
			cpu_map__id_to_cpu(id),
			csv_sep,
			csv_output ? 0 : 4,
			nr,
			csv_sep);
		break;
478
	case AGGR_SOCKET:
479
		fprintf(stat_config.output, "S%*d%s%*d%s",
480
			csv_output ? 0 : -5,
481
			id,
482 483 484 485
			csv_sep,
			csv_output ? 0 : 4,
			nr,
			csv_sep);
486 487
			break;
	case AGGR_NONE:
488
		fprintf(stat_config.output, "CPU%*d%s",
S
Stephane Eranian 已提交
489
			csv_output ? 0 : -4,
490
			perf_evsel__cpus(evsel)->map[id], csv_sep);
491
		break;
492
	case AGGR_THREAD:
493
		fprintf(stat_config.output, "%*s-%*d%s",
494 495 496 497 498 499
			csv_output ? 0 : 16,
			thread_map__comm(evsel->threads, id),
			csv_output ? 0 : -8,
			thread_map__pid(evsel->threads, id),
			csv_sep);
		break;
500
	case AGGR_GLOBAL:
J
Jiri Olsa 已提交
501
	case AGGR_UNSET:
502 503 504 505 506
	default:
		break;
	}
}

507
static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
508
{
509
	FILE *output = stat_config.output;
510
	double msecs = avg / 1e6;
511
	const char *fmt_v, *fmt_n;
512
	char name[25];
513

514 515 516
	fmt_v = csv_output ? "%.6f%s" : "%18.6f%s";
	fmt_n = csv_output ? "%s" : "%-25s";

517
	aggr_printout(evsel, id, nr);
S
Stephane Eranian 已提交
518

519 520
	scnprintf(name, sizeof(name), "%s%s",
		  perf_evsel__name(evsel), csv_output ? "" : " (msec)");
521 522 523 524 525 526 527 528 529

	fprintf(output, fmt_v, msecs, csv_sep);

	if (csv_output)
		fprintf(output, "%s%s", evsel->unit, csv_sep);
	else
		fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep);

	fprintf(output, fmt_n, name);
S
Stephane Eranian 已提交
530

S
Stephane Eranian 已提交
531
	if (evsel->cgrp)
532
		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
I
Ingo Molnar 已提交
533 534
}

535 536
static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
{
537
	FILE *output = stat_config.output;
538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562
	double sc =  evsel->scale;
	const char *fmt;

	if (csv_output) {
		fmt = sc != 1.0 ?  "%.2f%s" : "%.0f%s";
	} else {
		if (big_num)
			fmt = sc != 1.0 ? "%'18.2f%s" : "%'18.0f%s";
		else
			fmt = sc != 1.0 ? "%18.2f%s" : "%18.0f%s";
	}

	aggr_printout(evsel, id, nr);

	fprintf(output, fmt, avg, csv_sep);

	if (evsel->unit)
		fprintf(output, "%-*s%s",
			csv_output ? 0 : unit_width,
			evsel->unit, csv_sep);

	fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel));

	if (evsel->cgrp)
		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
563
}
564

565 566 567 568 569 570 571 572 573 574 575
static void printout(int id, int nr, struct perf_evsel *counter, double uval)
{
	int cpu = cpu_map__id_to_cpu(id);

	if (stat_config.aggr_mode == AGGR_GLOBAL)
		cpu = 0;

	if (nsec_counter(counter))
		nsec_printout(id, nr, counter, uval);
	else
		abs_printout(id, nr, counter, uval);
576

577 578 579 580
	if (!csv_output && !stat_config.interval)
		perf_stat__print_shadow_stats(stat_config.output, counter,
					      uval, cpu,
					      stat_config.aggr_mode);
581 582
}

583
static void print_aggr(char *prefix)
584
{
585
	FILE *output = stat_config.output;
586
	struct perf_evsel *counter;
587
	int cpu, s, s2, id, nr;
588
	double uval;
589 590
	u64 ena, run, val;

591
	if (!(aggr_map || aggr_get_id))
592 593
		return;

594 595
	for (s = 0; s < aggr_map->nr; s++) {
		id = aggr_map->map[s];
596
		evlist__for_each(evsel_list, counter) {
597 598 599
			val = ena = run = 0;
			nr = 0;
			for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
600
				s2 = aggr_get_id(perf_evsel__cpus(counter), cpu);
601
				if (s2 != id)
602
					continue;
603 604 605
				val += perf_counts(counter->counts, cpu, 0)->val;
				ena += perf_counts(counter->counts, cpu, 0)->ena;
				run += perf_counts(counter->counts, cpu, 0)->run;
606 607 608 609 610 611
				nr++;
			}
			if (prefix)
				fprintf(output, "%s", prefix);

			if (run == 0 || ena == 0) {
612
				aggr_printout(counter, id, nr);
613

614
				fprintf(output, "%*s%s",
615 616
					csv_output ? 0 : 18,
					counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
617 618 619 620 621 622 623 624
					csv_sep);

				fprintf(output, "%-*s%s",
					csv_output ? 0 : unit_width,
					counter->unit, csv_sep);

				fprintf(output, "%*s",
					csv_output ? 0 : -25,
625
					perf_evsel__name(counter));
626

627 628 629 630
				if (counter->cgrp)
					fprintf(output, "%s%s",
						csv_sep, counter->cgrp->name);

631
				print_running(run, ena);
632 633 634
				fputc('\n', output);
				continue;
			}
635
			uval = val * counter->scale;
636
			printout(id, nr, counter, uval);
637
			if (!csv_output)
638 639
				print_noise(counter, 1.0);

640
			print_running(run, ena);
641 642 643 644 645
			fputc('\n', output);
		}
	}
}

646 647
static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
{
648
	FILE *output = stat_config.output;
649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666
	int nthreads = thread_map__nr(counter->threads);
	int ncpus = cpu_map__nr(counter->cpus);
	int cpu, thread;
	double uval;

	for (thread = 0; thread < nthreads; thread++) {
		u64 ena = 0, run = 0, val = 0;

		for (cpu = 0; cpu < ncpus; cpu++) {
			val += perf_counts(counter->counts, cpu, thread)->val;
			ena += perf_counts(counter->counts, cpu, thread)->ena;
			run += perf_counts(counter->counts, cpu, thread)->run;
		}

		if (prefix)
			fprintf(output, "%s", prefix);

		uval = val * counter->scale;
667
		printout(thread, 0, counter, uval);
668 669 670 671 672 673 674 675 676

		if (!csv_output)
			print_noise(counter, 1.0);

		print_running(run, ena);
		fputc('\n', output);
	}
}

677 678
/*
 * Print out the results of a single counter:
679
 * aggregated counts in system-wide mode
680
 */
681
static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
682
{
683
	FILE *output = stat_config.output;
684
	struct perf_stat_evsel *ps = counter->priv;
685
	double avg = avg_stats(&ps->res_stats[0]);
686
	int scaled = counter->counts->scaled;
687
	double uval;
688 689 690 691
	double avg_enabled, avg_running;

	avg_enabled = avg_stats(&ps->res_stats[1]);
	avg_running = avg_stats(&ps->res_stats[2]);
692

693 694 695
	if (prefix)
		fprintf(output, "%s", prefix);

696
	if (scaled == -1 || !counter->supported) {
697
		fprintf(output, "%*s%s",
S
Stephane Eranian 已提交
698
			csv_output ? 0 : 18,
699
			counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
700 701 702 703 704 705
			csv_sep);
		fprintf(output, "%-*s%s",
			csv_output ? 0 : unit_width,
			counter->unit, csv_sep);
		fprintf(output, "%*s",
			csv_output ? 0 : -25,
706
			perf_evsel__name(counter));
S
Stephane Eranian 已提交
707 708

		if (counter->cgrp)
709
			fprintf(output, "%s%s", csv_sep, counter->cgrp->name);
S
Stephane Eranian 已提交
710

711
		print_running(avg_running, avg_enabled);
712
		fputc('\n', output);
713 714
		return;
	}
715

716
	uval = avg * counter->scale;
717
	printout(-1, 0, counter, uval);
718

719 720
	print_noise(counter, avg);

721
	print_running(avg_running, avg_enabled);
722
	fprintf(output, "\n");
723 724
}

725 726 727 728
/*
 * Print out the results of a single counter:
 * does not use aggregated count in system-wide
 */
729
static void print_counter(struct perf_evsel *counter, char *prefix)
730
{
731
	FILE *output = stat_config.output;
732
	u64 ena, run, val;
733
	double uval;
734 735
	int cpu;

Y
Yan, Zheng 已提交
736
	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
737 738 739
		val = perf_counts(counter->counts, cpu, 0)->val;
		ena = perf_counts(counter->counts, cpu, 0)->ena;
		run = perf_counts(counter->counts, cpu, 0)->run;
740 741 742 743

		if (prefix)
			fprintf(output, "%s", prefix);

744
		if (run == 0 || ena == 0) {
745
			fprintf(output, "CPU%*d%s%*s%s",
S
Stephane Eranian 已提交
746
				csv_output ? 0 : -4,
Y
Yan, Zheng 已提交
747
				perf_evsel__cpus(counter)->map[cpu], csv_sep,
S
Stephane Eranian 已提交
748
				csv_output ? 0 : 18,
749
				counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
750 751 752 753 754 755 756 757 758
				csv_sep);

				fprintf(output, "%-*s%s",
					csv_output ? 0 : unit_width,
					counter->unit, csv_sep);

				fprintf(output, "%*s",
					csv_output ? 0 : -25,
					perf_evsel__name(counter));
759

S
Stephane Eranian 已提交
760
			if (counter->cgrp)
761 762
				fprintf(output, "%s%s",
					csv_sep, counter->cgrp->name);
S
Stephane Eranian 已提交
763

764
			print_running(run, ena);
765
			fputc('\n', output);
766 767 768
			continue;
		}

769
		uval = val * counter->scale;
770
		printout(cpu, 0, counter, uval);
771
		if (!csv_output)
S
Stephane Eranian 已提交
772
			print_noise(counter, 1.0);
773
		print_running(run, ena);
774

775
		fputc('\n', output);
776 777 778
	}
}

779 780
static void print_interval(char *prefix, struct timespec *ts)
{
781
	FILE *output = stat_config.output;
782 783 784 785 786
	static int num_print_interval;

	sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);

	if (num_print_interval == 0 && !csv_output) {
787
		switch (stat_config.aggr_mode) {
788 789 790 791 792 793 794 795 796
		case AGGR_SOCKET:
			fprintf(output, "#           time socket cpus             counts %*s events\n", unit_width, "unit");
			break;
		case AGGR_CORE:
			fprintf(output, "#           time core         cpus             counts %*s events\n", unit_width, "unit");
			break;
		case AGGR_NONE:
			fprintf(output, "#           time CPU                counts %*s events\n", unit_width, "unit");
			break;
797 798 799
		case AGGR_THREAD:
			fprintf(output, "#           time             comm-pid                  counts %*s events\n", unit_width, "unit");
			break;
800 801 802
		case AGGR_GLOBAL:
		default:
			fprintf(output, "#           time             counts %*s events\n", unit_width, "unit");
J
Jiri Olsa 已提交
803 804
		case AGGR_UNSET:
			break;
805 806 807 808 809 810 811 812
		}
	}

	if (++num_print_interval == 25)
		num_print_interval = 0;
}

static void print_header(int argc, const char **argv)
813
{
814
	FILE *output = stat_config.output;
815
	int i;
816

817 818
	fflush(stdout);

S
Stephane Eranian 已提交
819
	if (!csv_output) {
820 821
		fprintf(output, "\n");
		fprintf(output, " Performance counter stats for ");
822 823 824 825
		if (target.system_wide)
			fprintf(output, "\'system wide");
		else if (target.cpu_list)
			fprintf(output, "\'CPU(s) %s", target.cpu_list);
826
		else if (!target__has_task(&target)) {
827
			fprintf(output, "\'%s", argv[0]);
S
Stephane Eranian 已提交
828
			for (i = 1; i < argc; i++)
829
				fprintf(output, " %s", argv[i]);
830 831
		} else if (target.pid)
			fprintf(output, "process id \'%s", target.pid);
S
Stephane Eranian 已提交
832
		else
833
			fprintf(output, "thread id \'%s", target.tid);
I
Ingo Molnar 已提交
834

835
		fprintf(output, "\'");
S
Stephane Eranian 已提交
836
		if (run_count > 1)
837 838
			fprintf(output, " (%d runs)", run_count);
		fprintf(output, ":\n\n");
S
Stephane Eranian 已提交
839
	}
840 841 842 843
}

static void print_footer(void)
{
844 845
	FILE *output = stat_config.output;

846 847 848 849 850 851 852 853 854 855 856 857 858 859
	if (!null_run)
		fprintf(output, "\n");
	fprintf(output, " %17.9f seconds time elapsed",
			avg_stats(&walltime_nsecs_stats)/1e9);
	if (run_count > 1) {
		fprintf(output, "                                        ");
		print_noise_pct(stddev_stats(&walltime_nsecs_stats),
				avg_stats(&walltime_nsecs_stats));
	}
	fprintf(output, "\n\n");
}

static void print_counters(struct timespec *ts, int argc, const char **argv)
{
860
	int interval = stat_config.interval;
861 862 863 864 865 866 867
	struct perf_evsel *counter;
	char buf[64], *prefix = NULL;

	if (interval)
		print_interval(prefix = buf, ts);
	else
		print_header(argc, argv);
868

869
	switch (stat_config.aggr_mode) {
870
	case AGGR_CORE:
871
	case AGGR_SOCKET:
872
		print_aggr(prefix);
873
		break;
874 875 876 877
	case AGGR_THREAD:
		evlist__for_each(evsel_list, counter)
			print_aggr_thread(counter, prefix);
		break;
878
	case AGGR_GLOBAL:
879
		evlist__for_each(evsel_list, counter)
880
			print_counter_aggr(counter, prefix);
881 882
		break;
	case AGGR_NONE:
883
		evlist__for_each(evsel_list, counter)
884
			print_counter(counter, prefix);
885
		break;
J
Jiri Olsa 已提交
886
	case AGGR_UNSET:
887 888
	default:
		break;
889
	}
890

891 892 893
	if (!interval && !csv_output)
		print_footer();

894
	fflush(stat_config.output);
895 896
}

897 898
static volatile int signr = -1;

899
static void skip_signal(int signo)
900
{
901
	if ((child_pid == -1) || stat_config.interval)
902 903
		done = 1;

904
	signr = signo;
905 906 907 908 909 910 911
	/*
	 * render child_pid harmless
	 * won't send SIGTERM to a random
	 * process in case of race condition
	 * and fast PID recycling
	 */
	child_pid = -1;
912 913 914 915
}

static void sig_atexit(void)
{
916 917 918 919 920 921 922 923 924 925 926 927
	sigset_t set, oset;

	/*
	 * avoid race condition with SIGCHLD handler
	 * in skip_signal() which is modifying child_pid
	 * goal is to avoid send SIGTERM to a random
	 * process
	 */
	sigemptyset(&set);
	sigaddset(&set, SIGCHLD);
	sigprocmask(SIG_BLOCK, &set, &oset);

928 929 930
	if (child_pid != -1)
		kill(child_pid, SIGTERM);

931 932
	sigprocmask(SIG_SETMASK, &oset, NULL);

933 934 935 936 937
	if (signr == -1)
		return;

	signal(signr, SIG_DFL);
	kill(getpid(), signr);
938 939
}

940 941
static int stat__set_big_num(const struct option *opt __maybe_unused,
			     const char *s __maybe_unused, int unset)
S
Stephane Eranian 已提交
942 943 944 945 946
{
	big_num_opt = unset ? 0 : 1;
	return 0;
}

J
Jiri Olsa 已提交
947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007
static const struct option stat_options[] = {
	OPT_BOOLEAN('T', "transaction", &transaction_run,
		    "hardware transaction statistics"),
	OPT_CALLBACK('e', "event", &evsel_list, "event",
		     "event selector. use 'perf list' to list available events",
		     parse_events_option),
	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
		     "event filter", parse_filter),
	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
		    "child tasks do not inherit counters"),
	OPT_STRING('p', "pid", &target.pid, "pid",
		   "stat events on existing process id"),
	OPT_STRING('t', "tid", &target.tid, "tid",
		   "stat events on existing thread id"),
	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
		    "system-wide collection from all CPUs"),
	OPT_BOOLEAN('g', "group", &group,
		    "put the counters into a counter group"),
	OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"),
	OPT_INCR('v', "verbose", &verbose,
		    "be more verbose (show counter open errors, etc)"),
	OPT_INTEGER('r', "repeat", &run_count,
		    "repeat command and print average + stddev (max: 100, forever: 0)"),
	OPT_BOOLEAN('n', "null", &null_run,
		    "null run - dont start any counters"),
	OPT_INCR('d', "detailed", &detailed_run,
		    "detailed run - start a lot of events"),
	OPT_BOOLEAN('S', "sync", &sync_run,
		    "call sync() before starting a run"),
	OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
			   "print large numbers with thousands\' separators",
			   stat__set_big_num),
	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
		    "list of cpus to monitor in system-wide"),
	OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
		    "disable CPU count aggregation", AGGR_NONE),
	OPT_STRING('x', "field-separator", &csv_sep, "separator",
		   "print counts with custom separator"),
	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
		     "monitor event in cgroup name only", parse_cgroups),
	OPT_STRING('o', "output", &output_name, "file", "output file name"),
	OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
	OPT_INTEGER(0, "log-fd", &output_fd,
		    "log output to fd, instead of stderr"),
	OPT_STRING(0, "pre", &pre_cmd, "command",
			"command to run prior to the measured command"),
	OPT_STRING(0, "post", &post_cmd, "command",
			"command to run after to the measured command"),
	OPT_UINTEGER('I', "interval-print", &stat_config.interval,
		    "print counts at regular interval in ms (>= 10)"),
	OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
		     "aggregate counts per processor socket", AGGR_SOCKET),
	OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
		     "aggregate counts per physical processor core", AGGR_CORE),
	OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
		     "aggregate counts per thread", AGGR_THREAD),
	OPT_UINTEGER('D', "delay", &initial_delay,
		     "ms to wait before starting measurement after program start"),
	OPT_END()
};

1008 1009 1010 1011 1012 1013 1014 1015 1016 1017
static int perf_stat__get_socket(struct cpu_map *map, int cpu)
{
	return cpu_map__get_socket(map, cpu, NULL);
}

static int perf_stat__get_core(struct cpu_map *map, int cpu)
{
	return cpu_map__get_core(map, cpu, NULL);
}

1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056
static int cpu_map__get_max(struct cpu_map *map)
{
	int i, max = -1;

	for (i = 0; i < map->nr; i++) {
		if (map->map[i] > max)
			max = map->map[i];
	}

	return max;
}

static struct cpu_map *cpus_aggr_map;

static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx)
{
	int cpu;

	if (idx >= map->nr)
		return -1;

	cpu = map->map[idx];

	if (cpus_aggr_map->map[cpu] == -1)
		cpus_aggr_map->map[cpu] = get_id(map, idx);

	return cpus_aggr_map->map[cpu];
}

static int perf_stat__get_socket_cached(struct cpu_map *map, int idx)
{
	return perf_stat__get_aggr(perf_stat__get_socket, map, idx);
}

static int perf_stat__get_core_cached(struct cpu_map *map, int idx)
{
	return perf_stat__get_aggr(perf_stat__get_core, map, idx);
}

1057 1058
static int perf_stat_init_aggr_mode(void)
{
1059 1060
	int nr;

1061
	switch (stat_config.aggr_mode) {
1062 1063 1064 1065 1066
	case AGGR_SOCKET:
		if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
			perror("cannot build socket map");
			return -1;
		}
1067
		aggr_get_id = perf_stat__get_socket_cached;
1068
		break;
1069 1070 1071 1072 1073
	case AGGR_CORE:
		if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
			perror("cannot build core map");
			return -1;
		}
1074
		aggr_get_id = perf_stat__get_core_cached;
1075
		break;
1076 1077
	case AGGR_NONE:
	case AGGR_GLOBAL:
1078
	case AGGR_THREAD:
J
Jiri Olsa 已提交
1079
	case AGGR_UNSET:
1080 1081 1082
	default:
		break;
	}
1083 1084 1085 1086 1087 1088 1089 1090 1091

	/*
	 * The evsel_list->cpus is the base we operate on,
	 * taking the highest cpu number to be the size of
	 * the aggregation translate cpumap.
	 */
	nr = cpu_map__get_max(evsel_list->cpus);
	cpus_aggr_map = cpu_map__empty_new(nr + 1);
	return cpus_aggr_map ? 0 : -ENOMEM;
1092 1093
}

1094 1095 1096 1097 1098 1099
/*
 * Add default attributes, if there were no attributes specified or
 * if -d/--detailed, -d -d or -d -d -d is used:
 */
static int add_default_attributes(void)
{
1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206
	struct perf_event_attr default_attrs[] = {

  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES	},
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS		},
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},

  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND	},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS	},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES		},

};

/*
 * Detailed stats (-d), covering the L1 and last level data caches:
 */
	struct perf_event_attr detailed_attrs[] = {

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_LL			<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_LL			<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
};

/*
 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
 */
	struct perf_event_attr very_detailed_attrs[] = {

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

};

/*
 * Very, very detailed stats (-d -d -d), adding prefetch events:
 */
	struct perf_event_attr very_very_detailed_attrs[] = {

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
};

1207 1208 1209 1210
	/* Set attrs if no event is selected and !null_run: */
	if (null_run)
		return 0;

1211 1212 1213 1214
	if (transaction_run) {
		int err;
		if (pmu_have_event("cpu", "cycles-ct") &&
		    pmu_have_event("cpu", "el-start"))
1215
			err = parse_events(evsel_list, transaction_attrs, NULL);
1216
		else
1217 1218
			err = parse_events(evsel_list, transaction_limited_attrs, NULL);
		if (err) {
1219 1220 1221 1222 1223 1224
			fprintf(stderr, "Cannot set up transaction events\n");
			return -1;
		}
		return 0;
	}

1225
	if (!evsel_list->nr_entries) {
1226
		if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
1227
			return -1;
1228 1229 1230 1231 1232 1233 1234 1235
	}

	/* Detailed events get appended to the event list: */

	if (detailed_run <  1)
		return 0;

	/* Append detailed run extra attributes: */
1236
	if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
1237
		return -1;
1238 1239 1240 1241 1242

	if (detailed_run < 2)
		return 0;

	/* Append very detailed run extra attributes: */
1243
	if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
1244
		return -1;
1245 1246 1247 1248 1249

	if (detailed_run < 3)
		return 0;

	/* Append very, very detailed run extra attributes: */
1250
	return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
1251 1252
}

1253
int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1254
{
1255 1256 1257 1258
	const char * const stat_usage[] = {
		"perf stat [<options>] [<command>]",
		NULL
	};
1259
	int status = -EINVAL, run_idx;
1260
	const char *mode;
1261
	FILE *output = stderr;
1262
	unsigned int interval;
1263

1264 1265
	setlocale(LC_ALL, "");

1266
	evsel_list = perf_evlist__new();
1267 1268 1269
	if (evsel_list == NULL)
		return -ENOMEM;

J
Jiri Olsa 已提交
1270
	argc = parse_options(argc, argv, stat_options, stat_usage,
1271
		PARSE_OPT_STOP_AT_NON_OPTION);
S
Stephane Eranian 已提交
1272

1273 1274
	interval = stat_config.interval;

1275 1276 1277
	if (output_name && strcmp(output_name, "-"))
		output = NULL;

1278 1279
	if (output_name && output_fd) {
		fprintf(stderr, "cannot use both --output and --log-fd\n");
J
Jiri Olsa 已提交
1280 1281
		parse_options_usage(stat_usage, stat_options, "o", 1);
		parse_options_usage(NULL, stat_options, "log-fd", 0);
1282
		goto out;
1283
	}
1284 1285 1286

	if (output_fd < 0) {
		fprintf(stderr, "argument to --log-fd must be a > 0\n");
J
Jiri Olsa 已提交
1287
		parse_options_usage(stat_usage, stat_options, "log-fd", 0);
1288
		goto out;
1289 1290
	}

1291 1292 1293 1294 1295 1296 1297
	if (!output) {
		struct timespec tm;
		mode = append_file ? "a" : "w";

		output = fopen(output_name, mode);
		if (!output) {
			perror("failed to create output file");
1298
			return -1;
1299 1300 1301
		}
		clock_gettime(CLOCK_REALTIME, &tm);
		fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
1302
	} else if (output_fd > 0) {
1303 1304 1305 1306 1307 1308
		mode = append_file ? "a" : "w";
		output = fdopen(output_fd, mode);
		if (!output) {
			perror("Failed opening logfd");
			return -errno;
		}
1309 1310
	}

1311 1312
	stat_config.output = output;

1313
	if (csv_sep) {
S
Stephane Eranian 已提交
1314
		csv_output = true;
1315 1316 1317
		if (!strcmp(csv_sep, "\\t"))
			csv_sep = "\t";
	} else
S
Stephane Eranian 已提交
1318 1319 1320 1321 1322 1323
		csv_sep = DEFAULT_SEPARATOR;

	/*
	 * let the spreadsheet do the pretty-printing
	 */
	if (csv_output) {
J
Jim Cromie 已提交
1324
		/* User explicitly passed -B? */
S
Stephane Eranian 已提交
1325 1326
		if (big_num_opt == 1) {
			fprintf(stderr, "-B option not supported with -x\n");
J
Jiri Olsa 已提交
1327 1328
			parse_options_usage(stat_usage, stat_options, "B", 1);
			parse_options_usage(NULL, stat_options, "x", 1);
1329
			goto out;
S
Stephane Eranian 已提交
1330 1331 1332 1333 1334
		} else /* Nope, so disable big number formatting */
			big_num = false;
	} else if (big_num_opt == 0) /* User passed --no-big-num */
		big_num = false;

1335
	if (!argc && target__none(&target))
J
Jiri Olsa 已提交
1336
		usage_with_options(stat_usage, stat_options);
1337

1338
	if (run_count < 0) {
1339
		pr_err("Run count must be a positive number\n");
J
Jiri Olsa 已提交
1340
		parse_options_usage(stat_usage, stat_options, "r", 1);
1341
		goto out;
1342 1343 1344 1345
	} else if (run_count == 0) {
		forever = true;
		run_count = 1;
	}
1346

1347
	if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) {
1348 1349
		fprintf(stderr, "The --per-thread option is only available "
			"when monitoring via -p -t options.\n");
J
Jiri Olsa 已提交
1350 1351
		parse_options_usage(NULL, stat_options, "p", 1);
		parse_options_usage(NULL, stat_options, "t", 1);
1352 1353 1354 1355 1356 1357 1358
		goto out;
	}

	/*
	 * no_aggr, cgroup are for system-wide only
	 * --per-thread is aggregated per thread, we dont mix it with cpu mode
	 */
1359 1360
	if (((stat_config.aggr_mode != AGGR_GLOBAL &&
	      stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) &&
1361
	    !target__has_cpu(&target)) {
S
Stephane Eranian 已提交
1362 1363 1364
		fprintf(stderr, "both cgroup and no-aggregation "
			"modes only available in system-wide mode\n");

J
Jiri Olsa 已提交
1365 1366 1367
		parse_options_usage(stat_usage, stat_options, "G", 1);
		parse_options_usage(NULL, stat_options, "A", 1);
		parse_options_usage(NULL, stat_options, "a", 1);
1368
		goto out;
1369 1370
	}

1371 1372
	if (add_default_attributes())
		goto out;
1373

1374
	target__validate(&target);
1375

1376
	if (perf_evlist__create_maps(evsel_list, &target) < 0) {
1377
		if (target__has_task(&target)) {
1378
			pr_err("Problems finding threads of monitor\n");
J
Jiri Olsa 已提交
1379 1380
			parse_options_usage(stat_usage, stat_options, "p", 1);
			parse_options_usage(NULL, stat_options, "t", 1);
1381
		} else if (target__has_cpu(&target)) {
1382
			perror("failed to parse CPUs map");
J
Jiri Olsa 已提交
1383 1384
			parse_options_usage(stat_usage, stat_options, "C", 1);
			parse_options_usage(NULL, stat_options, "a", 1);
1385 1386
		}
		goto out;
1387
	}
1388 1389 1390 1391 1392

	/*
	 * Initialize thread_map with comm names,
	 * so we could print it out on output.
	 */
1393
	if (stat_config.aggr_mode == AGGR_THREAD)
1394 1395
		thread_map__read_comms(evsel_list->threads);

1396
	if (interval && interval < 100) {
1397 1398
		if (interval < 10) {
			pr_err("print interval must be >= 10ms\n");
J
Jiri Olsa 已提交
1399
			parse_options_usage(stat_usage, stat_options, "I", 1);
1400 1401 1402 1403 1404
			goto out;
		} else
			pr_warning("print interval < 100ms. "
				   "The overhead percentage could be high in some cases. "
				   "Please proceed with caution.\n");
1405
	}
1406

1407
	if (perf_evlist__alloc_stats(evsel_list, interval))
1408
		goto out;
1409

1410
	if (perf_stat_init_aggr_mode())
1411
		goto out;
1412

I
Ingo Molnar 已提交
1413 1414 1415 1416 1417 1418
	/*
	 * We dont want to block the signals - that would cause
	 * child tasks to inherit that and Ctrl-C would not work.
	 * What we want is for Ctrl-C to work in the exec()-ed
	 * task, but being ignored by perf stat itself:
	 */
1419
	atexit(sig_atexit);
1420 1421
	if (!forever)
		signal(SIGINT,  skip_signal);
1422
	signal(SIGCHLD, skip_signal);
I
Ingo Molnar 已提交
1423 1424 1425
	signal(SIGALRM, skip_signal);
	signal(SIGABRT, skip_signal);

1426
	status = 0;
1427
	for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
1428
		if (run_count != 1 && verbose)
1429 1430
			fprintf(output, "[ perf stat: executing run #%d ... ]\n",
				run_idx + 1);
I
Ingo Molnar 已提交
1431

1432
		status = run_perf_stat(argc, argv);
1433
		if (forever && status != -1) {
1434
			print_counters(NULL, argc, argv);
1435
			perf_stat__reset_stats();
1436
		}
1437 1438
	}

1439
	if (!forever && status != -1 && !interval)
1440
		print_counters(NULL, argc, argv);
1441 1442

	perf_evlist__free_stats(evsel_list);
1443 1444
out:
	perf_evlist__delete(evsel_list);
1445
	return status;
1446
}