builtin-stat.c 49.4 KB
Newer Older
1
/*
2 3 4 5 6 7
 * builtin-stat.c
 *
 * Builtin stat command: Give a precise performance counters summary
 * overview about any workload, CPU or specific PID.
 *
 * Sample output:
8

9
   $ perf stat ./hackbench 10
10

11
  Time: 0.118
12

13
  Performance counter stats for './hackbench 10':
14

15 16 17 18 19 20 21 22 23 24 25 26 27
       1708.761321 task-clock                #   11.037 CPUs utilized
            41,190 context-switches          #    0.024 M/sec
             6,735 CPU-migrations            #    0.004 M/sec
            17,318 page-faults               #    0.010 M/sec
     5,205,202,243 cycles                    #    3.046 GHz
     3,856,436,920 stalled-cycles-frontend   #   74.09% frontend cycles idle
     1,600,790,871 stalled-cycles-backend    #   30.75% backend  cycles idle
     2,603,501,247 instructions              #    0.50  insns per cycle
                                             #    1.48  stalled cycles per insn
       484,357,498 branches                  #  283.455 M/sec
         6,388,934 branch-misses             #    1.32% of all branches

        0.154822978  seconds time elapsed
28

29
 *
30
 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
31 32 33 34 35 36 37 38
 *
 * Improvements and fixes by:
 *
 *   Arjan van de Ven <arjan@linux.intel.com>
 *   Yanmin Zhang <yanmin.zhang@intel.com>
 *   Wu Fengguang <fengguang.wu@intel.com>
 *   Mike Galbraith <efault@gmx.de>
 *   Paul Mackerras <paulus@samba.org>
39
 *   Jaswinder Singh Rajput <jaswinder@kernel.org>
40 41
 *
 * Released under the GPL v2. (and only v2, not any later version)
42 43
 */

44
#include "perf.h"
45
#include "builtin.h"
46
#include "util/cgroup.h"
47
#include "util/util.h"
48 49
#include "util/parse-options.h"
#include "util/parse-events.h"
50
#include "util/pmu.h"
51
#include "util/event.h"
52
#include "util/evlist.h"
53
#include "util/evsel.h"
54
#include "util/debug.h"
55
#include "util/color.h"
56
#include "util/stat.h"
57
#include "util/header.h"
58
#include "util/cpumap.h"
59
#include "util/thread.h"
60
#include "util/thread_map.h"
61

62
#include <stdlib.h>
63
#include <sys/prctl.h>
64
#include <locale.h>
65

S
Stephane Eranian 已提交
66
#define DEFAULT_SEPARATOR	" "
67 68
#define CNTR_NOT_SUPPORTED	"<not supported>"
#define CNTR_NOT_COUNTED	"<not counted>"
S
Stephane Eranian 已提交
69

70 71 72
static void print_stat(int argc, const char **argv);
static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
static void print_counter(struct perf_evsel *counter, char *prefix);
73
static void print_aggr(char *prefix);
74

75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
/* Default events used for perf stat -T */
static const char * const transaction_attrs[] = {
	"task-clock",
	"{"
	"instructions,"
	"cycles,"
	"cpu/cycles-t/,"
	"cpu/tx-start/,"
	"cpu/el-start/,"
	"cpu/cycles-ct/"
	"}"
};

/* More limited version when the CPU does not have all events. */
static const char * const transaction_limited_attrs[] = {
	"task-clock",
	"{"
	"instructions,"
	"cycles,"
	"cpu/cycles-t/,"
	"cpu/tx-start/"
	"}"
};

/* must match transaction_attrs and the beginning limited_attrs */
enum {
	T_TASK_CLOCK,
	T_INSTRUCTIONS,
	T_CYCLES,
	T_CYCLES_IN_TX,
	T_TRANSACTION_START,
	T_ELISION_START,
	T_CYCLES_IN_TX_CP,
};

110
static struct perf_evlist	*evsel_list;
111

112
static struct target target = {
113 114
	.uid	= UINT_MAX,
};
115

116 117 118 119
enum aggr_mode {
	AGGR_NONE,
	AGGR_GLOBAL,
	AGGR_SOCKET,
120
	AGGR_CORE,
121 122
};

123
static int			run_count			=  1;
124
static bool			no_inherit			= false;
125
static bool			scale				=  true;
126
static enum aggr_mode		aggr_mode			= AGGR_GLOBAL;
127
static volatile pid_t		child_pid			= -1;
128
static bool			null_run			=  false;
129
static int			detailed_run			=  0;
130
static bool			transaction_run;
131
static bool			big_num				=  true;
S
Stephane Eranian 已提交
132 133 134
static int			big_num_opt			=  -1;
static const char		*csv_sep			= NULL;
static bool			csv_output			= false;
135
static bool			group				= false;
136
static FILE			*output				= NULL;
137 138 139
static const char		*pre_cmd			= NULL;
static const char		*post_cmd			= NULL;
static bool			sync_run			= false;
140
static unsigned int		interval			= 0;
141
static unsigned int		initial_delay			= 0;
142
static unsigned int		unit_width			= 4; /* strlen("unit") */
143
static bool			forever				= false;
144
static struct timespec		ref_time;
145 146
static struct cpu_map		*aggr_map;
static int			(*aggr_get_id)(struct cpu_map *m, int cpu);
147

148 149
static volatile int done = 0;

150 151 152 153
struct perf_stat {
	struct stats	  res_stats[3];
};

154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
static inline void diff_timespec(struct timespec *r, struct timespec *a,
				 struct timespec *b)
{
	r->tv_sec = a->tv_sec - b->tv_sec;
	if (a->tv_nsec < b->tv_nsec) {
		r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
		r->tv_sec--;
	} else {
		r->tv_nsec = a->tv_nsec - b->tv_nsec ;
	}
}

static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
{
	return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus;
}

static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
{
	return perf_evsel__cpus(evsel)->nr;
}

176 177
static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
{
178 179 180 181 182
	int i;
	struct perf_stat *ps = evsel->priv;

	for (i = 0; i < 3; i++)
		init_stats(&ps->res_stats[i]);
183 184
}

185
static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
186
{
187
	evsel->priv = zalloc(sizeof(struct perf_stat));
188
	if (evsel->priv == NULL)
189 190 191
		return -ENOMEM;
	perf_evsel__reset_stat_priv(evsel);
	return 0;
192 193 194 195
}

static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
{
196
	zfree(&evsel->priv);
197 198
}

199
static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel)
Y
Yan, Zheng 已提交
200
{
201 202 203 204 205 206 207 208 209 210 211 212 213
	void *addr;
	size_t sz;

	sz = sizeof(*evsel->counts) +
	     (perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values));

	addr = zalloc(sz);
	if (!addr)
		return -ENOMEM;

	evsel->prev_raw_counts =  addr;

	return 0;
Y
Yan, Zheng 已提交
214 215
}

216
static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
Y
Yan, Zheng 已提交
217
{
218
	zfree(&evsel->prev_raw_counts);
Y
Yan, Zheng 已提交
219 220
}

221 222 223 224
static void perf_evlist__free_stats(struct perf_evlist *evlist)
{
	struct perf_evsel *evsel;

225
	evlist__for_each(evlist, evsel) {
226 227 228 229 230 231 232 233 234 235
		perf_evsel__free_stat_priv(evsel);
		perf_evsel__free_counts(evsel);
		perf_evsel__free_prev_raw_counts(evsel);
	}
}

static int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw)
{
	struct perf_evsel *evsel;

236
	evlist__for_each(evlist, evsel) {
237 238 239 240 241 242 243 244 245 246 247 248 249
		if (perf_evsel__alloc_stat_priv(evsel) < 0 ||
		    perf_evsel__alloc_counts(evsel, perf_evsel__nr_cpus(evsel)) < 0 ||
		    (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel) < 0))
			goto out_free;
	}

	return 0;

out_free:
	perf_evlist__free_stats(evlist);
	return -1;
}

250 251 252 253 254 255 256 257 258 259 260
static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
static struct stats runtime_cycles_stats[MAX_NR_CPUS];
static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
static struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
static struct stats runtime_branches_stats[MAX_NR_CPUS];
static struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
static struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
261
static struct stats runtime_cycles_in_tx_stats[MAX_NR_CPUS];
262
static struct stats walltime_nsecs_stats;
263 264
static struct stats runtime_transaction_stats[MAX_NR_CPUS];
static struct stats runtime_elision_stats[MAX_NR_CPUS];
265

266
static void perf_stat__reset_stats(struct perf_evlist *evlist)
267
{
268 269
	struct perf_evsel *evsel;

270
	evlist__for_each(evlist, evsel) {
271 272 273 274
		perf_evsel__reset_stat_priv(evsel);
		perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel));
	}

275 276 277 278 279 280 281 282 283 284 285
	memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
	memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
	memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
	memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
	memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
	memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
	memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
	memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
	memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
	memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
	memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
286 287 288 289 290
	memset(runtime_cycles_in_tx_stats, 0,
			sizeof(runtime_cycles_in_tx_stats));
	memset(runtime_transaction_stats, 0,
		sizeof(runtime_transaction_stats));
	memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
291 292 293
	memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
}

294
static int create_perf_stat_counter(struct perf_evsel *evsel)
295
{
296
	struct perf_event_attr *attr = &evsel->attr;
297

298
	if (scale)
299 300
		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
				    PERF_FORMAT_TOTAL_TIME_RUNNING;
301

302 303
	attr->inherit = !no_inherit;

304
	if (target__has_cpu(&target))
305
		return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
306

307
	if (!target__has_task(&target) && perf_evsel__is_group_leader(evsel)) {
308
		attr->disabled = 1;
309 310
		if (!initial_delay)
			attr->enable_on_exec = 1;
311
	}
312

313
	return perf_evsel__open_per_thread(evsel, evsel_list->threads);
314 315
}

316 317 318
/*
 * Does the counter have nsecs as a unit?
 */
319
static inline int nsec_counter(struct perf_evsel *evsel)
320
{
321 322
	if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
	    perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
323 324 325 326 327
		return 1;

	return 0;
}

328 329 330 331 332 333 334 335 336
static struct perf_evsel *nth_evsel(int n)
{
	static struct perf_evsel **array;
	static int array_len;
	struct perf_evsel *ev;
	int j;

	/* Assumes this only called when evsel_list does not change anymore. */
	if (!array) {
337
		evlist__for_each(evsel_list, ev)
338 339 340 341 342
			array_len++;
		array = malloc(array_len * sizeof(void *));
		if (!array)
			exit(ENOMEM);
		j = 0;
343
		evlist__for_each(evsel_list, ev)
344 345 346 347 348 349 350
			array[j++] = ev;
	}
	if (n < array_len)
		return array[n];
	return NULL;
}

I
Ingo Molnar 已提交
351 352 353 354 355 356 357 358 359 360 361
/*
 * Update various tracking values we maintain to print
 * more semantic information such as miss/hit ratios,
 * instruction rates, etc:
 */
static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
{
	if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
		update_stats(&runtime_nsecs_stats[0], count[0]);
	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
		update_stats(&runtime_cycles_stats[0], count[0]);
362 363 364 365 366 367 368 369 370
	else if (transaction_run &&
		 perf_evsel__cmp(counter, nth_evsel(T_CYCLES_IN_TX)))
		update_stats(&runtime_cycles_in_tx_stats[0], count[0]);
	else if (transaction_run &&
		 perf_evsel__cmp(counter, nth_evsel(T_TRANSACTION_START)))
		update_stats(&runtime_transaction_stats[0], count[0]);
	else if (transaction_run &&
		 perf_evsel__cmp(counter, nth_evsel(T_ELISION_START)))
		update_stats(&runtime_elision_stats[0], count[0]);
371 372
	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
		update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
373
	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
374
		update_stats(&runtime_stalled_cycles_back_stats[0], count[0]);
I
Ingo Molnar 已提交
375 376 377 378
	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
		update_stats(&runtime_branches_stats[0], count[0]);
	else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
		update_stats(&runtime_cacherefs_stats[0], count[0]);
379 380
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
		update_stats(&runtime_l1_dcache_stats[0], count[0]);
381 382 383 384 385 386 387 388
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
		update_stats(&runtime_l1_icache_stats[0], count[0]);
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
		update_stats(&runtime_ll_cache_stats[0], count[0]);
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
		update_stats(&runtime_dtlb_cache_stats[0], count[0]);
	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
		update_stats(&runtime_itlb_cache_stats[0], count[0]);
I
Ingo Molnar 已提交
389 390
}

391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426
static void zero_per_pkg(struct perf_evsel *counter)
{
	if (counter->per_pkg_mask)
		memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
}

static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip)
{
	unsigned long *mask = counter->per_pkg_mask;
	struct cpu_map *cpus = perf_evsel__cpus(counter);
	int s;

	*skip = false;

	if (!counter->per_pkg)
		return 0;

	if (cpu_map__empty(cpus))
		return 0;

	if (!mask) {
		mask = zalloc(MAX_NR_CPUS);
		if (!mask)
			return -ENOMEM;

		counter->per_pkg_mask = mask;
	}

	s = cpu_map__get_socket(cpus, cpu);
	if (s < 0)
		return -1;

	*skip = test_and_set_bit(s, mask) == 1;
	return 0;
}

427 428 429
static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused,
		   struct perf_counts_values *count)
{
430
	struct perf_counts_values *aggr = &evsel->counts->aggr;
431 432 433 434 435 436 437 438 439 440
	static struct perf_counts_values zero;
	bool skip = false;

	if (check_per_pkg(evsel, cpu, &skip)) {
		pr_err("failed to read per-pkg counter\n");
		return -1;
	}

	if (skip)
		count = &zero;
441

442 443 444 445
	switch (aggr_mode) {
	case AGGR_CORE:
	case AGGR_SOCKET:
	case AGGR_NONE:
446 447
		if (!evsel->snapshot)
			perf_evsel__compute_deltas(evsel, cpu, count);
448 449 450 451 452
		perf_counts_values__scale(count, scale, NULL);
		evsel->counts->cpu[cpu] = *count;
		update_shadow_stats(evsel, count->values);
		break;
	case AGGR_GLOBAL:
453 454 455 456 457
		aggr->val += count->val;
		if (scale) {
			aggr->ena += count->ena;
			aggr->run += count->run;
		}
458 459 460 461 462 463 464
	default:
		break;
	}

	return 0;
}

465 466
static int read_counter(struct perf_evsel *counter);

467
/*
468
 * Read out the results of a single counter:
469
 * aggregate counts across CPUs in system-wide mode
470
 */
471
static int read_counter_aggr(struct perf_evsel *counter)
472
{
473
	struct perf_counts_values *aggr = &counter->counts->aggr;
474
	struct perf_stat *ps = counter->priv;
475 476
	u64 *count = counter->counts->aggr.values;
	int i;
477

478 479 480
	aggr->val = aggr->ena = aggr->run = 0;

	if (read_counter(counter))
481
		return -1;
482

483 484
	if (!counter->snapshot)
		perf_evsel__compute_deltas(counter, -1, aggr);
485 486
	perf_counts_values__scale(aggr, scale, &counter->counts->scaled);

487
	for (i = 0; i < 3; i++)
488
		update_stats(&ps->res_stats[i], count[i]);
489 490

	if (verbose) {
491
		fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
492
			perf_evsel__name(counter), count[0], count[1], count[2]);
493 494
	}

495 496 497
	/*
	 * Save the full runtime - to allow normalization during printout:
	 */
I
Ingo Molnar 已提交
498
	update_shadow_stats(counter, count);
499 500

	return 0;
501 502 503 504 505 506
}

/*
 * Read out the results of a single counter:
 * do not aggregate counts across CPUs in system-wide mode
 */
507
static int read_counter(struct perf_evsel *counter)
508
{
509 510 511
	int nthreads = thread_map__nr(evsel_list->threads);
	int ncpus = perf_evsel__nr_cpus(counter);
	int cpu, thread;
512

513 514 515
	if (!counter->supported)
		return -ENOENT;

516 517 518
	if (counter->system_wide)
		nthreads = 1;

519 520 521
	if (counter->per_pkg)
		zero_per_pkg(counter);

522 523 524 525 526
	for (thread = 0; thread < nthreads; thread++) {
		for (cpu = 0; cpu < ncpus; cpu++) {
			if (perf_evsel__read_cb(counter, cpu, thread, read_cb))
				return -1;
		}
527
	}
528 529

	return 0;
530 531
}

532 533 534 535 536 537 538 539
static void print_interval(void)
{
	static int num_print_interval;
	struct perf_evsel *counter;
	struct perf_stat *ps;
	struct timespec ts, rs;
	char prefix[64];

540
	if (aggr_mode == AGGR_GLOBAL) {
541
		evlist__for_each(evsel_list, counter) {
542 543
			ps = counter->priv;
			memset(ps->res_stats, 0, sizeof(ps->res_stats));
544
			read_counter_aggr(counter);
545
		}
546
	} else	{
547
		evlist__for_each(evsel_list, counter) {
548 549
			ps = counter->priv;
			memset(ps->res_stats, 0, sizeof(ps->res_stats));
550
			read_counter(counter);
551 552
		}
	}
553

554 555 556 557 558
	clock_gettime(CLOCK_MONOTONIC, &ts);
	diff_timespec(&rs, &ts, &ref_time);
	sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);

	if (num_print_interval == 0 && !csv_output) {
559 560
		switch (aggr_mode) {
		case AGGR_SOCKET:
561
			fprintf(output, "#           time socket cpus             counts %*s events\n", unit_width, "unit");
562
			break;
563
		case AGGR_CORE:
564
			fprintf(output, "#           time core         cpus             counts %*s events\n", unit_width, "unit");
565
			break;
566
		case AGGR_NONE:
567
			fprintf(output, "#           time CPU                counts %*s events\n", unit_width, "unit");
568 569 570
			break;
		case AGGR_GLOBAL:
		default:
571
			fprintf(output, "#           time             counts %*s events\n", unit_width, "unit");
572
		}
573 574 575 576 577
	}

	if (++num_print_interval == 25)
		num_print_interval = 0;

578
	switch (aggr_mode) {
579
	case AGGR_CORE:
580 581 582 583
	case AGGR_SOCKET:
		print_aggr(prefix);
		break;
	case AGGR_NONE:
584
		evlist__for_each(evsel_list, counter)
585
			print_counter(counter, prefix);
586 587 588
		break;
	case AGGR_GLOBAL:
	default:
589
		evlist__for_each(evsel_list, counter)
590 591
			print_counter_aggr(counter, prefix);
	}
592 593

	fflush(output);
594 595
}

596 597 598 599 600 601 602 603 604
static void handle_initial_delay(void)
{
	struct perf_evsel *counter;

	if (initial_delay) {
		const int ncpus = cpu_map__nr(evsel_list->cpus),
			nthreads = thread_map__nr(evsel_list->threads);

		usleep(initial_delay * 1000);
605
		evlist__for_each(evsel_list, counter)
606 607 608 609
			perf_evsel__enable(counter, ncpus, nthreads);
	}
}

610
static volatile int workload_exec_errno;
611 612 613 614 615 616

/*
 * perf_evlist__prepare_workload will send a SIGUSR1
 * if the fork fails, since we asked by setting its
 * want_signal to true.
 */
617 618
static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info,
					void *ucontext __maybe_unused)
619
{
620
	workload_exec_errno = info->si_value.sival_int;
621 622
}

623
static int __run_perf_stat(int argc, const char **argv)
624
{
625
	char msg[512];
626
	unsigned long long t0, t1;
627
	struct perf_evsel *counter;
628
	struct timespec ts;
629
	size_t l;
630
	int status = 0;
631
	const bool forks = (argc > 0);
632

633 634 635 636 637 638 639 640
	if (interval) {
		ts.tv_sec  = interval / 1000;
		ts.tv_nsec = (interval % 1000) * 1000000;
	} else {
		ts.tv_sec  = 1;
		ts.tv_nsec = 0;
	}

641
	if (forks) {
642 643
		if (perf_evlist__prepare_workload(evsel_list, &target, argv, false,
						  workload_exec_failed_signal) < 0) {
644 645
			perror("failed to prepare workload");
			return -1;
646
		}
647
		child_pid = evsel_list->workload.pid;
648 649
	}

650
	if (group)
651
		perf_evlist__set_leader(evsel_list);
652

653
	evlist__for_each(evsel_list, counter) {
654
		if (create_perf_stat_counter(counter) < 0) {
655 656 657 658
			/*
			 * PPC returns ENXIO for HW counters until 2.6.37
			 * (behavior changed with commit b0a873e).
			 */
659
			if (errno == EINVAL || errno == ENOSYS ||
660 661
			    errno == ENOENT || errno == EOPNOTSUPP ||
			    errno == ENXIO) {
662 663
				if (verbose)
					ui__warning("%s event is not supported by the kernel.\n",
664
						    perf_evsel__name(counter));
665
				counter->supported = false;
666
				continue;
667
			}
668

669 670 671 672
			perf_evsel__open_strerror(counter, &target,
						  errno, msg, sizeof(msg));
			ui__error("%s\n", msg);

673 674
			if (child_pid != -1)
				kill(child_pid, SIGTERM);
675

676 677
			return -1;
		}
678
		counter->supported = true;
679 680 681 682

		l = strlen(counter->unit);
		if (l > unit_width)
			unit_width = l;
683
	}
684

685
	if (perf_evlist__apply_filters(evsel_list)) {
686
		error("failed to set filter with %d (%s)\n", errno,
687
			strerror_r(errno, msg, sizeof(msg)));
688 689 690
		return -1;
	}

691 692 693 694
	/*
	 * Enable counters and exec the command:
	 */
	t0 = rdclock();
695
	clock_gettime(CLOCK_MONOTONIC, &ref_time);
696

697
	if (forks) {
698
		perf_evlist__start_workload(evsel_list);
699
		handle_initial_delay();
700

701 702 703 704 705 706
		if (interval) {
			while (!waitpid(child_pid, &status, WNOHANG)) {
				nanosleep(&ts, NULL);
				print_interval();
			}
		}
707
		wait(&status);
708

709 710 711
		if (workload_exec_errno) {
			const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
			pr_err("Workload failed: %s\n", emsg);
712
			return -1;
713
		}
714

715 716
		if (WIFSIGNALED(status))
			psignal(WTERMSIG(status), argv[0]);
717
	} else {
718
		handle_initial_delay();
719 720 721 722 723
		while (!done) {
			nanosleep(&ts, NULL);
			if (interval)
				print_interval();
		}
724
	}
725 726 727

	t1 = rdclock();

728
	update_stats(&walltime_nsecs_stats, t1 - t0);
729

730
	if (aggr_mode == AGGR_GLOBAL) {
731
		evlist__for_each(evsel_list, counter) {
732
			read_counter_aggr(counter);
Y
Yan, Zheng 已提交
733
			perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
734
					     thread_map__nr(evsel_list->threads));
735
		}
736
	} else {
737
		evlist__for_each(evsel_list, counter) {
738 739 740
			read_counter(counter);
			perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1);
		}
741
	}
742

743 744 745
	return WEXITSTATUS(status);
}

746
static int run_perf_stat(int argc, const char **argv)
747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771
{
	int ret;

	if (pre_cmd) {
		ret = system(pre_cmd);
		if (ret)
			return ret;
	}

	if (sync_run)
		sync();

	ret = __run_perf_stat(argc, argv);
	if (ret)
		return ret;

	if (post_cmd) {
		ret = system(post_cmd);
		if (ret)
			return ret;
	}

	return ret;
}

772 773 774 775 776 777 778 779 780 781 782 783 784
static void print_running(u64 run, u64 ena)
{
	if (csv_output) {
		fprintf(output, "%s%" PRIu64 "%s%.2f",
					csv_sep,
					run,
					csv_sep,
					ena ? 100.0 * run / ena : 100.0);
	} else if (run != ena) {
		fprintf(output, "  (%.2f%%)", 100.0 * run / ena);
	}
}

785 786
static void print_noise_pct(double total, double avg)
{
787
	double pct = rel_stddev_stats(total, avg);
788

789
	if (csv_output)
790
		fprintf(output, "%s%.2f%%", csv_sep, pct);
791
	else if (pct)
792
		fprintf(output, "  ( +-%6.2f%% )", pct);
793 794
}

795
static void print_noise(struct perf_evsel *evsel, double avg)
796
{
797 798
	struct perf_stat *ps;

799 800 801
	if (run_count == 1)
		return;

802
	ps = evsel->priv;
803
	print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
804 805
}

806
static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
I
Ingo Molnar 已提交
807
{
808
	switch (aggr_mode) {
809 810 811 812 813 814 815 816 817 818
	case AGGR_CORE:
		fprintf(output, "S%d-C%*d%s%*d%s",
			cpu_map__id_to_socket(id),
			csv_output ? 0 : -8,
			cpu_map__id_to_cpu(id),
			csv_sep,
			csv_output ? 0 : 4,
			nr,
			csv_sep);
		break;
819 820
	case AGGR_SOCKET:
		fprintf(output, "S%*d%s%*d%s",
821
			csv_output ? 0 : -5,
822
			id,
823 824 825 826
			csv_sep,
			csv_output ? 0 : 4,
			nr,
			csv_sep);
827 828 829
			break;
	case AGGR_NONE:
		fprintf(output, "CPU%*d%s",
S
Stephane Eranian 已提交
830
			csv_output ? 0 : -4,
831
			perf_evsel__cpus(evsel)->map[id], csv_sep);
832 833 834 835 836 837 838
		break;
	case AGGR_GLOBAL:
	default:
		break;
	}
}

839
static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
840 841
{
	double msecs = avg / 1e6;
842
	const char *fmt_v, *fmt_n;
843
	char name[25];
844

845 846 847
	fmt_v = csv_output ? "%.6f%s" : "%18.6f%s";
	fmt_n = csv_output ? "%s" : "%-25s";

848
	aggr_printout(evsel, id, nr);
S
Stephane Eranian 已提交
849

850 851
	scnprintf(name, sizeof(name), "%s%s",
		  perf_evsel__name(evsel), csv_output ? "" : " (msec)");
852 853 854 855 856 857 858 859 860

	fprintf(output, fmt_v, msecs, csv_sep);

	if (csv_output)
		fprintf(output, "%s%s", evsel->unit, csv_sep);
	else
		fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep);

	fprintf(output, fmt_n, name);
S
Stephane Eranian 已提交
861

S
Stephane Eranian 已提交
862
	if (evsel->cgrp)
863
		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
S
Stephane Eranian 已提交
864

865
	if (csv_output || interval)
S
Stephane Eranian 已提交
866
		return;
I
Ingo Molnar 已提交
867

868
	if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
869 870
		fprintf(output, " # %8.3f CPUs utilized          ",
			avg / avg_stats(&walltime_nsecs_stats));
871 872
	else
		fprintf(output, "                                   ");
I
Ingo Molnar 已提交
873 874
}

875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901
/* used for get_ratio_color() */
enum grc_type {
	GRC_STALLED_CYCLES_FE,
	GRC_STALLED_CYCLES_BE,
	GRC_CACHE_MISSES,
	GRC_MAX_NR
};

static const char *get_ratio_color(enum grc_type type, double ratio)
{
	static const double grc_table[GRC_MAX_NR][3] = {
		[GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
		[GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
		[GRC_CACHE_MISSES] 	= { 20.0, 10.0, 5.0 },
	};
	const char *color = PERF_COLOR_NORMAL;

	if (ratio > grc_table[type][0])
		color = PERF_COLOR_RED;
	else if (ratio > grc_table[type][1])
		color = PERF_COLOR_MAGENTA;
	else if (ratio > grc_table[type][2])
		color = PERF_COLOR_YELLOW;

	return color;
}

902 903 904
static void print_stalled_cycles_frontend(int cpu,
					  struct perf_evsel *evsel
					  __maybe_unused, double avg)
905 906 907 908 909 910 911 912 913
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_cycles_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

914
	color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
915

916 917 918
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " frontend cycles idle   ");
919 920
}

921 922 923
static void print_stalled_cycles_backend(int cpu,
					 struct perf_evsel *evsel
					 __maybe_unused, double avg)
924 925 926 927 928 929 930 931 932
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_cycles_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

933
	color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
934

935 936 937
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " backend  cycles idle   ");
938 939
}

940 941 942
static void print_branch_misses(int cpu,
				struct perf_evsel *evsel __maybe_unused,
				double avg)
943 944 945 946 947 948 949 950 951
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_branches_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

952
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
953

954 955 956
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all branches        ");
957 958
}

959 960 961
static void print_l1_dcache_misses(int cpu,
				   struct perf_evsel *evsel __maybe_unused,
				   double avg)
962 963 964 965 966 967 968 969 970
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_l1_dcache_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

971
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
972

973 974 975
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all L1-dcache hits  ");
976 977
}

978 979 980
static void print_l1_icache_misses(int cpu,
				   struct perf_evsel *evsel __maybe_unused,
				   double avg)
981 982 983 984 985 986 987 988 989
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_l1_icache_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

990
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
991

992 993 994
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all L1-icache hits  ");
995 996
}

997 998 999
static void print_dtlb_cache_misses(int cpu,
				    struct perf_evsel *evsel __maybe_unused,
				    double avg)
1000 1001 1002 1003 1004 1005 1006 1007 1008
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_dtlb_cache_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

1009
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
1010

1011 1012 1013
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all dTLB cache hits ");
1014 1015
}

1016 1017 1018
static void print_itlb_cache_misses(int cpu,
				    struct perf_evsel *evsel __maybe_unused,
				    double avg)
1019 1020 1021 1022 1023 1024 1025 1026 1027
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_itlb_cache_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

1028
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
1029

1030 1031 1032
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all iTLB cache hits ");
1033 1034
}

1035 1036 1037
static void print_ll_cache_misses(int cpu,
				  struct perf_evsel *evsel __maybe_unused,
				  double avg)
1038 1039 1040 1041 1042 1043 1044 1045 1046
{
	double total, ratio = 0.0;
	const char *color;

	total = avg_stats(&runtime_ll_cache_stats[cpu]);

	if (total)
		ratio = avg / total * 100.0;

1047
	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
1048

1049 1050 1051
	fprintf(output, " #  ");
	color_fprintf(output, color, "%6.2f%%", ratio);
	fprintf(output, " of all LL-cache hits   ");
1052 1053
}

1054
static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
I
Ingo Molnar 已提交
1055
{
1056
	double total, ratio = 0.0, total2;
1057
	double sc =  evsel->scale;
S
Stephane Eranian 已提交
1058
	const char *fmt;
1059
	int cpu = cpu_map__id_to_cpu(id);
S
Stephane Eranian 已提交
1060

1061 1062 1063 1064 1065 1066 1067 1068
	if (csv_output) {
		fmt = sc != 1.0 ?  "%.2f%s" : "%.0f%s";
	} else {
		if (big_num)
			fmt = sc != 1.0 ? "%'18.2f%s" : "%'18.0f%s";
		else
			fmt = sc != 1.0 ? "%18.2f%s" : "%18.0f%s";
	}
1069

1070
	aggr_printout(evsel, id, nr);
1071 1072

	if (aggr_mode == AGGR_GLOBAL)
1073
		cpu = 0;
1074

1075 1076 1077 1078 1079 1080 1081 1082
	fprintf(output, fmt, avg, csv_sep);

	if (evsel->unit)
		fprintf(output, "%-*s%s",
			csv_output ? 0 : unit_width,
			evsel->unit, csv_sep);

	fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel));
S
Stephane Eranian 已提交
1083

S
Stephane Eranian 已提交
1084
	if (evsel->cgrp)
1085
		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
S
Stephane Eranian 已提交
1086

1087
	if (csv_output || interval)
S
Stephane Eranian 已提交
1088
		return;
I
Ingo Molnar 已提交
1089

1090
	if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
1091
		total = avg_stats(&runtime_cycles_stats[cpu]);
1092
		if (total) {
1093
			ratio = avg / total;
1094 1095
			fprintf(output, " #   %5.2f  insns per cycle        ", ratio);
		}
1096 1097
		total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
1098 1099 1100

		if (total && avg) {
			ratio = total / avg;
1101 1102 1103 1104
			fprintf(output, "\n");
			if (aggr_mode == AGGR_NONE)
				fprintf(output, "        ");
			fprintf(output, "                                                  #   %5.2f  stalled cycles per insn", ratio);
1105 1106
		}

1107
	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
1108
			runtime_branches_stats[cpu].n != 0) {
1109
		print_branch_misses(cpu, evsel, avg);
1110 1111 1112 1113 1114
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
1115
			runtime_l1_dcache_stats[cpu].n != 0) {
1116
		print_l1_dcache_misses(cpu, evsel, avg);
1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
			runtime_l1_icache_stats[cpu].n != 0) {
		print_l1_icache_misses(cpu, evsel, avg);
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
			runtime_dtlb_cache_stats[cpu].n != 0) {
		print_dtlb_cache_misses(cpu, evsel, avg);
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
			runtime_itlb_cache_stats[cpu].n != 0) {
		print_itlb_cache_misses(cpu, evsel, avg);
	} else if (
		evsel->attr.type == PERF_TYPE_HW_CACHE &&
		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
			runtime_ll_cache_stats[cpu].n != 0) {
		print_ll_cache_misses(cpu, evsel, avg);
1145 1146 1147 1148 1149 1150 1151
	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
			runtime_cacherefs_stats[cpu].n != 0) {
		total = avg_stats(&runtime_cacherefs_stats[cpu]);

		if (total)
			ratio = avg * 100 / total;

1152
		fprintf(output, " # %8.3f %% of all cache refs    ", ratio);
1153

1154 1155
	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
		print_stalled_cycles_frontend(cpu, evsel, avg);
1156
	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
1157
		print_stalled_cycles_backend(cpu, evsel, avg);
1158
	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
1159
		total = avg_stats(&runtime_nsecs_stats[cpu]);
1160

1161 1162 1163 1164
		if (total) {
			ratio = avg / total;
			fprintf(output, " # %8.3f GHz                    ", ratio);
		}
1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201
	} else if (transaction_run &&
		   perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX))) {
		total = avg_stats(&runtime_cycles_stats[cpu]);
		if (total)
			fprintf(output,
				" #   %5.2f%% transactional cycles   ",
				100.0 * (avg / total));
	} else if (transaction_run &&
		   perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX_CP))) {
		total = avg_stats(&runtime_cycles_stats[cpu]);
		total2 = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
		if (total2 < avg)
			total2 = avg;
		if (total)
			fprintf(output,
				" #   %5.2f%% aborted cycles         ",
				100.0 * ((total2-avg) / total));
	} else if (transaction_run &&
		   perf_evsel__cmp(evsel, nth_evsel(T_TRANSACTION_START)) &&
		   avg > 0 &&
		   runtime_cycles_in_tx_stats[cpu].n != 0) {
		total = avg_stats(&runtime_cycles_in_tx_stats[cpu]);

		if (total)
			ratio = total / avg;

		fprintf(output, " # %8.0f cycles / transaction   ", ratio);
	} else if (transaction_run &&
		   perf_evsel__cmp(evsel, nth_evsel(T_ELISION_START)) &&
		   avg > 0 &&
		   runtime_cycles_in_tx_stats[cpu].n != 0) {
		total = avg_stats(&runtime_cycles_in_tx_stats[cpu]);

		if (total)
			ratio = total / avg;

		fprintf(output, " # %8.0f cycles / elision       ", ratio);
1202
	} else if (runtime_nsecs_stats[cpu].n != 0) {
N
Namhyung Kim 已提交
1203 1204
		char unit = 'M';

1205
		total = avg_stats(&runtime_nsecs_stats[cpu]);
1206 1207

		if (total)
1208
			ratio = 1000.0 * avg / total;
N
Namhyung Kim 已提交
1209 1210 1211 1212
		if (ratio < 0.001) {
			ratio *= 1000;
			unit = 'K';
		}
1213

N
Namhyung Kim 已提交
1214
		fprintf(output, " # %8.3f %c/sec                  ", ratio, unit);
1215
	} else {
1216
		fprintf(output, "                                   ");
I
Ingo Molnar 已提交
1217 1218 1219
	}
}

1220
static void print_aggr(char *prefix)
1221 1222
{
	struct perf_evsel *counter;
1223
	int cpu, cpu2, s, s2, id, nr;
1224
	double uval;
1225 1226
	u64 ena, run, val;

1227
	if (!(aggr_map || aggr_get_id))
1228 1229
		return;

1230 1231
	for (s = 0; s < aggr_map->nr; s++) {
		id = aggr_map->map[s];
1232
		evlist__for_each(evsel_list, counter) {
1233 1234 1235
			val = ena = run = 0;
			nr = 0;
			for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
1236 1237
				cpu2 = perf_evsel__cpus(counter)->map[cpu];
				s2 = aggr_get_id(evsel_list->cpus, cpu2);
1238
				if (s2 != id)
1239 1240 1241 1242 1243 1244 1245 1246 1247 1248
					continue;
				val += counter->counts->cpu[cpu].val;
				ena += counter->counts->cpu[cpu].ena;
				run += counter->counts->cpu[cpu].run;
				nr++;
			}
			if (prefix)
				fprintf(output, "%s", prefix);

			if (run == 0 || ena == 0) {
1249
				aggr_printout(counter, id, nr);
1250

1251
				fprintf(output, "%*s%s",
1252 1253
					csv_output ? 0 : 18,
					counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
1254 1255 1256 1257 1258 1259 1260 1261
					csv_sep);

				fprintf(output, "%-*s%s",
					csv_output ? 0 : unit_width,
					counter->unit, csv_sep);

				fprintf(output, "%*s",
					csv_output ? 0 : -25,
1262
					perf_evsel__name(counter));
1263

1264 1265 1266 1267
				if (counter->cgrp)
					fprintf(output, "%s%s",
						csv_sep, counter->cgrp->name);

1268
				print_running(run, ena);
1269 1270 1271
				fputc('\n', output);
				continue;
			}
1272
			uval = val * counter->scale;
1273 1274

			if (nsec_counter(counter))
1275
				nsec_printout(id, nr, counter, uval);
1276
			else
1277
				abs_printout(id, nr, counter, uval);
1278

1279
			if (!csv_output)
1280 1281
				print_noise(counter, 1.0);

1282
			print_running(run, ena);
1283 1284 1285 1286 1287
			fputc('\n', output);
		}
	}
}

1288 1289
/*
 * Print out the results of a single counter:
1290
 * aggregated counts in system-wide mode
1291
 */
1292
static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
1293
{
1294 1295
	struct perf_stat *ps = counter->priv;
	double avg = avg_stats(&ps->res_stats[0]);
1296
	int scaled = counter->counts->scaled;
1297
	double uval;
1298 1299 1300 1301
	double avg_enabled, avg_running;

	avg_enabled = avg_stats(&ps->res_stats[1]);
	avg_running = avg_stats(&ps->res_stats[2]);
1302

1303 1304 1305
	if (prefix)
		fprintf(output, "%s", prefix);

1306
	if (scaled == -1 || !counter->supported) {
1307
		fprintf(output, "%*s%s",
S
Stephane Eranian 已提交
1308
			csv_output ? 0 : 18,
1309
			counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
1310 1311 1312 1313 1314 1315
			csv_sep);
		fprintf(output, "%-*s%s",
			csv_output ? 0 : unit_width,
			counter->unit, csv_sep);
		fprintf(output, "%*s",
			csv_output ? 0 : -25,
1316
			perf_evsel__name(counter));
S
Stephane Eranian 已提交
1317 1318

		if (counter->cgrp)
1319
			fprintf(output, "%s%s", csv_sep, counter->cgrp->name);
S
Stephane Eranian 已提交
1320

1321
		print_running(avg_running, avg_enabled);
1322
		fputc('\n', output);
1323 1324
		return;
	}
1325

1326 1327
	uval = avg * counter->scale;

I
Ingo Molnar 已提交
1328
	if (nsec_counter(counter))
1329
		nsec_printout(-1, 0, counter, uval);
I
Ingo Molnar 已提交
1330
	else
1331
		abs_printout(-1, 0, counter, uval);
1332

1333 1334
	print_noise(counter, avg);

1335
	print_running(avg_running, avg_enabled);
1336
	fprintf(output, "\n");
1337 1338
}

1339 1340 1341 1342
/*
 * Print out the results of a single counter:
 * does not use aggregated count in system-wide
 */
1343
static void print_counter(struct perf_evsel *counter, char *prefix)
1344 1345
{
	u64 ena, run, val;
1346
	double uval;
1347 1348
	int cpu;

Y
Yan, Zheng 已提交
1349
	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
1350 1351 1352
		val = counter->counts->cpu[cpu].val;
		ena = counter->counts->cpu[cpu].ena;
		run = counter->counts->cpu[cpu].run;
1353 1354 1355 1356

		if (prefix)
			fprintf(output, "%s", prefix);

1357
		if (run == 0 || ena == 0) {
1358
			fprintf(output, "CPU%*d%s%*s%s",
S
Stephane Eranian 已提交
1359
				csv_output ? 0 : -4,
Y
Yan, Zheng 已提交
1360
				perf_evsel__cpus(counter)->map[cpu], csv_sep,
S
Stephane Eranian 已提交
1361
				csv_output ? 0 : 18,
1362
				counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
1363 1364 1365 1366 1367 1368 1369 1370 1371
				csv_sep);

				fprintf(output, "%-*s%s",
					csv_output ? 0 : unit_width,
					counter->unit, csv_sep);

				fprintf(output, "%*s",
					csv_output ? 0 : -25,
					perf_evsel__name(counter));
1372

S
Stephane Eranian 已提交
1373
			if (counter->cgrp)
1374 1375
				fprintf(output, "%s%s",
					csv_sep, counter->cgrp->name);
S
Stephane Eranian 已提交
1376

1377
			print_running(run, ena);
1378
			fputc('\n', output);
1379 1380 1381
			continue;
		}

1382 1383
		uval = val * counter->scale;

1384
		if (nsec_counter(counter))
1385
			nsec_printout(cpu, 0, counter, uval);
1386
		else
1387
			abs_printout(cpu, 0, counter, uval);
1388

1389
		if (!csv_output)
S
Stephane Eranian 已提交
1390
			print_noise(counter, 1.0);
1391
		print_running(run, ena);
1392

1393
		fputc('\n', output);
1394 1395 1396
	}
}

1397 1398
static void print_stat(int argc, const char **argv)
{
1399 1400
	struct perf_evsel *counter;
	int i;
1401

1402 1403
	fflush(stdout);

S
Stephane Eranian 已提交
1404
	if (!csv_output) {
1405 1406
		fprintf(output, "\n");
		fprintf(output, " Performance counter stats for ");
1407 1408 1409 1410
		if (target.system_wide)
			fprintf(output, "\'system wide");
		else if (target.cpu_list)
			fprintf(output, "\'CPU(s) %s", target.cpu_list);
1411
		else if (!target__has_task(&target)) {
1412
			fprintf(output, "\'%s", argv[0]);
S
Stephane Eranian 已提交
1413
			for (i = 1; i < argc; i++)
1414
				fprintf(output, " %s", argv[i]);
1415 1416
		} else if (target.pid)
			fprintf(output, "process id \'%s", target.pid);
S
Stephane Eranian 已提交
1417
		else
1418
			fprintf(output, "thread id \'%s", target.tid);
I
Ingo Molnar 已提交
1419

1420
		fprintf(output, "\'");
S
Stephane Eranian 已提交
1421
		if (run_count > 1)
1422 1423
			fprintf(output, " (%d runs)", run_count);
		fprintf(output, ":\n\n");
S
Stephane Eranian 已提交
1424
	}
1425

1426
	switch (aggr_mode) {
1427
	case AGGR_CORE:
1428 1429 1430 1431
	case AGGR_SOCKET:
		print_aggr(NULL);
		break;
	case AGGR_GLOBAL:
1432
		evlist__for_each(evsel_list, counter)
1433
			print_counter_aggr(counter, NULL);
1434 1435
		break;
	case AGGR_NONE:
1436
		evlist__for_each(evsel_list, counter)
1437 1438 1439 1440
			print_counter(counter, NULL);
		break;
	default:
		break;
1441
	}
1442

S
Stephane Eranian 已提交
1443
	if (!csv_output) {
1444
		if (!null_run)
1445 1446
			fprintf(output, "\n");
		fprintf(output, " %17.9f seconds time elapsed",
S
Stephane Eranian 已提交
1447 1448
				avg_stats(&walltime_nsecs_stats)/1e9);
		if (run_count > 1) {
1449
			fprintf(output, "                                        ");
1450 1451
			print_noise_pct(stddev_stats(&walltime_nsecs_stats),
					avg_stats(&walltime_nsecs_stats));
S
Stephane Eranian 已提交
1452
		}
1453
		fprintf(output, "\n\n");
I
Ingo Molnar 已提交
1454
	}
1455 1456
}

1457 1458
static volatile int signr = -1;

1459
static void skip_signal(int signo)
1460
{
1461
	if ((child_pid == -1) || interval)
1462 1463
		done = 1;

1464
	signr = signo;
1465 1466 1467 1468 1469 1470 1471
	/*
	 * render child_pid harmless
	 * won't send SIGTERM to a random
	 * process in case of race condition
	 * and fast PID recycling
	 */
	child_pid = -1;
1472 1473 1474 1475
}

static void sig_atexit(void)
{
1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487
	sigset_t set, oset;

	/*
	 * avoid race condition with SIGCHLD handler
	 * in skip_signal() which is modifying child_pid
	 * goal is to avoid send SIGTERM to a random
	 * process
	 */
	sigemptyset(&set);
	sigaddset(&set, SIGCHLD);
	sigprocmask(SIG_BLOCK, &set, &oset);

1488 1489 1490
	if (child_pid != -1)
		kill(child_pid, SIGTERM);

1491 1492
	sigprocmask(SIG_SETMASK, &oset, NULL);

1493 1494 1495 1496 1497
	if (signr == -1)
		return;

	signal(signr, SIG_DFL);
	kill(getpid(), signr);
1498 1499
}

1500 1501
static int stat__set_big_num(const struct option *opt __maybe_unused,
			     const char *s __maybe_unused, int unset)
S
Stephane Eranian 已提交
1502 1503 1504 1505 1506
{
	big_num_opt = unset ? 0 : 1;
	return 0;
}

1507 1508 1509 1510 1511 1512 1513 1514 1515 1516
static int perf_stat_init_aggr_mode(void)
{
	switch (aggr_mode) {
	case AGGR_SOCKET:
		if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
			perror("cannot build socket map");
			return -1;
		}
		aggr_get_id = cpu_map__get_socket;
		break;
1517 1518 1519 1520 1521 1522 1523
	case AGGR_CORE:
		if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
			perror("cannot build core map");
			return -1;
		}
		aggr_get_id = cpu_map__get_core;
		break;
1524 1525 1526 1527 1528 1529 1530 1531
	case AGGR_NONE:
	case AGGR_GLOBAL:
	default:
		break;
	}
	return 0;
}

1532 1533 1534 1535 1536 1537 1538 1539 1540 1541
static int setup_events(const char * const *attrs, unsigned len)
{
	unsigned i;

	for (i = 0; i < len; i++) {
		if (parse_events(evsel_list, attrs[i]))
			return -1;
	}
	return 0;
}
1542

1543 1544 1545 1546 1547 1548
/*
 * Add default attributes, if there were no attributes specified or
 * if -d/--detailed, -d -d or -d -d -d is used:
 */
static int add_default_attributes(void)
{
1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655
	struct perf_event_attr default_attrs[] = {

  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES	},
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS		},
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},

  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND	},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS	},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES		},

};

/*
 * Detailed stats (-d), covering the L1 and last level data caches:
 */
	struct perf_event_attr detailed_attrs[] = {

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_LL			<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_LL			<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
};

/*
 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
 */
	struct perf_event_attr very_detailed_attrs[] = {

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},

};

/*
 * Very, very detailed stats (-d -d -d), adding prefetch events:
 */
	struct perf_event_attr very_very_detailed_attrs[] = {

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},

  { .type = PERF_TYPE_HW_CACHE,
    .config =
	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
};

1656 1657 1658 1659
	/* Set attrs if no event is selected and !null_run: */
	if (null_run)
		return 0;

1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675
	if (transaction_run) {
		int err;
		if (pmu_have_event("cpu", "cycles-ct") &&
		    pmu_have_event("cpu", "el-start"))
			err = setup_events(transaction_attrs,
					ARRAY_SIZE(transaction_attrs));
		else
			err = setup_events(transaction_limited_attrs,
				 ARRAY_SIZE(transaction_limited_attrs));
		if (err < 0) {
			fprintf(stderr, "Cannot set up transaction events\n");
			return -1;
		}
		return 0;
	}

1676
	if (!evsel_list->nr_entries) {
1677
		if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
1678
			return -1;
1679 1680 1681 1682 1683 1684 1685 1686
	}

	/* Detailed events get appended to the event list: */

	if (detailed_run <  1)
		return 0;

	/* Append detailed run extra attributes: */
1687
	if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
1688
		return -1;
1689 1690 1691 1692 1693

	if (detailed_run < 2)
		return 0;

	/* Append very detailed run extra attributes: */
1694
	if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
1695
		return -1;
1696 1697 1698 1699 1700

	if (detailed_run < 3)
		return 0;

	/* Append very, very detailed run extra attributes: */
1701
	return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
1702 1703
}

1704
int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1705
{
1706
	bool append_file = false;
1707 1708 1709
	int output_fd = 0;
	const char *output_name	= NULL;
	const struct option options[] = {
1710 1711
	OPT_BOOLEAN('T', "transaction", &transaction_run,
		    "hardware transaction statistics"),
1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730
	OPT_CALLBACK('e', "event", &evsel_list, "event",
		     "event selector. use 'perf list' to list available events",
		     parse_events_option),
	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
		     "event filter", parse_filter),
	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
		    "child tasks do not inherit counters"),
	OPT_STRING('p', "pid", &target.pid, "pid",
		   "stat events on existing process id"),
	OPT_STRING('t', "tid", &target.tid, "tid",
		   "stat events on existing thread id"),
	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
		    "system-wide collection from all CPUs"),
	OPT_BOOLEAN('g', "group", &group,
		    "put the counters into a counter group"),
	OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"),
	OPT_INCR('v', "verbose", &verbose,
		    "be more verbose (show counter open errors, etc)"),
	OPT_INTEGER('r', "repeat", &run_count,
1731
		    "repeat command and print average + stddev (max: 100, forever: 0)"),
1732 1733 1734 1735 1736 1737
	OPT_BOOLEAN('n', "null", &null_run,
		    "null run - dont start any counters"),
	OPT_INCR('d', "detailed", &detailed_run,
		    "detailed run - start a lot of events"),
	OPT_BOOLEAN('S', "sync", &sync_run,
		    "call sync() before starting a run"),
1738
	OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
1739 1740 1741 1742
			   "print large numbers with thousands\' separators",
			   stat__set_big_num),
	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
		    "list of cpus to monitor in system-wide"),
1743 1744
	OPT_SET_UINT('A', "no-aggr", &aggr_mode,
		    "disable CPU count aggregation", AGGR_NONE),
1745 1746 1747 1748 1749 1750 1751 1752
	OPT_STRING('x', "field-separator", &csv_sep, "separator",
		   "print counts with custom separator"),
	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
		     "monitor event in cgroup name only", parse_cgroups),
	OPT_STRING('o', "output", &output_name, "file", "output file name"),
	OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
	OPT_INTEGER(0, "log-fd", &output_fd,
		    "log output to fd, instead of stderr"),
1753 1754 1755 1756
	OPT_STRING(0, "pre", &pre_cmd, "command",
			"command to run prior to the measured command"),
	OPT_STRING(0, "post", &post_cmd, "command",
			"command to run after to the measured command"),
1757 1758
	OPT_UINTEGER('I', "interval-print", &interval,
		    "print counts at regular interval in ms (>= 100)"),
1759
	OPT_SET_UINT(0, "per-socket", &aggr_mode,
1760
		     "aggregate counts per processor socket", AGGR_SOCKET),
1761 1762
	OPT_SET_UINT(0, "per-core", &aggr_mode,
		     "aggregate counts per physical processor core", AGGR_CORE),
1763 1764
	OPT_UINTEGER('D', "delay", &initial_delay,
		     "ms to wait before starting measurement after program start"),
1765 1766 1767 1768 1769 1770
	OPT_END()
	};
	const char * const stat_usage[] = {
		"perf stat [<options>] [<command>]",
		NULL
	};
1771
	int status = -EINVAL, run_idx;
1772
	const char *mode;
1773

1774 1775
	setlocale(LC_ALL, "");

1776
	evsel_list = perf_evlist__new();
1777 1778 1779
	if (evsel_list == NULL)
		return -ENOMEM;

1780 1781
	argc = parse_options(argc, argv, options, stat_usage,
		PARSE_OPT_STOP_AT_NON_OPTION);
S
Stephane Eranian 已提交
1782

1783 1784 1785 1786
	output = stderr;
	if (output_name && strcmp(output_name, "-"))
		output = NULL;

1787 1788
	if (output_name && output_fd) {
		fprintf(stderr, "cannot use both --output and --log-fd\n");
1789 1790 1791
		parse_options_usage(stat_usage, options, "o", 1);
		parse_options_usage(NULL, options, "log-fd", 0);
		goto out;
1792
	}
1793 1794 1795

	if (output_fd < 0) {
		fprintf(stderr, "argument to --log-fd must be a > 0\n");
1796 1797
		parse_options_usage(stat_usage, options, "log-fd", 0);
		goto out;
1798 1799
	}

1800 1801 1802 1803 1804 1805 1806
	if (!output) {
		struct timespec tm;
		mode = append_file ? "a" : "w";

		output = fopen(output_name, mode);
		if (!output) {
			perror("failed to create output file");
1807
			return -1;
1808 1809 1810
		}
		clock_gettime(CLOCK_REALTIME, &tm);
		fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
1811
	} else if (output_fd > 0) {
1812 1813 1814 1815 1816 1817
		mode = append_file ? "a" : "w";
		output = fdopen(output_fd, mode);
		if (!output) {
			perror("Failed opening logfd");
			return -errno;
		}
1818 1819
	}

1820
	if (csv_sep) {
S
Stephane Eranian 已提交
1821
		csv_output = true;
1822 1823 1824
		if (!strcmp(csv_sep, "\\t"))
			csv_sep = "\t";
	} else
S
Stephane Eranian 已提交
1825 1826 1827 1828 1829 1830
		csv_sep = DEFAULT_SEPARATOR;

	/*
	 * let the spreadsheet do the pretty-printing
	 */
	if (csv_output) {
J
Jim Cromie 已提交
1831
		/* User explicitly passed -B? */
S
Stephane Eranian 已提交
1832 1833
		if (big_num_opt == 1) {
			fprintf(stderr, "-B option not supported with -x\n");
1834 1835 1836
			parse_options_usage(stat_usage, options, "B", 1);
			parse_options_usage(NULL, options, "x", 1);
			goto out;
S
Stephane Eranian 已提交
1837 1838 1839 1840 1841
		} else /* Nope, so disable big number formatting */
			big_num = false;
	} else if (big_num_opt == 0) /* User passed --no-big-num */
		big_num = false;

1842
	if (!argc && target__none(&target))
1843
		usage_with_options(stat_usage, options);
1844

1845
	if (run_count < 0) {
1846 1847 1848
		pr_err("Run count must be a positive number\n");
		parse_options_usage(stat_usage, options, "r", 1);
		goto out;
1849 1850 1851 1852
	} else if (run_count == 0) {
		forever = true;
		run_count = 1;
	}
1853

S
Stephane Eranian 已提交
1854
	/* no_aggr, cgroup are for system-wide only */
1855 1856
	if ((aggr_mode != AGGR_GLOBAL || nr_cgroups) &&
	    !target__has_cpu(&target)) {
S
Stephane Eranian 已提交
1857 1858 1859
		fprintf(stderr, "both cgroup and no-aggregation "
			"modes only available in system-wide mode\n");

1860 1861 1862 1863
		parse_options_usage(stat_usage, options, "G", 1);
		parse_options_usage(NULL, options, "A", 1);
		parse_options_usage(NULL, options, "a", 1);
		goto out;
1864 1865
	}

1866 1867
	if (add_default_attributes())
		goto out;
1868

1869
	target__validate(&target);
1870

1871
	if (perf_evlist__create_maps(evsel_list, &target) < 0) {
1872
		if (target__has_task(&target)) {
1873
			pr_err("Problems finding threads of monitor\n");
1874 1875
			parse_options_usage(stat_usage, options, "p", 1);
			parse_options_usage(NULL, options, "t", 1);
1876
		} else if (target__has_cpu(&target)) {
1877
			perror("failed to parse CPUs map");
1878 1879 1880 1881
			parse_options_usage(stat_usage, options, "C", 1);
			parse_options_usage(NULL, options, "a", 1);
		}
		goto out;
1882
	}
1883 1884
	if (interval && interval < 100) {
		pr_err("print interval must be >= 100ms\n");
1885
		parse_options_usage(stat_usage, options, "I", 1);
1886
		goto out;
1887
	}
1888

1889
	if (perf_evlist__alloc_stats(evsel_list, interval))
1890
		goto out;
1891

1892
	if (perf_stat_init_aggr_mode())
1893
		goto out;
1894

I
Ingo Molnar 已提交
1895 1896 1897 1898 1899 1900
	/*
	 * We dont want to block the signals - that would cause
	 * child tasks to inherit that and Ctrl-C would not work.
	 * What we want is for Ctrl-C to work in the exec()-ed
	 * task, but being ignored by perf stat itself:
	 */
1901
	atexit(sig_atexit);
1902 1903
	if (!forever)
		signal(SIGINT,  skip_signal);
1904
	signal(SIGCHLD, skip_signal);
I
Ingo Molnar 已提交
1905 1906 1907
	signal(SIGALRM, skip_signal);
	signal(SIGABRT, skip_signal);

1908
	status = 0;
1909
	for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
1910
		if (run_count != 1 && verbose)
1911 1912
			fprintf(output, "[ perf stat: executing run #%d ... ]\n",
				run_idx + 1);
I
Ingo Molnar 已提交
1913

1914
		status = run_perf_stat(argc, argv);
1915 1916
		if (forever && status != -1) {
			print_stat(argc, argv);
1917
			perf_stat__reset_stats(evsel_list);
1918
		}
1919 1920
	}

1921
	if (!forever && status != -1 && !interval)
1922
		print_stat(argc, argv);
1923 1924

	perf_evlist__free_stats(evsel_list);
1925 1926
out:
	perf_evlist__delete(evsel_list);
1927
	return status;
1928
}