builtin-stat.c 18.2 KB
Newer Older
1
/*
2 3 4 5 6 7
 * builtin-stat.c
 *
 * Builtin stat command: Give a precise performance counters summary
 * overview about any workload, CPU or specific PID.
 *
 * Sample output:
8

9 10
   $ perf stat ~/hackbench 10
   Time: 0.104
11

12
    Performance counter stats for '/home/mingo/hackbench':
13

14 15 16 17 18 19 20 21
       1255.538611  task clock ticks     #      10.143 CPU utilization factor
             54011  context switches     #       0.043 M/sec
               385  CPU migrations       #       0.000 M/sec
             17755  pagefaults           #       0.014 M/sec
        3808323185  CPU cycles           #    3033.219 M/sec
        1575111190  instructions         #    1254.530 M/sec
          17367895  cache references     #      13.833 M/sec
           7674421  cache misses         #       6.112 M/sec
22

23
    Wall-clock time elapsed:   123.786620 msecs
24

25 26 27 28 29 30 31 32 33 34
 *
 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
 *
 * Improvements and fixes by:
 *
 *   Arjan van de Ven <arjan@linux.intel.com>
 *   Yanmin Zhang <yanmin.zhang@intel.com>
 *   Wu Fengguang <fengguang.wu@intel.com>
 *   Mike Galbraith <efault@gmx.de>
 *   Paul Mackerras <paulus@samba.org>
35
 *   Jaswinder Singh Rajput <jaswinder@kernel.org>
36 37
 *
 * Released under the GPL v2. (and only v2, not any later version)
38 39
 */

40
#include "perf.h"
41
#include "builtin.h"
42
#include "util/util.h"
43 44
#include "util/parse-options.h"
#include "util/parse-events.h"
45 46
#include "util/event.h"
#include "util/debug.h"
47
#include "util/header.h"
48
#include "util/cpumap.h"
49
#include "util/thread.h"
50 51

#include <sys/prctl.h>
52
#include <math.h>
53
#include <locale.h>
54

55
static struct perf_event_attr default_attrs[] = {
56

57 58 59 60
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES	},
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS		},
  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},
61

62 63 64 65
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS	},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES		},
I
Ingo Molnar 已提交
66 67
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES	},
  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES		},
68

69
};
70

71
static bool			system_wide			=  false;
72
static int			nr_cpus				=  0;
73
static int			run_idx				=  0;
74

75
static int			run_count			=  1;
76
static bool			no_inherit			= false;
77
static bool			scale				=  true;
78
static bool			no_aggr				= false;
79
static pid_t			target_pid			= -1;
80 81 82
static pid_t			target_tid			= -1;
static pid_t			*all_tids			=  NULL;
static int			thread_num			=  0;
83
static pid_t			child_pid			= -1;
84
static bool			null_run			=  false;
85
static bool			big_num				=  false;
86
static const char		*cpu_list;
87

88

89
static int			*fd[MAX_NR_CPUS][MAX_COUNTERS];
90

91
static int			event_scaled[MAX_COUNTERS];
92

93 94 95 96 97 98
static struct {
	u64 val;
	u64 ena;
	u64 run;
} cpu_counts[MAX_NR_CPUS][MAX_COUNTERS];

99 100
static volatile int done = 0;

101 102
struct stats
{
103
	double n, mean, M2;
104
};
105

106 107
static void update_stats(struct stats *stats, u64 val)
{
108
	double delta;
109

110 111 112 113
	stats->n++;
	delta = val - stats->mean;
	stats->mean += delta / stats->n;
	stats->M2 += delta*(val - stats->mean);
114 115
}

116 117
static double avg_stats(struct stats *stats)
{
118
	return stats->mean;
119
}
120

121
/*
122 123
 * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
 *
124 125 126
 *       (\Sum n_i^2) - ((\Sum n_i)^2)/n
 * s^2 = -------------------------------
 *                  n - 1
127 128 129 130 131 132 133 134 135
 *
 * http://en.wikipedia.org/wiki/Stddev
 *
 * The std dev of the mean is related to the std dev by:
 *
 *             s
 * s_mean = -------
 *          sqrt(n)
 *
136 137 138
 */
static double stddev_stats(struct stats *stats)
{
139 140
	double variance = stats->M2 / (stats->n - 1);
	double variance_mean = variance / stats->n;
141

142
	return sqrt(variance_mean);
143
}
144

145
struct stats			event_res_stats[MAX_COUNTERS][3];
146 147 148
struct stats			runtime_nsecs_stats[MAX_NR_CPUS];
struct stats			runtime_cycles_stats[MAX_NR_CPUS];
struct stats			runtime_branches_stats[MAX_NR_CPUS];
149
struct stats			walltime_nsecs_stats;
150

151 152 153 154
#define MATCH_EVENT(t, c, counter)			\
	(attrs[counter].type == PERF_TYPE_##t &&	\
	 attrs[counter].config == PERF_COUNT_##c)

155
#define ERR_PERF_OPEN \
156
"counter %d, sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information."
157

158
static int create_perf_stat_counter(int counter, bool *perm_err)
159
{
160
	struct perf_event_attr *attr = attrs + counter;
161
	int thread;
162
	int ncreated = 0;
163

164
	if (scale)
165 166
		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
				    PERF_FORMAT_TOTAL_TIME_RUNNING;
167 168

	if (system_wide) {
169
		int cpu;
170

171
		for (cpu = 0; cpu < nr_cpus; cpu++) {
172 173
			fd[cpu][counter][0] = sys_perf_event_open(attr,
					-1, cpumap[cpu], -1, 0);
174 175 176 177
			if (fd[cpu][counter][0] < 0) {
				if (errno == EPERM || errno == EACCES)
					*perm_err = true;
				error(ERR_PERF_OPEN, counter,
178
					 fd[cpu][counter][0], strerror(errno));
179
			} else {
180
				++ncreated;
181
			}
182 183
		}
	} else {
184 185
		attr->inherit = !no_inherit;
		if (target_pid == -1 && target_tid == -1) {
186 187 188
			attr->disabled = 1;
			attr->enable_on_exec = 1;
		}
189 190 191
		for (thread = 0; thread < thread_num; thread++) {
			fd[0][counter][thread] = sys_perf_event_open(attr,
				all_tids[thread], -1, -1, 0);
192 193 194 195
			if (fd[0][counter][thread] < 0) {
				if (errno == EPERM || errno == EACCES)
					*perm_err = true;
				error(ERR_PERF_OPEN, counter,
196 197
					 fd[0][counter][thread],
					 strerror(errno));
198
			} else {
199
				++ncreated;
200
			}
201
		}
202
	}
203 204

	return ncreated;
205 206
}

207 208 209 210 211
/*
 * Does the counter have nsecs as a unit?
 */
static inline int nsec_counter(int counter)
{
212 213
	if (MATCH_EVENT(SOFTWARE, SW_CPU_CLOCK, counter) ||
	    MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
214 215 216 217 218 219
		return 1;

	return 0;
}

/*
220
 * Read out the results of a single counter:
221
 * aggregate counts across CPUs in system-wide mode
222
 */
223
static void read_counter_aggr(int counter)
224
{
225
	u64 count[3], single_count[3];
226
	int cpu;
227
	size_t res, nv;
228
	int scaled;
229
	int i, thread;
230 231

	count[0] = count[1] = count[2] = 0;
232

233
	nv = scale ? 3 : 1;
234
	for (cpu = 0; cpu < nr_cpus; cpu++) {
235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
		for (thread = 0; thread < thread_num; thread++) {
			if (fd[cpu][counter][thread] < 0)
				continue;

			res = read(fd[cpu][counter][thread],
					single_count, nv * sizeof(u64));
			assert(res == nv * sizeof(u64));

			close(fd[cpu][counter][thread]);
			fd[cpu][counter][thread] = -1;

			count[0] += single_count[0];
			if (scale) {
				count[1] += single_count[1];
				count[2] += single_count[2];
			}
251 252 253 254 255 256
		}
	}

	scaled = 0;
	if (scale) {
		if (count[2] == 0) {
257
			event_scaled[counter] = -1;
258
			count[0] = 0;
259 260
			return;
		}
261

262
		if (count[2] < count[1]) {
263
			event_scaled[counter] = 1;
264 265 266 267
			count[0] = (unsigned long long)
				((double)count[0] * count[1] / count[2] + 0.5);
		}
	}
268 269 270 271 272 273 274 275 276

	for (i = 0; i < 3; i++)
		update_stats(&event_res_stats[counter][i], count[i]);

	if (verbose) {
		fprintf(stderr, "%s: %Ld %Ld %Ld\n", event_name(counter),
				count[0], count[1], count[2]);
	}

277 278 279
	/*
	 * Save the full runtime - to allow normalization during printout:
	 */
280
	if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
281
		update_stats(&runtime_nsecs_stats[0], count[0]);
282
	if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter))
283
		update_stats(&runtime_cycles_stats[0], count[0]);
284
	if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter))
285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332
		update_stats(&runtime_branches_stats[0], count[0]);
}

/*
 * Read out the results of a single counter:
 * do not aggregate counts across CPUs in system-wide mode
 */
static void read_counter(int counter)
{
	u64 count[3];
	int cpu;
	size_t res, nv;

	count[0] = count[1] = count[2] = 0;

	nv = scale ? 3 : 1;

	for (cpu = 0; cpu < nr_cpus; cpu++) {

		if (fd[cpu][counter][0] < 0)
			continue;

		res = read(fd[cpu][counter][0], count, nv * sizeof(u64));

		assert(res == nv * sizeof(u64));

		close(fd[cpu][counter][0]);
		fd[cpu][counter][0] = -1;

		if (scale) {
			if (count[2] == 0) {
				count[0] = 0;
			} else if (count[2] < count[1]) {
				count[0] = (unsigned long long)
				((double)count[0] * count[1] / count[2] + 0.5);
			}
		}
		cpu_counts[cpu][counter].val = count[0]; /* scaled count */
		cpu_counts[cpu][counter].ena = count[1];
		cpu_counts[cpu][counter].run = count[2];

		if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
			update_stats(&runtime_nsecs_stats[cpu], count[0]);
		if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter))
			update_stats(&runtime_cycles_stats[cpu], count[0]);
		if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter))
			update_stats(&runtime_branches_stats[cpu], count[0]);
	}
333 334
}

335
static int run_perf_stat(int argc __used, const char **argv)
336 337 338
{
	unsigned long long t0, t1;
	int status = 0;
339
	int counter, ncreated = 0;
340
	int child_ready_pipe[2], go_pipe[2];
341
	bool perm_err = false;
342
	const bool forks = (argc > 0);
343
	char buf;
344 345 346 347

	if (!system_wide)
		nr_cpus = 1;

348
	if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
349 350 351 352
		perror("failed to create pipes");
		exit(1);
	}

353
	if (forks) {
354
		if ((child_pid = fork()) < 0)
355 356
			perror("failed to fork");

357
		if (!child_pid) {
358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384
			close(child_ready_pipe[0]);
			close(go_pipe[1]);
			fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);

			/*
			 * Do a dummy execvp to get the PLT entry resolved,
			 * so we avoid the resolver overhead on the real
			 * execvp call.
			 */
			execvp("", (char **)argv);

			/*
			 * Tell the parent we're ready to go
			 */
			close(child_ready_pipe[1]);

			/*
			 * Wait until the parent tells us to go.
			 */
			if (read(go_pipe[0], &buf, 1) == -1)
				perror("unable to read pipe");

			execvp(argv[0], (char **)argv);

			perror(argv[0]);
			exit(-1);
		}
385

386 387 388
		if (target_tid == -1 && target_pid == -1 && !system_wide)
			all_tids[0] = child_pid;

389
		/*
390
		 * Wait for the child to be ready to exec.
391 392
		 */
		close(child_ready_pipe[1]);
393 394
		close(go_pipe[0]);
		if (read(child_ready_pipe[0], &buf, 1) == -1)
395
			perror("unable to read pipe");
396
		close(child_ready_pipe[0]);
397 398
	}

399
	for (counter = 0; counter < nr_counters; counter++)
400 401 402 403 404 405 406 407 408
		ncreated += create_perf_stat_counter(counter, &perm_err);

	if (ncreated < nr_counters) {
		if (perm_err)
			error("You may not have permission to collect %sstats.\n"
			      "\t Consider tweaking"
			      " /proc/sys/kernel/perf_event_paranoid or running as root.",
			      system_wide ? "system-wide " : "");
		die("Not all events could be opened.\n");
409 410 411 412
		if (child_pid != -1)
			kill(child_pid, SIGTERM);
		return -1;
	}
413 414 415 416 417 418

	/*
	 * Enable counters and exec the command:
	 */
	t0 = rdclock();

419 420 421 422
	if (forks) {
		close(go_pipe[1]);
		wait(&status);
	} else {
423
		while(!done) sleep(1);
424
	}
425 426 427

	t1 = rdclock();

428
	update_stats(&walltime_nsecs_stats, t1 - t0);
429

430 431 432 433 434 435 436
	if (no_aggr) {
		for (counter = 0; counter < nr_counters; counter++)
			read_counter(counter);
	} else {
		for (counter = 0; counter < nr_counters; counter++)
			read_counter_aggr(counter);
	}
437 438 439
	return WEXITSTATUS(status);
}

440
static void print_noise(int counter, double avg)
441
{
442 443 444 445 446
	if (run_count == 1)
		return;

	fprintf(stderr, "   ( +- %7.3f%% )",
			100 * stddev_stats(&event_res_stats[counter][0]) / avg);
447 448
}

449
static void nsec_printout(int cpu, int counter, double avg)
I
Ingo Molnar 已提交
450
{
451
	double msecs = avg / 1e6;
I
Ingo Molnar 已提交
452

453 454 455 456 457
	if (no_aggr)
		fprintf(stderr, "CPU%-4d %18.6f  %-24s",
			cpumap[cpu], msecs, event_name(counter));
	else
		fprintf(stderr, " %18.6f  %-24s", msecs, event_name(counter));
I
Ingo Molnar 已提交
458

459
	if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) {
460 461
		fprintf(stderr, " # %10.3f CPUs ",
				avg / avg_stats(&walltime_nsecs_stats));
I
Ingo Molnar 已提交
462 463 464
	}
}

465
static void abs_printout(int cpu, int counter, double avg)
I
Ingo Molnar 已提交
466
{
467
	double total, ratio = 0.0;
468 469 470 471 472 473
	char cpustr[16] = { '\0', };

	if (no_aggr)
		sprintf(cpustr, "CPU%-4d", cpumap[cpu]);
	else
		cpu = 0;
474

475
	if (big_num)
476 477
		fprintf(stderr, "%s %'18.0f  %-24s",
			cpustr, avg, event_name(counter));
478
	else
479 480
		fprintf(stderr, "%s %18.0f  %-24s",
			cpustr, avg, event_name(counter));
I
Ingo Molnar 已提交
481

482
	if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) {
483
		total = avg_stats(&runtime_cycles_stats[cpu]);
484 485 486 487 488

		if (total)
			ratio = avg / total;

		fprintf(stderr, " # %10.3f IPC  ", ratio);
489
	} else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) &&
490 491
			runtime_branches_stats[cpu].n != 0) {
		total = avg_stats(&runtime_branches_stats[cpu]);
492 493 494 495

		if (total)
			ratio = avg * 100 / total;

I
Ingo Molnar 已提交
496
		fprintf(stderr, " # %10.3f %%    ", ratio);
497

498 499
	} else if (runtime_nsecs_stats[cpu].n != 0) {
		total = avg_stats(&runtime_nsecs_stats[cpu]);
500 501 502 503 504

		if (total)
			ratio = 1000.0 * avg / total;

		fprintf(stderr, " # %10.3f M/sec", ratio);
I
Ingo Molnar 已提交
505 506 507
	}
}

508 509
/*
 * Print out the results of a single counter:
510
 * aggregated counts in system-wide mode
511
 */
512
static void print_counter_aggr(int counter)
513
{
514
	double avg = avg_stats(&event_res_stats[counter][0]);
515
	int scaled = event_scaled[counter];
516 517

	if (scaled == -1) {
518
		fprintf(stderr, " %18s  %-24s\n",
519 520 521
			"<not counted>", event_name(counter));
		return;
	}
522

I
Ingo Molnar 已提交
523
	if (nsec_counter(counter))
524
		nsec_printout(-1, counter, avg);
I
Ingo Molnar 已提交
525
	else
526
		abs_printout(-1, counter, avg);
527 528

	print_noise(counter, avg);
529 530 531 532 533 534

	if (scaled) {
		double avg_enabled, avg_running;

		avg_enabled = avg_stats(&event_res_stats[counter][1]);
		avg_running = avg_stats(&event_res_stats[counter][2]);
535

536
		fprintf(stderr, "  (scaled from %.2f%%)",
537 538
				100 * avg_running / avg_enabled);
	}
I
Ingo Molnar 已提交
539

540 541 542
	fprintf(stderr, "\n");
}

543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578
/*
 * Print out the results of a single counter:
 * does not use aggregated count in system-wide
 */
static void print_counter(int counter)
{
	u64 ena, run, val;
	int cpu;

	for (cpu = 0; cpu < nr_cpus; cpu++) {
		val = cpu_counts[cpu][counter].val;
		ena = cpu_counts[cpu][counter].ena;
		run = cpu_counts[cpu][counter].run;
		if (run == 0 || ena == 0) {
			fprintf(stderr, "CPU%-4d %18s  %-24s", cpumap[cpu],
					"<not counted>", event_name(counter));

			fprintf(stderr, "\n");
			continue;
		}

		if (nsec_counter(counter))
			nsec_printout(cpu, counter, val);
		else
			abs_printout(cpu, counter, val);

		print_noise(counter, 1.0);

		if (run != ena) {
			fprintf(stderr, "  (scaled from %.2f%%)",
					100.0 * run / ena);
		}
		fprintf(stderr, "\n");
	}
}

579 580 581 582
static void print_stat(int argc, const char **argv)
{
	int i, counter;

583 584 585
	fflush(stdout);

	fprintf(stderr, "\n");
586
	fprintf(stderr, " Performance counter stats for ");
587
	if(target_pid == -1 && target_tid == -1) {
588 589 590
		fprintf(stderr, "\'%s", argv[0]);
		for (i = 1; i < argc; i++)
			fprintf(stderr, " %s", argv[i]);
591 592 593 594
	} else if (target_pid != -1)
		fprintf(stderr, "process id \'%d", target_pid);
	else
		fprintf(stderr, "thread id \'%d", target_tid);
I
Ingo Molnar 已提交
595

596 597 598 599
	fprintf(stderr, "\'");
	if (run_count > 1)
		fprintf(stderr, " (%d runs)", run_count);
	fprintf(stderr, ":\n\n");
600

601 602 603 604 605 606 607
	if (no_aggr) {
		for (counter = 0; counter < nr_counters; counter++)
			print_counter(counter);
	} else {
		for (counter = 0; counter < nr_counters; counter++)
			print_counter_aggr(counter);
	}
608 609

	fprintf(stderr, "\n");
610
	fprintf(stderr, " %18.9f  seconds time elapsed",
611
			avg_stats(&walltime_nsecs_stats)/1e9);
I
Ingo Molnar 已提交
612 613
	if (run_count > 1) {
		fprintf(stderr, "   ( +- %7.3f%% )",
614 615
				100*stddev_stats(&walltime_nsecs_stats) /
				avg_stats(&walltime_nsecs_stats));
I
Ingo Molnar 已提交
616 617
	}
	fprintf(stderr, "\n\n");
618 619
}

620 621
static volatile int signr = -1;

622
static void skip_signal(int signo)
623
{
624
	if(child_pid == -1)
625 626
		done = 1;

627 628 629 630 631
	signr = signo;
}

static void sig_atexit(void)
{
632 633 634
	if (child_pid != -1)
		kill(child_pid, SIGTERM);

635 636 637 638 639
	if (signr == -1)
		return;

	signal(signr, SIG_DFL);
	kill(getpid(), signr);
640 641 642
}

static const char * const stat_usage[] = {
643
	"perf stat [<options>] [<command>]",
644 645 646 647 648
	NULL
};

static const struct option options[] = {
	OPT_CALLBACK('e', "event", NULL, "event",
649 650
		     "event selector. use 'perf list' to list available events",
		     parse_events),
651 652
	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
		    "child tasks do not inherit counters"),
653
	OPT_INTEGER('p', "pid", &target_pid,
654 655 656
		    "stat events on existing process id"),
	OPT_INTEGER('t', "tid", &target_tid,
		    "stat events on existing thread id"),
657
	OPT_BOOLEAN('a', "all-cpus", &system_wide,
658
		    "system-wide collection from all CPUs"),
659
	OPT_BOOLEAN('c', "scale", &scale,
660
		    "scale/normalize counters"),
661
	OPT_INCR('v', "verbose", &verbose,
662
		    "be more verbose (show counter open errors, etc)"),
663 664
	OPT_INTEGER('r', "repeat", &run_count,
		    "repeat command and print average + stddev (max: 100)"),
665 666
	OPT_BOOLEAN('n', "null", &null_run,
		    "null run - dont start any counters"),
667 668
	OPT_BOOLEAN('B', "big-num", &big_num,
		    "print large numbers with thousands\' separators"),
669 670
	OPT_STRING('C', "cpu", &cpu_list, "cpu",
		    "list of cpus to monitor in system-wide"),
671 672
	OPT_BOOLEAN('A', "no-aggr", &no_aggr,
		    "disable CPU count aggregation"),
673 674 675
	OPT_END()
};

676
int cmd_stat(int argc, const char **argv, const char *prefix __used)
677
{
678
	int status;
679
	int i,j;
680

681 682
	setlocale(LC_ALL, "");

683 684
	argc = parse_options(argc, argv, options, stat_usage,
		PARSE_OPT_STOP_AT_NON_OPTION);
685
	if (!argc && target_pid == -1 && target_tid == -1)
686
		usage_with_options(stat_usage, options);
687
	if (run_count <= 0)
688
		usage_with_options(stat_usage, options);
689

690 691 692 693
	/* no_aggr is for system-wide only */
	if (no_aggr && !system_wide)
		usage_with_options(stat_usage, options);

694 695 696 697 698
	/* Set attrs and nr_counters if no event is selected and !null_run */
	if (!null_run && !nr_counters) {
		memcpy(attrs, default_attrs, sizeof(default_attrs));
		nr_counters = ARRAY_SIZE(default_attrs);
	}
699

700
	if (system_wide)
701
		nr_cpus = read_cpu_map(cpu_list);
702 703
	else
		nr_cpus = 1;
704

705 706 707
	if (nr_cpus < 1)
		usage_with_options(stat_usage, options);

708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732
	if (target_pid != -1) {
		target_tid = target_pid;
		thread_num = find_all_tid(target_pid, &all_tids);
		if (thread_num <= 0) {
			fprintf(stderr, "Can't find all threads of pid %d\n",
					target_pid);
			usage_with_options(stat_usage, options);
		}
	} else {
		all_tids=malloc(sizeof(pid_t));
		if (!all_tids)
			return -ENOMEM;

		all_tids[0] = target_tid;
		thread_num = 1;
	}

	for (i = 0; i < MAX_NR_CPUS; i++) {
		for (j = 0; j < MAX_COUNTERS; j++) {
			fd[i][j] = malloc(sizeof(int)*thread_num);
			if (!fd[i][j])
				return -ENOMEM;
		}
	}

I
Ingo Molnar 已提交
733 734 735 736 737 738
	/*
	 * We dont want to block the signals - that would cause
	 * child tasks to inherit that and Ctrl-C would not work.
	 * What we want is for Ctrl-C to work in the exec()-ed
	 * task, but being ignored by perf stat itself:
	 */
739
	atexit(sig_atexit);
I
Ingo Molnar 已提交
740 741 742 743
	signal(SIGINT,  skip_signal);
	signal(SIGALRM, skip_signal);
	signal(SIGABRT, skip_signal);

744 745 746
	status = 0;
	for (run_idx = 0; run_idx < run_count; run_idx++) {
		if (run_count != 1 && verbose)
747
			fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1);
748 749 750
		status = run_perf_stat(argc, argv);
	}

751 752
	if (status != -1)
		print_stat(argc, argv);
753 754

	return status;
755
}