builtin-record.c 22.2 KB
Newer Older
I
Ingo Molnar 已提交
1
/*
2 3 4 5 6
 * builtin-record.c
 *
 * Builtin record command: Record the profile of a workload
 * (or a CPU, or a PID) into the perf.data output file - for
 * later analysis via perf report.
I
Ingo Molnar 已提交
7
 */
8 9
#define _FILE_OFFSET_BITS 64

10
#include "builtin.h"
11 12 13

#include "perf.h"

14
#include "util/build-id.h"
15
#include "util/util.h"
16
#include "util/parse-options.h"
17
#include "util/parse-events.h"
18

19
#include "util/header.h"
20
#include "util/event.h"
21
#include "util/evlist.h"
22
#include "util/evsel.h"
23
#include "util/debug.h"
24
#include "util/session.h"
25
#include "util/symbol.h"
26
#include "util/cpumap.h"
27
#include "util/thread_map.h"
28

29
#include <unistd.h>
30
#include <sched.h>
31
#include <sys/mman.h>
32

33 34 35 36 37
enum write_mode_t {
	WRITE_FORCE,
	WRITE_APPEND
};

38 39
static u64			user_interval			= ULLONG_MAX;
static u64			default_interval		=      0;
40

41
static unsigned int		page_size;
42
static unsigned int		mmap_pages			= UINT_MAX;
43
static unsigned int		user_freq 			= UINT_MAX;
44
static int			freq				=   1000;
45
static int			output;
46
static int			pipe_output			=      0;
47
static const char		*output_name			= NULL;
48
static bool			group				=  false;
49
static int			realtime_prio			=      0;
50
static bool			nodelay				=  false;
51
static bool			raw_samples			=  false;
52
static bool			sample_id_all_avail		=   true;
53
static bool			system_wide			=  false;
54
static pid_t			target_pid			=     -1;
55
static pid_t			target_tid			=     -1;
56
static pid_t			child_pid			=     -1;
57
static bool			no_inherit			=  false;
58
static enum write_mode_t	write_mode			= WRITE_FORCE;
59 60 61 62
static bool			call_graph			=  false;
static bool			inherit_stat			=  false;
static bool			no_samples			=  false;
static bool			sample_address			=  false;
63
static bool			sample_time			=  false;
64
static bool			no_buildid			=  false;
65
static bool			no_buildid_cache		=  false;
66
static struct perf_evlist	*evsel_list;
67 68 69

static long			samples				=      0;
static u64			bytes_written			=      0;
70

71
static int			file_new			=      1;
72
static off_t			post_processing_offset;
73

74
static struct perf_session	*session;
75
static const char		*cpu_list;
76

77 78 79 80 81
static void advance_output(size_t size)
{
	bytes_written += size;
}

82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
static void write_output(void *buf, size_t size)
{
	while (size) {
		int ret = write(output, buf, size);

		if (ret < 0)
			die("failed to write");

		size -= ret;
		buf += ret;

		bytes_written += ret;
	}
}

97
static int process_synthesized_event(union perf_event *event,
98
				     struct perf_sample *sample __used,
99
				     struct perf_session *self __used)
100
{
101
	write_output(event, event->header.size);
102 103 104
	return 0;
}

105
static void mmap_read(struct perf_mmap *md)
106
{
107
	unsigned int head = perf_mmap__read_head(md);
108 109 110 111 112
	unsigned int old = md->prev;
	unsigned char *data = md->base + page_size;
	unsigned long size;
	void *buf;

113 114 115 116
	if (old == head)
		return;

	samples++;
117 118 119 120 121 122 123

	size = head - old;

	if ((old & md->mask) + size != (head & md->mask)) {
		buf = &data[old & md->mask];
		size = md->mask + 1 - (old & md->mask);
		old += size;
124

125
		write_output(buf, size);
126 127 128 129 130
	}

	buf = &data[old & md->mask];
	size = head - old;
	old += size;
131

132
	write_output(buf, size);
133 134

	md->prev = old;
135
	perf_mmap__write_tail(md, old);
136 137 138
}

static volatile int done = 0;
139
static volatile int signr = -1;
140

141
static void sig_handler(int sig)
142
{
143
	done = 1;
144 145 146 147 148
	signr = sig;
}

static void sig_atexit(void)
{
149
	if (child_pid > 0)
150 151
		kill(child_pid, SIGTERM);

152
	if (signr == -1 || signr == SIGUSR1)
153 154 155 156
		return;

	signal(signr, SIG_DFL);
	kill(getpid(), signr);
157 158
}

159 160 161 162
static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
{
	struct perf_event_attr *attr = &evsel->attr;
	int track = !evsel->idx; /* only the first counter needs these */
163

164
	attr->inherit		= !no_inherit;
165 166 167
	attr->read_format	= PERF_FORMAT_TOTAL_TIME_ENABLED |
				  PERF_FORMAT_TOTAL_TIME_RUNNING |
				  PERF_FORMAT_ID;
168

169
	attr->sample_type	|= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
170

171
	if (evlist->nr_entries > 1)
172 173
		attr->sample_type |= PERF_SAMPLE_ID;

174 175 176 177 178
	/*
	 * We default some events to a 1 default interval. But keep
	 * it a weak assumption overridable by the user.
	 */
	if (!attr->sample_period || (user_freq != UINT_MAX &&
179
				     user_interval != ULLONG_MAX)) {
180 181 182 183 184 185 186
		if (freq) {
			attr->sample_type	|= PERF_SAMPLE_PERIOD;
			attr->freq		= 1;
			attr->sample_freq	= freq;
		} else {
			attr->sample_period = default_interval;
		}
187
	}
188

189 190 191 192 193 194
	if (no_samples)
		attr->sample_freq = 0;

	if (inherit_stat)
		attr->inherit_stat = 1;

195
	if (sample_address) {
196
		attr->sample_type	|= PERF_SAMPLE_ADDR;
197 198
		attr->mmap_data = track;
	}
199

200 201 202
	if (call_graph)
		attr->sample_type	|= PERF_SAMPLE_CALLCHAIN;

A
Arun Sharma 已提交
203 204 205
	if (system_wide)
		attr->sample_type	|= PERF_SAMPLE_CPU;

206 207
	if (sample_id_all_avail &&
	    (sample_time || system_wide || !no_inherit || cpu_list))
208 209
		attr->sample_type	|= PERF_SAMPLE_TIME;

I
Ingo Molnar 已提交
210
	if (raw_samples) {
211
		attr->sample_type	|= PERF_SAMPLE_TIME;
212
		attr->sample_type	|= PERF_SAMPLE_RAW;
I
Ingo Molnar 已提交
213 214
		attr->sample_type	|= PERF_SAMPLE_CPU;
	}
215

216 217 218 219 220
	if (nodelay) {
		attr->watermark = 0;
		attr->wakeup_events = 1;
	}

221 222
	attr->mmap		= track;
	attr->comm		= track;
223

224
	if (target_pid == -1 && target_tid == -1 && !system_wide) {
225
		attr->disabled = 1;
226
		attr->enable_on_exec = 1;
227
	}
228
}
229

230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248
static bool perf_evlist__equal(struct perf_evlist *evlist,
			       struct perf_evlist *other)
{
	struct perf_evsel *pos, *pair;

	if (evlist->nr_entries != other->nr_entries)
		return false;

	pair = list_entry(other->entries.next, struct perf_evsel, node);

	list_for_each_entry(pos, &evlist->entries, node) {
		if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
			return false;
		pair = list_entry(pair->node.next, struct perf_evsel, node);
	}

	return true;
}

249 250 251 252
static void open_counters(struct perf_evlist *evlist)
{
	struct perf_evsel *pos;

253 254 255
	if (evlist->cpus->map[0] < 0)
		no_inherit = true;

256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
	list_for_each_entry(pos, &evlist->entries, node) {
		struct perf_event_attr *attr = &pos->attr;
		/*
		 * Check if parse_single_tracepoint_event has already asked for
		 * PERF_SAMPLE_TIME.
		 *
		 * XXX this is kludgy but short term fix for problems introduced by
		 * eac23d1c that broke 'perf script' by having different sample_types
		 * when using multiple tracepoint events when we use a perf binary
		 * that tries to use sample_id_all on an older kernel.
		 *
		 * We need to move counter creation to perf_session, support
		 * different sample_types, etc.
		 */
		bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
271

272 273 274 275
		config_attr(pos, evlist);
retry_sample_id:
		attr->sample_id_all = sample_id_all_avail ? 1 : 0;
try_again:
276
		if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group) < 0) {
277 278
			int err = errno;

279 280 281 282
			if (err == EPERM || err == EACCES) {
				ui__warning_paranoid();
				exit(EXIT_FAILURE);
			} else if (err ==  ENODEV && cpu_list) {
283 284
				die("No such device - did you specify"
					" an out-of-range profile CPU?\n");
285 286 287 288 289
			} else if (err == EINVAL && sample_id_all_avail) {
				/*
				 * Old kernel, no attr->sample_id_type_all field
				 */
				sample_id_all_avail = false;
290
				if (!sample_time && !raw_samples && !time_needed)
291 292
					attr->sample_type &= ~PERF_SAMPLE_TIME;

293
				goto retry_sample_id;
294
			}
295

296 297 298 299 300 301 302 303 304
			/*
			 * If it's cycles then fall back to hrtimer
			 * based cpu-clock-tick sw counter, which
			 * is always available even if no PMU support:
			 */
			if (attr->type == PERF_TYPE_HARDWARE
					&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {

				if (verbose)
305 306
					ui__warning("The cycles event is not supported, "
						    "trying to fall back to cpu-clock-ticks\n");
307 308 309 310
				attr->type = PERF_TYPE_SOFTWARE;
				attr->config = PERF_COUNT_SW_CPU_CLOCK;
				goto try_again;
			}
311 312 313 314 315 316 317

			if (err == ENOENT) {
				ui__warning("The %s event is not supported.\n",
					    event_name(pos));
				exit(EXIT_FAILURE);
			}

318
			printf("\n");
319
			error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
320
			      err, strerror(err));
321 322

#if defined(__i386__) || defined(__x86_64__)
323 324 325 326 327
			if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
				die("No hardware sampling interrupt available."
				    " No APIC? If so then you can boot the kernel"
				    " with the \"lapic\" boot parameter to"
				    " force-enable it.\n");
328 329
#endif

330
			die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
L
Li Zefan 已提交
331 332
		}
	}
333

334 335 336 337 338 339
	if (perf_evlist__set_filters(evlist)) {
		error("failed to set filter with %d (%s)\n", errno,
			strerror(errno));
		exit(-1);
	}

340
	if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
341 342
		die("failed to mmap with %d (%s)\n", errno, strerror(errno));

343 344 345 346 347 348 349 350 351 352
	if (file_new)
		session->evlist = evlist;
	else {
		if (!perf_evlist__equal(session->evlist, evlist)) {
			fprintf(stderr, "incompatible append\n");
			exit(-1);
		}
 	}

	perf_session__update_sample_type(session);
353 354
}

355 356 357 358
static int process_buildids(void)
{
	u64 size = lseek(output, 0, SEEK_CUR);

359 360 361
	if (size == 0)
		return 0;

362 363 364 365 366 367
	session->fd = output;
	return __perf_session__process_events(session, post_processing_offset,
					      size - post_processing_offset,
					      size, &build_id__mark_dso_hit_ops);
}

368 369
static void atexit_header(void)
{
370 371
	if (!pipe_output) {
		session->header.data_size += bytes_written;
372

373 374
		if (!no_buildid)
			process_buildids();
375
		perf_session__write_header(session, evsel_list, output, true);
376
		perf_session__delete(session);
377
		perf_evlist__delete(evsel_list);
378
		symbol__exit();
379
	}
380 381
}

382
static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
383 384
{
	int err;
385
	struct perf_session *psession = data;
386

387
	if (machine__is_host(machine))
388 389 390 391 392 393 394 395 396 397
		return;

	/*
	 *As for guest kernel when processing subcommand record&report,
	 *we arrange module mmap prior to guest kernel mmap and trigger
	 *a preload dso because default guest module symbols are loaded
	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
	 *method is used to avoid symbol missing when the first addr is
	 *in module instead of in guest kernel.
	 */
398 399
	err = perf_event__synthesize_modules(process_synthesized_event,
					     psession, machine);
400 401
	if (err < 0)
		pr_err("Couldn't record guest kernel [%d]'s reference"
402
		       " relocation symbol.\n", machine->pid);
403 404 405 406 407

	/*
	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
	 * have no _text sometimes.
	 */
408 409
	err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
						 psession, machine, "_text");
410
	if (err < 0)
411 412 413
		err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
							 psession, machine,
							 "_stext");
414 415
	if (err < 0)
		pr_err("Couldn't record guest kernel [%d]'s reference"
416
		       " relocation symbol.\n", machine->pid);
417 418
}

419 420 421 422 423 424 425
static struct perf_event_header finished_round_event = {
	.size = sizeof(struct perf_event_header),
	.type = PERF_RECORD_FINISHED_ROUND,
};

static void mmap_read_all(void)
{
426
	int i;
427

428
	for (i = 0; i < evsel_list->nr_mmaps; i++) {
429 430
		if (evsel_list->mmap[i].base)
			mmap_read(&evsel_list->mmap[i]);
431 432 433 434 435 436
	}

	if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
		write_output(&finished_round_event, sizeof(finished_round_event));
}

437
static int __cmd_record(int argc, const char **argv)
438
{
I
Ingo Molnar 已提交
439 440
	struct stat st;
	int flags;
441
	int err;
442
	unsigned long waking = 0;
443
	int child_ready_pipe[2], go_pipe[2];
444
	const bool forks = argc > 0;
445
	char buf;
446
	struct machine *machine;
447 448 449

	page_size = sysconf(_SC_PAGE_SIZE);

450 451 452
	atexit(sig_atexit);
	signal(SIGCHLD, sig_handler);
	signal(SIGINT, sig_handler);
453
	signal(SIGUSR1, sig_handler);
454

455
	if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
456 457 458 459
		perror("failed to create pipes");
		exit(-1);
	}

460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478
	if (!output_name) {
		if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
			pipe_output = 1;
		else
			output_name = "perf.data";
	}
	if (output_name) {
		if (!strcmp(output_name, "-"))
			pipe_output = 1;
		else if (!stat(output_name, &st) && st.st_size) {
			if (write_mode == WRITE_FORCE) {
				char oldname[PATH_MAX];
				snprintf(oldname, sizeof(oldname), "%s.old",
					 output_name);
				unlink(oldname);
				rename(output_name, oldname);
			}
		} else if (write_mode == WRITE_APPEND) {
			write_mode = WRITE_FORCE;
479
		}
480 481
	}

482
	flags = O_CREAT|O_RDWR;
483
	if (write_mode == WRITE_APPEND)
484
		file_new = 0;
I
Ingo Molnar 已提交
485 486 487
	else
		flags |= O_TRUNC;

488 489 490 491
	if (pipe_output)
		output = STDOUT_FILENO;
	else
		output = open(output_name, flags, S_IRUSR | S_IWUSR);
492 493 494 495 496
	if (output < 0) {
		perror("failed to create output file");
		exit(-1);
	}

497
	session = perf_session__new(output_name, O_WRONLY,
498
				    write_mode == WRITE_FORCE, false, NULL);
499
	if (session == NULL) {
500 501 502 503
		pr_err("Not enough memory for reading perf file header\n");
		return -1;
	}

504 505 506
	if (!no_buildid)
		perf_header__set_feat(&session->header, HEADER_BUILD_ID);

507
	if (!file_new) {
508
		err = perf_session__read_header(session, output);
509
		if (err < 0)
510
			goto out_delete_session;
511 512
	}

513
	if (have_tracepoints(&evsel_list->entries))
514
		perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
515

516 517 518 519
	/* 512 kiB: default amount of unprivileged mlocked memory */
	if (mmap_pages == UINT_MAX)
		mmap_pages = (512 * 1024) / page_size;

520
	if (forks) {
521
		child_pid = fork();
522
		if (child_pid < 0) {
523 524 525
			perror("failed to fork");
			exit(-1);
		}
526

527
		if (!child_pid) {
528 529
			if (pipe_output)
				dup2(2, 1);
530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554
			close(child_ready_pipe[0]);
			close(go_pipe[1]);
			fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);

			/*
			 * Do a dummy execvp to get the PLT entry resolved,
			 * so we avoid the resolver overhead on the real
			 * execvp call.
			 */
			execvp("", (char **)argv);

			/*
			 * Tell the parent we're ready to go
			 */
			close(child_ready_pipe[1]);

			/*
			 * Wait until the parent tells us to go.
			 */
			if (read(go_pipe[0], &buf, 1) == -1)
				perror("unable to read pipe");

			execvp(argv[0], (char **)argv);

			perror(argv[0]);
555
			kill(getppid(), SIGUSR1);
556
			exit(-1);
557
		}
558

559
		if (!system_wide && target_tid == -1 && target_pid == -1)
560
			evsel_list->threads->map[0] = child_pid;
561

562 563 564 565 566 567 568 569 570 571 572 573
		close(child_ready_pipe[1]);
		close(go_pipe[0]);
		/*
		 * wait for child to settle
		 */
		if (read(child_ready_pipe[0], &buf, 1) == -1) {
			perror("unable to read pipe");
			exit(-1);
		}
		close(child_ready_pipe[0]);
	}

574
	open_counters(evsel_list);
575

576 577 578 579 580
	/*
	 * perf_session__delete(session) will be called at atexit_header()
	 */
	atexit(atexit_header);

581 582 583 584 585
	if (pipe_output) {
		err = perf_header__write_pipe(output);
		if (err < 0)
			return err;
	} else if (file_new) {
586 587
		err = perf_session__write_header(session, evsel_list,
						 output, false);
588 589
		if (err < 0)
			return err;
590 591
	}

592 593
	post_processing_offset = lseek(output, 0, SEEK_CUR);

594
	if (pipe_output) {
595 596
		err = perf_session__synthesize_attrs(session,
						     process_synthesized_event);
597 598 599 600
		if (err < 0) {
			pr_err("Couldn't synthesize attrs.\n");
			return err;
		}
601

602 603
		err = perf_event__synthesize_event_types(process_synthesized_event,
							 session);
604 605 606 607
		if (err < 0) {
			pr_err("Couldn't synthesize event_types.\n");
			return err;
		}
608

609
		if (have_tracepoints(&evsel_list->entries)) {
610 611 612 613 614 615 616 617
			/*
			 * FIXME err <= 0 here actually means that
			 * there were no tracepoints so its not really
			 * an error, just that we don't need to
			 * synthesize anything.  We really have to
			 * return this more properly and also
			 * propagate errors that now are calling die()
			 */
618 619 620
			err = perf_event__synthesize_tracing_data(output, evsel_list,
								  process_synthesized_event,
								  session);
621 622 623 624
			if (err <= 0) {
				pr_err("Couldn't record tracing data.\n");
				return err;
			}
625
			advance_output(err);
626
		}
627 628
	}

629 630
	machine = perf_session__find_host_machine(session);
	if (!machine) {
631 632 633 634
		pr_err("Couldn't find native kernel information.\n");
		return -1;
	}

635 636
	err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
						 session, machine, "_text");
637
	if (err < 0)
638 639
		err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
							 session, machine, "_stext");
640 641 642 643
	if (err < 0)
		pr_err("Couldn't record kernel reference relocation symbol\n"
		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
		       "Check /proc/kallsyms permission or run as root.\n");
644

645 646
	err = perf_event__synthesize_modules(process_synthesized_event,
					     session, machine);
647 648 649 650 651
	if (err < 0)
		pr_err("Couldn't record kernel module information.\n"
		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
		       "Check /proc/modules permission or run as root.\n");

652
	if (perf_guest)
653 654
		perf_session__process_machines(session,
					       perf_event__synthesize_guest_os);
655

656
	if (!system_wide)
657 658 659
		perf_event__synthesize_thread_map(evsel_list->threads,
						  process_synthesized_event,
						  session);
660
	else
661 662
		perf_event__synthesize_threads(process_synthesized_event,
					       session);
663

664 665 666 667 668
	if (realtime_prio) {
		struct sched_param param;

		param.sched_priority = realtime_prio;
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
669
			pr_err("Could not set realtime priority.\n");
670 671 672 673
			exit(-1);
		}
	}

674 675 676
	/*
	 * Let the child rip
	 */
677 678
	if (forks)
		close(go_pipe[1]);
679

680
	for (;;) {
681
		int hits = samples;
682

683
		mmap_read_all();
684

685 686 687
		if (hits == samples) {
			if (done)
				break;
688
			err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
689 690 691
			waking++;
		}

692 693
		if (done)
			perf_evlist__disable(evsel_list);
694 695
	}

696
	if (quiet || signr == SIGUSR1)
697 698
		return 0;

699 700
	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);

701 702 703 704
	/*
	 * Approximate RIP event size: 24 bytes.
	 */
	fprintf(stderr,
705
		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
706 707 708
		(double)bytes_written / 1024.0 / 1024.0,
		output_name,
		bytes_written / 24);
709

710
	return 0;
711 712 713 714

out_delete_session:
	perf_session__delete(session);
	return err;
715
}
716 717

static const char * const record_usage[] = {
718 719
	"perf record [<options>] [<command>]",
	"perf record [<options>] -- <command> [<options>]",
720 721 722
	NULL
};

723 724
static bool force, append_file;

725
const struct option record_options[] = {
726
	OPT_CALLBACK('e', "event", &evsel_list, "event",
727
		     "event selector. use 'perf list' to list available events",
728
		     parse_events_option),
729
	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
L
Li Zefan 已提交
730
		     "event filter", parse_filter),
731
	OPT_INTEGER('p', "pid", &target_pid,
732 733 734
		    "record events on existing process id"),
	OPT_INTEGER('t', "tid", &target_tid,
		    "record events on existing thread id"),
735 736
	OPT_INTEGER('r', "realtime", &realtime_prio,
		    "collect data with this RT SCHED_FIFO priority"),
737 738
	OPT_BOOLEAN('D', "no-delay", &nodelay,
		    "collect data without buffering"),
739 740
	OPT_BOOLEAN('R', "raw-samples", &raw_samples,
		    "collect raw sample records from all opened counters"),
741 742
	OPT_BOOLEAN('a', "all-cpus", &system_wide,
			    "system-wide collection from all CPUs"),
I
Ingo Molnar 已提交
743 744
	OPT_BOOLEAN('A', "append", &append_file,
			    "append to the output file to do incremental profiling"),
745 746
	OPT_STRING('C', "cpu", &cpu_list, "cpu",
		    "list of cpus to monitor"),
747
	OPT_BOOLEAN('f', "force", &force,
748
			"overwrite existing data file (deprecated)"),
749
	OPT_U64('c', "count", &user_interval, "event period to sample"),
I
Ingo Molnar 已提交
750 751
	OPT_STRING('o', "output", &output_name, "file",
		    "output file name"),
752 753
	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
		    "child tasks do not inherit counters"),
754 755
	OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"),
	OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
756 757
	OPT_BOOLEAN(0, "group", &group,
		    "put the counters into a counter group"),
758 759
	OPT_BOOLEAN('g', "call-graph", &call_graph,
		    "do call-graph (stack chain/backtrace) recording"),
760
	OPT_INCR('v', "verbose", &verbose,
761
		    "be more verbose (show counter open errors, etc)"),
762
	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
763 764
	OPT_BOOLEAN('s', "stat", &inherit_stat,
		    "per thread counts"),
765 766
	OPT_BOOLEAN('d', "data", &sample_address,
		    "Sample addresses"),
767
	OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
768 769
	OPT_BOOLEAN('n', "no-samples", &no_samples,
		    "don't sample"),
770
	OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
771
		    "do not update the buildid cache"),
772 773
	OPT_BOOLEAN('B', "no-buildid", &no_buildid,
		    "do not collect buildids in perf.data"),
S
Stephane Eranian 已提交
774 775 776
	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
		     "monitor event in cgroup name only",
		     parse_cgroups),
777 778 779
	OPT_END()
};

780
int cmd_record(int argc, const char **argv, const char *prefix __used)
781
{
782 783
	int err = -ENOMEM;
	struct perf_evsel *pos;
784

785
	evsel_list = perf_evlist__new(NULL, NULL);
786 787 788
	if (evsel_list == NULL)
		return -ENOMEM;

789
	argc = parse_options(argc, argv, record_options, record_usage,
790
			    PARSE_OPT_STOP_AT_NON_OPTION);
791
	if (!argc && target_pid == -1 && target_tid == -1 &&
792
		!system_wide && !cpu_list)
793
		usage_with_options(record_usage, record_options);
794

795 796 797
	if (force && append_file) {
		fprintf(stderr, "Can't overwrite and append at the same time."
				" You need to choose between -f and -A");
798
		usage_with_options(record_usage, record_options);
799 800 801 802 803 804
	} else if (append_file) {
		write_mode = WRITE_APPEND;
	} else {
		write_mode = WRITE_FORCE;
	}

S
Stephane Eranian 已提交
805 806 807 808 809 810
	if (nr_cgroups && !system_wide) {
		fprintf(stderr, "cgroup monitoring only available in"
			" system-wide mode\n");
		usage_with_options(record_usage, record_options);
	}

811
	symbol__init();
812

813
	if (symbol_conf.kptr_restrict)
814 815 816 817 818 819 820 821
		pr_warning(
"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
"check /proc/sys/kernel/kptr_restrict.\n\n"
"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
"file is not found in the buildid cache or in the vmlinux path.\n\n"
"Samples in kernel modules won't be resolved at all.\n\n"
"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
"even with a suitable vmlinux or kallsyms file.\n\n");
822

823
	if (no_buildid_cache || no_buildid)
824
		disable_buildid_cache();
825

826 827
	if (evsel_list->nr_entries == 0 &&
	    perf_evlist__add_default(evsel_list) < 0) {
828 829
		pr_err("Not enough memory for event selector list\n");
		goto out_symbol_exit;
830
	}
831

832
	if (target_pid != -1)
833 834
		target_tid = target_pid;

835 836
	if (perf_evlist__create_maps(evsel_list, target_pid,
				     target_tid, cpu_list) < 0)
837
		usage_with_options(record_usage, record_options);
838

839
	list_for_each_entry(pos, &evsel_list->entries, node) {
840 841
		if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr,
					 evsel_list->threads->nr) < 0)
842
			goto out_free_fd;
843 844
		if (perf_header__push_event(pos->attr.config, event_name(pos)))
			goto out_free_fd;
845
	}
846

847
	if (perf_evlist__alloc_pollfd(evsel_list) < 0)
848
		goto out_free_fd;
849

850
	if (user_interval != ULLONG_MAX)
851 852 853 854
		default_interval = user_interval;
	if (user_freq != UINT_MAX)
		freq = user_freq;

855 856 857 858 859 860 861 862 863
	/*
	 * User specified count overrides default frequency.
	 */
	if (default_interval)
		freq = 0;
	else if (freq) {
		default_interval = freq;
	} else {
		fprintf(stderr, "frequency and count are zero, aborting\n");
864
		err = -EINVAL;
865
		goto out_free_fd;
866 867
	}

868 869
	err = __cmd_record(argc, argv);
out_free_fd:
870
	perf_evlist__delete_maps(evsel_list);
871 872
out_symbol_exit:
	symbol__exit();
873
	return err;
874
}