builtin-record.c 22.5 KB
Newer Older
I
Ingo Molnar 已提交
1
/*
2 3 4 5 6
 * builtin-record.c
 *
 * Builtin record command: Record the profile of a workload
 * (or a CPU, or a PID) into the perf.data output file - for
 * later analysis via perf report.
I
Ingo Molnar 已提交
7
 */
8 9
#define _FILE_OFFSET_BITS 64

10
#include "builtin.h"
11 12 13

#include "perf.h"

14
#include "util/build-id.h"
15
#include "util/util.h"
16
#include "util/parse-options.h"
17
#include "util/parse-events.h"
18

19
#include "util/header.h"
20
#include "util/event.h"
21
#include "util/evlist.h"
22
#include "util/evsel.h"
23
#include "util/debug.h"
24
#include "util/session.h"
25
#include "util/symbol.h"
26
#include "util/cpumap.h"
27
#include "util/thread_map.h"
28

29
#include <unistd.h>
30
#include <sched.h>
31
#include <sys/mman.h>
32

33 34
#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))

35 36 37 38 39
enum write_mode_t {
	WRITE_FORCE,
	WRITE_APPEND
};

40 41
static u64			user_interval			= ULLONG_MAX;
static u64			default_interval		=      0;
42

43
static unsigned int		page_size;
44
static unsigned int		mmap_pages			= UINT_MAX;
45
static unsigned int		user_freq 			= UINT_MAX;
46
static int			freq				=   1000;
47
static int			output;
48
static int			pipe_output			=      0;
49
static const char		*output_name			= NULL;
50
static int			group				=      0;
51
static int			realtime_prio			=      0;
52
static bool			nodelay				=  false;
53
static bool			raw_samples			=  false;
54
static bool			sample_id_all_avail		=   true;
55
static bool			system_wide			=  false;
56
static pid_t			target_pid			=     -1;
57
static pid_t			target_tid			=     -1;
58
static pid_t			child_pid			=     -1;
59
static bool			no_inherit			=  false;
60
static enum write_mode_t	write_mode			= WRITE_FORCE;
61 62 63 64
static bool			call_graph			=  false;
static bool			inherit_stat			=  false;
static bool			no_samples			=  false;
static bool			sample_address			=  false;
65
static bool			sample_time			=  false;
66
static bool			no_buildid			=  false;
67
static bool			no_buildid_cache		=  false;
68
static struct perf_evlist	*evsel_list;
69 70 71

static long			samples				=      0;
static u64			bytes_written			=      0;
72

73
static int			file_new			=      1;
74
static off_t			post_processing_offset;
75

76
static struct perf_session	*session;
77
static const char		*cpu_list;
78

79 80 81 82 83
static void advance_output(size_t size)
{
	bytes_written += size;
}

84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
static void write_output(void *buf, size_t size)
{
	while (size) {
		int ret = write(output, buf, size);

		if (ret < 0)
			die("failed to write");

		size -= ret;
		buf += ret;

		bytes_written += ret;
	}
}

99
static int process_synthesized_event(union perf_event *event,
100
				     struct perf_sample *sample __used,
101
				     struct perf_session *self __used)
102
{
103
	write_output(event, event->header.size);
104 105 106
	return 0;
}

107
static void mmap_read(struct perf_mmap *md)
108
{
109
	unsigned int head = perf_mmap__read_head(md);
110 111 112 113 114
	unsigned int old = md->prev;
	unsigned char *data = md->base + page_size;
	unsigned long size;
	void *buf;

115 116 117 118
	if (old == head)
		return;

	samples++;
119 120 121 122 123 124 125

	size = head - old;

	if ((old & md->mask) + size != (head & md->mask)) {
		buf = &data[old & md->mask];
		size = md->mask + 1 - (old & md->mask);
		old += size;
126

127
		write_output(buf, size);
128 129 130 131 132
	}

	buf = &data[old & md->mask];
	size = head - old;
	old += size;
133

134
	write_output(buf, size);
135 136

	md->prev = old;
137
	perf_mmap__write_tail(md, old);
138 139 140
}

static volatile int done = 0;
141
static volatile int signr = -1;
142

143
static void sig_handler(int sig)
144
{
145
	done = 1;
146 147 148 149 150
	signr = sig;
}

static void sig_atexit(void)
{
151
	if (child_pid > 0)
152 153
		kill(child_pid, SIGTERM);

154
	if (signr == -1 || signr == SIGUSR1)
155 156 157 158
		return;

	signal(signr, SIG_DFL);
	kill(getpid(), signr);
159 160
}

161 162 163 164
static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
{
	struct perf_event_attr *attr = &evsel->attr;
	int track = !evsel->idx; /* only the first counter needs these */
165

166
	attr->inherit		= !no_inherit;
167 168 169
	attr->read_format	= PERF_FORMAT_TOTAL_TIME_ENABLED |
				  PERF_FORMAT_TOTAL_TIME_RUNNING |
				  PERF_FORMAT_ID;
170

171
	attr->sample_type	|= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
172

173
	if (evlist->nr_entries > 1)
174 175
		attr->sample_type |= PERF_SAMPLE_ID;

176 177 178 179 180
	/*
	 * We default some events to a 1 default interval. But keep
	 * it a weak assumption overridable by the user.
	 */
	if (!attr->sample_period || (user_freq != UINT_MAX &&
181
				     user_interval != ULLONG_MAX)) {
182 183 184 185 186 187 188
		if (freq) {
			attr->sample_type	|= PERF_SAMPLE_PERIOD;
			attr->freq		= 1;
			attr->sample_freq	= freq;
		} else {
			attr->sample_period = default_interval;
		}
189
	}
190

191 192 193 194 195 196
	if (no_samples)
		attr->sample_freq = 0;

	if (inherit_stat)
		attr->inherit_stat = 1;

197
	if (sample_address) {
198
		attr->sample_type	|= PERF_SAMPLE_ADDR;
199 200
		attr->mmap_data = track;
	}
201

202 203 204
	if (call_graph)
		attr->sample_type	|= PERF_SAMPLE_CALLCHAIN;

A
Arun Sharma 已提交
205 206 207
	if (system_wide)
		attr->sample_type	|= PERF_SAMPLE_CPU;

208 209
	if (sample_id_all_avail &&
	    (sample_time || system_wide || !no_inherit || cpu_list))
210 211
		attr->sample_type	|= PERF_SAMPLE_TIME;

I
Ingo Molnar 已提交
212
	if (raw_samples) {
213
		attr->sample_type	|= PERF_SAMPLE_TIME;
214
		attr->sample_type	|= PERF_SAMPLE_RAW;
I
Ingo Molnar 已提交
215 216
		attr->sample_type	|= PERF_SAMPLE_CPU;
	}
217

218 219 220 221 222
	if (nodelay) {
		attr->watermark = 0;
		attr->wakeup_events = 1;
	}

223 224
	attr->mmap		= track;
	attr->comm		= track;
225

226
	if (target_pid == -1 && target_tid == -1 && !system_wide) {
227
		attr->disabled = 1;
228
		attr->enable_on_exec = 1;
229
	}
230
}
231

232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
static bool perf_evlist__equal(struct perf_evlist *evlist,
			       struct perf_evlist *other)
{
	struct perf_evsel *pos, *pair;

	if (evlist->nr_entries != other->nr_entries)
		return false;

	pair = list_entry(other->entries.next, struct perf_evsel, node);

	list_for_each_entry(pos, &evlist->entries, node) {
		if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
			return false;
		pair = list_entry(pair->node.next, struct perf_evsel, node);
	}

	return true;
}

251 252 253 254
static void open_counters(struct perf_evlist *evlist)
{
	struct perf_evsel *pos;

255 256 257
	if (evlist->cpus->map[0] < 0)
		no_inherit = true;

258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
	list_for_each_entry(pos, &evlist->entries, node) {
		struct perf_event_attr *attr = &pos->attr;
		/*
		 * Check if parse_single_tracepoint_event has already asked for
		 * PERF_SAMPLE_TIME.
		 *
		 * XXX this is kludgy but short term fix for problems introduced by
		 * eac23d1c that broke 'perf script' by having different sample_types
		 * when using multiple tracepoint events when we use a perf binary
		 * that tries to use sample_id_all on an older kernel.
		 *
		 * We need to move counter creation to perf_session, support
		 * different sample_types, etc.
		 */
		bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
273

274 275 276 277
		config_attr(pos, evlist);
retry_sample_id:
		attr->sample_id_all = sample_id_all_avail ? 1 : 0;
try_again:
278
		if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group) < 0) {
279 280
			int err = errno;

281 282 283 284
			if (err == EPERM || err == EACCES) {
				ui__warning_paranoid();
				exit(EXIT_FAILURE);
			} else if (err ==  ENODEV && cpu_list) {
285 286
				die("No such device - did you specify"
					" an out-of-range profile CPU?\n");
287 288 289 290 291
			} else if (err == EINVAL && sample_id_all_avail) {
				/*
				 * Old kernel, no attr->sample_id_type_all field
				 */
				sample_id_all_avail = false;
292
				if (!sample_time && !raw_samples && !time_needed)
293 294
					attr->sample_type &= ~PERF_SAMPLE_TIME;

295
				goto retry_sample_id;
296
			}
297

298 299 300 301 302 303 304 305 306
			/*
			 * If it's cycles then fall back to hrtimer
			 * based cpu-clock-tick sw counter, which
			 * is always available even if no PMU support:
			 */
			if (attr->type == PERF_TYPE_HARDWARE
					&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {

				if (verbose)
307 308
					ui__warning("The cycles event is not supported, "
						    "trying to fall back to cpu-clock-ticks\n");
309 310 311 312
				attr->type = PERF_TYPE_SOFTWARE;
				attr->config = PERF_COUNT_SW_CPU_CLOCK;
				goto try_again;
			}
313 314 315 316 317 318 319

			if (err == ENOENT) {
				ui__warning("The %s event is not supported.\n",
					    event_name(pos));
				exit(EXIT_FAILURE);
			}

320
			printf("\n");
321
			error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
322
			      err, strerror(err));
323 324

#if defined(__i386__) || defined(__x86_64__)
325 326 327 328 329
			if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
				die("No hardware sampling interrupt available."
				    " No APIC? If so then you can boot the kernel"
				    " with the \"lapic\" boot parameter to"
				    " force-enable it.\n");
330 331
#endif

332
			die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
L
Li Zefan 已提交
333 334
		}
	}
335

336 337 338 339 340 341
	if (perf_evlist__set_filters(evlist)) {
		error("failed to set filter with %d (%s)\n", errno,
			strerror(errno));
		exit(-1);
	}

342
	if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
343 344
		die("failed to mmap with %d (%s)\n", errno, strerror(errno));

345 346 347 348 349 350 351 352 353 354
	if (file_new)
		session->evlist = evlist;
	else {
		if (!perf_evlist__equal(session->evlist, evlist)) {
			fprintf(stderr, "incompatible append\n");
			exit(-1);
		}
 	}

	perf_session__update_sample_type(session);
355 356
}

357 358 359 360
static int process_buildids(void)
{
	u64 size = lseek(output, 0, SEEK_CUR);

361 362 363
	if (size == 0)
		return 0;

364 365 366 367 368 369
	session->fd = output;
	return __perf_session__process_events(session, post_processing_offset,
					      size - post_processing_offset,
					      size, &build_id__mark_dso_hit_ops);
}

370 371
static void atexit_header(void)
{
372 373
	if (!pipe_output) {
		session->header.data_size += bytes_written;
374

375 376
		if (!no_buildid)
			process_buildids();
377
		perf_session__write_header(session, evsel_list, output, true);
378
		perf_session__delete(session);
379
		perf_evlist__delete(evsel_list);
380
		symbol__exit();
381
	}
382 383
}

384
static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
385 386
{
	int err;
387
	struct perf_session *psession = data;
388

389
	if (machine__is_host(machine))
390 391 392 393 394 395 396 397 398 399
		return;

	/*
	 *As for guest kernel when processing subcommand record&report,
	 *we arrange module mmap prior to guest kernel mmap and trigger
	 *a preload dso because default guest module symbols are loaded
	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
	 *method is used to avoid symbol missing when the first addr is
	 *in module instead of in guest kernel.
	 */
400 401
	err = perf_event__synthesize_modules(process_synthesized_event,
					     psession, machine);
402 403
	if (err < 0)
		pr_err("Couldn't record guest kernel [%d]'s reference"
404
		       " relocation symbol.\n", machine->pid);
405 406 407 408 409

	/*
	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
	 * have no _text sometimes.
	 */
410 411
	err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
						 psession, machine, "_text");
412
	if (err < 0)
413 414 415
		err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
							 psession, machine,
							 "_stext");
416 417
	if (err < 0)
		pr_err("Couldn't record guest kernel [%d]'s reference"
418
		       " relocation symbol.\n", machine->pid);
419 420
}

421 422 423 424 425 426 427
static struct perf_event_header finished_round_event = {
	.size = sizeof(struct perf_event_header),
	.type = PERF_RECORD_FINISHED_ROUND,
};

static void mmap_read_all(void)
{
428
	int i;
429

430
	for (i = 0; i < evsel_list->nr_mmaps; i++) {
431 432
		if (evsel_list->mmap[i].base)
			mmap_read(&evsel_list->mmap[i]);
433 434 435 436 437 438
	}

	if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
		write_output(&finished_round_event, sizeof(finished_round_event));
}

439
static int __cmd_record(int argc, const char **argv)
440
{
441
	int i;
I
Ingo Molnar 已提交
442 443
	struct stat st;
	int flags;
444
	int err;
445
	unsigned long waking = 0;
446
	int child_ready_pipe[2], go_pipe[2];
447
	const bool forks = argc > 0;
448
	char buf;
449
	struct machine *machine;
450 451 452

	page_size = sysconf(_SC_PAGE_SIZE);

453 454 455
	atexit(sig_atexit);
	signal(SIGCHLD, sig_handler);
	signal(SIGINT, sig_handler);
456
	signal(SIGUSR1, sig_handler);
457

458
	if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
459 460 461 462
		perror("failed to create pipes");
		exit(-1);
	}

463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481
	if (!output_name) {
		if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
			pipe_output = 1;
		else
			output_name = "perf.data";
	}
	if (output_name) {
		if (!strcmp(output_name, "-"))
			pipe_output = 1;
		else if (!stat(output_name, &st) && st.st_size) {
			if (write_mode == WRITE_FORCE) {
				char oldname[PATH_MAX];
				snprintf(oldname, sizeof(oldname), "%s.old",
					 output_name);
				unlink(oldname);
				rename(output_name, oldname);
			}
		} else if (write_mode == WRITE_APPEND) {
			write_mode = WRITE_FORCE;
482
		}
483 484
	}

485
	flags = O_CREAT|O_RDWR;
486
	if (write_mode == WRITE_APPEND)
487
		file_new = 0;
I
Ingo Molnar 已提交
488 489 490
	else
		flags |= O_TRUNC;

491 492 493 494
	if (pipe_output)
		output = STDOUT_FILENO;
	else
		output = open(output_name, flags, S_IRUSR | S_IWUSR);
495 496 497 498 499
	if (output < 0) {
		perror("failed to create output file");
		exit(-1);
	}

500
	session = perf_session__new(output_name, O_WRONLY,
501
				    write_mode == WRITE_FORCE, false, NULL);
502
	if (session == NULL) {
503 504 505 506
		pr_err("Not enough memory for reading perf file header\n");
		return -1;
	}

507 508 509
	if (!no_buildid)
		perf_header__set_feat(&session->header, HEADER_BUILD_ID);

510
	if (!file_new) {
511
		err = perf_session__read_header(session, output);
512
		if (err < 0)
513
			goto out_delete_session;
514 515
	}

516
	if (have_tracepoints(&evsel_list->entries))
517
		perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
518

519 520 521 522
	/* 512 kiB: default amount of unprivileged mlocked memory */
	if (mmap_pages == UINT_MAX)
		mmap_pages = (512 * 1024) / page_size;

523
	if (forks) {
524
		child_pid = fork();
525
		if (child_pid < 0) {
526 527 528
			perror("failed to fork");
			exit(-1);
		}
529

530
		if (!child_pid) {
531 532
			if (pipe_output)
				dup2(2, 1);
533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557
			close(child_ready_pipe[0]);
			close(go_pipe[1]);
			fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);

			/*
			 * Do a dummy execvp to get the PLT entry resolved,
			 * so we avoid the resolver overhead on the real
			 * execvp call.
			 */
			execvp("", (char **)argv);

			/*
			 * Tell the parent we're ready to go
			 */
			close(child_ready_pipe[1]);

			/*
			 * Wait until the parent tells us to go.
			 */
			if (read(go_pipe[0], &buf, 1) == -1)
				perror("unable to read pipe");

			execvp(argv[0], (char **)argv);

			perror(argv[0]);
558
			kill(getppid(), SIGUSR1);
559
			exit(-1);
560
		}
561

562
		if (!system_wide && target_tid == -1 && target_pid == -1)
563
			evsel_list->threads->map[0] = child_pid;
564

565 566 567 568 569 570 571 572 573 574 575 576
		close(child_ready_pipe[1]);
		close(go_pipe[0]);
		/*
		 * wait for child to settle
		 */
		if (read(child_ready_pipe[0], &buf, 1) == -1) {
			perror("unable to read pipe");
			exit(-1);
		}
		close(child_ready_pipe[0]);
	}

577
	open_counters(evsel_list);
578

579 580 581 582 583
	/*
	 * perf_session__delete(session) will be called at atexit_header()
	 */
	atexit(atexit_header);

584 585 586 587 588
	if (pipe_output) {
		err = perf_header__write_pipe(output);
		if (err < 0)
			return err;
	} else if (file_new) {
589 590
		err = perf_session__write_header(session, evsel_list,
						 output, false);
591 592
		if (err < 0)
			return err;
593 594
	}

595 596
	post_processing_offset = lseek(output, 0, SEEK_CUR);

597
	if (pipe_output) {
598 599
		err = perf_session__synthesize_attrs(session,
						     process_synthesized_event);
600 601 602 603
		if (err < 0) {
			pr_err("Couldn't synthesize attrs.\n");
			return err;
		}
604

605 606
		err = perf_event__synthesize_event_types(process_synthesized_event,
							 session);
607 608 609 610
		if (err < 0) {
			pr_err("Couldn't synthesize event_types.\n");
			return err;
		}
611

612
		if (have_tracepoints(&evsel_list->entries)) {
613 614 615 616 617 618 619 620
			/*
			 * FIXME err <= 0 here actually means that
			 * there were no tracepoints so its not really
			 * an error, just that we don't need to
			 * synthesize anything.  We really have to
			 * return this more properly and also
			 * propagate errors that now are calling die()
			 */
621 622 623
			err = perf_event__synthesize_tracing_data(output, evsel_list,
								  process_synthesized_event,
								  session);
624 625 626 627
			if (err <= 0) {
				pr_err("Couldn't record tracing data.\n");
				return err;
			}
628
			advance_output(err);
629
		}
630 631
	}

632 633
	machine = perf_session__find_host_machine(session);
	if (!machine) {
634 635 636 637
		pr_err("Couldn't find native kernel information.\n");
		return -1;
	}

638 639
	err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
						 session, machine, "_text");
640
	if (err < 0)
641 642
		err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
							 session, machine, "_stext");
643 644 645 646
	if (err < 0)
		pr_err("Couldn't record kernel reference relocation symbol\n"
		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
		       "Check /proc/kallsyms permission or run as root.\n");
647

648 649
	err = perf_event__synthesize_modules(process_synthesized_event,
					     session, machine);
650 651 652 653 654
	if (err < 0)
		pr_err("Couldn't record kernel module information.\n"
		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
		       "Check /proc/modules permission or run as root.\n");

655
	if (perf_guest)
656 657
		perf_session__process_machines(session,
					       perf_event__synthesize_guest_os);
658

659
	if (!system_wide)
660 661 662
		perf_event__synthesize_thread_map(evsel_list->threads,
						  process_synthesized_event,
						  session);
663
	else
664 665
		perf_event__synthesize_threads(process_synthesized_event,
					       session);
666

667 668 669 670 671
	if (realtime_prio) {
		struct sched_param param;

		param.sched_priority = realtime_prio;
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
672
			pr_err("Could not set realtime priority.\n");
673 674 675 676
			exit(-1);
		}
	}

677 678 679
	/*
	 * Let the child rip
	 */
680 681
	if (forks)
		close(go_pipe[1]);
682

683
	for (;;) {
684
		int hits = samples;
685
		int thread;
686

687
		mmap_read_all();
688

689 690 691
		if (hits == samples) {
			if (done)
				break;
692
			err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
693 694 695 696
			waking++;
		}

		if (done) {
697
			for (i = 0; i < evsel_list->cpus->nr; i++) {
698 699
				struct perf_evsel *pos;

700
				list_for_each_entry(pos, &evsel_list->entries, node) {
701
					for (thread = 0;
702
						thread < evsel_list->threads->nr;
703
						thread++)
704
						ioctl(FD(pos, i, thread),
705 706
							PERF_EVENT_IOC_DISABLE);
				}
707
			}
708
		}
709 710
	}

711
	if (quiet || signr == SIGUSR1)
712 713
		return 0;

714 715
	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);

716 717 718 719
	/*
	 * Approximate RIP event size: 24 bytes.
	 */
	fprintf(stderr,
720
		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
721 722 723
		(double)bytes_written / 1024.0 / 1024.0,
		output_name,
		bytes_written / 24);
724

725
	return 0;
726 727 728 729

out_delete_session:
	perf_session__delete(session);
	return err;
730
}
731 732

static const char * const record_usage[] = {
733 734
	"perf record [<options>] [<command>]",
	"perf record [<options>] -- <command> [<options>]",
735 736 737
	NULL
};

738 739
static bool force, append_file;

740
const struct option record_options[] = {
741
	OPT_CALLBACK('e', "event", &evsel_list, "event",
742 743
		     "event selector. use 'perf list' to list available events",
		     parse_events),
744
	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
L
Li Zefan 已提交
745
		     "event filter", parse_filter),
746
	OPT_INTEGER('p', "pid", &target_pid,
747 748 749
		    "record events on existing process id"),
	OPT_INTEGER('t', "tid", &target_tid,
		    "record events on existing thread id"),
750 751
	OPT_INTEGER('r', "realtime", &realtime_prio,
		    "collect data with this RT SCHED_FIFO priority"),
752 753
	OPT_BOOLEAN('D', "no-delay", &nodelay,
		    "collect data without buffering"),
754 755
	OPT_BOOLEAN('R', "raw-samples", &raw_samples,
		    "collect raw sample records from all opened counters"),
756 757
	OPT_BOOLEAN('a', "all-cpus", &system_wide,
			    "system-wide collection from all CPUs"),
I
Ingo Molnar 已提交
758 759
	OPT_BOOLEAN('A', "append", &append_file,
			    "append to the output file to do incremental profiling"),
760 761
	OPT_STRING('C', "cpu", &cpu_list, "cpu",
		    "list of cpus to monitor"),
762
	OPT_BOOLEAN('f', "force", &force,
763
			"overwrite existing data file (deprecated)"),
764
	OPT_U64('c', "count", &user_interval, "event period to sample"),
I
Ingo Molnar 已提交
765 766
	OPT_STRING('o', "output", &output_name, "file",
		    "output file name"),
767 768
	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
		    "child tasks do not inherit counters"),
769 770
	OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"),
	OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
771 772
	OPT_BOOLEAN('g', "call-graph", &call_graph,
		    "do call-graph (stack chain/backtrace) recording"),
773
	OPT_INCR('v', "verbose", &verbose,
774
		    "be more verbose (show counter open errors, etc)"),
775
	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
776 777
	OPT_BOOLEAN('s', "stat", &inherit_stat,
		    "per thread counts"),
778 779
	OPT_BOOLEAN('d', "data", &sample_address,
		    "Sample addresses"),
780
	OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
781 782
	OPT_BOOLEAN('n', "no-samples", &no_samples,
		    "don't sample"),
783
	OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
784
		    "do not update the buildid cache"),
785 786
	OPT_BOOLEAN('B', "no-buildid", &no_buildid,
		    "do not collect buildids in perf.data"),
S
Stephane Eranian 已提交
787 788 789
	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
		     "monitor event in cgroup name only",
		     parse_cgroups),
790 791 792
	OPT_END()
};

793
int cmd_record(int argc, const char **argv, const char *prefix __used)
794
{
795 796
	int err = -ENOMEM;
	struct perf_evsel *pos;
797

798
	evsel_list = perf_evlist__new(NULL, NULL);
799 800 801
	if (evsel_list == NULL)
		return -ENOMEM;

802
	argc = parse_options(argc, argv, record_options, record_usage,
803
			    PARSE_OPT_STOP_AT_NON_OPTION);
804
	if (!argc && target_pid == -1 && target_tid == -1 &&
805
		!system_wide && !cpu_list)
806
		usage_with_options(record_usage, record_options);
807

808 809 810
	if (force && append_file) {
		fprintf(stderr, "Can't overwrite and append at the same time."
				" You need to choose between -f and -A");
811
		usage_with_options(record_usage, record_options);
812 813 814 815 816 817
	} else if (append_file) {
		write_mode = WRITE_APPEND;
	} else {
		write_mode = WRITE_FORCE;
	}

S
Stephane Eranian 已提交
818 819 820 821 822 823
	if (nr_cgroups && !system_wide) {
		fprintf(stderr, "cgroup monitoring only available in"
			" system-wide mode\n");
		usage_with_options(record_usage, record_options);
	}

824
	symbol__init();
825

826
	if (symbol_conf.kptr_restrict)
827 828 829 830 831 832 833 834
		pr_warning(
"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
"check /proc/sys/kernel/kptr_restrict.\n\n"
"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
"file is not found in the buildid cache or in the vmlinux path.\n\n"
"Samples in kernel modules won't be resolved at all.\n\n"
"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
"even with a suitable vmlinux or kallsyms file.\n\n");
835

836
	if (no_buildid_cache || no_buildid)
837
		disable_buildid_cache();
838

839 840
	if (evsel_list->nr_entries == 0 &&
	    perf_evlist__add_default(evsel_list) < 0) {
841 842
		pr_err("Not enough memory for event selector list\n");
		goto out_symbol_exit;
843
	}
844

845
	if (target_pid != -1)
846 847
		target_tid = target_pid;

848 849
	if (perf_evlist__create_maps(evsel_list, target_pid,
				     target_tid, cpu_list) < 0)
850
		usage_with_options(record_usage, record_options);
851

852
	list_for_each_entry(pos, &evsel_list->entries, node) {
853 854
		if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr,
					 evsel_list->threads->nr) < 0)
855
			goto out_free_fd;
856 857
		if (perf_header__push_event(pos->attr.config, event_name(pos)))
			goto out_free_fd;
858
	}
859

860
	if (perf_evlist__alloc_pollfd(evsel_list) < 0)
861
		goto out_free_fd;
862

863
	if (user_interval != ULLONG_MAX)
864 865 866 867
		default_interval = user_interval;
	if (user_freq != UINT_MAX)
		freq = user_freq;

868 869 870 871 872 873 874 875 876
	/*
	 * User specified count overrides default frequency.
	 */
	if (default_interval)
		freq = 0;
	else if (freq) {
		default_interval = freq;
	} else {
		fprintf(stderr, "frequency and count are zero, aborting\n");
877
		err = -EINVAL;
878
		goto out_free_fd;
879 880
	}

881 882
	err = __cmd_record(argc, argv);
out_free_fd:
883
	perf_evlist__delete_maps(evsel_list);
884 885
out_symbol_exit:
	symbol__exit();
886
	return err;
887
}