builtin-record.c 23.5 KB
Newer Older
I
Ingo Molnar 已提交
1
/*
2 3 4 5 6
 * builtin-record.c
 *
 * Builtin record command: Record the profile of a workload
 * (or a CPU, or a PID) into the perf.data output file - for
 * later analysis via perf report.
I
Ingo Molnar 已提交
7
 */
8 9
#define _FILE_OFFSET_BITS 64

10
#include "builtin.h"
11 12 13

#include "perf.h"

14
#include "util/build-id.h"
15
#include "util/util.h"
16
#include "util/parse-options.h"
17
#include "util/parse-events.h"
18

19
#include "util/header.h"
20
#include "util/event.h"
21
#include "util/evlist.h"
22
#include "util/evsel.h"
23
#include "util/debug.h"
24
#include "util/session.h"
25
#include "util/symbol.h"
26
#include "util/cpumap.h"
27
#include "util/thread_map.h"
28

29
#include <unistd.h>
30
#include <sched.h>
31
#include <sys/mman.h>
32

33 34 35 36 37
enum write_mode_t {
	WRITE_FORCE,
	WRITE_APPEND
};

38 39
static u64			user_interval			= ULLONG_MAX;
static u64			default_interval		=      0;
40

41
static unsigned int		page_size;
42
static unsigned int		mmap_pages			= UINT_MAX;
43
static unsigned int		user_freq 			= UINT_MAX;
44
static int			freq				=   1000;
45
static int			output;
46
static int			pipe_output			=      0;
47
static const char		*output_name			= NULL;
48
static bool			group				=  false;
49
static int			realtime_prio			=      0;
50
static bool			nodelay				=  false;
51
static bool			raw_samples			=  false;
52
static bool			sample_id_all_avail		=   true;
53
static bool			system_wide			=  false;
54
static pid_t			target_pid			=     -1;
55
static pid_t			target_tid			=     -1;
56
static pid_t			child_pid			=     -1;
57
static bool			no_inherit			=  false;
58
static enum write_mode_t	write_mode			= WRITE_FORCE;
59 60 61 62
static bool			call_graph			=  false;
static bool			inherit_stat			=  false;
static bool			no_samples			=  false;
static bool			sample_address			=  false;
63
static bool			sample_time			=  false;
64
static bool			no_buildid			=  false;
65
static bool			no_buildid_cache		=  false;
66
static struct perf_evlist	*evsel_list;
67 68 69

static long			samples				=      0;
static u64			bytes_written			=      0;
70

71
static int			file_new			=      1;
72
static off_t			post_processing_offset;
73

74
static struct perf_session	*session;
75
static const char		*cpu_list;
76
static const char               *progname;
77

78 79 80 81 82
static void advance_output(size_t size)
{
	bytes_written += size;
}

83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
static void write_output(void *buf, size_t size)
{
	while (size) {
		int ret = write(output, buf, size);

		if (ret < 0)
			die("failed to write");

		size -= ret;
		buf += ret;

		bytes_written += ret;
	}
}

98
static int process_synthesized_event(union perf_event *event,
99
				     struct perf_sample *sample __used,
100
				     struct perf_session *self __used)
101
{
102
	write_output(event, event->header.size);
103 104 105
	return 0;
}

106
static void mmap_read(struct perf_mmap *md)
107
{
108
	unsigned int head = perf_mmap__read_head(md);
109 110 111 112 113
	unsigned int old = md->prev;
	unsigned char *data = md->base + page_size;
	unsigned long size;
	void *buf;

114 115 116 117
	if (old == head)
		return;

	samples++;
118 119 120 121 122 123 124

	size = head - old;

	if ((old & md->mask) + size != (head & md->mask)) {
		buf = &data[old & md->mask];
		size = md->mask + 1 - (old & md->mask);
		old += size;
125

126
		write_output(buf, size);
127 128 129 130 131
	}

	buf = &data[old & md->mask];
	size = head - old;
	old += size;
132

133
	write_output(buf, size);
134 135

	md->prev = old;
136
	perf_mmap__write_tail(md, old);
137 138 139
}

static volatile int done = 0;
140
static volatile int signr = -1;
141
static volatile int child_finished = 0;
142

143
static void sig_handler(int sig)
144
{
145 146 147
	if (sig == SIGCHLD)
		child_finished = 1;

148
	done = 1;
149 150 151 152 153
	signr = sig;
}

static void sig_atexit(void)
{
154 155 156 157 158 159 160 161 162 163
	int status;

	if (child_pid > 0) {
		if (!child_finished)
			kill(child_pid, SIGTERM);

		wait(&status);
		if (WIFSIGNALED(status))
			psignal(WTERMSIG(status), progname);
	}
164

165
	if (signr == -1 || signr == SIGUSR1)
166 167 168 169
		return;

	signal(signr, SIG_DFL);
	kill(getpid(), signr);
170 171
}

172 173 174 175
static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
{
	struct perf_event_attr *attr = &evsel->attr;
	int track = !evsel->idx; /* only the first counter needs these */
176

177
	attr->disabled		= 1;
178
	attr->inherit		= !no_inherit;
179 180 181
	attr->read_format	= PERF_FORMAT_TOTAL_TIME_ENABLED |
				  PERF_FORMAT_TOTAL_TIME_RUNNING |
				  PERF_FORMAT_ID;
182

183
	attr->sample_type	|= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
184

185
	if (evlist->nr_entries > 1)
186 187
		attr->sample_type |= PERF_SAMPLE_ID;

188 189 190 191 192
	/*
	 * We default some events to a 1 default interval. But keep
	 * it a weak assumption overridable by the user.
	 */
	if (!attr->sample_period || (user_freq != UINT_MAX &&
193
				     user_interval != ULLONG_MAX)) {
194 195 196 197 198 199 200
		if (freq) {
			attr->sample_type	|= PERF_SAMPLE_PERIOD;
			attr->freq		= 1;
			attr->sample_freq	= freq;
		} else {
			attr->sample_period = default_interval;
		}
201
	}
202

203 204 205 206 207 208
	if (no_samples)
		attr->sample_freq = 0;

	if (inherit_stat)
		attr->inherit_stat = 1;

209
	if (sample_address) {
210
		attr->sample_type	|= PERF_SAMPLE_ADDR;
211 212
		attr->mmap_data = track;
	}
213

214 215 216
	if (call_graph)
		attr->sample_type	|= PERF_SAMPLE_CALLCHAIN;

A
Arun Sharma 已提交
217 218 219
	if (system_wide)
		attr->sample_type	|= PERF_SAMPLE_CPU;

220 221
	if (sample_id_all_avail &&
	    (sample_time || system_wide || !no_inherit || cpu_list))
222 223
		attr->sample_type	|= PERF_SAMPLE_TIME;

I
Ingo Molnar 已提交
224
	if (raw_samples) {
225
		attr->sample_type	|= PERF_SAMPLE_TIME;
226
		attr->sample_type	|= PERF_SAMPLE_RAW;
I
Ingo Molnar 已提交
227 228
		attr->sample_type	|= PERF_SAMPLE_CPU;
	}
229

230 231 232 233 234
	if (nodelay) {
		attr->watermark = 0;
		attr->wakeup_events = 1;
	}

235 236
	attr->mmap		= track;
	attr->comm		= track;
237

238
	if (target_pid == -1 && target_tid == -1 && !system_wide) {
239
		attr->disabled = 1;
240
		attr->enable_on_exec = 1;
241
	}
242
}
243

244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
static bool perf_evlist__equal(struct perf_evlist *evlist,
			       struct perf_evlist *other)
{
	struct perf_evsel *pos, *pair;

	if (evlist->nr_entries != other->nr_entries)
		return false;

	pair = list_entry(other->entries.next, struct perf_evsel, node);

	list_for_each_entry(pos, &evlist->entries, node) {
		if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
			return false;
		pair = list_entry(pair->node.next, struct perf_evsel, node);
	}

	return true;
}

263 264
static void open_counters(struct perf_evlist *evlist)
{
265
	struct perf_evsel *pos, *first;
266

267 268 269
	if (evlist->cpus->map[0] < 0)
		no_inherit = true;

270 271
	first = list_entry(evlist->entries.next, struct perf_evsel, node);

272 273
	list_for_each_entry(pos, &evlist->entries, node) {
		struct perf_event_attr *attr = &pos->attr;
274
		struct xyarray *group_fd = NULL;
275 276 277 278 279 280 281 282 283 284 285 286 287
		/*
		 * Check if parse_single_tracepoint_event has already asked for
		 * PERF_SAMPLE_TIME.
		 *
		 * XXX this is kludgy but short term fix for problems introduced by
		 * eac23d1c that broke 'perf script' by having different sample_types
		 * when using multiple tracepoint events when we use a perf binary
		 * that tries to use sample_id_all on an older kernel.
		 *
		 * We need to move counter creation to perf_session, support
		 * different sample_types, etc.
		 */
		bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
288

289 290 291
		if (group && pos != first)
			group_fd = first->fd;

292 293 294 295
		config_attr(pos, evlist);
retry_sample_id:
		attr->sample_id_all = sample_id_all_avail ? 1 : 0;
try_again:
296 297
		if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group,
				     group_fd) < 0) {
298 299
			int err = errno;

300 301 302 303
			if (err == EPERM || err == EACCES) {
				ui__warning_paranoid();
				exit(EXIT_FAILURE);
			} else if (err ==  ENODEV && cpu_list) {
304 305
				die("No such device - did you specify"
					" an out-of-range profile CPU?\n");
306 307 308 309 310
			} else if (err == EINVAL && sample_id_all_avail) {
				/*
				 * Old kernel, no attr->sample_id_type_all field
				 */
				sample_id_all_avail = false;
311
				if (!sample_time && !raw_samples && !time_needed)
312 313
					attr->sample_type &= ~PERF_SAMPLE_TIME;

314
				goto retry_sample_id;
315
			}
316

317 318 319 320 321 322 323 324 325
			/*
			 * If it's cycles then fall back to hrtimer
			 * based cpu-clock-tick sw counter, which
			 * is always available even if no PMU support:
			 */
			if (attr->type == PERF_TYPE_HARDWARE
					&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {

				if (verbose)
326 327
					ui__warning("The cycles event is not supported, "
						    "trying to fall back to cpu-clock-ticks\n");
328 329 330 331
				attr->type = PERF_TYPE_SOFTWARE;
				attr->config = PERF_COUNT_SW_CPU_CLOCK;
				goto try_again;
			}
332 333 334 335 336 337 338

			if (err == ENOENT) {
				ui__warning("The %s event is not supported.\n",
					    event_name(pos));
				exit(EXIT_FAILURE);
			}

339
			printf("\n");
340
			error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
341
			      err, strerror(err));
342 343

#if defined(__i386__) || defined(__x86_64__)
344 345 346 347 348
			if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
				die("No hardware sampling interrupt available."
				    " No APIC? If so then you can boot the kernel"
				    " with the \"lapic\" boot parameter to"
				    " force-enable it.\n");
349 350
#endif

351
			die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
L
Li Zefan 已提交
352 353
		}
	}
354

355 356 357 358 359 360
	if (perf_evlist__set_filters(evlist)) {
		error("failed to set filter with %d (%s)\n", errno,
			strerror(errno));
		exit(-1);
	}

361
	if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
362 363
		die("failed to mmap with %d (%s)\n", errno, strerror(errno));

364 365 366 367 368 369 370 371 372 373
	if (file_new)
		session->evlist = evlist;
	else {
		if (!perf_evlist__equal(session->evlist, evlist)) {
			fprintf(stderr, "incompatible append\n");
			exit(-1);
		}
 	}

	perf_session__update_sample_type(session);
374 375
}

376 377 378 379
static int process_buildids(void)
{
	u64 size = lseek(output, 0, SEEK_CUR);

380 381 382
	if (size == 0)
		return 0;

383 384 385 386 387 388
	session->fd = output;
	return __perf_session__process_events(session, post_processing_offset,
					      size - post_processing_offset,
					      size, &build_id__mark_dso_hit_ops);
}

389 390
static void atexit_header(void)
{
391 392
	if (!pipe_output) {
		session->header.data_size += bytes_written;
393

394 395
		if (!no_buildid)
			process_buildids();
396
		perf_session__write_header(session, evsel_list, output, true);
397
		perf_session__delete(session);
398
		perf_evlist__delete(evsel_list);
399
		symbol__exit();
400
	}
401 402
}

403
static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
404 405
{
	int err;
406
	struct perf_session *psession = data;
407

408
	if (machine__is_host(machine))
409 410 411 412 413 414 415 416 417 418
		return;

	/*
	 *As for guest kernel when processing subcommand record&report,
	 *we arrange module mmap prior to guest kernel mmap and trigger
	 *a preload dso because default guest module symbols are loaded
	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
	 *method is used to avoid symbol missing when the first addr is
	 *in module instead of in guest kernel.
	 */
419 420
	err = perf_event__synthesize_modules(process_synthesized_event,
					     psession, machine);
421 422
	if (err < 0)
		pr_err("Couldn't record guest kernel [%d]'s reference"
423
		       " relocation symbol.\n", machine->pid);
424 425 426 427 428

	/*
	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
	 * have no _text sometimes.
	 */
429 430
	err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
						 psession, machine, "_text");
431
	if (err < 0)
432 433 434
		err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
							 psession, machine,
							 "_stext");
435 436
	if (err < 0)
		pr_err("Couldn't record guest kernel [%d]'s reference"
437
		       " relocation symbol.\n", machine->pid);
438 439
}

440 441 442 443 444 445 446
static struct perf_event_header finished_round_event = {
	.size = sizeof(struct perf_event_header),
	.type = PERF_RECORD_FINISHED_ROUND,
};

static void mmap_read_all(void)
{
447
	int i;
448

449
	for (i = 0; i < evsel_list->nr_mmaps; i++) {
450 451
		if (evsel_list->mmap[i].base)
			mmap_read(&evsel_list->mmap[i]);
452 453 454 455 456 457
	}

	if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
		write_output(&finished_round_event, sizeof(finished_round_event));
}

458
static int __cmd_record(int argc, const char **argv)
459
{
I
Ingo Molnar 已提交
460 461
	struct stat st;
	int flags;
462
	int err;
463
	unsigned long waking = 0;
464
	int child_ready_pipe[2], go_pipe[2];
465
	const bool forks = argc > 0;
466
	char buf;
467
	struct machine *machine;
468

469 470
	progname = argv[0];

471 472
	page_size = sysconf(_SC_PAGE_SIZE);

473 474 475
	atexit(sig_atexit);
	signal(SIGCHLD, sig_handler);
	signal(SIGINT, sig_handler);
476
	signal(SIGUSR1, sig_handler);
477

478
	if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
479 480 481 482
		perror("failed to create pipes");
		exit(-1);
	}

483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501
	if (!output_name) {
		if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
			pipe_output = 1;
		else
			output_name = "perf.data";
	}
	if (output_name) {
		if (!strcmp(output_name, "-"))
			pipe_output = 1;
		else if (!stat(output_name, &st) && st.st_size) {
			if (write_mode == WRITE_FORCE) {
				char oldname[PATH_MAX];
				snprintf(oldname, sizeof(oldname), "%s.old",
					 output_name);
				unlink(oldname);
				rename(output_name, oldname);
			}
		} else if (write_mode == WRITE_APPEND) {
			write_mode = WRITE_FORCE;
502
		}
503 504
	}

505
	flags = O_CREAT|O_RDWR;
506
	if (write_mode == WRITE_APPEND)
507
		file_new = 0;
I
Ingo Molnar 已提交
508 509 510
	else
		flags |= O_TRUNC;

511 512 513 514
	if (pipe_output)
		output = STDOUT_FILENO;
	else
		output = open(output_name, flags, S_IRUSR | S_IWUSR);
515 516 517 518 519
	if (output < 0) {
		perror("failed to create output file");
		exit(-1);
	}

520
	session = perf_session__new(output_name, O_WRONLY,
521
				    write_mode == WRITE_FORCE, false, NULL);
522
	if (session == NULL) {
523 524 525 526
		pr_err("Not enough memory for reading perf file header\n");
		return -1;
	}

527 528 529
	if (!no_buildid)
		perf_header__set_feat(&session->header, HEADER_BUILD_ID);

530
	if (!file_new) {
531
		err = perf_session__read_header(session, output);
532
		if (err < 0)
533
			goto out_delete_session;
534 535
	}

536
	if (have_tracepoints(&evsel_list->entries))
537
		perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
538

539 540 541 542 543 544 545 546 547 548 549 550 551
	perf_header__set_feat(&session->header, HEADER_HOSTNAME);
	perf_header__set_feat(&session->header, HEADER_OSRELEASE);
	perf_header__set_feat(&session->header, HEADER_ARCH);
	perf_header__set_feat(&session->header, HEADER_CPUDESC);
	perf_header__set_feat(&session->header, HEADER_NRCPUS);
	perf_header__set_feat(&session->header, HEADER_EVENT_DESC);
	perf_header__set_feat(&session->header, HEADER_CMDLINE);
	perf_header__set_feat(&session->header, HEADER_VERSION);
	perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
	perf_header__set_feat(&session->header, HEADER_TOTAL_MEM);
	perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY);
	perf_header__set_feat(&session->header, HEADER_CPUID);

552 553 554 555
	/* 512 kiB: default amount of unprivileged mlocked memory */
	if (mmap_pages == UINT_MAX)
		mmap_pages = (512 * 1024) / page_size;

556
	if (forks) {
557
		child_pid = fork();
558
		if (child_pid < 0) {
559 560 561
			perror("failed to fork");
			exit(-1);
		}
562

563
		if (!child_pid) {
564 565
			if (pipe_output)
				dup2(2, 1);
566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590
			close(child_ready_pipe[0]);
			close(go_pipe[1]);
			fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);

			/*
			 * Do a dummy execvp to get the PLT entry resolved,
			 * so we avoid the resolver overhead on the real
			 * execvp call.
			 */
			execvp("", (char **)argv);

			/*
			 * Tell the parent we're ready to go
			 */
			close(child_ready_pipe[1]);

			/*
			 * Wait until the parent tells us to go.
			 */
			if (read(go_pipe[0], &buf, 1) == -1)
				perror("unable to read pipe");

			execvp(argv[0], (char **)argv);

			perror(argv[0]);
591
			kill(getppid(), SIGUSR1);
592
			exit(-1);
593
		}
594

595
		if (!system_wide && target_tid == -1 && target_pid == -1)
596
			evsel_list->threads->map[0] = child_pid;
597

598 599 600 601 602 603 604 605 606 607 608 609
		close(child_ready_pipe[1]);
		close(go_pipe[0]);
		/*
		 * wait for child to settle
		 */
		if (read(child_ready_pipe[0], &buf, 1) == -1) {
			perror("unable to read pipe");
			exit(-1);
		}
		close(child_ready_pipe[0]);
	}

610
	open_counters(evsel_list);
611

612 613 614 615 616
	/*
	 * perf_session__delete(session) will be called at atexit_header()
	 */
	atexit(atexit_header);

617 618 619 620 621
	if (pipe_output) {
		err = perf_header__write_pipe(output);
		if (err < 0)
			return err;
	} else if (file_new) {
622 623
		err = perf_session__write_header(session, evsel_list,
						 output, false);
624 625
		if (err < 0)
			return err;
626 627
	}

628 629
	post_processing_offset = lseek(output, 0, SEEK_CUR);

630
	if (pipe_output) {
631 632
		err = perf_session__synthesize_attrs(session,
						     process_synthesized_event);
633 634 635 636
		if (err < 0) {
			pr_err("Couldn't synthesize attrs.\n");
			return err;
		}
637

638 639
		err = perf_event__synthesize_event_types(process_synthesized_event,
							 session);
640 641 642 643
		if (err < 0) {
			pr_err("Couldn't synthesize event_types.\n");
			return err;
		}
644

645
		if (have_tracepoints(&evsel_list->entries)) {
646 647 648 649 650 651 652 653
			/*
			 * FIXME err <= 0 here actually means that
			 * there were no tracepoints so its not really
			 * an error, just that we don't need to
			 * synthesize anything.  We really have to
			 * return this more properly and also
			 * propagate errors that now are calling die()
			 */
654 655 656
			err = perf_event__synthesize_tracing_data(output, evsel_list,
								  process_synthesized_event,
								  session);
657 658 659 660
			if (err <= 0) {
				pr_err("Couldn't record tracing data.\n");
				return err;
			}
661
			advance_output(err);
662
		}
663 664
	}

665 666
	machine = perf_session__find_host_machine(session);
	if (!machine) {
667 668 669 670
		pr_err("Couldn't find native kernel information.\n");
		return -1;
	}

671 672
	err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
						 session, machine, "_text");
673
	if (err < 0)
674 675
		err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
							 session, machine, "_stext");
676 677 678 679
	if (err < 0)
		pr_err("Couldn't record kernel reference relocation symbol\n"
		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
		       "Check /proc/kallsyms permission or run as root.\n");
680

681 682
	err = perf_event__synthesize_modules(process_synthesized_event,
					     session, machine);
683 684 685 686 687
	if (err < 0)
		pr_err("Couldn't record kernel module information.\n"
		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
		       "Check /proc/modules permission or run as root.\n");

688
	if (perf_guest)
689 690
		perf_session__process_machines(session,
					       perf_event__synthesize_guest_os);
691

692
	if (!system_wide)
693 694 695
		perf_event__synthesize_thread_map(evsel_list->threads,
						  process_synthesized_event,
						  session);
696
	else
697 698
		perf_event__synthesize_threads(process_synthesized_event,
					       session);
699

700 701 702 703 704
	if (realtime_prio) {
		struct sched_param param;

		param.sched_priority = realtime_prio;
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
705
			pr_err("Could not set realtime priority.\n");
706 707 708 709
			exit(-1);
		}
	}

710 711
	perf_evlist__enable(evsel_list);

712 713 714
	/*
	 * Let the child rip
	 */
715 716
	if (forks)
		close(go_pipe[1]);
717

718
	for (;;) {
719
		int hits = samples;
720

721
		mmap_read_all();
722

723 724 725
		if (hits == samples) {
			if (done)
				break;
726
			err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
727 728 729
			waking++;
		}

730 731
		if (done)
			perf_evlist__disable(evsel_list);
732 733
	}

734
	if (quiet || signr == SIGUSR1)
735 736
		return 0;

737 738
	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);

739 740 741 742
	/*
	 * Approximate RIP event size: 24 bytes.
	 */
	fprintf(stderr,
743
		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
744 745 746
		(double)bytes_written / 1024.0 / 1024.0,
		output_name,
		bytes_written / 24);
747

748
	return 0;
749 750 751 752

out_delete_session:
	perf_session__delete(session);
	return err;
753
}
754 755

static const char * const record_usage[] = {
756 757
	"perf record [<options>] [<command>]",
	"perf record [<options>] -- <command> [<options>]",
758 759 760
	NULL
};

761 762
static bool force, append_file;

763
const struct option record_options[] = {
764
	OPT_CALLBACK('e', "event", &evsel_list, "event",
765
		     "event selector. use 'perf list' to list available events",
766
		     parse_events_option),
767
	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
L
Li Zefan 已提交
768
		     "event filter", parse_filter),
769
	OPT_INTEGER('p', "pid", &target_pid,
770 771 772
		    "record events on existing process id"),
	OPT_INTEGER('t', "tid", &target_tid,
		    "record events on existing thread id"),
773 774
	OPT_INTEGER('r', "realtime", &realtime_prio,
		    "collect data with this RT SCHED_FIFO priority"),
775 776
	OPT_BOOLEAN('D', "no-delay", &nodelay,
		    "collect data without buffering"),
777 778
	OPT_BOOLEAN('R', "raw-samples", &raw_samples,
		    "collect raw sample records from all opened counters"),
779 780
	OPT_BOOLEAN('a', "all-cpus", &system_wide,
			    "system-wide collection from all CPUs"),
I
Ingo Molnar 已提交
781 782
	OPT_BOOLEAN('A', "append", &append_file,
			    "append to the output file to do incremental profiling"),
783 784
	OPT_STRING('C', "cpu", &cpu_list, "cpu",
		    "list of cpus to monitor"),
785
	OPT_BOOLEAN('f', "force", &force,
786
			"overwrite existing data file (deprecated)"),
787
	OPT_U64('c', "count", &user_interval, "event period to sample"),
I
Ingo Molnar 已提交
788 789
	OPT_STRING('o', "output", &output_name, "file",
		    "output file name"),
790 791
	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
		    "child tasks do not inherit counters"),
792 793
	OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"),
	OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
794 795
	OPT_BOOLEAN(0, "group", &group,
		    "put the counters into a counter group"),
796 797
	OPT_BOOLEAN('g', "call-graph", &call_graph,
		    "do call-graph (stack chain/backtrace) recording"),
798
	OPT_INCR('v', "verbose", &verbose,
799
		    "be more verbose (show counter open errors, etc)"),
800
	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
801 802
	OPT_BOOLEAN('s', "stat", &inherit_stat,
		    "per thread counts"),
803 804
	OPT_BOOLEAN('d', "data", &sample_address,
		    "Sample addresses"),
805
	OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
806 807
	OPT_BOOLEAN('n', "no-samples", &no_samples,
		    "don't sample"),
808
	OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
809
		    "do not update the buildid cache"),
810 811
	OPT_BOOLEAN('B', "no-buildid", &no_buildid,
		    "do not collect buildids in perf.data"),
S
Stephane Eranian 已提交
812 813 814
	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
		     "monitor event in cgroup name only",
		     parse_cgroups),
815 816 817
	OPT_END()
};

818
int cmd_record(int argc, const char **argv, const char *prefix __used)
819
{
820 821
	int err = -ENOMEM;
	struct perf_evsel *pos;
822

823 824
	perf_header__set_cmdline(argc, argv);

825
	evsel_list = perf_evlist__new(NULL, NULL);
826 827 828
	if (evsel_list == NULL)
		return -ENOMEM;

829
	argc = parse_options(argc, argv, record_options, record_usage,
830
			    PARSE_OPT_STOP_AT_NON_OPTION);
831
	if (!argc && target_pid == -1 && target_tid == -1 &&
832
		!system_wide && !cpu_list)
833
		usage_with_options(record_usage, record_options);
834

835 836 837
	if (force && append_file) {
		fprintf(stderr, "Can't overwrite and append at the same time."
				" You need to choose between -f and -A");
838
		usage_with_options(record_usage, record_options);
839 840 841 842 843 844
	} else if (append_file) {
		write_mode = WRITE_APPEND;
	} else {
		write_mode = WRITE_FORCE;
	}

S
Stephane Eranian 已提交
845 846 847 848 849 850
	if (nr_cgroups && !system_wide) {
		fprintf(stderr, "cgroup monitoring only available in"
			" system-wide mode\n");
		usage_with_options(record_usage, record_options);
	}

851
	symbol__init();
852

853
	if (symbol_conf.kptr_restrict)
854 855 856 857 858 859 860 861
		pr_warning(
"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
"check /proc/sys/kernel/kptr_restrict.\n\n"
"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
"file is not found in the buildid cache or in the vmlinux path.\n\n"
"Samples in kernel modules won't be resolved at all.\n\n"
"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
"even with a suitable vmlinux or kallsyms file.\n\n");
862

863
	if (no_buildid_cache || no_buildid)
864
		disable_buildid_cache();
865

866 867
	if (evsel_list->nr_entries == 0 &&
	    perf_evlist__add_default(evsel_list) < 0) {
868 869
		pr_err("Not enough memory for event selector list\n");
		goto out_symbol_exit;
870
	}
871

872
	if (target_pid != -1)
873 874
		target_tid = target_pid;

875 876
	if (perf_evlist__create_maps(evsel_list, target_pid,
				     target_tid, cpu_list) < 0)
877
		usage_with_options(record_usage, record_options);
878

879
	list_for_each_entry(pos, &evsel_list->entries, node) {
880 881
		if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr,
					 evsel_list->threads->nr) < 0)
882
			goto out_free_fd;
883 884
		if (perf_header__push_event(pos->attr.config, event_name(pos)))
			goto out_free_fd;
885
	}
886

887
	if (perf_evlist__alloc_pollfd(evsel_list) < 0)
888
		goto out_free_fd;
889

890
	if (user_interval != ULLONG_MAX)
891 892 893 894
		default_interval = user_interval;
	if (user_freq != UINT_MAX)
		freq = user_freq;

895 896 897 898 899 900 901 902 903
	/*
	 * User specified count overrides default frequency.
	 */
	if (default_interval)
		freq = 0;
	else if (freq) {
		default_interval = freq;
	} else {
		fprintf(stderr, "frequency and count are zero, aborting\n");
904
		err = -EINVAL;
905
		goto out_free_fd;
906 907
	}

908 909
	err = __cmd_record(argc, argv);
out_free_fd:
910
	perf_evlist__delete_maps(evsel_list);
911 912
out_symbol_exit:
	symbol__exit();
913
	return err;
914
}