builtin-record.c 22.4 KB
Newer Older
I
Ingo Molnar 已提交
1
/*
2 3 4 5 6
 * builtin-record.c
 *
 * Builtin record command: Record the profile of a workload
 * (or a CPU, or a PID) into the perf.data output file - for
 * later analysis via perf report.
I
Ingo Molnar 已提交
7
 */
8 9
#define _FILE_OFFSET_BITS 64

10
#include "builtin.h"
11 12 13

#include "perf.h"

14
#include "util/build-id.h"
15
#include "util/util.h"
16
#include "util/parse-options.h"
17
#include "util/parse-events.h"
18

19
#include "util/header.h"
20
#include "util/event.h"
21
#include "util/evlist.h"
22
#include "util/evsel.h"
23
#include "util/debug.h"
24
#include "util/session.h"
25
#include "util/tool.h"
26
#include "util/symbol.h"
27
#include "util/cpumap.h"
28
#include "util/thread_map.h"
29

30
#include <unistd.h>
31
#include <sched.h>
32
#include <sys/mman.h>
33

34 35 36 37 38
enum write_mode_t {
	WRITE_FORCE,
	WRITE_APPEND
};

39
struct perf_record {
40
	struct perf_tool	tool;
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
	struct perf_record_opts	opts;
	u64			bytes_written;
	const char		*output_name;
	struct perf_evlist	*evlist;
	struct perf_session	*session;
	const char		*progname;
	int			output;
	unsigned int		page_size;
	int			realtime_prio;
	enum write_mode_t	write_mode;
	bool			no_buildid;
	bool			no_buildid_cache;
	bool			force;
	bool			file_new;
	bool			append_file;
	long			samples;
	off_t			post_processing_offset;
58
};
59

60
static void advance_output(struct perf_record *rec, size_t size)
61
{
62
	rec->bytes_written += size;
63 64
}

65
static void write_output(struct perf_record *rec, void *buf, size_t size)
66 67
{
	while (size) {
68
		int ret = write(rec->output, buf, size);
69 70 71 72 73 74 75

		if (ret < 0)
			die("failed to write");

		size -= ret;
		buf += ret;

76
		rec->bytes_written += ret;
77 78 79
	}
}

80
static int process_synthesized_event(struct perf_tool *tool,
81
				     union perf_event *event,
82
				     struct perf_sample *sample __used,
83
				     struct machine *machine __used)
84
{
85
	struct perf_record *rec = container_of(tool, struct perf_record, tool);
86
	write_output(rec, event, event->header.size);
87 88 89
	return 0;
}

90 91
static void perf_record__mmap_read(struct perf_record *rec,
				   struct perf_mmap *md)
92
{
93
	unsigned int head = perf_mmap__read_head(md);
94
	unsigned int old = md->prev;
95
	unsigned char *data = md->base + rec->page_size;
96 97 98
	unsigned long size;
	void *buf;

99 100 101
	if (old == head)
		return;

102
	rec->samples++;
103 104 105 106 107 108 109

	size = head - old;

	if ((old & md->mask) + size != (head & md->mask)) {
		buf = &data[old & md->mask];
		size = md->mask + 1 - (old & md->mask);
		old += size;
110

111
		write_output(rec, buf, size);
112 113 114 115 116
	}

	buf = &data[old & md->mask];
	size = head - old;
	old += size;
117

118
	write_output(rec, buf, size);
119 120

	md->prev = old;
121
	perf_mmap__write_tail(md, old);
122 123 124
}

static volatile int done = 0;
125
static volatile int signr = -1;
126
static volatile int child_finished = 0;
127

128
static void sig_handler(int sig)
129
{
130 131 132
	if (sig == SIGCHLD)
		child_finished = 1;

133
	done = 1;
134 135 136
	signr = sig;
}

137
static void perf_record__sig_exit(int exit_status __used, void *arg)
138
{
139
	struct perf_record *rec = arg;
140 141
	int status;

142
	if (rec->evlist->workload.pid > 0) {
143
		if (!child_finished)
144
			kill(rec->evlist->workload.pid, SIGTERM);
145 146 147

		wait(&status);
		if (WIFSIGNALED(status))
148
			psignal(WTERMSIG(status), rec->progname);
149
	}
150

151
	if (signr == -1 || signr == SIGUSR1)
152 153 154 155
		return;

	signal(signr, SIG_DFL);
	kill(getpid(), signr);
156 157
}

158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
static bool perf_evlist__equal(struct perf_evlist *evlist,
			       struct perf_evlist *other)
{
	struct perf_evsel *pos, *pair;

	if (evlist->nr_entries != other->nr_entries)
		return false;

	pair = list_entry(other->entries.next, struct perf_evsel, node);

	list_for_each_entry(pos, &evlist->entries, node) {
		if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
			return false;
		pair = list_entry(pair->node.next, struct perf_evsel, node);
	}

	return true;
}

177
static void perf_record__open(struct perf_record *rec)
178
{
179
	struct perf_evsel *pos, *first;
180 181 182
	struct perf_evlist *evlist = rec->evlist;
	struct perf_session *session = rec->session;
	struct perf_record_opts *opts = &rec->opts;
183

184 185
	first = list_entry(evlist->entries.next, struct perf_evsel, node);

186
	perf_evlist__config_attrs(evlist, opts);
187

188 189
	list_for_each_entry(pos, &evlist->entries, node) {
		struct perf_event_attr *attr = &pos->attr;
190
		struct xyarray *group_fd = NULL;
191 192 193 194 195 196 197 198 199 200 201 202 203
		/*
		 * Check if parse_single_tracepoint_event has already asked for
		 * PERF_SAMPLE_TIME.
		 *
		 * XXX this is kludgy but short term fix for problems introduced by
		 * eac23d1c that broke 'perf script' by having different sample_types
		 * when using multiple tracepoint events when we use a perf binary
		 * that tries to use sample_id_all on an older kernel.
		 *
		 * We need to move counter creation to perf_session, support
		 * different sample_types, etc.
		 */
		bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
204

205
		if (opts->group && pos != first)
206
			group_fd = first->fd;
207
retry_sample_id:
208
		attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0;
209
try_again:
210
		if (perf_evsel__open(pos, evlist->cpus, evlist->threads,
211
				     opts->group, group_fd) < 0) {
212 213
			int err = errno;

214
			if (err == EPERM || err == EACCES) {
215
				ui__error_paranoid();
216
				exit(EXIT_FAILURE);
217
			} else if (err ==  ENODEV && opts->cpu_list) {
218 219
				die("No such device - did you specify"
					" an out-of-range profile CPU?\n");
220
			} else if (err == EINVAL && opts->sample_id_all_avail) {
221 222 223
				/*
				 * Old kernel, no attr->sample_id_type_all field
				 */
224 225
				opts->sample_id_all_avail = false;
				if (!opts->sample_time && !opts->raw_samples && !time_needed)
226 227
					attr->sample_type &= ~PERF_SAMPLE_TIME;

228
				goto retry_sample_id;
229
			}
230

231 232 233 234 235 236 237 238 239
			/*
			 * If it's cycles then fall back to hrtimer
			 * based cpu-clock-tick sw counter, which
			 * is always available even if no PMU support:
			 */
			if (attr->type == PERF_TYPE_HARDWARE
					&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {

				if (verbose)
240 241
					ui__warning("The cycles event is not supported, "
						    "trying to fall back to cpu-clock-ticks\n");
242 243 244 245
				attr->type = PERF_TYPE_SOFTWARE;
				attr->config = PERF_COUNT_SW_CPU_CLOCK;
				goto try_again;
			}
246 247 248 249 250 251 252

			if (err == ENOENT) {
				ui__warning("The %s event is not supported.\n",
					    event_name(pos));
				exit(EXIT_FAILURE);
			}

253
			printf("\n");
254
			error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
255
			      err, strerror(err));
256 257

#if defined(__i386__) || defined(__x86_64__)
258 259 260 261 262
			if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
				die("No hardware sampling interrupt available."
				    " No APIC? If so then you can boot the kernel"
				    " with the \"lapic\" boot parameter to"
				    " force-enable it.\n");
263 264
#endif

265
			die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
L
Li Zefan 已提交
266 267
		}
	}
268

269 270 271 272 273 274
	if (perf_evlist__set_filters(evlist)) {
		error("failed to set filter with %d (%s)\n", errno,
			strerror(errno));
		exit(-1);
	}

275
	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0)
276 277
		die("failed to mmap with %d (%s)\n", errno, strerror(errno));

278
	if (rec->file_new)
279 280 281 282 283 284 285 286 287
		session->evlist = evlist;
	else {
		if (!perf_evlist__equal(session->evlist, evlist)) {
			fprintf(stderr, "incompatible append\n");
			exit(-1);
		}
 	}

	perf_session__update_sample_type(session);
288 289
}

290
static int process_buildids(struct perf_record *rec)
291
{
292
	u64 size = lseek(rec->output, 0, SEEK_CUR);
293

294 295 296
	if (size == 0)
		return 0;

297 298 299
	rec->session->fd = rec->output;
	return __perf_session__process_events(rec->session, rec->post_processing_offset,
					      size - rec->post_processing_offset,
300 301 302
					      size, &build_id__mark_dso_hit_ops);
}

303
static void perf_record__exit(int status __used, void *arg)
304
{
305 306 307 308 309 310 311 312 313 314 315
	struct perf_record *rec = arg;

	if (!rec->opts.pipe_output) {
		rec->session->header.data_size += rec->bytes_written;

		if (!rec->no_buildid)
			process_buildids(rec);
		perf_session__write_header(rec->session, rec->evlist,
					   rec->output, true);
		perf_session__delete(rec->session);
		perf_evlist__delete(rec->evlist);
316
		symbol__exit();
317
	}
318 319
}

320
static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
321 322
{
	int err;
323
	struct perf_tool *tool = data;
324

325
	if (machine__is_host(machine))
326 327 328 329 330 331 332 333 334 335
		return;

	/*
	 *As for guest kernel when processing subcommand record&report,
	 *we arrange module mmap prior to guest kernel mmap and trigger
	 *a preload dso because default guest module symbols are loaded
	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
	 *method is used to avoid symbol missing when the first addr is
	 *in module instead of in guest kernel.
	 */
336
	err = perf_event__synthesize_modules(tool, process_synthesized_event,
337
					     machine);
338 339
	if (err < 0)
		pr_err("Couldn't record guest kernel [%d]'s reference"
340
		       " relocation symbol.\n", machine->pid);
341 342 343 344 345

	/*
	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
	 * have no _text sometimes.
	 */
346
	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
347
						 machine, "_text");
348
	if (err < 0)
349
		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
350
							 machine, "_stext");
351 352
	if (err < 0)
		pr_err("Couldn't record guest kernel [%d]'s reference"
353
		       " relocation symbol.\n", machine->pid);
354 355
}

356 357 358 359 360
static struct perf_event_header finished_round_event = {
	.size = sizeof(struct perf_event_header),
	.type = PERF_RECORD_FINISHED_ROUND,
};

361
static void perf_record__mmap_read_all(struct perf_record *rec)
362
{
363
	int i;
364

365 366 367
	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
		if (rec->evlist->mmap[i].base)
			perf_record__mmap_read(rec, &rec->evlist->mmap[i]);
368 369
	}

370 371
	if (perf_header__has_feat(&rec->session->header, HEADER_TRACE_INFO))
		write_output(rec, &finished_round_event, sizeof(finished_round_event));
372 373
}

374
static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
375
{
I
Ingo Molnar 已提交
376 377
	struct stat st;
	int flags;
378
	int err, output;
379
	unsigned long waking = 0;
380
	const bool forks = argc > 0;
381
	struct machine *machine;
382
	struct perf_tool *tool = &rec->tool;
383 384 385 386
	struct perf_record_opts *opts = &rec->opts;
	struct perf_evlist *evsel_list = rec->evlist;
	const char *output_name = rec->output_name;
	struct perf_session *session;
387

388
	rec->progname = argv[0];
389

390
	rec->page_size = sysconf(_SC_PAGE_SIZE);
391

392
	on_exit(perf_record__sig_exit, rec);
393 394
	signal(SIGCHLD, sig_handler);
	signal(SIGINT, sig_handler);
395
	signal(SIGUSR1, sig_handler);
396

397 398
	if (!output_name) {
		if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
399
			opts->pipe_output = true;
400
		else
401
			rec->output_name = output_name = "perf.data";
402 403 404
	}
	if (output_name) {
		if (!strcmp(output_name, "-"))
405
			opts->pipe_output = true;
406
		else if (!stat(output_name, &st) && st.st_size) {
407
			if (rec->write_mode == WRITE_FORCE) {
408 409 410 411 412 413
				char oldname[PATH_MAX];
				snprintf(oldname, sizeof(oldname), "%s.old",
					 output_name);
				unlink(oldname);
				rename(output_name, oldname);
			}
414 415
		} else if (rec->write_mode == WRITE_APPEND) {
			rec->write_mode = WRITE_FORCE;
416
		}
417 418
	}

419
	flags = O_CREAT|O_RDWR;
420 421
	if (rec->write_mode == WRITE_APPEND)
		rec->file_new = 0;
I
Ingo Molnar 已提交
422 423 424
	else
		flags |= O_TRUNC;

425
	if (opts->pipe_output)
426 427 428
		output = STDOUT_FILENO;
	else
		output = open(output_name, flags, S_IRUSR | S_IWUSR);
429 430 431 432 433
	if (output < 0) {
		perror("failed to create output file");
		exit(-1);
	}

434 435
	rec->output = output;

436
	session = perf_session__new(output_name, O_WRONLY,
437
				    rec->write_mode == WRITE_FORCE, false, NULL);
438
	if (session == NULL) {
439 440 441 442
		pr_err("Not enough memory for reading perf file header\n");
		return -1;
	}

443 444 445
	rec->session = session;

	if (!rec->no_buildid)
446 447
		perf_header__set_feat(&session->header, HEADER_BUILD_ID);

448
	if (!rec->file_new) {
449
		err = perf_session__read_header(session, output);
450
		if (err < 0)
451
			goto out_delete_session;
452 453
	}

454
	if (have_tracepoints(&evsel_list->entries))
455
		perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
456

457 458 459 460 461 462 463 464 465 466 467 468 469
	perf_header__set_feat(&session->header, HEADER_HOSTNAME);
	perf_header__set_feat(&session->header, HEADER_OSRELEASE);
	perf_header__set_feat(&session->header, HEADER_ARCH);
	perf_header__set_feat(&session->header, HEADER_CPUDESC);
	perf_header__set_feat(&session->header, HEADER_NRCPUS);
	perf_header__set_feat(&session->header, HEADER_EVENT_DESC);
	perf_header__set_feat(&session->header, HEADER_CMDLINE);
	perf_header__set_feat(&session->header, HEADER_VERSION);
	perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
	perf_header__set_feat(&session->header, HEADER_TOTAL_MEM);
	perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY);
	perf_header__set_feat(&session->header, HEADER_CPUID);

470
	if (forks) {
471
		err = perf_evlist__prepare_workload(evsel_list, opts, argv);
472 473 474
		if (err < 0) {
			pr_err("Couldn't run the workload!\n");
			goto out_delete_session;
475 476 477
		}
	}

478
	perf_record__open(rec);
479

480
	/*
481
	 * perf_session__delete(session) will be called at perf_record__exit()
482
	 */
483
	on_exit(perf_record__exit, rec);
484

485
	if (opts->pipe_output) {
486 487 488
		err = perf_header__write_pipe(output);
		if (err < 0)
			return err;
489
	} else if (rec->file_new) {
490 491
		err = perf_session__write_header(session, evsel_list,
						 output, false);
492 493
		if (err < 0)
			return err;
494 495
	}

496
	rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
497

498 499 500 501 502 503
	machine = perf_session__find_host_machine(session);
	if (!machine) {
		pr_err("Couldn't find native kernel information.\n");
		return -1;
	}

504
	if (opts->pipe_output) {
505
		err = perf_event__synthesize_attrs(tool, session,
506
						   process_synthesized_event);
507 508 509 510
		if (err < 0) {
			pr_err("Couldn't synthesize attrs.\n");
			return err;
		}
511

512
		err = perf_event__synthesize_event_types(tool, process_synthesized_event,
513
							 machine);
514 515 516 517
		if (err < 0) {
			pr_err("Couldn't synthesize event_types.\n");
			return err;
		}
518

519
		if (have_tracepoints(&evsel_list->entries)) {
520 521 522 523 524 525 526 527
			/*
			 * FIXME err <= 0 here actually means that
			 * there were no tracepoints so its not really
			 * an error, just that we don't need to
			 * synthesize anything.  We really have to
			 * return this more properly and also
			 * propagate errors that now are calling die()
			 */
528
			err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
529
								  process_synthesized_event);
530 531 532 533
			if (err <= 0) {
				pr_err("Couldn't record tracing data.\n");
				return err;
			}
534
			advance_output(rec, err);
535
		}
536 537
	}

538
	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
539
						 machine, "_text");
540
	if (err < 0)
541
		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
542
							 machine, "_stext");
543 544 545 546
	if (err < 0)
		pr_err("Couldn't record kernel reference relocation symbol\n"
		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
		       "Check /proc/kallsyms permission or run as root.\n");
547

548
	err = perf_event__synthesize_modules(tool, process_synthesized_event,
549
					     machine);
550 551 552 553 554
	if (err < 0)
		pr_err("Couldn't record kernel module information.\n"
		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
		       "Check /proc/modules permission or run as root.\n");

555
	if (perf_guest)
556
		perf_session__process_machines(session, tool,
557
					       perf_event__synthesize_guest_os);
558

559
	if (!opts->system_wide)
560
		perf_event__synthesize_thread_map(tool, evsel_list->threads,
561
						  process_synthesized_event,
562
						  machine);
563
	else
564
		perf_event__synthesize_threads(tool, process_synthesized_event,
565
					       machine);
566

567
	if (rec->realtime_prio) {
568 569
		struct sched_param param;

570
		param.sched_priority = rec->realtime_prio;
571
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
572
			pr_err("Could not set realtime priority.\n");
573 574 575 576
			exit(-1);
		}
	}

577 578
	perf_evlist__enable(evsel_list);

579 580 581
	/*
	 * Let the child rip
	 */
582
	if (forks)
583
		perf_evlist__start_workload(evsel_list);
584

585
	for (;;) {
586
		int hits = rec->samples;
587

588
		perf_record__mmap_read_all(rec);
589

590
		if (hits == rec->samples) {
591 592
			if (done)
				break;
593
			err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
594 595 596
			waking++;
		}

597 598
		if (done)
			perf_evlist__disable(evsel_list);
599 600
	}

601
	if (quiet || signr == SIGUSR1)
602 603
		return 0;

604 605
	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);

606 607 608 609
	/*
	 * Approximate RIP event size: 24 bytes.
	 */
	fprintf(stderr,
610
		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
611
		(double)rec->bytes_written / 1024.0 / 1024.0,
612
		output_name,
613
		rec->bytes_written / 24);
614

615
	return 0;
616 617 618 619

out_delete_session:
	perf_session__delete(session);
	return err;
620
}
621 622

static const char * const record_usage[] = {
623 624
	"perf record [<options>] [<command>]",
	"perf record [<options>] -- <command> [<options>]",
625 626 627
	NULL
};

628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650
/*
 * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
 * because we need to have access to it in perf_record__exit, that is called
 * after cmd_record() exits, but since record_options need to be accessible to
 * builtin-script, leave it here.
 *
 * At least we don't ouch it in all the other functions here directly.
 *
 * Just say no to tons of global variables, sigh.
 */
static struct perf_record record = {
	.opts = {
		.target_pid	     = -1,
		.target_tid	     = -1,
		.mmap_pages	     = UINT_MAX,
		.user_freq	     = UINT_MAX,
		.user_interval	     = ULLONG_MAX,
		.freq		     = 1000,
		.sample_id_all_avail = true,
	},
	.write_mode = WRITE_FORCE,
	.file_new   = true,
};
651

652 653 654 655 656 657 658
/*
 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
 * with it and switch to use the library functions in perf_evlist that came
 * from builtin-record.c, i.e. use perf_record_opts,
 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
 * using pipes, etc.
 */
659
const struct option record_options[] = {
660
	OPT_CALLBACK('e', "event", &record.evlist, "event",
661
		     "event selector. use 'perf list' to list available events",
662
		     parse_events_option),
663
	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
L
Li Zefan 已提交
664
		     "event filter", parse_filter),
665
	OPT_INTEGER('p', "pid", &record.opts.target_pid,
666
		    "record events on existing process id"),
667
	OPT_INTEGER('t', "tid", &record.opts.target_tid,
668
		    "record events on existing thread id"),
669
	OPT_INTEGER('r', "realtime", &record.realtime_prio,
670
		    "collect data with this RT SCHED_FIFO priority"),
671
	OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
672
		    "collect data without buffering"),
673
	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
674
		    "collect raw sample records from all opened counters"),
675
	OPT_BOOLEAN('a', "all-cpus", &record.opts.system_wide,
676
			    "system-wide collection from all CPUs"),
677
	OPT_BOOLEAN('A', "append", &record.append_file,
I
Ingo Molnar 已提交
678
			    "append to the output file to do incremental profiling"),
679
	OPT_STRING('C', "cpu", &record.opts.cpu_list, "cpu",
680
		    "list of cpus to monitor"),
681
	OPT_BOOLEAN('f', "force", &record.force,
682
			"overwrite existing data file (deprecated)"),
683 684
	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
	OPT_STRING('o', "output", &record.output_name, "file",
I
Ingo Molnar 已提交
685
		    "output file name"),
686
	OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
687
		    "child tasks do not inherit counters"),
688 689
	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
	OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
690
		     "number of mmap data pages"),
691
	OPT_BOOLEAN(0, "group", &record.opts.group,
692
		    "put the counters into a counter group"),
693
	OPT_BOOLEAN('g', "call-graph", &record.opts.call_graph,
694
		    "do call-graph (stack chain/backtrace) recording"),
695
	OPT_INCR('v', "verbose", &verbose,
696
		    "be more verbose (show counter open errors, etc)"),
697
	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
698
	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
699
		    "per thread counts"),
700
	OPT_BOOLEAN('d', "data", &record.opts.sample_address,
701
		    "Sample addresses"),
702 703
	OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
704
		    "don't sample"),
705
	OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
706
		    "do not update the buildid cache"),
707
	OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
708
		    "do not collect buildids in perf.data"),
709
	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
S
Stephane Eranian 已提交
710 711
		     "monitor event in cgroup name only",
		     parse_cgroups),
712 713 714
	OPT_END()
};

715
int cmd_record(int argc, const char **argv, const char *prefix __used)
716
{
717 718
	int err = -ENOMEM;
	struct perf_evsel *pos;
719 720
	struct perf_evlist *evsel_list;
	struct perf_record *rec = &record;
721

722 723
	perf_header__set_cmdline(argc, argv);

724
	evsel_list = perf_evlist__new(NULL, NULL);
725 726 727
	if (evsel_list == NULL)
		return -ENOMEM;

728 729
	rec->evlist = evsel_list;

730
	argc = parse_options(argc, argv, record_options, record_usage,
731
			    PARSE_OPT_STOP_AT_NON_OPTION);
732 733
	if (!argc && rec->opts.target_pid == -1 && rec->opts.target_tid == -1 &&
		!rec->opts.system_wide && !rec->opts.cpu_list)
734
		usage_with_options(record_usage, record_options);
735

736
	if (rec->force && rec->append_file) {
737 738
		fprintf(stderr, "Can't overwrite and append at the same time."
				" You need to choose between -f and -A");
739
		usage_with_options(record_usage, record_options);
740 741
	} else if (rec->append_file) {
		rec->write_mode = WRITE_APPEND;
742
	} else {
743
		rec->write_mode = WRITE_FORCE;
744 745
	}

746
	if (nr_cgroups && !rec->opts.system_wide) {
S
Stephane Eranian 已提交
747 748 749 750 751
		fprintf(stderr, "cgroup monitoring only available in"
			" system-wide mode\n");
		usage_with_options(record_usage, record_options);
	}

752
	symbol__init();
753

754
	if (symbol_conf.kptr_restrict)
755 756 757 758 759 760 761 762
		pr_warning(
"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
"check /proc/sys/kernel/kptr_restrict.\n\n"
"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
"file is not found in the buildid cache or in the vmlinux path.\n\n"
"Samples in kernel modules won't be resolved at all.\n\n"
"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
"even with a suitable vmlinux or kallsyms file.\n\n");
763

764
	if (rec->no_buildid_cache || rec->no_buildid)
765
		disable_buildid_cache();
766

767 768
	if (evsel_list->nr_entries == 0 &&
	    perf_evlist__add_default(evsel_list) < 0) {
769 770
		pr_err("Not enough memory for event selector list\n");
		goto out_symbol_exit;
771
	}
772

773 774
	if (rec->opts.target_pid != -1)
		rec->opts.target_tid = rec->opts.target_pid;
775

776 777
	if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid,
				     rec->opts.target_tid, rec->opts.cpu_list) < 0)
778
		usage_with_options(record_usage, record_options);
779

780
	list_for_each_entry(pos, &evsel_list->entries, node) {
781 782
		if (perf_header__push_event(pos->attr.config, event_name(pos)))
			goto out_free_fd;
783
	}
784

785 786 787 788
	if (rec->opts.user_interval != ULLONG_MAX)
		rec->opts.default_interval = rec->opts.user_interval;
	if (rec->opts.user_freq != UINT_MAX)
		rec->opts.freq = rec->opts.user_freq;
789

790 791 792
	/*
	 * User specified count overrides default frequency.
	 */
793 794 795 796
	if (rec->opts.default_interval)
		rec->opts.freq = 0;
	else if (rec->opts.freq) {
		rec->opts.default_interval = rec->opts.freq;
797 798
	} else {
		fprintf(stderr, "frequency and count are zero, aborting\n");
799
		err = -EINVAL;
800
		goto out_free_fd;
801 802
	}

803
	err = __cmd_record(&record, argc, argv);
804
out_free_fd:
805
	perf_evlist__delete_maps(evsel_list);
806 807
out_symbol_exit:
	symbol__exit();
808
	return err;
809
}