builtin-record.c 24.1 KB
Newer Older
I
Ingo Molnar 已提交
1
/*
2 3 4 5 6
 * builtin-record.c
 *
 * Builtin record command: Record the profile of a workload
 * (or a CPU, or a PID) into the perf.data output file - for
 * later analysis via perf report.
I
Ingo Molnar 已提交
7
 */
8
#include "builtin.h"
9 10 11

#include "perf.h"

12
#include "util/build-id.h"
13
#include "util/util.h"
14
#include "util/parse-options.h"
15
#include "util/parse-events.h"
16

17
#include "util/callchain.h"
18
#include "util/cgroup.h"
19
#include "util/header.h"
20
#include "util/event.h"
21
#include "util/evlist.h"
22
#include "util/evsel.h"
23
#include "util/debug.h"
24
#include "util/session.h"
25
#include "util/tool.h"
26
#include "util/symbol.h"
27
#include "util/cpumap.h"
28
#include "util/thread_map.h"
29
#include "util/data.h"
30

31
#include <unistd.h>
32
#include <sched.h>
33
#include <sys/mman.h>
34

35

36
struct record {
37
	struct perf_tool	tool;
38
	struct record_opts	opts;
39
	u64			bytes_written;
40
	struct perf_data_file	file;
41 42 43 44 45 46 47
	struct perf_evlist	*evlist;
	struct perf_session	*session;
	const char		*progname;
	int			realtime_prio;
	bool			no_buildid;
	bool			no_buildid_cache;
	long			samples;
48
};
49

50
static int record__write(struct record *rec, void *bf, size_t size)
51
{
52
	if (perf_data_file__write(rec->session->file, bf, size) < 0) {
53 54
		pr_err("failed to write perf data, error: %m\n");
		return -1;
55
	}
56

57
	rec->bytes_written += size;
58
	return 0;
59 60
}

61
static int process_synthesized_event(struct perf_tool *tool,
62
				     union perf_event *event,
63 64
				     struct perf_sample *sample __maybe_unused,
				     struct machine *machine __maybe_unused)
65
{
66 67
	struct record *rec = container_of(tool, struct record, tool);
	return record__write(rec, event, event->header.size);
68 69
}

70
static int record__mmap_read(struct record *rec, int idx)
71
{
72
	struct perf_mmap *md = &rec->evlist->mmap[idx];
73
	unsigned int head = perf_mmap__read_head(md);
74
	unsigned int old = md->prev;
J
Jiri Olsa 已提交
75
	unsigned char *data = md->base + page_size;
76 77
	unsigned long size;
	void *buf;
78
	int rc = 0;
79

80
	if (old == head)
81
		return 0;
82

83
	rec->samples++;
84 85 86 87 88 89 90

	size = head - old;

	if ((old & md->mask) + size != (head & md->mask)) {
		buf = &data[old & md->mask];
		size = md->mask + 1 - (old & md->mask);
		old += size;
91

92
		if (record__write(rec, buf, size) < 0) {
93 94 95
			rc = -1;
			goto out;
		}
96 97 98 99 100
	}

	buf = &data[old & md->mask];
	size = head - old;
	old += size;
101

102
	if (record__write(rec, buf, size) < 0) {
103 104 105
		rc = -1;
		goto out;
	}
106 107

	md->prev = old;
108
	perf_evlist__mmap_consume(rec->evlist, idx);
109 110
out:
	return rc;
111 112 113
}

static volatile int done = 0;
114
static volatile int signr = -1;
115
static volatile int child_finished = 0;
116

117
static void sig_handler(int sig)
118
{
119 120
	if (sig == SIGCHLD)
		child_finished = 1;
121 122
	else
		signr = sig;
123

124
	done = 1;
125 126
}

127
static void record__sig_exit(void)
128
{
129
	if (signr == -1)
130 131 132
		return;

	signal(signr, SIG_DFL);
133
	raise(signr);
134 135
}

136
static int record__open(struct record *rec)
137
{
138
	char msg[512];
139
	struct perf_evsel *pos;
140 141
	struct perf_evlist *evlist = rec->evlist;
	struct perf_session *session = rec->session;
142
	struct record_opts *opts = &rec->opts;
143
	int rc = 0;
144

145
	perf_evlist__config(evlist, opts);
146

147
	evlist__for_each(evlist, pos) {
148
try_again:
149
		if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
150
			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
151
				if (verbose)
152
					ui__warning("%s\n", msg);
153 154
				goto try_again;
			}
155

156 157 158 159
			rc = -errno;
			perf_evsel__open_strerror(pos, &opts->target,
						  errno, msg, sizeof(msg));
			ui__error("%s\n", msg);
160
			goto out;
L
Li Zefan 已提交
161 162
		}
	}
163

164
	if (perf_evlist__apply_filters(evlist)) {
165
		error("failed to set filter with %d (%s)\n", errno,
166
			strerror_r(errno, msg, sizeof(msg)));
167 168
		rc = -1;
		goto out;
169 170
	}

171
	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
172 173 174 175 176
		if (errno == EPERM) {
			pr_err("Permission error mapping pages.\n"
			       "Consider increasing "
			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
			       "or try again with a smaller value of -m/--mmap_pages.\n"
177
			       "(current value: %u)\n", opts->mmap_pages);
178 179
			rc = -errno;
		} else {
180 181
			pr_err("failed to mmap with %d (%s)\n", errno,
				strerror_r(errno, msg, sizeof(msg)));
182 183 184
			rc = -errno;
		}
		goto out;
185
	}
186

187
	session->evlist = evlist;
188
	perf_session__set_id_hdr_size(session);
189 190
out:
	return rc;
191 192
}

193 194 195 196 197 198 199 200 201 202 203 204 205
static int process_sample_event(struct perf_tool *tool,
				union perf_event *event,
				struct perf_sample *sample,
				struct perf_evsel *evsel,
				struct machine *machine)
{
	struct record *rec = container_of(tool, struct record, tool);

	rec->samples++;

	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
}

206
static int process_buildids(struct record *rec)
207
{
208 209
	struct perf_data_file *file  = &rec->file;
	struct perf_session *session = rec->session;
210

211
	u64 size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
212 213 214
	if (size == 0)
		return 0;

215 216
	file->size = size;

217 218 219 220 221 222 223 224 225 226 227
	/*
	 * During this process, it'll load kernel map and replace the
	 * dso->long_name to a real pathname it found.  In this case
	 * we prefer the vmlinux path like
	 *   /lib/modules/3.16.4/build/vmlinux
	 *
	 * rather than build-id path (in debug directory).
	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
	 */
	symbol_conf.ignore_vmlinux_buildid = true;

228
	return perf_session__process_events(session, &rec->tool);
229 230
}

231
static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
232 233
{
	int err;
234
	struct perf_tool *tool = data;
235 236 237 238 239 240 241 242
	/*
	 *As for guest kernel when processing subcommand record&report,
	 *we arrange module mmap prior to guest kernel mmap and trigger
	 *a preload dso because default guest module symbols are loaded
	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
	 *method is used to avoid symbol missing when the first addr is
	 *in module instead of in guest kernel.
	 */
243
	err = perf_event__synthesize_modules(tool, process_synthesized_event,
244
					     machine);
245 246
	if (err < 0)
		pr_err("Couldn't record guest kernel [%d]'s reference"
247
		       " relocation symbol.\n", machine->pid);
248 249 250 251 252

	/*
	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
	 * have no _text sometimes.
	 */
253
	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
254
						 machine);
255 256
	if (err < 0)
		pr_err("Couldn't record guest kernel [%d]'s reference"
257
		       " relocation symbol.\n", machine->pid);
258 259
}

260 261 262 263 264
static struct perf_event_header finished_round_event = {
	.size = sizeof(struct perf_event_header),
	.type = PERF_RECORD_FINISHED_ROUND,
};

265
static int record__mmap_read_all(struct record *rec)
266
{
267
	u64 bytes_written = rec->bytes_written;
268
	int i;
269
	int rc = 0;
270

271
	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
272
		if (rec->evlist->mmap[i].base) {
273
			if (record__mmap_read(rec, i) != 0) {
274 275 276 277
				rc = -1;
				goto out;
			}
		}
278 279
	}

280 281 282 283 284 285
	/*
	 * Mark the round finished in case we wrote
	 * at least one event.
	 */
	if (bytes_written != rec->bytes_written)
		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
286 287 288

out:
	return rc;
289 290
}

291
static void record__init_features(struct record *rec)
292 293 294 295 296 297 298 299 300 301
{
	struct perf_session *session = rec->session;
	int feat;

	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
		perf_header__set_feat(&session->header, feat);

	if (rec->no_buildid)
		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);

302
	if (!have_tracepoints(&rec->evlist->entries))
303 304 305 306 307 308
		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);

	if (!rec->opts.branch_stack)
		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
}

309 310 311 312 313 314 315
static volatile int workload_exec_errno;

/*
 * perf_evlist__prepare_workload will send a SIGUSR1
 * if the fork fails, since we asked by setting its
 * want_signal to true.
 */
316 317
static void workload_exec_failed_signal(int signo __maybe_unused,
					siginfo_t *info,
318 319 320 321 322 323 324
					void *ucontext __maybe_unused)
{
	workload_exec_errno = info->si_value.sival_int;
	done = 1;
	child_finished = 1;
}

325
static int __cmd_record(struct record *rec, int argc, const char **argv)
326
{
327
	int err;
328
	int status = 0;
329
	unsigned long waking = 0;
330
	const bool forks = argc > 0;
331
	struct machine *machine;
332
	struct perf_tool *tool = &rec->tool;
333
	struct record_opts *opts = &rec->opts;
334
	struct perf_data_file *file = &rec->file;
335
	struct perf_session *session;
336
	bool disabled = false, draining = false;
337
	int fd;
338

339
	rec->progname = argv[0];
340

341
	atexit(record__sig_exit);
342 343
	signal(SIGCHLD, sig_handler);
	signal(SIGINT, sig_handler);
344
	signal(SIGTERM, sig_handler);
345

346
	session = perf_session__new(file, false, NULL);
347
	if (session == NULL) {
A
Adrien BAK 已提交
348
		pr_err("Perf session creation failed.\n");
349 350 351
		return -1;
	}

352
	fd = perf_data_file__fd(file);
353 354
	rec->session = session;

355
	record__init_features(rec);
356

357
	if (forks) {
358
		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
359
						    argv, file->is_pipe,
360
						    workload_exec_failed_signal);
361 362
		if (err < 0) {
			pr_err("Couldn't run the workload!\n");
363
			status = err;
364
			goto out_delete_session;
365 366 367
		}
	}

368
	if (record__open(rec) != 0) {
369
		err = -1;
370
		goto out_child;
371
	}
372

373
	if (!rec->evlist->nr_groups)
374 375
		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);

376
	if (file->is_pipe) {
377
		err = perf_header__write_pipe(fd);
378
		if (err < 0)
379
			goto out_child;
380
	} else {
381
		err = perf_session__write_header(session, rec->evlist, fd, false);
382
		if (err < 0)
383
			goto out_child;
384 385
	}

386
	if (!rec->no_buildid
387
	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
388
		pr_err("Couldn't generate buildids. "
389
		       "Use --no-buildid to profile anyway.\n");
390
		err = -1;
391
		goto out_child;
392 393
	}

394
	machine = &session->machines.host;
395

396
	if (file->is_pipe) {
397
		err = perf_event__synthesize_attrs(tool, session,
398
						   process_synthesized_event);
399 400
		if (err < 0) {
			pr_err("Couldn't synthesize attrs.\n");
401
			goto out_child;
402
		}
403

404
		if (have_tracepoints(&rec->evlist->entries)) {
405 406 407 408 409 410 411 412
			/*
			 * FIXME err <= 0 here actually means that
			 * there were no tracepoints so its not really
			 * an error, just that we don't need to
			 * synthesize anything.  We really have to
			 * return this more properly and also
			 * propagate errors that now are calling die()
			 */
413
			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
414
								  process_synthesized_event);
415 416
			if (err <= 0) {
				pr_err("Couldn't record tracing data.\n");
417
				goto out_child;
418
			}
419
			rec->bytes_written += err;
420
		}
421 422
	}

423
	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
424
						 machine);
425 426 427 428
	if (err < 0)
		pr_err("Couldn't record kernel reference relocation symbol\n"
		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
		       "Check /proc/kallsyms permission or run as root.\n");
429

430
	err = perf_event__synthesize_modules(tool, process_synthesized_event,
431
					     machine);
432 433 434 435 436
	if (err < 0)
		pr_err("Couldn't record kernel module information.\n"
		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
		       "Check /proc/modules permission or run as root.\n");

437
	if (perf_guest) {
438 439
		machines__process_guests(&session->machines,
					 perf_event__synthesize_guest_os, tool);
440
	}
441

442
	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
443
					    process_synthesized_event, opts->sample_address);
444
	if (err != 0)
445
		goto out_child;
446

447
	if (rec->realtime_prio) {
448 449
		struct sched_param param;

450
		param.sched_priority = rec->realtime_prio;
451
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
452
			pr_err("Could not set realtime priority.\n");
453
			err = -1;
454
			goto out_child;
455 456 457
		}
	}

458 459 460 461 462
	/*
	 * When perf is starting the traced process, all the events
	 * (apart from group members) have enable_on_exec=1 set,
	 * so don't spoil it by prematurely enabling them.
	 */
463
	if (!target__none(&opts->target) && !opts->initial_delay)
464
		perf_evlist__enable(rec->evlist);
465

466 467 468
	/*
	 * Let the child rip
	 */
469
	if (forks)
470
		perf_evlist__start_workload(rec->evlist);
471

472 473 474 475 476
	if (opts->initial_delay) {
		usleep(opts->initial_delay * 1000);
		perf_evlist__enable(rec->evlist);
	}

477
	for (;;) {
478
		int hits = rec->samples;
479

480
		if (record__mmap_read_all(rec) < 0) {
481
			err = -1;
482
			goto out_child;
483
		}
484

485
		if (hits == rec->samples) {
486
			if (done || draining)
487
				break;
488
			err = perf_evlist__poll(rec->evlist, -1);
489 490 491 492 493
			/*
			 * Propagate error, only if there's any. Ignore positive
			 * number of returned events and interrupt error.
			 */
			if (err > 0 || (err < 0 && errno == EINTR))
494
				err = 0;
495
			waking++;
496 497 498

			if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
				draining = true;
499 500
		}

501 502 503 504 505
		/*
		 * When perf is starting the traced process, at the end events
		 * die with the process and we wait for that. Thus no need to
		 * disable events in this case.
		 */
506
		if (done && !disabled && !target__none(&opts->target)) {
507
			perf_evlist__disable(rec->evlist);
508 509
			disabled = true;
		}
510 511
	}

512
	if (forks && workload_exec_errno) {
513
		char msg[STRERR_BUFSIZE];
514 515 516
		const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
		pr_err("Workload failed: %s\n", emsg);
		err = -1;
517
		goto out_child;
518 519
	}

520
	if (!quiet)
521
		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
522

523 524 525
out_child:
	if (forks) {
		int exit_status;
526

527 528 529 530 531 532 533 534 535 536 537 538 539 540
		if (!child_finished)
			kill(rec->evlist->workload.pid, SIGTERM);

		wait(&exit_status);

		if (err < 0)
			status = err;
		else if (WIFEXITED(exit_status))
			status = WEXITSTATUS(exit_status);
		else if (WIFSIGNALED(exit_status))
			signr = WTERMSIG(exit_status);
	} else
		status = err;

541 542 543
	/* this will be recalculated during process_buildids() */
	rec->samples = 0;

544 545 546 547 548
	if (!err && !file->is_pipe) {
		rec->session->header.data_size += rec->bytes_written;

		if (!rec->no_buildid)
			process_buildids(rec);
549
		perf_session__write_header(rec->session, rec->evlist, fd, true);
550
	}
551

552 553 554 555 556 557 558 559 560 561 562 563 564 565
	if (!err && !quiet) {
		char samples[128];

		if (rec->samples)
			scnprintf(samples, sizeof(samples),
				  " (%" PRIu64 " samples)", rec->samples);
		else
			samples[0] = '\0';

		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s ]\n",
			perf_data_file__size(file) / 1024.0 / 1024.0,
			file->path, samples);
	}

566 567
out_delete_session:
	perf_session__delete(session);
568
	return status;
569
}
570

571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588
#define BRANCH_OPT(n, m) \
	{ .name = n, .mode = (m) }

#define BRANCH_END { .name = NULL }

struct branch_mode {
	const char *name;
	int mode;
};

static const struct branch_mode branch_modes[] = {
	BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
	BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
	BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
	BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
	BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
	BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
	BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
589 590 591
	BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
	BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
	BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
592
	BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
593 594 595 596
	BRANCH_END
};

static int
597
parse_branch_stack(const struct option *opt, const char *str, int unset)
598 599 600 601 602 603 604 605
{
#define ONLY_PLM \
	(PERF_SAMPLE_BRANCH_USER	|\
	 PERF_SAMPLE_BRANCH_KERNEL	|\
	 PERF_SAMPLE_BRANCH_HV)

	uint64_t *mode = (uint64_t *)opt->value;
	const struct branch_mode *br;
606
	char *s, *os = NULL, *p;
607 608
	int ret = -1;

609 610
	if (unset)
		return 0;
611

612 613 614 615
	/*
	 * cannot set it twice, -b + --branch-filter for instance
	 */
	if (*mode)
616 617
		return -1;

618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638
	/* str may be NULL in case no arg is passed to -b */
	if (str) {
		/* because str is read-only */
		s = os = strdup(str);
		if (!s)
			return -1;

		for (;;) {
			p = strchr(s, ',');
			if (p)
				*p = '\0';

			for (br = branch_modes; br->name; br++) {
				if (!strcasecmp(s, br->name))
					break;
			}
			if (!br->name) {
				ui__warning("unknown branch filter %s,"
					    " check man page\n", s);
				goto error;
			}
639

640
			*mode |= br->mode;
641

642 643
			if (!p)
				break;
644

645 646
			s = p + 1;
		}
647 648 649
	}
	ret = 0;

650
	/* default to any branch */
651
	if ((*mode & ~ONLY_PLM) == 0) {
652
		*mode = PERF_SAMPLE_BRANCH_ANY;
653 654 655 656 657 658
	}
error:
	free(os);
	return ret;
}

659
static void callchain_debug(void)
J
Jiri Olsa 已提交
660
{
661
	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
662

663
	pr_debug("callchain: type %s\n", str[callchain_param.record_mode]);
664

665
	if (callchain_param.record_mode == CALLCHAIN_DWARF)
J
Jiri Olsa 已提交
666
		pr_debug("callchain: stack dump size %d\n",
667
			 callchain_param.dump_size);
J
Jiri Olsa 已提交
668 669
}

670
int record_parse_callchain_opt(const struct option *opt __maybe_unused,
J
Jiri Olsa 已提交
671 672 673 674 675
			       const char *arg,
			       int unset)
{
	int ret;

676
	callchain_param.enabled = !unset;
677

J
Jiri Olsa 已提交
678 679
	/* --no-call-graph */
	if (unset) {
680
		callchain_param.record_mode = CALLCHAIN_NONE;
J
Jiri Olsa 已提交
681 682 683 684
		pr_debug("callchain: disabled\n");
		return 0;
	}

685
	ret = parse_callchain_record_opt(arg);
686
	if (!ret)
687
		callchain_debug();
688 689 690 691

	return ret;
}

692
int record_callchain_opt(const struct option *opt __maybe_unused,
J
Jiri Olsa 已提交
693 694 695
			 const char *arg __maybe_unused,
			 int unset __maybe_unused)
{
696
	callchain_param.enabled = true;
J
Jiri Olsa 已提交
697

698 699
	if (callchain_param.record_mode == CALLCHAIN_NONE)
		callchain_param.record_mode = CALLCHAIN_FP;
700

701
	callchain_debug();
J
Jiri Olsa 已提交
702 703 704
	return 0;
}

705 706 707
static int perf_record_config(const char *var, const char *value, void *cb)
{
	if (!strcmp(var, "record.call-graph"))
708
		var = "call-graph.record-mode"; /* fall-through */
709 710 711 712

	return perf_default_config(var, value, cb);
}

713
static const char * const __record_usage[] = {
714 715
	"perf record [<options>] [<command>]",
	"perf record [<options>] -- <command> [<options>]",
716 717
	NULL
};
718
const char * const *record_usage = __record_usage;
719

720
/*
721 722
 * XXX Ideally would be local to cmd_record() and passed to a record__new
 * because we need to have access to it in record__exit, that is called
723 724 725 726 727 728 729
 * after cmd_record() exits, but since record_options need to be accessible to
 * builtin-script, leave it here.
 *
 * At least we don't ouch it in all the other functions here directly.
 *
 * Just say no to tons of global variables, sigh.
 */
730
static struct record record = {
731
	.opts = {
732
		.sample_time	     = true,
733 734 735
		.mmap_pages	     = UINT_MAX,
		.user_freq	     = UINT_MAX,
		.user_interval	     = ULLONG_MAX,
736
		.freq		     = 4000,
N
Namhyung Kim 已提交
737 738
		.target		     = {
			.uses_mmap   = true,
739
			.default_per_cpu = true,
N
Namhyung Kim 已提交
740
		},
741
	},
742 743 744 745 746 747 748
	.tool = {
		.sample		= process_sample_event,
		.fork		= perf_event__process_fork,
		.comm		= perf_event__process_comm,
		.mmap		= perf_event__process_mmap,
		.mmap2		= perf_event__process_mmap2,
	},
749
};
750

J
Jiri Olsa 已提交
751
#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
752

753
#ifdef HAVE_DWARF_UNWIND_SUPPORT
754
const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf lbr";
755
#else
756
const char record_callchain_help[] = CALLCHAIN_HELP "fp lbr";
757 758
#endif

759 760 761
/*
 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
 * with it and switch to use the library functions in perf_evlist that came
762
 * from builtin-record.c, i.e. use record_opts,
763 764 765
 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
 * using pipes, etc.
 */
766
struct option __record_options[] = {
767
	OPT_CALLBACK('e', "event", &record.evlist, "event",
768
		     "event selector. use 'perf list' to list available events",
769
		     parse_events_option),
770
	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
L
Li Zefan 已提交
771
		     "event filter", parse_filter),
772
	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
773
		    "record events on existing process id"),
774
	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
775
		    "record events on existing thread id"),
776
	OPT_INTEGER('r', "realtime", &record.realtime_prio,
777
		    "collect data with this RT SCHED_FIFO priority"),
778
	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
779
		    "collect data without buffering"),
780
	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
781
		    "collect raw sample records from all opened counters"),
782
	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
783
			    "system-wide collection from all CPUs"),
784
	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
785
		    "list of cpus to monitor"),
786
	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
787
	OPT_STRING('o', "output", &record.file.path, "file",
I
Ingo Molnar 已提交
788
		    "output file name"),
789 790 791
	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
			&record.opts.no_inherit_set,
			"child tasks do not inherit counters"),
792
	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
793 794 795
	OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages",
		     "number of mmap data pages",
		     perf_evlist__parse_mmap_pages),
796
	OPT_BOOLEAN(0, "group", &record.opts.group,
797
		    "put the counters into a counter group"),
J
Jiri Olsa 已提交
798 799 800 801 802 803
	OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
			   NULL, "enables call-graph recording" ,
			   &record_callchain_opt),
	OPT_CALLBACK(0, "call-graph", &record.opts,
		     "mode[,dump_size]", record_callchain_help,
		     &record_parse_callchain_opt),
804
	OPT_INCR('v', "verbose", &verbose,
805
		    "be more verbose (show counter open errors, etc)"),
806
	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
807
	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
808
		    "per thread counts"),
809
	OPT_BOOLEAN('d', "data", &record.opts.sample_address,
810
		    "Sample addresses"),
811
	OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
812
	OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
813
	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
814
		    "don't sample"),
815
	OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
816
		    "do not update the buildid cache"),
817
	OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
818
		    "do not collect buildids in perf.data"),
819
	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
S
Stephane Eranian 已提交
820 821
		     "monitor event in cgroup name only",
		     parse_cgroups),
822
	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
823
		  "ms to wait before starting measurement after program start"),
824 825
	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
		   "user to profile"),
826 827 828 829 830 831 832

	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
		     "branch any", "sample any taken branches",
		     parse_branch_stack),

	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
		     "branch filter mask", "branch stack filter modes",
833
		     parse_branch_stack),
834 835
	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
		    "sample by weight (on special events only)"),
836 837
	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
		    "sample transaction flags (special events only)"),
838 839
	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
		    "use per-thread mmaps"),
840 841
	OPT_BOOLEAN('I', "intr-regs", &record.opts.sample_intr_regs,
		    "Sample machine registers on interrupt"),
842 843
	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
		    "Record running/enabled time of read (:S) events"),
844 845 846
	OPT_END()
};

847 848
struct option *record_options = __record_options;

849
int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
850
{
851
	int err = -ENOMEM;
852
	struct record *rec = &record;
853
	char errbuf[BUFSIZ];
854

855 856
	rec->evlist = perf_evlist__new();
	if (rec->evlist == NULL)
857 858
		return -ENOMEM;

859 860
	perf_config(perf_record_config, rec);

861
	argc = parse_options(argc, argv, record_options, record_usage,
862
			    PARSE_OPT_STOP_AT_NON_OPTION);
863
	if (!argc && target__none(&rec->opts.target))
864
		usage_with_options(record_usage, record_options);
865

866
	if (nr_cgroups && !rec->opts.target.system_wide) {
867 868
		ui__error("cgroup monitoring only available in"
			  " system-wide mode\n");
S
Stephane Eranian 已提交
869 870 871
		usage_with_options(record_usage, record_options);
	}

872
	symbol__init(NULL);
873

874
	if (symbol_conf.kptr_restrict)
875 876 877 878 879 880 881 882
		pr_warning(
"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
"check /proc/sys/kernel/kptr_restrict.\n\n"
"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
"file is not found in the buildid cache or in the vmlinux path.\n\n"
"Samples in kernel modules won't be resolved at all.\n\n"
"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
"even with a suitable vmlinux or kallsyms file.\n\n");
883

884
	if (rec->no_buildid_cache || rec->no_buildid)
885
		disable_buildid_cache();
886

887 888
	if (rec->evlist->nr_entries == 0 &&
	    perf_evlist__add_default(rec->evlist) < 0) {
889 890
		pr_err("Not enough memory for event selector list\n");
		goto out_symbol_exit;
891
	}
892

893 894 895
	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
		rec->opts.no_inherit = true;

896
	err = target__validate(&rec->opts.target);
897
	if (err) {
898
		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
899 900 901
		ui__warning("%s", errbuf);
	}

902
	err = target__parse_uid(&rec->opts.target);
903 904
	if (err) {
		int saved_errno = errno;
905

906
		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
907
		ui__error("%s", errbuf);
908 909

		err = -saved_errno;
910
		goto out_symbol_exit;
911
	}
912

913
	err = -ENOMEM;
914
	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
915
		usage_with_options(record_usage, record_options);
916

917
	if (record_opts__config(&rec->opts)) {
918
		err = -EINVAL;
919
		goto out_symbol_exit;
920 921
	}

922
	err = __cmd_record(&record, argc, argv);
923
out_symbol_exit:
924
	perf_evlist__delete(rec->evlist);
925
	symbol__exit();
926
	return err;
927
}