builtin-record.c 29.1 KB
Newer Older
I
Ingo Molnar 已提交
1
/*
2 3 4 5 6
 * builtin-record.c
 *
 * Builtin record command: Record the profile of a workload
 * (or a CPU, or a PID) into the perf.data output file - for
 * later analysis via perf report.
I
Ingo Molnar 已提交
7
 */
8
#include "builtin.h"
9 10 11

#include "perf.h"

12
#include "util/build-id.h"
13
#include "util/util.h"
14
#include "util/parse-options.h"
15
#include "util/parse-events.h"
16

17
#include "util/callchain.h"
18
#include "util/cgroup.h"
19
#include "util/header.h"
20
#include "util/event.h"
21
#include "util/evlist.h"
22
#include "util/evsel.h"
23
#include "util/debug.h"
24
#include "util/session.h"
25
#include "util/tool.h"
26
#include "util/symbol.h"
27
#include "util/cpumap.h"
28
#include "util/thread_map.h"
29
#include "util/data.h"
30
#include "util/auxtrace.h"
31

32
#include <unistd.h>
33
#include <sched.h>
34
#include <sys/mman.h>
35

36

37
struct record {
38
	struct perf_tool	tool;
39
	struct record_opts	opts;
40
	u64			bytes_written;
41
	struct perf_data_file	file;
42
	struct auxtrace_record	*itr;
43 44 45 46 47 48 49
	struct perf_evlist	*evlist;
	struct perf_session	*session;
	const char		*progname;
	int			realtime_prio;
	bool			no_buildid;
	bool			no_buildid_cache;
	long			samples;
50
};
51

52
static int record__write(struct record *rec, void *bf, size_t size)
53
{
54
	if (perf_data_file__write(rec->session->file, bf, size) < 0) {
55 56
		pr_err("failed to write perf data, error: %m\n");
		return -1;
57
	}
58

59
	rec->bytes_written += size;
60
	return 0;
61 62
}

63
static int process_synthesized_event(struct perf_tool *tool,
64
				     union perf_event *event,
65 66
				     struct perf_sample *sample __maybe_unused,
				     struct machine *machine __maybe_unused)
67
{
68 69
	struct record *rec = container_of(tool, struct record, tool);
	return record__write(rec, event, event->header.size);
70 71
}

72
static int record__mmap_read(struct record *rec, int idx)
73
{
74
	struct perf_mmap *md = &rec->evlist->mmap[idx];
75 76
	u64 head = perf_mmap__read_head(md);
	u64 old = md->prev;
J
Jiri Olsa 已提交
77
	unsigned char *data = md->base + page_size;
78 79
	unsigned long size;
	void *buf;
80
	int rc = 0;
81

82
	if (old == head)
83
		return 0;
84

85
	rec->samples++;
86 87 88 89 90 91 92

	size = head - old;

	if ((old & md->mask) + size != (head & md->mask)) {
		buf = &data[old & md->mask];
		size = md->mask + 1 - (old & md->mask);
		old += size;
93

94
		if (record__write(rec, buf, size) < 0) {
95 96 97
			rc = -1;
			goto out;
		}
98 99 100 101 102
	}

	buf = &data[old & md->mask];
	size = head - old;
	old += size;
103

104
	if (record__write(rec, buf, size) < 0) {
105 106 107
		rc = -1;
		goto out;
	}
108 109

	md->prev = old;
110
	perf_evlist__mmap_consume(rec->evlist, idx);
111 112
out:
	return rc;
113 114
}

115 116
#ifdef HAVE_AUXTRACE_SUPPORT

117 118 119 120 121
static int record__process_auxtrace(struct perf_tool *tool,
				    union perf_event *event, void *data1,
				    size_t len1, void *data2, size_t len2)
{
	struct record *rec = container_of(tool, struct record, tool);
122
	struct perf_data_file *file = &rec->file;
123 124 125
	size_t padding;
	u8 pad[8] = {0};

126 127 128 129 130 131 132 133 134 135 136 137 138 139
	if (!perf_data_file__is_pipe(file)) {
		off_t file_offset;
		int fd = perf_data_file__fd(file);
		int err;

		file_offset = lseek(fd, 0, SEEK_CUR);
		if (file_offset == -1)
			return -1;
		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
						     event, file_offset);
		if (err)
			return err;
	}

140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
	padding = (len1 + len2) & 7;
	if (padding)
		padding = 8 - padding;

	record__write(rec, event, event->header.size);
	record__write(rec, data1, len1);
	if (len2)
		record__write(rec, data2, len2);
	record__write(rec, &pad, padding);

	return 0;
}

static int record__auxtrace_mmap_read(struct record *rec,
				      struct auxtrace_mmap *mm)
{
	int ret;

	ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
				  record__process_auxtrace);
	if (ret < 0)
		return ret;

	if (ret)
		rec->samples++;

	return 0;
}

170 171 172 173 174 175 176 177 178 179 180
#else

static inline
int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
			       struct auxtrace_mmap *mm __maybe_unused)
{
	return 0;
}

#endif

181
static volatile int done = 0;
182
static volatile int signr = -1;
183
static volatile int child_finished = 0;
184

185
static void sig_handler(int sig)
186
{
187 188
	if (sig == SIGCHLD)
		child_finished = 1;
189 190
	else
		signr = sig;
191

192
	done = 1;
193 194
}

195
static void record__sig_exit(void)
196
{
197
	if (signr == -1)
198 199 200
		return;

	signal(signr, SIG_DFL);
201
	raise(signr);
202 203
}

204
static int record__open(struct record *rec)
205
{
206
	char msg[512];
207
	struct perf_evsel *pos;
208 209
	struct perf_evlist *evlist = rec->evlist;
	struct perf_session *session = rec->session;
210
	struct record_opts *opts = &rec->opts;
211
	int rc = 0;
212

213
	perf_evlist__config(evlist, opts);
214

215
	evlist__for_each(evlist, pos) {
216
try_again:
217
		if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
218
			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
219
				if (verbose)
220
					ui__warning("%s\n", msg);
221 222
				goto try_again;
			}
223

224 225 226 227
			rc = -errno;
			perf_evsel__open_strerror(pos, &opts->target,
						  errno, msg, sizeof(msg));
			ui__error("%s\n", msg);
228
			goto out;
L
Li Zefan 已提交
229 230
		}
	}
231

232 233 234
	if (perf_evlist__apply_filters(evlist, &pos)) {
		error("failed to set filter \"%s\" on event %s with %d (%s)\n",
			pos->filter, perf_evsel__name(pos), errno,
235
			strerror_r(errno, msg, sizeof(msg)));
236 237
		rc = -1;
		goto out;
238 239
	}

240 241
	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
				 opts->auxtrace_mmap_pages, false) < 0) {
242 243 244 245 246
		if (errno == EPERM) {
			pr_err("Permission error mapping pages.\n"
			       "Consider increasing "
			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
			       "or try again with a smaller value of -m/--mmap_pages.\n"
247 248
			       "(current value: %u,%u)\n",
			       opts->mmap_pages, opts->auxtrace_mmap_pages);
249 250
			rc = -errno;
		} else {
251 252
			pr_err("failed to mmap with %d (%s)\n", errno,
				strerror_r(errno, msg, sizeof(msg)));
253 254 255
			rc = -errno;
		}
		goto out;
256
	}
257

258
	session->evlist = evlist;
259
	perf_session__set_id_hdr_size(session);
260 261
out:
	return rc;
262 263
}

264 265 266 267 268 269 270 271 272 273 274 275 276
static int process_sample_event(struct perf_tool *tool,
				union perf_event *event,
				struct perf_sample *sample,
				struct perf_evsel *evsel,
				struct machine *machine)
{
	struct record *rec = container_of(tool, struct record, tool);

	rec->samples++;

	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
}

277
static int process_buildids(struct record *rec)
278
{
279 280
	struct perf_data_file *file  = &rec->file;
	struct perf_session *session = rec->session;
281

282
	u64 size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
283 284 285
	if (size == 0)
		return 0;

286 287
	file->size = size;

288 289 290 291 292 293 294 295 296 297 298
	/*
	 * During this process, it'll load kernel map and replace the
	 * dso->long_name to a real pathname it found.  In this case
	 * we prefer the vmlinux path like
	 *   /lib/modules/3.16.4/build/vmlinux
	 *
	 * rather than build-id path (in debug directory).
	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
	 */
	symbol_conf.ignore_vmlinux_buildid = true;

299
	return perf_session__process_events(session);
300 301
}

302
static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
303 304
{
	int err;
305
	struct perf_tool *tool = data;
306 307 308 309 310 311 312 313
	/*
	 *As for guest kernel when processing subcommand record&report,
	 *we arrange module mmap prior to guest kernel mmap and trigger
	 *a preload dso because default guest module symbols are loaded
	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
	 *method is used to avoid symbol missing when the first addr is
	 *in module instead of in guest kernel.
	 */
314
	err = perf_event__synthesize_modules(tool, process_synthesized_event,
315
					     machine);
316 317
	if (err < 0)
		pr_err("Couldn't record guest kernel [%d]'s reference"
318
		       " relocation symbol.\n", machine->pid);
319 320 321 322 323

	/*
	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
	 * have no _text sometimes.
	 */
324
	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
325
						 machine);
326 327
	if (err < 0)
		pr_err("Couldn't record guest kernel [%d]'s reference"
328
		       " relocation symbol.\n", machine->pid);
329 330
}

331 332 333 334 335
static struct perf_event_header finished_round_event = {
	.size = sizeof(struct perf_event_header),
	.type = PERF_RECORD_FINISHED_ROUND,
};

336
static int record__mmap_read_all(struct record *rec)
337
{
338
	u64 bytes_written = rec->bytes_written;
339
	int i;
340
	int rc = 0;
341

342
	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
343 344
		struct auxtrace_mmap *mm = &rec->evlist->mmap[i].auxtrace_mmap;

345
		if (rec->evlist->mmap[i].base) {
346
			if (record__mmap_read(rec, i) != 0) {
347 348 349 350
				rc = -1;
				goto out;
			}
		}
351 352 353 354 355 356

		if (mm->base &&
		    record__auxtrace_mmap_read(rec, mm) != 0) {
			rc = -1;
			goto out;
		}
357 358
	}

359 360 361 362 363 364
	/*
	 * Mark the round finished in case we wrote
	 * at least one event.
	 */
	if (bytes_written != rec->bytes_written)
		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
365 366 367

out:
	return rc;
368 369
}

370
static void record__init_features(struct record *rec)
371 372 373 374 375 376 377 378 379 380
{
	struct perf_session *session = rec->session;
	int feat;

	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
		perf_header__set_feat(&session->header, feat);

	if (rec->no_buildid)
		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);

381
	if (!have_tracepoints(&rec->evlist->entries))
382 383 384 385
		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);

	if (!rec->opts.branch_stack)
		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
386 387 388

	if (!rec->opts.full_auxtrace)
		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
389 390
}

391 392 393 394 395 396 397
static volatile int workload_exec_errno;

/*
 * perf_evlist__prepare_workload will send a SIGUSR1
 * if the fork fails, since we asked by setting its
 * want_signal to true.
 */
398 399
static void workload_exec_failed_signal(int signo __maybe_unused,
					siginfo_t *info,
400 401 402 403 404 405 406
					void *ucontext __maybe_unused)
{
	workload_exec_errno = info->si_value.sival_int;
	done = 1;
	child_finished = 1;
}

407
static int __cmd_record(struct record *rec, int argc, const char **argv)
408
{
409
	int err;
410
	int status = 0;
411
	unsigned long waking = 0;
412
	const bool forks = argc > 0;
413
	struct machine *machine;
414
	struct perf_tool *tool = &rec->tool;
415
	struct record_opts *opts = &rec->opts;
416
	struct perf_data_file *file = &rec->file;
417
	struct perf_session *session;
418
	bool disabled = false, draining = false;
419
	int fd;
420

421
	rec->progname = argv[0];
422

423
	atexit(record__sig_exit);
424 425
	signal(SIGCHLD, sig_handler);
	signal(SIGINT, sig_handler);
426
	signal(SIGTERM, sig_handler);
427

428
	session = perf_session__new(file, false, tool);
429
	if (session == NULL) {
A
Adrien BAK 已提交
430
		pr_err("Perf session creation failed.\n");
431 432 433
		return -1;
	}

434
	fd = perf_data_file__fd(file);
435 436
	rec->session = session;

437
	record__init_features(rec);
438

439
	if (forks) {
440
		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
441
						    argv, file->is_pipe,
442
						    workload_exec_failed_signal);
443 444
		if (err < 0) {
			pr_err("Couldn't run the workload!\n");
445
			status = err;
446
			goto out_delete_session;
447 448 449
		}
	}

450
	if (record__open(rec) != 0) {
451
		err = -1;
452
		goto out_child;
453
	}
454

455
	if (!rec->evlist->nr_groups)
456 457
		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);

458
	if (file->is_pipe) {
459
		err = perf_header__write_pipe(fd);
460
		if (err < 0)
461
			goto out_child;
462
	} else {
463
		err = perf_session__write_header(session, rec->evlist, fd, false);
464
		if (err < 0)
465
			goto out_child;
466 467
	}

468
	if (!rec->no_buildid
469
	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
470
		pr_err("Couldn't generate buildids. "
471
		       "Use --no-buildid to profile anyway.\n");
472
		err = -1;
473
		goto out_child;
474 475
	}

476
	machine = &session->machines.host;
477

478
	if (file->is_pipe) {
479
		err = perf_event__synthesize_attrs(tool, session,
480
						   process_synthesized_event);
481 482
		if (err < 0) {
			pr_err("Couldn't synthesize attrs.\n");
483
			goto out_child;
484
		}
485

486
		if (have_tracepoints(&rec->evlist->entries)) {
487 488 489 490 491 492 493 494
			/*
			 * FIXME err <= 0 here actually means that
			 * there were no tracepoints so its not really
			 * an error, just that we don't need to
			 * synthesize anything.  We really have to
			 * return this more properly and also
			 * propagate errors that now are calling die()
			 */
495
			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
496
								  process_synthesized_event);
497 498
			if (err <= 0) {
				pr_err("Couldn't record tracing data.\n");
499
				goto out_child;
500
			}
501
			rec->bytes_written += err;
502
		}
503 504
	}

505 506 507 508 509 510 511
	if (rec->opts.full_auxtrace) {
		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
					session, process_synthesized_event);
		if (err)
			goto out_delete_session;
	}

512
	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
513
						 machine);
514 515 516 517
	if (err < 0)
		pr_err("Couldn't record kernel reference relocation symbol\n"
		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
		       "Check /proc/kallsyms permission or run as root.\n");
518

519
	err = perf_event__synthesize_modules(tool, process_synthesized_event,
520
					     machine);
521 522 523 524 525
	if (err < 0)
		pr_err("Couldn't record kernel module information.\n"
		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
		       "Check /proc/modules permission or run as root.\n");

526
	if (perf_guest) {
527 528
		machines__process_guests(&session->machines,
					 perf_event__synthesize_guest_os, tool);
529
	}
530

531
	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
532
					    process_synthesized_event, opts->sample_address);
533
	if (err != 0)
534
		goto out_child;
535

536
	if (rec->realtime_prio) {
537 538
		struct sched_param param;

539
		param.sched_priority = rec->realtime_prio;
540
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
541
			pr_err("Could not set realtime priority.\n");
542
			err = -1;
543
			goto out_child;
544 545 546
		}
	}

547 548 549 550 551
	/*
	 * When perf is starting the traced process, all the events
	 * (apart from group members) have enable_on_exec=1 set,
	 * so don't spoil it by prematurely enabling them.
	 */
552
	if (!target__none(&opts->target) && !opts->initial_delay)
553
		perf_evlist__enable(rec->evlist);
554

555 556 557
	/*
	 * Let the child rip
	 */
558
	if (forks)
559
		perf_evlist__start_workload(rec->evlist);
560

561 562 563 564 565
	if (opts->initial_delay) {
		usleep(opts->initial_delay * 1000);
		perf_evlist__enable(rec->evlist);
	}

566
	for (;;) {
567
		int hits = rec->samples;
568

569
		if (record__mmap_read_all(rec) < 0) {
570
			err = -1;
571
			goto out_child;
572
		}
573

574
		if (hits == rec->samples) {
575
			if (done || draining)
576
				break;
577
			err = perf_evlist__poll(rec->evlist, -1);
578 579 580 581 582
			/*
			 * Propagate error, only if there's any. Ignore positive
			 * number of returned events and interrupt error.
			 */
			if (err > 0 || (err < 0 && errno == EINTR))
583
				err = 0;
584
			waking++;
585 586 587

			if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
				draining = true;
588 589
		}

590 591 592 593 594
		/*
		 * When perf is starting the traced process, at the end events
		 * die with the process and we wait for that. Thus no need to
		 * disable events in this case.
		 */
595
		if (done && !disabled && !target__none(&opts->target)) {
596
			perf_evlist__disable(rec->evlist);
597 598
			disabled = true;
		}
599 600
	}

601
	if (forks && workload_exec_errno) {
602
		char msg[STRERR_BUFSIZE];
603 604 605
		const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
		pr_err("Workload failed: %s\n", emsg);
		err = -1;
606
		goto out_child;
607 608
	}

609
	if (!quiet)
610
		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
611

612 613 614
out_child:
	if (forks) {
		int exit_status;
615

616 617 618 619 620 621 622 623 624 625 626 627 628 629
		if (!child_finished)
			kill(rec->evlist->workload.pid, SIGTERM);

		wait(&exit_status);

		if (err < 0)
			status = err;
		else if (WIFEXITED(exit_status))
			status = WEXITSTATUS(exit_status);
		else if (WIFSIGNALED(exit_status))
			signr = WTERMSIG(exit_status);
	} else
		status = err;

630 631 632
	/* this will be recalculated during process_buildids() */
	rec->samples = 0;

633 634 635
	if (!err && !file->is_pipe) {
		rec->session->header.data_size += rec->bytes_written;

636
		if (!rec->no_buildid) {
637
			process_buildids(rec);
638 639 640 641 642 643 644 645
			/*
			 * We take all buildids when the file contains
			 * AUX area tracing data because we do not decode the
			 * trace because it would take too long.
			 */
			if (rec->opts.full_auxtrace)
				dsos__hit_all(rec->session);
		}
646
		perf_session__write_header(rec->session, rec->evlist, fd, true);
647
	}
648

649 650 651
	if (!err && !quiet) {
		char samples[128];

652
		if (rec->samples && !rec->opts.full_auxtrace)
653 654 655 656 657 658 659 660 661 662
			scnprintf(samples, sizeof(samples),
				  " (%" PRIu64 " samples)", rec->samples);
		else
			samples[0] = '\0';

		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s ]\n",
			perf_data_file__size(file) / 1024.0 / 1024.0,
			file->path, samples);
	}

663 664
out_delete_session:
	perf_session__delete(session);
665
	return status;
666
}
667

668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685
#define BRANCH_OPT(n, m) \
	{ .name = n, .mode = (m) }

#define BRANCH_END { .name = NULL }

struct branch_mode {
	const char *name;
	int mode;
};

static const struct branch_mode branch_modes[] = {
	BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
	BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
	BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
	BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
	BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
	BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
	BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
686 687 688
	BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
	BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
	BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
689
	BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
690 691 692 693
	BRANCH_END
};

static int
694
parse_branch_stack(const struct option *opt, const char *str, int unset)
695 696 697 698 699 700 701 702
{
#define ONLY_PLM \
	(PERF_SAMPLE_BRANCH_USER	|\
	 PERF_SAMPLE_BRANCH_KERNEL	|\
	 PERF_SAMPLE_BRANCH_HV)

	uint64_t *mode = (uint64_t *)opt->value;
	const struct branch_mode *br;
703
	char *s, *os = NULL, *p;
704 705
	int ret = -1;

706 707
	if (unset)
		return 0;
708

709 710 711 712
	/*
	 * cannot set it twice, -b + --branch-filter for instance
	 */
	if (*mode)
713 714
		return -1;

715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735
	/* str may be NULL in case no arg is passed to -b */
	if (str) {
		/* because str is read-only */
		s = os = strdup(str);
		if (!s)
			return -1;

		for (;;) {
			p = strchr(s, ',');
			if (p)
				*p = '\0';

			for (br = branch_modes; br->name; br++) {
				if (!strcasecmp(s, br->name))
					break;
			}
			if (!br->name) {
				ui__warning("unknown branch filter %s,"
					    " check man page\n", s);
				goto error;
			}
736

737
			*mode |= br->mode;
738

739 740
			if (!p)
				break;
741

742 743
			s = p + 1;
		}
744 745 746
	}
	ret = 0;

747
	/* default to any branch */
748
	if ((*mode & ~ONLY_PLM) == 0) {
749
		*mode = PERF_SAMPLE_BRANCH_ANY;
750 751 752 753 754 755
	}
error:
	free(os);
	return ret;
}

756
static void callchain_debug(void)
J
Jiri Olsa 已提交
757
{
758
	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
759

760
	pr_debug("callchain: type %s\n", str[callchain_param.record_mode]);
761

762
	if (callchain_param.record_mode == CALLCHAIN_DWARF)
J
Jiri Olsa 已提交
763
		pr_debug("callchain: stack dump size %d\n",
764
			 callchain_param.dump_size);
J
Jiri Olsa 已提交
765 766
}

767
int record_parse_callchain_opt(const struct option *opt __maybe_unused,
J
Jiri Olsa 已提交
768 769 770 771 772
			       const char *arg,
			       int unset)
{
	int ret;

773
	callchain_param.enabled = !unset;
774

J
Jiri Olsa 已提交
775 776
	/* --no-call-graph */
	if (unset) {
777
		callchain_param.record_mode = CALLCHAIN_NONE;
J
Jiri Olsa 已提交
778 779 780 781
		pr_debug("callchain: disabled\n");
		return 0;
	}

782
	ret = parse_callchain_record_opt(arg);
783
	if (!ret)
784
		callchain_debug();
785 786 787 788

	return ret;
}

789
int record_callchain_opt(const struct option *opt __maybe_unused,
J
Jiri Olsa 已提交
790 791 792
			 const char *arg __maybe_unused,
			 int unset __maybe_unused)
{
793
	callchain_param.enabled = true;
J
Jiri Olsa 已提交
794

795 796
	if (callchain_param.record_mode == CALLCHAIN_NONE)
		callchain_param.record_mode = CALLCHAIN_FP;
797

798
	callchain_debug();
J
Jiri Olsa 已提交
799 800 801
	return 0;
}

802 803 804
static int perf_record_config(const char *var, const char *value, void *cb)
{
	if (!strcmp(var, "record.call-graph"))
805
		var = "call-graph.record-mode"; /* fall-through */
806 807 808 809

	return perf_default_config(var, value, cb);
}

810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893
struct clockid_map {
	const char *name;
	int clockid;
};

#define CLOCKID_MAP(n, c)	\
	{ .name = n, .clockid = (c), }

#define CLOCKID_END	{ .name = NULL, }


/*
 * Add the missing ones, we need to build on many distros...
 */
#ifndef CLOCK_MONOTONIC_RAW
#define CLOCK_MONOTONIC_RAW 4
#endif
#ifndef CLOCK_BOOTTIME
#define CLOCK_BOOTTIME 7
#endif
#ifndef CLOCK_TAI
#define CLOCK_TAI 11
#endif

static const struct clockid_map clockids[] = {
	/* available for all events, NMI safe */
	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),

	/* available for some events */
	CLOCKID_MAP("realtime", CLOCK_REALTIME),
	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
	CLOCKID_MAP("tai", CLOCK_TAI),

	/* available for the lazy */
	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
	CLOCKID_MAP("real", CLOCK_REALTIME),
	CLOCKID_MAP("boot", CLOCK_BOOTTIME),

	CLOCKID_END,
};

static int parse_clockid(const struct option *opt, const char *str, int unset)
{
	struct record_opts *opts = (struct record_opts *)opt->value;
	const struct clockid_map *cm;
	const char *ostr = str;

	if (unset) {
		opts->use_clockid = 0;
		return 0;
	}

	/* no arg passed */
	if (!str)
		return 0;

	/* no setting it twice */
	if (opts->use_clockid)
		return -1;

	opts->use_clockid = true;

	/* if its a number, we're done */
	if (sscanf(str, "%d", &opts->clockid) == 1)
		return 0;

	/* allow a "CLOCK_" prefix to the name */
	if (!strncasecmp(str, "CLOCK_", 6))
		str += 6;

	for (cm = clockids; cm->name; cm++) {
		if (!strcasecmp(str, cm->name)) {
			opts->clockid = cm->clockid;
			return 0;
		}
	}

	opts->use_clockid = false;
	ui__warning("unknown clockid %s, check man page\n", ostr);
	return -1;
}

894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936
static int record__parse_mmap_pages(const struct option *opt,
				    const char *str,
				    int unset __maybe_unused)
{
	struct record_opts *opts = opt->value;
	char *s, *p;
	unsigned int mmap_pages;
	int ret;

	if (!str)
		return -EINVAL;

	s = strdup(str);
	if (!s)
		return -ENOMEM;

	p = strchr(s, ',');
	if (p)
		*p = '\0';

	if (*s) {
		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
		if (ret)
			goto out_free;
		opts->mmap_pages = mmap_pages;
	}

	if (!p) {
		ret = 0;
		goto out_free;
	}

	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
	if (ret)
		goto out_free;

	opts->auxtrace_mmap_pages = mmap_pages;

out_free:
	free(s);
	return ret;
}

937
static const char * const __record_usage[] = {
938 939
	"perf record [<options>] [<command>]",
	"perf record [<options>] -- <command> [<options>]",
940 941
	NULL
};
942
const char * const *record_usage = __record_usage;
943

944
/*
945 946
 * XXX Ideally would be local to cmd_record() and passed to a record__new
 * because we need to have access to it in record__exit, that is called
947 948 949 950 951 952 953
 * after cmd_record() exits, but since record_options need to be accessible to
 * builtin-script, leave it here.
 *
 * At least we don't ouch it in all the other functions here directly.
 *
 * Just say no to tons of global variables, sigh.
 */
954
static struct record record = {
955
	.opts = {
956
		.sample_time	     = true,
957 958 959
		.mmap_pages	     = UINT_MAX,
		.user_freq	     = UINT_MAX,
		.user_interval	     = ULLONG_MAX,
960
		.freq		     = 4000,
N
Namhyung Kim 已提交
961 962
		.target		     = {
			.uses_mmap   = true,
963
			.default_per_cpu = true,
N
Namhyung Kim 已提交
964
		},
965
	},
966 967 968 969 970 971 972
	.tool = {
		.sample		= process_sample_event,
		.fork		= perf_event__process_fork,
		.comm		= perf_event__process_comm,
		.mmap		= perf_event__process_mmap,
		.mmap2		= perf_event__process_mmap2,
	},
973
};
974

J
Jiri Olsa 已提交
975
#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
976

977
#ifdef HAVE_DWARF_UNWIND_SUPPORT
978
const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf lbr";
979
#else
980
const char record_callchain_help[] = CALLCHAIN_HELP "fp lbr";
981 982
#endif

983 984 985
/*
 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
 * with it and switch to use the library functions in perf_evlist that came
986
 * from builtin-record.c, i.e. use record_opts,
987 988 989
 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
 * using pipes, etc.
 */
990
struct option __record_options[] = {
991
	OPT_CALLBACK('e', "event", &record.evlist, "event",
992
		     "event selector. use 'perf list' to list available events",
993
		     parse_events_option),
994
	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
L
Li Zefan 已提交
995
		     "event filter", parse_filter),
996
	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
997
		    "record events on existing process id"),
998
	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
999
		    "record events on existing thread id"),
1000
	OPT_INTEGER('r', "realtime", &record.realtime_prio,
1001
		    "collect data with this RT SCHED_FIFO priority"),
1002
	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1003
		    "collect data without buffering"),
1004
	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1005
		    "collect raw sample records from all opened counters"),
1006
	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1007
			    "system-wide collection from all CPUs"),
1008
	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1009
		    "list of cpus to monitor"),
1010
	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1011
	OPT_STRING('o', "output", &record.file.path, "file",
I
Ingo Molnar 已提交
1012
		    "output file name"),
1013 1014 1015
	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
			&record.opts.no_inherit_set,
			"child tasks do not inherit counters"),
1016
	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1017 1018 1019
	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
		     "number of mmap data pages and AUX area tracing mmap pages",
		     record__parse_mmap_pages),
1020
	OPT_BOOLEAN(0, "group", &record.opts.group,
1021
		    "put the counters into a counter group"),
J
Jiri Olsa 已提交
1022 1023 1024 1025 1026 1027
	OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
			   NULL, "enables call-graph recording" ,
			   &record_callchain_opt),
	OPT_CALLBACK(0, "call-graph", &record.opts,
		     "mode[,dump_size]", record_callchain_help,
		     &record_parse_callchain_opt),
1028
	OPT_INCR('v', "verbose", &verbose,
1029
		    "be more verbose (show counter open errors, etc)"),
1030
	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1031
	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1032
		    "per thread counts"),
1033
	OPT_BOOLEAN('d', "data", &record.opts.sample_address,
1034
		    "Sample addresses"),
1035
	OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
1036
	OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
1037
	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1038
		    "don't sample"),
1039
	OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
1040
		    "do not update the buildid cache"),
1041
	OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
1042
		    "do not collect buildids in perf.data"),
1043
	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
S
Stephane Eranian 已提交
1044 1045
		     "monitor event in cgroup name only",
		     parse_cgroups),
1046
	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1047
		  "ms to wait before starting measurement after program start"),
1048 1049
	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
		   "user to profile"),
1050 1051 1052 1053 1054 1055 1056

	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
		     "branch any", "sample any taken branches",
		     parse_branch_stack),

	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
		     "branch filter mask", "branch stack filter modes",
1057
		     parse_branch_stack),
1058 1059
	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
		    "sample by weight (on special events only)"),
1060 1061
	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
		    "sample transaction flags (special events only)"),
1062 1063
	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
		    "use per-thread mmaps"),
1064 1065
	OPT_BOOLEAN('I', "intr-regs", &record.opts.sample_intr_regs,
		    "Sample machine registers on interrupt"),
1066 1067
	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
		    "Record running/enabled time of read (:S) events"),
1068 1069 1070
	OPT_CALLBACK('k', "clockid", &record.opts,
	"clockid", "clockid to use for events, see clock_gettime()",
	parse_clockid),
1071 1072 1073
	OPT_END()
};

1074 1075
struct option *record_options = __record_options;

1076
int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1077
{
1078
	int err;
1079
	struct record *rec = &record;
1080
	char errbuf[BUFSIZ];
1081

1082 1083
	rec->evlist = perf_evlist__new();
	if (rec->evlist == NULL)
1084 1085
		return -ENOMEM;

1086 1087
	perf_config(perf_record_config, rec);

1088
	argc = parse_options(argc, argv, record_options, record_usage,
1089
			    PARSE_OPT_STOP_AT_NON_OPTION);
1090
	if (!argc && target__none(&rec->opts.target))
1091
		usage_with_options(record_usage, record_options);
1092

1093
	if (nr_cgroups && !rec->opts.target.system_wide) {
1094 1095
		ui__error("cgroup monitoring only available in"
			  " system-wide mode\n");
S
Stephane Eranian 已提交
1096 1097 1098
		usage_with_options(record_usage, record_options);
	}

1099 1100 1101 1102 1103 1104 1105 1106
	if (!rec->itr) {
		rec->itr = auxtrace_record__init(rec->evlist, &err);
		if (err)
			return err;
	}

	err = -ENOMEM;

1107
	symbol__init(NULL);
1108

1109
	if (symbol_conf.kptr_restrict)
1110 1111 1112 1113 1114 1115 1116 1117
		pr_warning(
"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
"check /proc/sys/kernel/kptr_restrict.\n\n"
"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
"file is not found in the buildid cache or in the vmlinux path.\n\n"
"Samples in kernel modules won't be resolved at all.\n\n"
"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
"even with a suitable vmlinux or kallsyms file.\n\n");
1118

1119
	if (rec->no_buildid_cache || rec->no_buildid)
1120
		disable_buildid_cache();
1121

1122 1123
	if (rec->evlist->nr_entries == 0 &&
	    perf_evlist__add_default(rec->evlist) < 0) {
1124 1125
		pr_err("Not enough memory for event selector list\n");
		goto out_symbol_exit;
1126
	}
1127

1128 1129 1130
	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
		rec->opts.no_inherit = true;

1131
	err = target__validate(&rec->opts.target);
1132
	if (err) {
1133
		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1134 1135 1136
		ui__warning("%s", errbuf);
	}

1137
	err = target__parse_uid(&rec->opts.target);
1138 1139
	if (err) {
		int saved_errno = errno;
1140

1141
		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1142
		ui__error("%s", errbuf);
1143 1144

		err = -saved_errno;
1145
		goto out_symbol_exit;
1146
	}
1147

1148
	err = -ENOMEM;
1149
	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1150
		usage_with_options(record_usage, record_options);
1151

1152 1153 1154 1155
	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
	if (err)
		goto out_symbol_exit;

1156
	if (record_opts__config(&rec->opts)) {
1157
		err = -EINVAL;
1158
		goto out_symbol_exit;
1159 1160
	}

1161
	err = __cmd_record(&record, argc, argv);
1162
out_symbol_exit:
1163
	perf_evlist__delete(rec->evlist);
1164
	symbol__exit();
1165
	auxtrace_record__free(rec->itr);
1166
	return err;
1167
}