builtin-record.c 28.3 KB
Newer Older
I
Ingo Molnar 已提交
1
/*
2 3 4 5 6
 * builtin-record.c
 *
 * Builtin record command: Record the profile of a workload
 * (or a CPU, or a PID) into the perf.data output file - for
 * later analysis via perf report.
I
Ingo Molnar 已提交
7
 */
8
#include "builtin.h"
9 10 11

#include "perf.h"

12
#include "util/build-id.h"
13
#include "util/util.h"
14
#include "util/parse-options.h"
15
#include "util/parse-events.h"
16

17
#include "util/callchain.h"
18
#include "util/cgroup.h"
19
#include "util/header.h"
20
#include "util/event.h"
21
#include "util/evlist.h"
22
#include "util/evsel.h"
23
#include "util/debug.h"
24
#include "util/session.h"
25
#include "util/tool.h"
26
#include "util/symbol.h"
27
#include "util/cpumap.h"
28
#include "util/thread_map.h"
29
#include "util/data.h"
30
#include "util/auxtrace.h"
31

32
#include <unistd.h>
33
#include <sched.h>
34
#include <sys/mman.h>
35

36

37
struct record {
38
	struct perf_tool	tool;
39
	struct record_opts	opts;
40
	u64			bytes_written;
41
	struct perf_data_file	file;
42
	struct auxtrace_record	*itr;
43 44 45 46 47 48 49
	struct perf_evlist	*evlist;
	struct perf_session	*session;
	const char		*progname;
	int			realtime_prio;
	bool			no_buildid;
	bool			no_buildid_cache;
	long			samples;
50
};
51

52
static int record__write(struct record *rec, void *bf, size_t size)
53
{
54
	if (perf_data_file__write(rec->session->file, bf, size) < 0) {
55 56
		pr_err("failed to write perf data, error: %m\n");
		return -1;
57
	}
58

59
	rec->bytes_written += size;
60
	return 0;
61 62
}

63
static int process_synthesized_event(struct perf_tool *tool,
64
				     union perf_event *event,
65 66
				     struct perf_sample *sample __maybe_unused,
				     struct machine *machine __maybe_unused)
67
{
68 69
	struct record *rec = container_of(tool, struct record, tool);
	return record__write(rec, event, event->header.size);
70 71
}

72
static int record__mmap_read(struct record *rec, int idx)
73
{
74
	struct perf_mmap *md = &rec->evlist->mmap[idx];
75 76
	u64 head = perf_mmap__read_head(md);
	u64 old = md->prev;
J
Jiri Olsa 已提交
77
	unsigned char *data = md->base + page_size;
78 79
	unsigned long size;
	void *buf;
80
	int rc = 0;
81

82
	if (old == head)
83
		return 0;
84

85
	rec->samples++;
86 87 88 89 90 91 92

	size = head - old;

	if ((old & md->mask) + size != (head & md->mask)) {
		buf = &data[old & md->mask];
		size = md->mask + 1 - (old & md->mask);
		old += size;
93

94
		if (record__write(rec, buf, size) < 0) {
95 96 97
			rc = -1;
			goto out;
		}
98 99 100 101 102
	}

	buf = &data[old & md->mask];
	size = head - old;
	old += size;
103

104
	if (record__write(rec, buf, size) < 0) {
105 106 107
		rc = -1;
		goto out;
	}
108 109

	md->prev = old;
110
	perf_evlist__mmap_consume(rec->evlist, idx);
111 112
out:
	return rc;
113 114
}

115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
static int record__process_auxtrace(struct perf_tool *tool,
				    union perf_event *event, void *data1,
				    size_t len1, void *data2, size_t len2)
{
	struct record *rec = container_of(tool, struct record, tool);
	size_t padding;
	u8 pad[8] = {0};

	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
	padding = (len1 + len2) & 7;
	if (padding)
		padding = 8 - padding;

	record__write(rec, event, event->header.size);
	record__write(rec, data1, len1);
	if (len2)
		record__write(rec, data2, len2);
	record__write(rec, &pad, padding);

	return 0;
}

static int record__auxtrace_mmap_read(struct record *rec,
				      struct auxtrace_mmap *mm)
{
	int ret;

	ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
				  record__process_auxtrace);
	if (ret < 0)
		return ret;

	if (ret)
		rec->samples++;

	return 0;
}

153
static volatile int done = 0;
154
static volatile int signr = -1;
155
static volatile int child_finished = 0;
156

157
static void sig_handler(int sig)
158
{
159 160
	if (sig == SIGCHLD)
		child_finished = 1;
161 162
	else
		signr = sig;
163

164
	done = 1;
165 166
}

167
static void record__sig_exit(void)
168
{
169
	if (signr == -1)
170 171 172
		return;

	signal(signr, SIG_DFL);
173
	raise(signr);
174 175
}

176
static int record__open(struct record *rec)
177
{
178
	char msg[512];
179
	struct perf_evsel *pos;
180 181
	struct perf_evlist *evlist = rec->evlist;
	struct perf_session *session = rec->session;
182
	struct record_opts *opts = &rec->opts;
183
	int rc = 0;
184

185
	perf_evlist__config(evlist, opts);
186

187
	evlist__for_each(evlist, pos) {
188
try_again:
189
		if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
190
			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
191
				if (verbose)
192
					ui__warning("%s\n", msg);
193 194
				goto try_again;
			}
195

196 197 198 199
			rc = -errno;
			perf_evsel__open_strerror(pos, &opts->target,
						  errno, msg, sizeof(msg));
			ui__error("%s\n", msg);
200
			goto out;
L
Li Zefan 已提交
201 202
		}
	}
203

204 205 206
	if (perf_evlist__apply_filters(evlist, &pos)) {
		error("failed to set filter \"%s\" on event %s with %d (%s)\n",
			pos->filter, perf_evsel__name(pos), errno,
207
			strerror_r(errno, msg, sizeof(msg)));
208 209
		rc = -1;
		goto out;
210 211
	}

212 213
	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
				 opts->auxtrace_mmap_pages, false) < 0) {
214 215 216 217 218
		if (errno == EPERM) {
			pr_err("Permission error mapping pages.\n"
			       "Consider increasing "
			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
			       "or try again with a smaller value of -m/--mmap_pages.\n"
219 220
			       "(current value: %u,%u)\n",
			       opts->mmap_pages, opts->auxtrace_mmap_pages);
221 222
			rc = -errno;
		} else {
223 224
			pr_err("failed to mmap with %d (%s)\n", errno,
				strerror_r(errno, msg, sizeof(msg)));
225 226 227
			rc = -errno;
		}
		goto out;
228
	}
229

230
	session->evlist = evlist;
231
	perf_session__set_id_hdr_size(session);
232 233
out:
	return rc;
234 235
}

236 237 238 239 240 241 242 243 244 245 246 247 248
static int process_sample_event(struct perf_tool *tool,
				union perf_event *event,
				struct perf_sample *sample,
				struct perf_evsel *evsel,
				struct machine *machine)
{
	struct record *rec = container_of(tool, struct record, tool);

	rec->samples++;

	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
}

249
static int process_buildids(struct record *rec)
250
{
251 252
	struct perf_data_file *file  = &rec->file;
	struct perf_session *session = rec->session;
253

254
	u64 size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
255 256 257
	if (size == 0)
		return 0;

258 259
	file->size = size;

260 261 262 263 264 265 266 267 268 269 270
	/*
	 * During this process, it'll load kernel map and replace the
	 * dso->long_name to a real pathname it found.  In this case
	 * we prefer the vmlinux path like
	 *   /lib/modules/3.16.4/build/vmlinux
	 *
	 * rather than build-id path (in debug directory).
	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
	 */
	symbol_conf.ignore_vmlinux_buildid = true;

271
	return perf_session__process_events(session);
272 273
}

274
static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
275 276
{
	int err;
277
	struct perf_tool *tool = data;
278 279 280 281 282 283 284 285
	/*
	 *As for guest kernel when processing subcommand record&report,
	 *we arrange module mmap prior to guest kernel mmap and trigger
	 *a preload dso because default guest module symbols are loaded
	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
	 *method is used to avoid symbol missing when the first addr is
	 *in module instead of in guest kernel.
	 */
286
	err = perf_event__synthesize_modules(tool, process_synthesized_event,
287
					     machine);
288 289
	if (err < 0)
		pr_err("Couldn't record guest kernel [%d]'s reference"
290
		       " relocation symbol.\n", machine->pid);
291 292 293 294 295

	/*
	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
	 * have no _text sometimes.
	 */
296
	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
297
						 machine);
298 299
	if (err < 0)
		pr_err("Couldn't record guest kernel [%d]'s reference"
300
		       " relocation symbol.\n", machine->pid);
301 302
}

303 304 305 306 307
static struct perf_event_header finished_round_event = {
	.size = sizeof(struct perf_event_header),
	.type = PERF_RECORD_FINISHED_ROUND,
};

308
static int record__mmap_read_all(struct record *rec)
309
{
310
	u64 bytes_written = rec->bytes_written;
311
	int i;
312
	int rc = 0;
313

314
	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
315 316
		struct auxtrace_mmap *mm = &rec->evlist->mmap[i].auxtrace_mmap;

317
		if (rec->evlist->mmap[i].base) {
318
			if (record__mmap_read(rec, i) != 0) {
319 320 321 322
				rc = -1;
				goto out;
			}
		}
323 324 325 326 327 328

		if (mm->base &&
		    record__auxtrace_mmap_read(rec, mm) != 0) {
			rc = -1;
			goto out;
		}
329 330
	}

331 332 333 334 335 336
	/*
	 * Mark the round finished in case we wrote
	 * at least one event.
	 */
	if (bytes_written != rec->bytes_written)
		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
337 338 339

out:
	return rc;
340 341
}

342
static void record__init_features(struct record *rec)
343 344 345 346 347 348 349 350 351 352
{
	struct perf_session *session = rec->session;
	int feat;

	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
		perf_header__set_feat(&session->header, feat);

	if (rec->no_buildid)
		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);

353
	if (!have_tracepoints(&rec->evlist->entries))
354 355 356 357
		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);

	if (!rec->opts.branch_stack)
		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
358 359 360

	if (!rec->opts.full_auxtrace)
		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
361 362
}

363 364 365 366 367 368 369
static volatile int workload_exec_errno;

/*
 * perf_evlist__prepare_workload will send a SIGUSR1
 * if the fork fails, since we asked by setting its
 * want_signal to true.
 */
370 371
static void workload_exec_failed_signal(int signo __maybe_unused,
					siginfo_t *info,
372 373 374 375 376 377 378
					void *ucontext __maybe_unused)
{
	workload_exec_errno = info->si_value.sival_int;
	done = 1;
	child_finished = 1;
}

379
static int __cmd_record(struct record *rec, int argc, const char **argv)
380
{
381
	int err;
382
	int status = 0;
383
	unsigned long waking = 0;
384
	const bool forks = argc > 0;
385
	struct machine *machine;
386
	struct perf_tool *tool = &rec->tool;
387
	struct record_opts *opts = &rec->opts;
388
	struct perf_data_file *file = &rec->file;
389
	struct perf_session *session;
390
	bool disabled = false, draining = false;
391
	int fd;
392

393
	rec->progname = argv[0];
394

395
	atexit(record__sig_exit);
396 397
	signal(SIGCHLD, sig_handler);
	signal(SIGINT, sig_handler);
398
	signal(SIGTERM, sig_handler);
399

400
	session = perf_session__new(file, false, tool);
401
	if (session == NULL) {
A
Adrien BAK 已提交
402
		pr_err("Perf session creation failed.\n");
403 404 405
		return -1;
	}

406
	fd = perf_data_file__fd(file);
407 408
	rec->session = session;

409
	record__init_features(rec);
410

411
	if (forks) {
412
		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
413
						    argv, file->is_pipe,
414
						    workload_exec_failed_signal);
415 416
		if (err < 0) {
			pr_err("Couldn't run the workload!\n");
417
			status = err;
418
			goto out_delete_session;
419 420 421
		}
	}

422
	if (record__open(rec) != 0) {
423
		err = -1;
424
		goto out_child;
425
	}
426

427
	if (!rec->evlist->nr_groups)
428 429
		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);

430
	if (file->is_pipe) {
431
		err = perf_header__write_pipe(fd);
432
		if (err < 0)
433
			goto out_child;
434
	} else {
435
		err = perf_session__write_header(session, rec->evlist, fd, false);
436
		if (err < 0)
437
			goto out_child;
438 439
	}

440
	if (!rec->no_buildid
441
	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
442
		pr_err("Couldn't generate buildids. "
443
		       "Use --no-buildid to profile anyway.\n");
444
		err = -1;
445
		goto out_child;
446 447
	}

448
	machine = &session->machines.host;
449

450
	if (file->is_pipe) {
451
		err = perf_event__synthesize_attrs(tool, session,
452
						   process_synthesized_event);
453 454
		if (err < 0) {
			pr_err("Couldn't synthesize attrs.\n");
455
			goto out_child;
456
		}
457

458
		if (have_tracepoints(&rec->evlist->entries)) {
459 460 461 462 463 464 465 466
			/*
			 * FIXME err <= 0 here actually means that
			 * there were no tracepoints so its not really
			 * an error, just that we don't need to
			 * synthesize anything.  We really have to
			 * return this more properly and also
			 * propagate errors that now are calling die()
			 */
467
			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
468
								  process_synthesized_event);
469 470
			if (err <= 0) {
				pr_err("Couldn't record tracing data.\n");
471
				goto out_child;
472
			}
473
			rec->bytes_written += err;
474
		}
475 476
	}

477 478 479 480 481 482 483
	if (rec->opts.full_auxtrace) {
		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
					session, process_synthesized_event);
		if (err)
			goto out_delete_session;
	}

484
	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
485
						 machine);
486 487 488 489
	if (err < 0)
		pr_err("Couldn't record kernel reference relocation symbol\n"
		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
		       "Check /proc/kallsyms permission or run as root.\n");
490

491
	err = perf_event__synthesize_modules(tool, process_synthesized_event,
492
					     machine);
493 494 495 496 497
	if (err < 0)
		pr_err("Couldn't record kernel module information.\n"
		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
		       "Check /proc/modules permission or run as root.\n");

498
	if (perf_guest) {
499 500
		machines__process_guests(&session->machines,
					 perf_event__synthesize_guest_os, tool);
501
	}
502

503
	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
504
					    process_synthesized_event, opts->sample_address);
505
	if (err != 0)
506
		goto out_child;
507

508
	if (rec->realtime_prio) {
509 510
		struct sched_param param;

511
		param.sched_priority = rec->realtime_prio;
512
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
513
			pr_err("Could not set realtime priority.\n");
514
			err = -1;
515
			goto out_child;
516 517 518
		}
	}

519 520 521 522 523
	/*
	 * When perf is starting the traced process, all the events
	 * (apart from group members) have enable_on_exec=1 set,
	 * so don't spoil it by prematurely enabling them.
	 */
524
	if (!target__none(&opts->target) && !opts->initial_delay)
525
		perf_evlist__enable(rec->evlist);
526

527 528 529
	/*
	 * Let the child rip
	 */
530
	if (forks)
531
		perf_evlist__start_workload(rec->evlist);
532

533 534 535 536 537
	if (opts->initial_delay) {
		usleep(opts->initial_delay * 1000);
		perf_evlist__enable(rec->evlist);
	}

538
	for (;;) {
539
		int hits = rec->samples;
540

541
		if (record__mmap_read_all(rec) < 0) {
542
			err = -1;
543
			goto out_child;
544
		}
545

546
		if (hits == rec->samples) {
547
			if (done || draining)
548
				break;
549
			err = perf_evlist__poll(rec->evlist, -1);
550 551 552 553 554
			/*
			 * Propagate error, only if there's any. Ignore positive
			 * number of returned events and interrupt error.
			 */
			if (err > 0 || (err < 0 && errno == EINTR))
555
				err = 0;
556
			waking++;
557 558 559

			if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
				draining = true;
560 561
		}

562 563 564 565 566
		/*
		 * When perf is starting the traced process, at the end events
		 * die with the process and we wait for that. Thus no need to
		 * disable events in this case.
		 */
567
		if (done && !disabled && !target__none(&opts->target)) {
568
			perf_evlist__disable(rec->evlist);
569 570
			disabled = true;
		}
571 572
	}

573
	if (forks && workload_exec_errno) {
574
		char msg[STRERR_BUFSIZE];
575 576 577
		const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
		pr_err("Workload failed: %s\n", emsg);
		err = -1;
578
		goto out_child;
579 580
	}

581
	if (!quiet)
582
		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
583

584 585 586
out_child:
	if (forks) {
		int exit_status;
587

588 589 590 591 592 593 594 595 596 597 598 599 600 601
		if (!child_finished)
			kill(rec->evlist->workload.pid, SIGTERM);

		wait(&exit_status);

		if (err < 0)
			status = err;
		else if (WIFEXITED(exit_status))
			status = WEXITSTATUS(exit_status);
		else if (WIFSIGNALED(exit_status))
			signr = WTERMSIG(exit_status);
	} else
		status = err;

602 603 604
	/* this will be recalculated during process_buildids() */
	rec->samples = 0;

605 606 607 608 609
	if (!err && !file->is_pipe) {
		rec->session->header.data_size += rec->bytes_written;

		if (!rec->no_buildid)
			process_buildids(rec);
610
		perf_session__write_header(rec->session, rec->evlist, fd, true);
611
	}
612

613 614 615
	if (!err && !quiet) {
		char samples[128];

616
		if (rec->samples && !rec->opts.full_auxtrace)
617 618 619 620 621 622 623 624 625 626
			scnprintf(samples, sizeof(samples),
				  " (%" PRIu64 " samples)", rec->samples);
		else
			samples[0] = '\0';

		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s ]\n",
			perf_data_file__size(file) / 1024.0 / 1024.0,
			file->path, samples);
	}

627 628
out_delete_session:
	perf_session__delete(session);
629
	return status;
630
}
631

632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649
#define BRANCH_OPT(n, m) \
	{ .name = n, .mode = (m) }

#define BRANCH_END { .name = NULL }

struct branch_mode {
	const char *name;
	int mode;
};

static const struct branch_mode branch_modes[] = {
	BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
	BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
	BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
	BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
	BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
	BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
	BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
650 651 652
	BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
	BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
	BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
653
	BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
654 655 656 657
	BRANCH_END
};

static int
658
parse_branch_stack(const struct option *opt, const char *str, int unset)
659 660 661 662 663 664 665 666
{
#define ONLY_PLM \
	(PERF_SAMPLE_BRANCH_USER	|\
	 PERF_SAMPLE_BRANCH_KERNEL	|\
	 PERF_SAMPLE_BRANCH_HV)

	uint64_t *mode = (uint64_t *)opt->value;
	const struct branch_mode *br;
667
	char *s, *os = NULL, *p;
668 669
	int ret = -1;

670 671
	if (unset)
		return 0;
672

673 674 675 676
	/*
	 * cannot set it twice, -b + --branch-filter for instance
	 */
	if (*mode)
677 678
		return -1;

679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699
	/* str may be NULL in case no arg is passed to -b */
	if (str) {
		/* because str is read-only */
		s = os = strdup(str);
		if (!s)
			return -1;

		for (;;) {
			p = strchr(s, ',');
			if (p)
				*p = '\0';

			for (br = branch_modes; br->name; br++) {
				if (!strcasecmp(s, br->name))
					break;
			}
			if (!br->name) {
				ui__warning("unknown branch filter %s,"
					    " check man page\n", s);
				goto error;
			}
700

701
			*mode |= br->mode;
702

703 704
			if (!p)
				break;
705

706 707
			s = p + 1;
		}
708 709 710
	}
	ret = 0;

711
	/* default to any branch */
712
	if ((*mode & ~ONLY_PLM) == 0) {
713
		*mode = PERF_SAMPLE_BRANCH_ANY;
714 715 716 717 718 719
	}
error:
	free(os);
	return ret;
}

720
static void callchain_debug(void)
J
Jiri Olsa 已提交
721
{
722
	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
723

724
	pr_debug("callchain: type %s\n", str[callchain_param.record_mode]);
725

726
	if (callchain_param.record_mode == CALLCHAIN_DWARF)
J
Jiri Olsa 已提交
727
		pr_debug("callchain: stack dump size %d\n",
728
			 callchain_param.dump_size);
J
Jiri Olsa 已提交
729 730
}

731
int record_parse_callchain_opt(const struct option *opt __maybe_unused,
J
Jiri Olsa 已提交
732 733 734 735 736
			       const char *arg,
			       int unset)
{
	int ret;

737
	callchain_param.enabled = !unset;
738

J
Jiri Olsa 已提交
739 740
	/* --no-call-graph */
	if (unset) {
741
		callchain_param.record_mode = CALLCHAIN_NONE;
J
Jiri Olsa 已提交
742 743 744 745
		pr_debug("callchain: disabled\n");
		return 0;
	}

746
	ret = parse_callchain_record_opt(arg);
747
	if (!ret)
748
		callchain_debug();
749 750 751 752

	return ret;
}

753
int record_callchain_opt(const struct option *opt __maybe_unused,
J
Jiri Olsa 已提交
754 755 756
			 const char *arg __maybe_unused,
			 int unset __maybe_unused)
{
757
	callchain_param.enabled = true;
J
Jiri Olsa 已提交
758

759 760
	if (callchain_param.record_mode == CALLCHAIN_NONE)
		callchain_param.record_mode = CALLCHAIN_FP;
761

762
	callchain_debug();
J
Jiri Olsa 已提交
763 764 765
	return 0;
}

766 767 768
static int perf_record_config(const char *var, const char *value, void *cb)
{
	if (!strcmp(var, "record.call-graph"))
769
		var = "call-graph.record-mode"; /* fall-through */
770 771 772 773

	return perf_default_config(var, value, cb);
}

774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857
struct clockid_map {
	const char *name;
	int clockid;
};

#define CLOCKID_MAP(n, c)	\
	{ .name = n, .clockid = (c), }

#define CLOCKID_END	{ .name = NULL, }


/*
 * Add the missing ones, we need to build on many distros...
 */
#ifndef CLOCK_MONOTONIC_RAW
#define CLOCK_MONOTONIC_RAW 4
#endif
#ifndef CLOCK_BOOTTIME
#define CLOCK_BOOTTIME 7
#endif
#ifndef CLOCK_TAI
#define CLOCK_TAI 11
#endif

static const struct clockid_map clockids[] = {
	/* available for all events, NMI safe */
	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),

	/* available for some events */
	CLOCKID_MAP("realtime", CLOCK_REALTIME),
	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
	CLOCKID_MAP("tai", CLOCK_TAI),

	/* available for the lazy */
	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
	CLOCKID_MAP("real", CLOCK_REALTIME),
	CLOCKID_MAP("boot", CLOCK_BOOTTIME),

	CLOCKID_END,
};

static int parse_clockid(const struct option *opt, const char *str, int unset)
{
	struct record_opts *opts = (struct record_opts *)opt->value;
	const struct clockid_map *cm;
	const char *ostr = str;

	if (unset) {
		opts->use_clockid = 0;
		return 0;
	}

	/* no arg passed */
	if (!str)
		return 0;

	/* no setting it twice */
	if (opts->use_clockid)
		return -1;

	opts->use_clockid = true;

	/* if its a number, we're done */
	if (sscanf(str, "%d", &opts->clockid) == 1)
		return 0;

	/* allow a "CLOCK_" prefix to the name */
	if (!strncasecmp(str, "CLOCK_", 6))
		str += 6;

	for (cm = clockids; cm->name; cm++) {
		if (!strcasecmp(str, cm->name)) {
			opts->clockid = cm->clockid;
			return 0;
		}
	}

	opts->use_clockid = false;
	ui__warning("unknown clockid %s, check man page\n", ostr);
	return -1;
}

858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900
static int record__parse_mmap_pages(const struct option *opt,
				    const char *str,
				    int unset __maybe_unused)
{
	struct record_opts *opts = opt->value;
	char *s, *p;
	unsigned int mmap_pages;
	int ret;

	if (!str)
		return -EINVAL;

	s = strdup(str);
	if (!s)
		return -ENOMEM;

	p = strchr(s, ',');
	if (p)
		*p = '\0';

	if (*s) {
		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
		if (ret)
			goto out_free;
		opts->mmap_pages = mmap_pages;
	}

	if (!p) {
		ret = 0;
		goto out_free;
	}

	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
	if (ret)
		goto out_free;

	opts->auxtrace_mmap_pages = mmap_pages;

out_free:
	free(s);
	return ret;
}

901
static const char * const __record_usage[] = {
902 903
	"perf record [<options>] [<command>]",
	"perf record [<options>] -- <command> [<options>]",
904 905
	NULL
};
906
const char * const *record_usage = __record_usage;
907

908
/*
909 910
 * XXX Ideally would be local to cmd_record() and passed to a record__new
 * because we need to have access to it in record__exit, that is called
911 912 913 914 915 916 917
 * after cmd_record() exits, but since record_options need to be accessible to
 * builtin-script, leave it here.
 *
 * At least we don't ouch it in all the other functions here directly.
 *
 * Just say no to tons of global variables, sigh.
 */
918
static struct record record = {
919
	.opts = {
920
		.sample_time	     = true,
921 922 923
		.mmap_pages	     = UINT_MAX,
		.user_freq	     = UINT_MAX,
		.user_interval	     = ULLONG_MAX,
924
		.freq		     = 4000,
N
Namhyung Kim 已提交
925 926
		.target		     = {
			.uses_mmap   = true,
927
			.default_per_cpu = true,
N
Namhyung Kim 已提交
928
		},
929
	},
930 931 932 933 934 935 936
	.tool = {
		.sample		= process_sample_event,
		.fork		= perf_event__process_fork,
		.comm		= perf_event__process_comm,
		.mmap		= perf_event__process_mmap,
		.mmap2		= perf_event__process_mmap2,
	},
937
};
938

J
Jiri Olsa 已提交
939
#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
940

941
#ifdef HAVE_DWARF_UNWIND_SUPPORT
942
const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf lbr";
943
#else
944
const char record_callchain_help[] = CALLCHAIN_HELP "fp lbr";
945 946
#endif

947 948 949
/*
 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
 * with it and switch to use the library functions in perf_evlist that came
950
 * from builtin-record.c, i.e. use record_opts,
951 952 953
 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
 * using pipes, etc.
 */
954
struct option __record_options[] = {
955
	OPT_CALLBACK('e', "event", &record.evlist, "event",
956
		     "event selector. use 'perf list' to list available events",
957
		     parse_events_option),
958
	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
L
Li Zefan 已提交
959
		     "event filter", parse_filter),
960
	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
961
		    "record events on existing process id"),
962
	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
963
		    "record events on existing thread id"),
964
	OPT_INTEGER('r', "realtime", &record.realtime_prio,
965
		    "collect data with this RT SCHED_FIFO priority"),
966
	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
967
		    "collect data without buffering"),
968
	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
969
		    "collect raw sample records from all opened counters"),
970
	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
971
			    "system-wide collection from all CPUs"),
972
	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
973
		    "list of cpus to monitor"),
974
	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
975
	OPT_STRING('o', "output", &record.file.path, "file",
I
Ingo Molnar 已提交
976
		    "output file name"),
977 978 979
	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
			&record.opts.no_inherit_set,
			"child tasks do not inherit counters"),
980
	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
981 982 983
	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
		     "number of mmap data pages and AUX area tracing mmap pages",
		     record__parse_mmap_pages),
984
	OPT_BOOLEAN(0, "group", &record.opts.group,
985
		    "put the counters into a counter group"),
J
Jiri Olsa 已提交
986 987 988 989 990 991
	OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
			   NULL, "enables call-graph recording" ,
			   &record_callchain_opt),
	OPT_CALLBACK(0, "call-graph", &record.opts,
		     "mode[,dump_size]", record_callchain_help,
		     &record_parse_callchain_opt),
992
	OPT_INCR('v', "verbose", &verbose,
993
		    "be more verbose (show counter open errors, etc)"),
994
	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
995
	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
996
		    "per thread counts"),
997
	OPT_BOOLEAN('d', "data", &record.opts.sample_address,
998
		    "Sample addresses"),
999
	OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
1000
	OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
1001
	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1002
		    "don't sample"),
1003
	OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
1004
		    "do not update the buildid cache"),
1005
	OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
1006
		    "do not collect buildids in perf.data"),
1007
	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
S
Stephane Eranian 已提交
1008 1009
		     "monitor event in cgroup name only",
		     parse_cgroups),
1010
	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1011
		  "ms to wait before starting measurement after program start"),
1012 1013
	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
		   "user to profile"),
1014 1015 1016 1017 1018 1019 1020

	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
		     "branch any", "sample any taken branches",
		     parse_branch_stack),

	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
		     "branch filter mask", "branch stack filter modes",
1021
		     parse_branch_stack),
1022 1023
	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
		    "sample by weight (on special events only)"),
1024 1025
	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
		    "sample transaction flags (special events only)"),
1026 1027
	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
		    "use per-thread mmaps"),
1028 1029
	OPT_BOOLEAN('I', "intr-regs", &record.opts.sample_intr_regs,
		    "Sample machine registers on interrupt"),
1030 1031
	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
		    "Record running/enabled time of read (:S) events"),
1032 1033 1034
	OPT_CALLBACK('k', "clockid", &record.opts,
	"clockid", "clockid to use for events, see clock_gettime()",
	parse_clockid),
1035 1036 1037
	OPT_END()
};

1038 1039
struct option *record_options = __record_options;

1040
int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1041
{
1042
	int err;
1043
	struct record *rec = &record;
1044
	char errbuf[BUFSIZ];
1045

1046 1047
	rec->evlist = perf_evlist__new();
	if (rec->evlist == NULL)
1048 1049
		return -ENOMEM;

1050 1051
	perf_config(perf_record_config, rec);

1052
	argc = parse_options(argc, argv, record_options, record_usage,
1053
			    PARSE_OPT_STOP_AT_NON_OPTION);
1054
	if (!argc && target__none(&rec->opts.target))
1055
		usage_with_options(record_usage, record_options);
1056

1057
	if (nr_cgroups && !rec->opts.target.system_wide) {
1058 1059
		ui__error("cgroup monitoring only available in"
			  " system-wide mode\n");
S
Stephane Eranian 已提交
1060 1061 1062
		usage_with_options(record_usage, record_options);
	}

1063 1064 1065 1066 1067 1068 1069 1070
	if (!rec->itr) {
		rec->itr = auxtrace_record__init(rec->evlist, &err);
		if (err)
			return err;
	}

	err = -ENOMEM;

1071
	symbol__init(NULL);
1072

1073
	if (symbol_conf.kptr_restrict)
1074 1075 1076 1077 1078 1079 1080 1081
		pr_warning(
"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
"check /proc/sys/kernel/kptr_restrict.\n\n"
"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
"file is not found in the buildid cache or in the vmlinux path.\n\n"
"Samples in kernel modules won't be resolved at all.\n\n"
"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
"even with a suitable vmlinux or kallsyms file.\n\n");
1082

1083
	if (rec->no_buildid_cache || rec->no_buildid)
1084
		disable_buildid_cache();
1085

1086 1087
	if (rec->evlist->nr_entries == 0 &&
	    perf_evlist__add_default(rec->evlist) < 0) {
1088 1089
		pr_err("Not enough memory for event selector list\n");
		goto out_symbol_exit;
1090
	}
1091

1092 1093 1094
	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
		rec->opts.no_inherit = true;

1095
	err = target__validate(&rec->opts.target);
1096
	if (err) {
1097
		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1098 1099 1100
		ui__warning("%s", errbuf);
	}

1101
	err = target__parse_uid(&rec->opts.target);
1102 1103
	if (err) {
		int saved_errno = errno;
1104

1105
		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1106
		ui__error("%s", errbuf);
1107 1108

		err = -saved_errno;
1109
		goto out_symbol_exit;
1110
	}
1111

1112
	err = -ENOMEM;
1113
	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1114
		usage_with_options(record_usage, record_options);
1115

1116 1117 1118 1119
	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
	if (err)
		goto out_symbol_exit;

1120
	if (record_opts__config(&rec->opts)) {
1121
		err = -EINVAL;
1122
		goto out_symbol_exit;
1123 1124
	}

1125
	err = __cmd_record(&record, argc, argv);
1126
out_symbol_exit:
1127
	perf_evlist__delete(rec->evlist);
1128
	symbol__exit();
1129
	auxtrace_record__free(rec->itr);
1130
	return err;
1131
}