builtin-record.c 28.3 KB
Newer Older
I
Ingo Molnar 已提交
1
/*
2 3 4 5 6
 * builtin-record.c
 *
 * Builtin record command: Record the profile of a workload
 * (or a CPU, or a PID) into the perf.data output file - for
 * later analysis via perf report.
I
Ingo Molnar 已提交
7
 */
8 9
#define _FILE_OFFSET_BITS 64

10
#include "builtin.h"
11 12 13

#include "perf.h"

14
#include "util/build-id.h"
15
#include "util/util.h"
16
#include "util/parse-options.h"
17
#include "util/parse-events.h"
18

19
#include "util/header.h"
20
#include "util/event.h"
21
#include "util/evlist.h"
22
#include "util/evsel.h"
23
#include "util/debug.h"
24
#include "util/session.h"
25
#include "util/tool.h"
26
#include "util/symbol.h"
27
#include "util/cpumap.h"
28
#include "util/thread_map.h"
29

30
#include <unistd.h>
31
#include <sched.h>
32
#include <sys/mman.h>
33

34 35 36 37 38
enum write_mode_t {
	WRITE_FORCE,
	WRITE_APPEND
};

39
struct perf_record {
40
	struct perf_tool	tool;
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
	struct perf_record_opts	opts;
	u64			bytes_written;
	const char		*output_name;
	struct perf_evlist	*evlist;
	struct perf_session	*session;
	const char		*progname;
	int			output;
	unsigned int		page_size;
	int			realtime_prio;
	enum write_mode_t	write_mode;
	bool			no_buildid;
	bool			no_buildid_cache;
	bool			force;
	bool			file_new;
	bool			append_file;
	long			samples;
	off_t			post_processing_offset;
58
};
59

60
static void advance_output(struct perf_record *rec, size_t size)
61
{
62
	rec->bytes_written += size;
63 64
}

65
static int write_output(struct perf_record *rec, void *buf, size_t size)
66 67
{
	while (size) {
68
		int ret = write(rec->output, buf, size);
69

70 71 72 73
		if (ret < 0) {
			pr_err("failed to write\n");
			return -1;
		}
74 75 76 77

		size -= ret;
		buf += ret;

78
		rec->bytes_written += ret;
79
	}
80 81

	return 0;
82 83
}

84
static int process_synthesized_event(struct perf_tool *tool,
85
				     union perf_event *event,
86 87
				     struct perf_sample *sample __maybe_unused,
				     struct machine *machine __maybe_unused)
88
{
89
	struct perf_record *rec = container_of(tool, struct perf_record, tool);
90 91 92
	if (write_output(rec, event, event->header.size) < 0)
		return -1;

93 94 95
	return 0;
}

96
static int perf_record__mmap_read(struct perf_record *rec,
97
				   struct perf_mmap *md)
98
{
99
	unsigned int head = perf_mmap__read_head(md);
100
	unsigned int old = md->prev;
101
	unsigned char *data = md->base + rec->page_size;
102 103
	unsigned long size;
	void *buf;
104
	int rc = 0;
105

106
	if (old == head)
107
		return 0;
108

109
	rec->samples++;
110 111 112 113 114 115 116

	size = head - old;

	if ((old & md->mask) + size != (head & md->mask)) {
		buf = &data[old & md->mask];
		size = md->mask + 1 - (old & md->mask);
		old += size;
117

118 119 120 121
		if (write_output(rec, buf, size) < 0) {
			rc = -1;
			goto out;
		}
122 123 124 125 126
	}

	buf = &data[old & md->mask];
	size = head - old;
	old += size;
127

128 129 130 131
	if (write_output(rec, buf, size) < 0) {
		rc = -1;
		goto out;
	}
132 133

	md->prev = old;
134
	perf_mmap__write_tail(md, old);
135 136 137

out:
	return rc;
138 139 140
}

static volatile int done = 0;
141
static volatile int signr = -1;
142
static volatile int child_finished = 0;
143

144
static void sig_handler(int sig)
145
{
146 147 148
	if (sig == SIGCHLD)
		child_finished = 1;

149
	done = 1;
150 151 152
	signr = sig;
}

153
static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
154
{
155
	struct perf_record *rec = arg;
156 157
	int status;

158
	if (rec->evlist->workload.pid > 0) {
159
		if (!child_finished)
160
			kill(rec->evlist->workload.pid, SIGTERM);
161 162 163

		wait(&status);
		if (WIFSIGNALED(status))
164
			psignal(WTERMSIG(status), rec->progname);
165
	}
166

167
	if (signr == -1 || signr == SIGUSR1)
168 169 170 171
		return;

	signal(signr, SIG_DFL);
	kill(getpid(), signr);
172 173
}

174 175 176 177 178 179 180 181
static bool perf_evlist__equal(struct perf_evlist *evlist,
			       struct perf_evlist *other)
{
	struct perf_evsel *pos, *pair;

	if (evlist->nr_entries != other->nr_entries)
		return false;

182
	pair = perf_evlist__first(other);
183 184 185 186

	list_for_each_entry(pos, &evlist->entries, node) {
		if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
			return false;
187
		pair = perf_evsel__next(pair);
188 189 190 191 192
	}

	return true;
}

193
static int perf_record__open(struct perf_record *rec)
194
{
195
	struct perf_evsel *pos;
196 197 198
	struct perf_evlist *evlist = rec->evlist;
	struct perf_session *session = rec->session;
	struct perf_record_opts *opts = &rec->opts;
199
	int rc = 0;
200

201
	perf_evlist__config_attrs(evlist, opts);
202

203
	if (opts->group)
204
		perf_evlist__set_leader(evlist);
205

206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
	list_for_each_entry(pos, &evlist->entries, node) {
		struct perf_event_attr *attr = &pos->attr;
		/*
		 * Check if parse_single_tracepoint_event has already asked for
		 * PERF_SAMPLE_TIME.
		 *
		 * XXX this is kludgy but short term fix for problems introduced by
		 * eac23d1c that broke 'perf script' by having different sample_types
		 * when using multiple tracepoint events when we use a perf binary
		 * that tries to use sample_id_all on an older kernel.
		 *
		 * We need to move counter creation to perf_session, support
		 * different sample_types, etc.
		 */
		bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
221

222 223 224
fallback_missing_features:
		if (opts->exclude_guest_missing)
			attr->exclude_guest = attr->exclude_host = 0;
225
retry_sample_id:
226
		attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
227
try_again:
228
		if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
229 230
			int err = errno;

231
			if (err == EPERM || err == EACCES) {
232
				ui__error_paranoid();
233 234
				rc = -err;
				goto out;
235
			} else if (err ==  ENODEV && opts->target.cpu_list) {
236 237 238 239
				pr_err("No such device - did you specify"
				       " an out-of-range profile CPU?\n");
				rc = -err;
				goto out;
240 241 242 243 244 245 246
			} else if (err == EINVAL) {
				if (!opts->exclude_guest_missing &&
				    (attr->exclude_guest || attr->exclude_host)) {
					pr_debug("Old kernel, cannot exclude "
						 "guest or host samples.\n");
					opts->exclude_guest_missing = true;
					goto fallback_missing_features;
247
				} else if (!opts->sample_id_all_missing) {
248 249 250
					/*
					 * Old kernel, no attr->sample_id_type_all field
					 */
251
					opts->sample_id_all_missing = true;
252 253 254 255 256
					if (!opts->sample_time && !opts->raw_samples && !time_needed)
						attr->sample_type &= ~PERF_SAMPLE_TIME;

					goto retry_sample_id;
				}
257
			}
258

259 260 261
			/*
			 * If it's cycles then fall back to hrtimer
			 * based cpu-clock-tick sw counter, which
262 263 264 265
			 * is always available even if no PMU support.
			 *
			 * PPC returns ENXIO until 2.6.37 (behavior changed
			 * with commit b0a873e).
266
			 */
267 268
			if ((err == ENOENT || err == ENXIO)
					&& attr->type == PERF_TYPE_HARDWARE
269 270 271
					&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {

				if (verbose)
272 273
					ui__warning("The cycles event is not supported, "
						    "trying to fall back to cpu-clock-ticks\n");
274 275
				attr->type = PERF_TYPE_SOFTWARE;
				attr->config = PERF_COUNT_SW_CPU_CLOCK;
276 277 278 279
				if (pos->name) {
					free(pos->name);
					pos->name = NULL;
				}
280 281
				goto try_again;
			}
282 283

			if (err == ENOENT) {
284
				ui__error("The %s event is not supported.\n",
285
					  perf_evsel__name(pos));
286 287
				rc = -err;
				goto out;
288 289
			}

290
			printf("\n");
291 292 293 294
			error("sys_perf_event_open() syscall returned with %d "
			      "(%s) for event %s. /bin/dmesg may provide "
			      "additional information.\n",
			      err, strerror(err), perf_evsel__name(pos));
295 296

#if defined(__i386__) || defined(__x86_64__)
297 298 299 300 301 302 303 304 305
			if (attr->type == PERF_TYPE_HARDWARE &&
			    err == EOPNOTSUPP) {
				pr_err("No hardware sampling interrupt available."
				       " No APIC? If so then you can boot the kernel"
				       " with the \"lapic\" boot parameter to"
				       " force-enable it.\n");
				rc = -err;
				goto out;
			}
306 307
#endif

308 309 310
			pr_err("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
			rc = -err;
			goto out;
L
Li Zefan 已提交
311 312
		}
	}
313

314
	if (perf_evlist__apply_filters(evlist)) {
315 316
		error("failed to set filter with %d (%s)\n", errno,
			strerror(errno));
317 318
		rc = -1;
		goto out;
319 320
	}

321
	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
		if (errno == EPERM) {
			pr_err("Permission error mapping pages.\n"
			       "Consider increasing "
			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
			       "or try again with a smaller value of -m/--mmap_pages.\n"
			       "(current value: %d)\n", opts->mmap_pages);
			rc = -errno;
		} else if (!is_power_of_2(opts->mmap_pages)) {
			pr_err("--mmap_pages/-m value must be a power of two.");
			rc = -EINVAL;
		} else {
			pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
			rc = -errno;
		}
		goto out;
337
	}
338

339
	if (rec->file_new)
340 341 342 343
		session->evlist = evlist;
	else {
		if (!perf_evlist__equal(session->evlist, evlist)) {
			fprintf(stderr, "incompatible append\n");
344 345
			rc = -1;
			goto out;
346 347 348
		}
 	}

349
	perf_session__set_id_hdr_size(session);
350 351
out:
	return rc;
352 353
}

354
static int process_buildids(struct perf_record *rec)
355
{
356
	u64 size = lseek(rec->output, 0, SEEK_CUR);
357

358 359 360
	if (size == 0)
		return 0;

361 362 363
	rec->session->fd = rec->output;
	return __perf_session__process_events(rec->session, rec->post_processing_offset,
					      size - rec->post_processing_offset,
364 365 366
					      size, &build_id__mark_dso_hit_ops);
}

367
static void perf_record__exit(int status, void *arg)
368
{
369 370
	struct perf_record *rec = arg;

371 372 373
	if (status != 0)
		return;

374 375 376 377 378 379 380 381 382
	if (!rec->opts.pipe_output) {
		rec->session->header.data_size += rec->bytes_written;

		if (!rec->no_buildid)
			process_buildids(rec);
		perf_session__write_header(rec->session, rec->evlist,
					   rec->output, true);
		perf_session__delete(rec->session);
		perf_evlist__delete(rec->evlist);
383
		symbol__exit();
384
	}
385 386
}

387
static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
388 389
{
	int err;
390
	struct perf_tool *tool = data;
391

392
	if (machine__is_host(machine))
393 394 395 396 397 398 399 400 401 402
		return;

	/*
	 *As for guest kernel when processing subcommand record&report,
	 *we arrange module mmap prior to guest kernel mmap and trigger
	 *a preload dso because default guest module symbols are loaded
	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
	 *method is used to avoid symbol missing when the first addr is
	 *in module instead of in guest kernel.
	 */
403
	err = perf_event__synthesize_modules(tool, process_synthesized_event,
404
					     machine);
405 406
	if (err < 0)
		pr_err("Couldn't record guest kernel [%d]'s reference"
407
		       " relocation symbol.\n", machine->pid);
408 409 410 411 412

	/*
	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
	 * have no _text sometimes.
	 */
413
	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
414
						 machine, "_text");
415
	if (err < 0)
416
		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
417
							 machine, "_stext");
418 419
	if (err < 0)
		pr_err("Couldn't record guest kernel [%d]'s reference"
420
		       " relocation symbol.\n", machine->pid);
421 422
}

423 424 425 426 427
static struct perf_event_header finished_round_event = {
	.size = sizeof(struct perf_event_header),
	.type = PERF_RECORD_FINISHED_ROUND,
};

428
static int perf_record__mmap_read_all(struct perf_record *rec)
429
{
430
	int i;
431
	int rc = 0;
432

433
	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
434 435 436 437 438 439
		if (rec->evlist->mmap[i].base) {
			if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
				rc = -1;
				goto out;
			}
		}
440 441
	}

442
	if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
443 444 445 446 447
		rc = write_output(rec, &finished_round_event,
				  sizeof(finished_round_event));

out:
	return rc;
448 449
}

450
static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
451
{
I
Ingo Molnar 已提交
452 453
	struct stat st;
	int flags;
454
	int err, output, feat;
455
	unsigned long waking = 0;
456
	const bool forks = argc > 0;
457
	struct machine *machine;
458
	struct perf_tool *tool = &rec->tool;
459 460 461 462
	struct perf_record_opts *opts = &rec->opts;
	struct perf_evlist *evsel_list = rec->evlist;
	const char *output_name = rec->output_name;
	struct perf_session *session;
463

464
	rec->progname = argv[0];
465

466
	rec->page_size = sysconf(_SC_PAGE_SIZE);
467

468
	on_exit(perf_record__sig_exit, rec);
469 470
	signal(SIGCHLD, sig_handler);
	signal(SIGINT, sig_handler);
471
	signal(SIGUSR1, sig_handler);
472

473 474
	if (!output_name) {
		if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
475
			opts->pipe_output = true;
476
		else
477
			rec->output_name = output_name = "perf.data";
478 479 480
	}
	if (output_name) {
		if (!strcmp(output_name, "-"))
481
			opts->pipe_output = true;
482
		else if (!stat(output_name, &st) && st.st_size) {
483
			if (rec->write_mode == WRITE_FORCE) {
484 485 486 487 488 489
				char oldname[PATH_MAX];
				snprintf(oldname, sizeof(oldname), "%s.old",
					 output_name);
				unlink(oldname);
				rename(output_name, oldname);
			}
490 491
		} else if (rec->write_mode == WRITE_APPEND) {
			rec->write_mode = WRITE_FORCE;
492
		}
493 494
	}

495
	flags = O_CREAT|O_RDWR;
496 497
	if (rec->write_mode == WRITE_APPEND)
		rec->file_new = 0;
I
Ingo Molnar 已提交
498 499 500
	else
		flags |= O_TRUNC;

501
	if (opts->pipe_output)
502 503 504
		output = STDOUT_FILENO;
	else
		output = open(output_name, flags, S_IRUSR | S_IWUSR);
505 506
	if (output < 0) {
		perror("failed to create output file");
507
		return -1;
508 509
	}

510 511
	rec->output = output;

512
	session = perf_session__new(output_name, O_WRONLY,
513
				    rec->write_mode == WRITE_FORCE, false, NULL);
514
	if (session == NULL) {
515 516 517 518
		pr_err("Not enough memory for reading perf file header\n");
		return -1;
	}

519 520
	rec->session = session;

521 522 523 524 525 526 527
	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
		perf_header__set_feat(&session->header, feat);

	if (rec->no_buildid)
		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);

	if (!have_tracepoints(&evsel_list->entries))
528
		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
529

530 531 532
	if (!rec->opts.branch_stack)
		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);

533
	if (!rec->file_new) {
534
		err = perf_session__read_header(session, output);
535
		if (err < 0)
536
			goto out_delete_session;
537 538
	}

539
	if (forks) {
540
		err = perf_evlist__prepare_workload(evsel_list, opts, argv);
541 542 543
		if (err < 0) {
			pr_err("Couldn't run the workload!\n");
			goto out_delete_session;
544 545 546
		}
	}

547 548 549 550
	if (perf_record__open(rec) != 0) {
		err = -1;
		goto out_delete_session;
	}
551

552
	/*
553
	 * perf_session__delete(session) will be called at perf_record__exit()
554
	 */
555
	on_exit(perf_record__exit, rec);
556

557
	if (opts->pipe_output) {
558 559
		err = perf_header__write_pipe(output);
		if (err < 0)
560
			goto out_delete_session;
561
	} else if (rec->file_new) {
562 563
		err = perf_session__write_header(session, evsel_list,
						 output, false);
564
		if (err < 0)
565
			goto out_delete_session;
566 567
	}

568
	if (!rec->no_buildid
569
	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
570
		pr_err("Couldn't generate buildids. "
571
		       "Use --no-buildid to profile anyway.\n");
572 573
		err = -1;
		goto out_delete_session;
574 575
	}

576
	rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
577

578 579 580
	machine = perf_session__find_host_machine(session);
	if (!machine) {
		pr_err("Couldn't find native kernel information.\n");
581 582
		err = -1;
		goto out_delete_session;
583 584
	}

585
	if (opts->pipe_output) {
586
		err = perf_event__synthesize_attrs(tool, session,
587
						   process_synthesized_event);
588 589
		if (err < 0) {
			pr_err("Couldn't synthesize attrs.\n");
590
			goto out_delete_session;
591
		}
592

593
		err = perf_event__synthesize_event_types(tool, process_synthesized_event,
594
							 machine);
595 596
		if (err < 0) {
			pr_err("Couldn't synthesize event_types.\n");
597
			goto out_delete_session;
598
		}
599

600
		if (have_tracepoints(&evsel_list->entries)) {
601 602 603 604 605 606 607 608
			/*
			 * FIXME err <= 0 here actually means that
			 * there were no tracepoints so its not really
			 * an error, just that we don't need to
			 * synthesize anything.  We really have to
			 * return this more properly and also
			 * propagate errors that now are calling die()
			 */
609
			err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
610
								  process_synthesized_event);
611 612
			if (err <= 0) {
				pr_err("Couldn't record tracing data.\n");
613
				goto out_delete_session;
614
			}
615
			advance_output(rec, err);
616
		}
617 618
	}

619
	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
620
						 machine, "_text");
621
	if (err < 0)
622
		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
623
							 machine, "_stext");
624 625 626 627
	if (err < 0)
		pr_err("Couldn't record kernel reference relocation symbol\n"
		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
		       "Check /proc/kallsyms permission or run as root.\n");
628

629
	err = perf_event__synthesize_modules(tool, process_synthesized_event,
630
					     machine);
631 632 633 634 635
	if (err < 0)
		pr_err("Couldn't record kernel module information.\n"
		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
		       "Check /proc/modules permission or run as root.\n");

636
	if (perf_guest)
637
		perf_session__process_machines(session, tool,
638
					       perf_event__synthesize_guest_os);
639

640
	if (!opts->target.system_wide)
641
		err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
642
						  process_synthesized_event,
643
						  machine);
644
	else
645
		err = perf_event__synthesize_threads(tool, process_synthesized_event,
646
					       machine);
647

648 649 650
	if (err != 0)
		goto out_delete_session;

651
	if (rec->realtime_prio) {
652 653
		struct sched_param param;

654
		param.sched_priority = rec->realtime_prio;
655
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
656
			pr_err("Could not set realtime priority.\n");
657 658
			err = -1;
			goto out_delete_session;
659 660 661
		}
	}

662 663
	perf_evlist__enable(evsel_list);

664 665 666
	/*
	 * Let the child rip
	 */
667
	if (forks)
668
		perf_evlist__start_workload(evsel_list);
669

670
	for (;;) {
671
		int hits = rec->samples;
672

673 674 675 676
		if (perf_record__mmap_read_all(rec) < 0) {
			err = -1;
			goto out_delete_session;
		}
677

678
		if (hits == rec->samples) {
679 680
			if (done)
				break;
681
			err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
682 683 684
			waking++;
		}

685 686
		if (done)
			perf_evlist__disable(evsel_list);
687 688
	}

689
	if (quiet || signr == SIGUSR1)
690 691
		return 0;

692 693
	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);

694 695 696 697
	/*
	 * Approximate RIP event size: 24 bytes.
	 */
	fprintf(stderr,
698
		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
699
		(double)rec->bytes_written / 1024.0 / 1024.0,
700
		output_name,
701
		rec->bytes_written / 24);
702

703
	return 0;
704 705 706 707

out_delete_session:
	perf_session__delete(session);
	return err;
708
}
709

710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731
#define BRANCH_OPT(n, m) \
	{ .name = n, .mode = (m) }

#define BRANCH_END { .name = NULL }

struct branch_mode {
	const char *name;
	int mode;
};

static const struct branch_mode branch_modes[] = {
	BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
	BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
	BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
	BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
	BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
	BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
	BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
	BRANCH_END
};

static int
732
parse_branch_stack(const struct option *opt, const char *str, int unset)
733 734 735 736 737 738 739 740
{
#define ONLY_PLM \
	(PERF_SAMPLE_BRANCH_USER	|\
	 PERF_SAMPLE_BRANCH_KERNEL	|\
	 PERF_SAMPLE_BRANCH_HV)

	uint64_t *mode = (uint64_t *)opt->value;
	const struct branch_mode *br;
741
	char *s, *os = NULL, *p;
742 743
	int ret = -1;

744 745
	if (unset)
		return 0;
746

747 748 749 750
	/*
	 * cannot set it twice, -b + --branch-filter for instance
	 */
	if (*mode)
751 752
		return -1;

753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773
	/* str may be NULL in case no arg is passed to -b */
	if (str) {
		/* because str is read-only */
		s = os = strdup(str);
		if (!s)
			return -1;

		for (;;) {
			p = strchr(s, ',');
			if (p)
				*p = '\0';

			for (br = branch_modes; br->name; br++) {
				if (!strcasecmp(s, br->name))
					break;
			}
			if (!br->name) {
				ui__warning("unknown branch filter %s,"
					    " check man page\n", s);
				goto error;
			}
774

775
			*mode |= br->mode;
776

777 778
			if (!p)
				break;
779

780 781
			s = p + 1;
		}
782 783 784
	}
	ret = 0;

785
	/* default to any branch */
786
	if ((*mode & ~ONLY_PLM) == 0) {
787
		*mode = PERF_SAMPLE_BRANCH_ANY;
788 789 790 791 792 793
	}
error:
	free(os);
	return ret;
}

794
#ifdef LIBUNWIND_SUPPORT
795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819
static int get_stack_size(char *str, unsigned long *_size)
{
	char *endptr;
	unsigned long size;
	unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));

	size = strtoul(str, &endptr, 0);

	do {
		if (*endptr)
			break;

		size = round_up(size, sizeof(u64));
		if (!size || size > max_size)
			break;

		*_size = size;
		return 0;

	} while (0);

	pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
	       max_size, str);
	return -1;
}
820
#endif /* LIBUNWIND_SUPPORT */
821 822

static int
823
parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg,
824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858
		    int unset)
{
	struct perf_record *rec = (struct perf_record *)opt->value;
	char *tok, *name, *saveptr = NULL;
	char *buf;
	int ret = -1;

	/* --no-call-graph */
	if (unset)
		return 0;

	/* We specified default option if none is provided. */
	BUG_ON(!arg);

	/* We need buffer that we know we can write to. */
	buf = malloc(strlen(arg) + 1);
	if (!buf)
		return -ENOMEM;

	strcpy(buf, arg);

	tok = strtok_r((char *)buf, ",", &saveptr);
	name = tok ? : (char *)buf;

	do {
		/* Framepointer style */
		if (!strncmp(name, "fp", sizeof("fp"))) {
			if (!strtok_r(NULL, ",", &saveptr)) {
				rec->opts.call_graph = CALLCHAIN_FP;
				ret = 0;
			} else
				pr_err("callchain: No more arguments "
				       "needed for -g fp\n");
			break;

859
#ifdef LIBUNWIND_SUPPORT
860 861
		/* Dwarf style */
		} else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
862 863
			const unsigned long default_stack_dump_size = 8192;

864 865 866 867 868 869 870 871 872 873 874 875 876 877 878
			ret = 0;
			rec->opts.call_graph = CALLCHAIN_DWARF;
			rec->opts.stack_dump_size = default_stack_dump_size;

			tok = strtok_r(NULL, ",", &saveptr);
			if (tok) {
				unsigned long size = 0;

				ret = get_stack_size(tok, &size);
				rec->opts.stack_dump_size = size;
			}

			if (!ret)
				pr_debug("callchain: stack dump size %d\n",
					 rec->opts.stack_dump_size);
879
#endif /* LIBUNWIND_SUPPORT */
880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895
		} else {
			pr_err("callchain: Unknown -g option "
			       "value: %s\n", arg);
			break;
		}

	} while (0);

	free(buf);

	if (!ret)
		pr_debug("callchain: type %d\n", rec->opts.call_graph);

	return ret;
}

896
static const char * const record_usage[] = {
897 898
	"perf record [<options>] [<command>]",
	"perf record [<options>] -- <command> [<options>]",
899 900 901
	NULL
};

902 903 904 905 906 907 908 909 910 911 912 913 914 915 916
/*
 * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
 * because we need to have access to it in perf_record__exit, that is called
 * after cmd_record() exits, but since record_options need to be accessible to
 * builtin-script, leave it here.
 *
 * At least we don't ouch it in all the other functions here directly.
 *
 * Just say no to tons of global variables, sigh.
 */
static struct perf_record record = {
	.opts = {
		.mmap_pages	     = UINT_MAX,
		.user_freq	     = UINT_MAX,
		.user_interval	     = ULLONG_MAX,
917
		.freq		     = 4000,
N
Namhyung Kim 已提交
918 919 920
		.target		     = {
			.uses_mmap   = true,
		},
921 922 923 924
	},
	.write_mode = WRITE_FORCE,
	.file_new   = true,
};
925

926 927 928 929 930 931 932 933
#define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "

#ifdef LIBUNWIND_SUPPORT
static const char callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
#else
static const char callchain_help[] = CALLCHAIN_HELP "[fp]";
#endif

934 935 936 937 938 939 940
/*
 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
 * with it and switch to use the library functions in perf_evlist that came
 * from builtin-record.c, i.e. use perf_record_opts,
 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
 * using pipes, etc.
 */
941
const struct option record_options[] = {
942
	OPT_CALLBACK('e', "event", &record.evlist, "event",
943
		     "event selector. use 'perf list' to list available events",
944
		     parse_events_option),
945
	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
L
Li Zefan 已提交
946
		     "event filter", parse_filter),
947
	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
948
		    "record events on existing process id"),
949
	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
950
		    "record events on existing thread id"),
951
	OPT_INTEGER('r', "realtime", &record.realtime_prio,
952
		    "collect data with this RT SCHED_FIFO priority"),
953
	OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
954
		    "collect data without buffering"),
955
	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
956
		    "collect raw sample records from all opened counters"),
957
	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
958
			    "system-wide collection from all CPUs"),
959
	OPT_BOOLEAN('A', "append", &record.append_file,
I
Ingo Molnar 已提交
960
			    "append to the output file to do incremental profiling"),
961
	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
962
		    "list of cpus to monitor"),
963
	OPT_BOOLEAN('f', "force", &record.force,
964
			"overwrite existing data file (deprecated)"),
965 966
	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
	OPT_STRING('o', "output", &record.output_name, "file",
I
Ingo Molnar 已提交
967
		    "output file name"),
968
	OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
969
		    "child tasks do not inherit counters"),
970 971
	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
	OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
972
		     "number of mmap data pages"),
973
	OPT_BOOLEAN(0, "group", &record.opts.group,
974
		    "put the counters into a counter group"),
975 976 977
	OPT_CALLBACK_DEFAULT('g', "call-graph", &record, "mode[,dump_size]",
			     callchain_help, &parse_callchain_opt,
			     "fp"),
978
	OPT_INCR('v', "verbose", &verbose,
979
		    "be more verbose (show counter open errors, etc)"),
980
	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
981
	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
982
		    "per thread counts"),
983
	OPT_BOOLEAN('d', "data", &record.opts.sample_address,
984
		    "Sample addresses"),
985
	OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
986
	OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
987
	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
988
		    "don't sample"),
989
	OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
990
		    "do not update the buildid cache"),
991
	OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
992
		    "do not collect buildids in perf.data"),
993
	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
S
Stephane Eranian 已提交
994 995
		     "monitor event in cgroup name only",
		     parse_cgroups),
996 997
	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
		   "user to profile"),
998 999 1000 1001 1002 1003 1004

	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
		     "branch any", "sample any taken branches",
		     parse_branch_stack),

	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
		     "branch filter mask", "branch stack filter modes",
1005
		     parse_branch_stack),
1006 1007 1008
	OPT_END()
};

1009
int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1010
{
1011 1012
	int err = -ENOMEM;
	struct perf_evsel *pos;
1013 1014
	struct perf_evlist *evsel_list;
	struct perf_record *rec = &record;
1015
	char errbuf[BUFSIZ];
1016

1017
	evsel_list = perf_evlist__new(NULL, NULL);
1018 1019 1020
	if (evsel_list == NULL)
		return -ENOMEM;

1021 1022
	rec->evlist = evsel_list;

1023
	argc = parse_options(argc, argv, record_options, record_usage,
1024
			    PARSE_OPT_STOP_AT_NON_OPTION);
1025
	if (!argc && perf_target__none(&rec->opts.target))
1026
		usage_with_options(record_usage, record_options);
1027

1028
	if (rec->force && rec->append_file) {
1029 1030
		ui__error("Can't overwrite and append at the same time."
			  " You need to choose between -f and -A");
1031
		usage_with_options(record_usage, record_options);
1032 1033
	} else if (rec->append_file) {
		rec->write_mode = WRITE_APPEND;
1034
	} else {
1035
		rec->write_mode = WRITE_FORCE;
1036 1037
	}

1038
	if (nr_cgroups && !rec->opts.target.system_wide) {
1039 1040
		ui__error("cgroup monitoring only available in"
			  " system-wide mode\n");
S
Stephane Eranian 已提交
1041 1042 1043
		usage_with_options(record_usage, record_options);
	}

1044
	symbol__init();
1045

1046
	if (symbol_conf.kptr_restrict)
1047 1048 1049 1050 1051 1052 1053 1054
		pr_warning(
"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
"check /proc/sys/kernel/kptr_restrict.\n\n"
"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
"file is not found in the buildid cache or in the vmlinux path.\n\n"
"Samples in kernel modules won't be resolved at all.\n\n"
"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
"even with a suitable vmlinux or kallsyms file.\n\n");
1055

1056
	if (rec->no_buildid_cache || rec->no_buildid)
1057
		disable_buildid_cache();
1058

1059 1060
	if (evsel_list->nr_entries == 0 &&
	    perf_evlist__add_default(evsel_list) < 0) {
1061 1062
		pr_err("Not enough memory for event selector list\n");
		goto out_symbol_exit;
1063
	}
1064

1065 1066 1067 1068 1069 1070 1071 1072 1073
	err = perf_target__validate(&rec->opts.target);
	if (err) {
		perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
		ui__warning("%s", errbuf);
	}

	err = perf_target__parse_uid(&rec->opts.target);
	if (err) {
		int saved_errno = errno;
1074

1075
		perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1076
		ui__error("%s", errbuf);
1077 1078

		err = -saved_errno;
1079
		goto out_free_fd;
1080
	}
1081

1082
	err = -ENOMEM;
1083
	if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
1084
		usage_with_options(record_usage, record_options);
1085

1086
	list_for_each_entry(pos, &evsel_list->entries, node) {
1087
		if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
1088
			goto out_free_fd;
1089
	}
1090

1091 1092 1093 1094
	if (rec->opts.user_interval != ULLONG_MAX)
		rec->opts.default_interval = rec->opts.user_interval;
	if (rec->opts.user_freq != UINT_MAX)
		rec->opts.freq = rec->opts.user_freq;
1095

1096 1097 1098
	/*
	 * User specified count overrides default frequency.
	 */
1099 1100 1101 1102
	if (rec->opts.default_interval)
		rec->opts.freq = 0;
	else if (rec->opts.freq) {
		rec->opts.default_interval = rec->opts.freq;
1103
	} else {
1104
		ui__error("frequency and count are zero, aborting\n");
1105
		err = -EINVAL;
1106
		goto out_free_fd;
1107 1108
	}

1109
	err = __cmd_record(&record, argc, argv);
1110
out_free_fd:
1111
	perf_evlist__delete_maps(evsel_list);
1112 1113
out_symbol_exit:
	symbol__exit();
1114
	return err;
1115
}