builtin-record.c 27.4 KB
Newer Older
I
Ingo Molnar 已提交
1
/*
2 3 4 5 6
 * builtin-record.c
 *
 * Builtin record command: Record the profile of a workload
 * (or a CPU, or a PID) into the perf.data output file - for
 * later analysis via perf report.
I
Ingo Molnar 已提交
7
 */
8 9
#define _FILE_OFFSET_BITS 64

10
#include "builtin.h"
11 12 13

#include "perf.h"

14
#include "util/build-id.h"
15
#include "util/util.h"
16
#include "util/parse-options.h"
17
#include "util/parse-events.h"
18

19
#include "util/header.h"
20
#include "util/event.h"
21
#include "util/evlist.h"
22
#include "util/evsel.h"
23
#include "util/debug.h"
24
#include "util/session.h"
25
#include "util/tool.h"
26
#include "util/symbol.h"
27
#include "util/cpumap.h"
28
#include "util/thread_map.h"
29

30
#include <unistd.h>
31
#include <sched.h>
32
#include <sys/mman.h>
33

34 35 36 37 38 39 40 41 42
#define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "

#ifdef NO_LIBUNWIND_SUPPORT
static char callchain_help[] = CALLCHAIN_HELP "[fp]";
#else
static unsigned long default_stack_dump_size = 8192;
static char callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
#endif

43 44 45 46 47
enum write_mode_t {
	WRITE_FORCE,
	WRITE_APPEND
};

48
struct perf_record {
49
	struct perf_tool	tool;
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
	struct perf_record_opts	opts;
	u64			bytes_written;
	const char		*output_name;
	struct perf_evlist	*evlist;
	struct perf_session	*session;
	const char		*progname;
	int			output;
	unsigned int		page_size;
	int			realtime_prio;
	enum write_mode_t	write_mode;
	bool			no_buildid;
	bool			no_buildid_cache;
	bool			force;
	bool			file_new;
	bool			append_file;
	long			samples;
	off_t			post_processing_offset;
67
};
68

69
static void advance_output(struct perf_record *rec, size_t size)
70
{
71
	rec->bytes_written += size;
72 73
}

74
static void write_output(struct perf_record *rec, void *buf, size_t size)
75 76
{
	while (size) {
77
		int ret = write(rec->output, buf, size);
78 79 80 81 82 83 84

		if (ret < 0)
			die("failed to write");

		size -= ret;
		buf += ret;

85
		rec->bytes_written += ret;
86 87 88
	}
}

89
static int process_synthesized_event(struct perf_tool *tool,
90
				     union perf_event *event,
91
				     struct perf_sample *sample __used,
92
				     struct machine *machine __used)
93
{
94
	struct perf_record *rec = container_of(tool, struct perf_record, tool);
95
	write_output(rec, event, event->header.size);
96 97 98
	return 0;
}

99 100
static void perf_record__mmap_read(struct perf_record *rec,
				   struct perf_mmap *md)
101
{
102
	unsigned int head = perf_mmap__read_head(md);
103
	unsigned int old = md->prev;
104
	unsigned char *data = md->base + rec->page_size;
105 106 107
	unsigned long size;
	void *buf;

108 109 110
	if (old == head)
		return;

111
	rec->samples++;
112 113 114 115 116 117 118

	size = head - old;

	if ((old & md->mask) + size != (head & md->mask)) {
		buf = &data[old & md->mask];
		size = md->mask + 1 - (old & md->mask);
		old += size;
119

120
		write_output(rec, buf, size);
121 122 123 124 125
	}

	buf = &data[old & md->mask];
	size = head - old;
	old += size;
126

127
	write_output(rec, buf, size);
128 129

	md->prev = old;
130
	perf_mmap__write_tail(md, old);
131 132 133
}

static volatile int done = 0;
134
static volatile int signr = -1;
135
static volatile int child_finished = 0;
136

137
static void sig_handler(int sig)
138
{
139 140 141
	if (sig == SIGCHLD)
		child_finished = 1;

142
	done = 1;
143 144 145
	signr = sig;
}

146
static void perf_record__sig_exit(int exit_status __used, void *arg)
147
{
148
	struct perf_record *rec = arg;
149 150
	int status;

151
	if (rec->evlist->workload.pid > 0) {
152
		if (!child_finished)
153
			kill(rec->evlist->workload.pid, SIGTERM);
154 155 156

		wait(&status);
		if (WIFSIGNALED(status))
157
			psignal(WTERMSIG(status), rec->progname);
158
	}
159

160
	if (signr == -1 || signr == SIGUSR1)
161 162 163 164
		return;

	signal(signr, SIG_DFL);
	kill(getpid(), signr);
165 166
}

167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
static bool perf_evlist__equal(struct perf_evlist *evlist,
			       struct perf_evlist *other)
{
	struct perf_evsel *pos, *pair;

	if (evlist->nr_entries != other->nr_entries)
		return false;

	pair = list_entry(other->entries.next, struct perf_evsel, node);

	list_for_each_entry(pos, &evlist->entries, node) {
		if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
			return false;
		pair = list_entry(pair->node.next, struct perf_evsel, node);
	}

	return true;
}

186
static void perf_record__open(struct perf_record *rec)
187
{
188
	struct perf_evsel *pos;
189 190 191
	struct perf_evlist *evlist = rec->evlist;
	struct perf_session *session = rec->session;
	struct perf_record_opts *opts = &rec->opts;
192

193
	perf_evlist__config_attrs(evlist, opts);
194

195 196 197
	if (opts->group)
		perf_evlist__group(evlist);

198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
	list_for_each_entry(pos, &evlist->entries, node) {
		struct perf_event_attr *attr = &pos->attr;
		/*
		 * Check if parse_single_tracepoint_event has already asked for
		 * PERF_SAMPLE_TIME.
		 *
		 * XXX this is kludgy but short term fix for problems introduced by
		 * eac23d1c that broke 'perf script' by having different sample_types
		 * when using multiple tracepoint events when we use a perf binary
		 * that tries to use sample_id_all on an older kernel.
		 *
		 * We need to move counter creation to perf_session, support
		 * different sample_types, etc.
		 */
		bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
213

214 215 216
fallback_missing_features:
		if (opts->exclude_guest_missing)
			attr->exclude_guest = attr->exclude_host = 0;
217
retry_sample_id:
218
		attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
219
try_again:
220
		if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
221 222
			int err = errno;

223
			if (err == EPERM || err == EACCES) {
224
				ui__error_paranoid();
225
				exit(EXIT_FAILURE);
226
			} else if (err ==  ENODEV && opts->target.cpu_list) {
227 228
				die("No such device - did you specify"
					" an out-of-range profile CPU?\n");
229 230 231 232 233 234 235
			} else if (err == EINVAL) {
				if (!opts->exclude_guest_missing &&
				    (attr->exclude_guest || attr->exclude_host)) {
					pr_debug("Old kernel, cannot exclude "
						 "guest or host samples.\n");
					opts->exclude_guest_missing = true;
					goto fallback_missing_features;
236
				} else if (!opts->sample_id_all_missing) {
237 238 239
					/*
					 * Old kernel, no attr->sample_id_type_all field
					 */
240
					opts->sample_id_all_missing = true;
241 242 243 244 245
					if (!opts->sample_time && !opts->raw_samples && !time_needed)
						attr->sample_type &= ~PERF_SAMPLE_TIME;

					goto retry_sample_id;
				}
246
			}
247

248 249 250
			/*
			 * If it's cycles then fall back to hrtimer
			 * based cpu-clock-tick sw counter, which
251 252 253 254
			 * is always available even if no PMU support.
			 *
			 * PPC returns ENXIO until 2.6.37 (behavior changed
			 * with commit b0a873e).
255
			 */
256 257
			if ((err == ENOENT || err == ENXIO)
					&& attr->type == PERF_TYPE_HARDWARE
258 259 260
					&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {

				if (verbose)
261 262
					ui__warning("The cycles event is not supported, "
						    "trying to fall back to cpu-clock-ticks\n");
263 264
				attr->type = PERF_TYPE_SOFTWARE;
				attr->config = PERF_COUNT_SW_CPU_CLOCK;
265 266 267 268
				if (pos->name) {
					free(pos->name);
					pos->name = NULL;
				}
269 270
				goto try_again;
			}
271 272

			if (err == ENOENT) {
273
				ui__error("The %s event is not supported.\n",
274
					  perf_evsel__name(pos));
275 276 277
				exit(EXIT_FAILURE);
			}

278
			printf("\n");
279
			error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
280
			      err, strerror(err));
281 282

#if defined(__i386__) || defined(__x86_64__)
283 284 285 286 287
			if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
				die("No hardware sampling interrupt available."
				    " No APIC? If so then you can boot the kernel"
				    " with the \"lapic\" boot parameter to"
				    " force-enable it.\n");
288 289
#endif

290
			die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
L
Li Zefan 已提交
291 292
		}
	}
293

294 295 296 297 298 299
	if (perf_evlist__set_filters(evlist)) {
		error("failed to set filter with %d (%s)\n", errno,
			strerror(errno));
		exit(-1);
	}

300 301 302 303 304 305 306
	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
		if (errno == EPERM)
			die("Permission error mapping pages.\n"
			    "Consider increasing "
			    "/proc/sys/kernel/perf_event_mlock_kb,\n"
			    "or try again with a smaller value of -m/--mmap_pages.\n"
			    "(current value: %d)\n", opts->mmap_pages);
307 308 309
		else if (!is_power_of_2(opts->mmap_pages))
			die("--mmap_pages/-m value must be a power of two.");

310
		die("failed to mmap with %d (%s)\n", errno, strerror(errno));
311
	}
312

313
	if (rec->file_new)
314 315 316 317 318 319 320 321
		session->evlist = evlist;
	else {
		if (!perf_evlist__equal(session->evlist, evlist)) {
			fprintf(stderr, "incompatible append\n");
			exit(-1);
		}
 	}

322
	perf_session__set_id_hdr_size(session);
323 324
}

325
static int process_buildids(struct perf_record *rec)
326
{
327
	u64 size = lseek(rec->output, 0, SEEK_CUR);
328

329 330 331
	if (size == 0)
		return 0;

332 333 334
	rec->session->fd = rec->output;
	return __perf_session__process_events(rec->session, rec->post_processing_offset,
					      size - rec->post_processing_offset,
335 336 337
					      size, &build_id__mark_dso_hit_ops);
}

338
static void perf_record__exit(int status __used, void *arg)
339
{
340 341 342 343 344 345 346 347 348 349 350
	struct perf_record *rec = arg;

	if (!rec->opts.pipe_output) {
		rec->session->header.data_size += rec->bytes_written;

		if (!rec->no_buildid)
			process_buildids(rec);
		perf_session__write_header(rec->session, rec->evlist,
					   rec->output, true);
		perf_session__delete(rec->session);
		perf_evlist__delete(rec->evlist);
351
		symbol__exit();
352
	}
353 354
}

355
static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
356 357
{
	int err;
358
	struct perf_tool *tool = data;
359

360
	if (machine__is_host(machine))
361 362 363 364 365 366 367 368 369 370
		return;

	/*
	 *As for guest kernel when processing subcommand record&report,
	 *we arrange module mmap prior to guest kernel mmap and trigger
	 *a preload dso because default guest module symbols are loaded
	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
	 *method is used to avoid symbol missing when the first addr is
	 *in module instead of in guest kernel.
	 */
371
	err = perf_event__synthesize_modules(tool, process_synthesized_event,
372
					     machine);
373 374
	if (err < 0)
		pr_err("Couldn't record guest kernel [%d]'s reference"
375
		       " relocation symbol.\n", machine->pid);
376 377 378 379 380

	/*
	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
	 * have no _text sometimes.
	 */
381
	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
382
						 machine, "_text");
383
	if (err < 0)
384
		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
385
							 machine, "_stext");
386 387
	if (err < 0)
		pr_err("Couldn't record guest kernel [%d]'s reference"
388
		       " relocation symbol.\n", machine->pid);
389 390
}

391 392 393 394 395
static struct perf_event_header finished_round_event = {
	.size = sizeof(struct perf_event_header),
	.type = PERF_RECORD_FINISHED_ROUND,
};

396
static void perf_record__mmap_read_all(struct perf_record *rec)
397
{
398
	int i;
399

400 401 402
	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
		if (rec->evlist->mmap[i].base)
			perf_record__mmap_read(rec, &rec->evlist->mmap[i]);
403 404
	}

405
	if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
406
		write_output(rec, &finished_round_event, sizeof(finished_round_event));
407 408
}

409
static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
410
{
I
Ingo Molnar 已提交
411 412
	struct stat st;
	int flags;
413
	int err, output, feat;
414
	unsigned long waking = 0;
415
	const bool forks = argc > 0;
416
	struct machine *machine;
417
	struct perf_tool *tool = &rec->tool;
418 419 420 421
	struct perf_record_opts *opts = &rec->opts;
	struct perf_evlist *evsel_list = rec->evlist;
	const char *output_name = rec->output_name;
	struct perf_session *session;
422

423
	rec->progname = argv[0];
424

425
	rec->page_size = sysconf(_SC_PAGE_SIZE);
426

427
	on_exit(perf_record__sig_exit, rec);
428 429
	signal(SIGCHLD, sig_handler);
	signal(SIGINT, sig_handler);
430
	signal(SIGUSR1, sig_handler);
431

432 433
	if (!output_name) {
		if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
434
			opts->pipe_output = true;
435
		else
436
			rec->output_name = output_name = "perf.data";
437 438 439
	}
	if (output_name) {
		if (!strcmp(output_name, "-"))
440
			opts->pipe_output = true;
441
		else if (!stat(output_name, &st) && st.st_size) {
442
			if (rec->write_mode == WRITE_FORCE) {
443 444 445 446 447 448
				char oldname[PATH_MAX];
				snprintf(oldname, sizeof(oldname), "%s.old",
					 output_name);
				unlink(oldname);
				rename(output_name, oldname);
			}
449 450
		} else if (rec->write_mode == WRITE_APPEND) {
			rec->write_mode = WRITE_FORCE;
451
		}
452 453
	}

454
	flags = O_CREAT|O_RDWR;
455 456
	if (rec->write_mode == WRITE_APPEND)
		rec->file_new = 0;
I
Ingo Molnar 已提交
457 458 459
	else
		flags |= O_TRUNC;

460
	if (opts->pipe_output)
461 462 463
		output = STDOUT_FILENO;
	else
		output = open(output_name, flags, S_IRUSR | S_IWUSR);
464 465 466 467 468
	if (output < 0) {
		perror("failed to create output file");
		exit(-1);
	}

469 470
	rec->output = output;

471
	session = perf_session__new(output_name, O_WRONLY,
472
				    rec->write_mode == WRITE_FORCE, false, NULL);
473
	if (session == NULL) {
474 475 476 477
		pr_err("Not enough memory for reading perf file header\n");
		return -1;
	}

478 479
	rec->session = session;

480 481 482 483 484 485 486
	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
		perf_header__set_feat(&session->header, feat);

	if (rec->no_buildid)
		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);

	if (!have_tracepoints(&evsel_list->entries))
487
		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
488

489 490 491
	if (!rec->opts.branch_stack)
		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);

492
	if (!rec->file_new) {
493
		err = perf_session__read_header(session, output);
494
		if (err < 0)
495
			goto out_delete_session;
496 497
	}

498
	if (forks) {
499
		err = perf_evlist__prepare_workload(evsel_list, opts, argv);
500 501 502
		if (err < 0) {
			pr_err("Couldn't run the workload!\n");
			goto out_delete_session;
503 504 505
		}
	}

506
	perf_record__open(rec);
507

508
	/*
509
	 * perf_session__delete(session) will be called at perf_record__exit()
510
	 */
511
	on_exit(perf_record__exit, rec);
512

513
	if (opts->pipe_output) {
514 515 516
		err = perf_header__write_pipe(output);
		if (err < 0)
			return err;
517
	} else if (rec->file_new) {
518 519
		err = perf_session__write_header(session, evsel_list,
						 output, false);
520 521
		if (err < 0)
			return err;
522 523
	}

524
	if (!rec->no_buildid
525
	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
526
		pr_err("Couldn't generate buildids. "
527 528 529 530
		       "Use --no-buildid to profile anyway.\n");
		return -1;
	}

531
	rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
532

533 534 535 536 537 538
	machine = perf_session__find_host_machine(session);
	if (!machine) {
		pr_err("Couldn't find native kernel information.\n");
		return -1;
	}

539
	if (opts->pipe_output) {
540
		err = perf_event__synthesize_attrs(tool, session,
541
						   process_synthesized_event);
542 543 544 545
		if (err < 0) {
			pr_err("Couldn't synthesize attrs.\n");
			return err;
		}
546

547
		err = perf_event__synthesize_event_types(tool, process_synthesized_event,
548
							 machine);
549 550 551 552
		if (err < 0) {
			pr_err("Couldn't synthesize event_types.\n");
			return err;
		}
553

554
		if (have_tracepoints(&evsel_list->entries)) {
555 556 557 558 559 560 561 562
			/*
			 * FIXME err <= 0 here actually means that
			 * there were no tracepoints so its not really
			 * an error, just that we don't need to
			 * synthesize anything.  We really have to
			 * return this more properly and also
			 * propagate errors that now are calling die()
			 */
563
			err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
564
								  process_synthesized_event);
565 566 567 568
			if (err <= 0) {
				pr_err("Couldn't record tracing data.\n");
				return err;
			}
569
			advance_output(rec, err);
570
		}
571 572
	}

573
	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
574
						 machine, "_text");
575
	if (err < 0)
576
		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
577
							 machine, "_stext");
578 579 580 581
	if (err < 0)
		pr_err("Couldn't record kernel reference relocation symbol\n"
		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
		       "Check /proc/kallsyms permission or run as root.\n");
582

583
	err = perf_event__synthesize_modules(tool, process_synthesized_event,
584
					     machine);
585 586 587 588 589
	if (err < 0)
		pr_err("Couldn't record kernel module information.\n"
		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
		       "Check /proc/modules permission or run as root.\n");

590
	if (perf_guest)
591
		perf_session__process_machines(session, tool,
592
					       perf_event__synthesize_guest_os);
593

594
	if (!opts->target.system_wide)
595
		perf_event__synthesize_thread_map(tool, evsel_list->threads,
596
						  process_synthesized_event,
597
						  machine);
598
	else
599
		perf_event__synthesize_threads(tool, process_synthesized_event,
600
					       machine);
601

602
	if (rec->realtime_prio) {
603 604
		struct sched_param param;

605
		param.sched_priority = rec->realtime_prio;
606
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
607
			pr_err("Could not set realtime priority.\n");
608 609 610 611
			exit(-1);
		}
	}

612 613
	perf_evlist__enable(evsel_list);

614 615 616
	/*
	 * Let the child rip
	 */
617
	if (forks)
618
		perf_evlist__start_workload(evsel_list);
619

620
	for (;;) {
621
		int hits = rec->samples;
622

623
		perf_record__mmap_read_all(rec);
624

625
		if (hits == rec->samples) {
626 627
			if (done)
				break;
628
			err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
629 630 631
			waking++;
		}

632 633
		if (done)
			perf_evlist__disable(evsel_list);
634 635
	}

636
	if (quiet || signr == SIGUSR1)
637 638
		return 0;

639 640
	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);

641 642 643 644
	/*
	 * Approximate RIP event size: 24 bytes.
	 */
	fprintf(stderr,
645
		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
646
		(double)rec->bytes_written / 1024.0 / 1024.0,
647
		output_name,
648
		rec->bytes_written / 24);
649

650
	return 0;
651 652 653 654

out_delete_session:
	perf_session__delete(session);
	return err;
655
}
656

657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678
#define BRANCH_OPT(n, m) \
	{ .name = n, .mode = (m) }

#define BRANCH_END { .name = NULL }

struct branch_mode {
	const char *name;
	int mode;
};

static const struct branch_mode branch_modes[] = {
	BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
	BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
	BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
	BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
	BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
	BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
	BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
	BRANCH_END
};

static int
679
parse_branch_stack(const struct option *opt, const char *str, int unset)
680 681 682 683 684 685 686 687
{
#define ONLY_PLM \
	(PERF_SAMPLE_BRANCH_USER	|\
	 PERF_SAMPLE_BRANCH_KERNEL	|\
	 PERF_SAMPLE_BRANCH_HV)

	uint64_t *mode = (uint64_t *)opt->value;
	const struct branch_mode *br;
688
	char *s, *os = NULL, *p;
689 690
	int ret = -1;

691 692
	if (unset)
		return 0;
693

694 695 696 697
	/*
	 * cannot set it twice, -b + --branch-filter for instance
	 */
	if (*mode)
698 699
		return -1;

700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720
	/* str may be NULL in case no arg is passed to -b */
	if (str) {
		/* because str is read-only */
		s = os = strdup(str);
		if (!s)
			return -1;

		for (;;) {
			p = strchr(s, ',');
			if (p)
				*p = '\0';

			for (br = branch_modes; br->name; br++) {
				if (!strcasecmp(s, br->name))
					break;
			}
			if (!br->name) {
				ui__warning("unknown branch filter %s,"
					    " check man page\n", s);
				goto error;
			}
721

722
			*mode |= br->mode;
723

724 725
			if (!p)
				break;
726

727 728
			s = p + 1;
		}
729 730 731
	}
	ret = 0;

732
	/* default to any branch */
733
	if ((*mode & ~ONLY_PLM) == 0) {
734
		*mode = PERF_SAMPLE_BRANCH_ANY;
735 736 737 738 739 740
	}
error:
	free(os);
	return ret;
}

741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840
#ifndef NO_LIBUNWIND_SUPPORT
static int get_stack_size(char *str, unsigned long *_size)
{
	char *endptr;
	unsigned long size;
	unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));

	size = strtoul(str, &endptr, 0);

	do {
		if (*endptr)
			break;

		size = round_up(size, sizeof(u64));
		if (!size || size > max_size)
			break;

		*_size = size;
		return 0;

	} while (0);

	pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
	       max_size, str);
	return -1;
}
#endif /* !NO_LIBUNWIND_SUPPORT */

static int
parse_callchain_opt(const struct option *opt __used, const char *arg,
		    int unset)
{
	struct perf_record *rec = (struct perf_record *)opt->value;
	char *tok, *name, *saveptr = NULL;
	char *buf;
	int ret = -1;

	/* --no-call-graph */
	if (unset)
		return 0;

	/* We specified default option if none is provided. */
	BUG_ON(!arg);

	/* We need buffer that we know we can write to. */
	buf = malloc(strlen(arg) + 1);
	if (!buf)
		return -ENOMEM;

	strcpy(buf, arg);

	tok = strtok_r((char *)buf, ",", &saveptr);
	name = tok ? : (char *)buf;

	do {
		/* Framepointer style */
		if (!strncmp(name, "fp", sizeof("fp"))) {
			if (!strtok_r(NULL, ",", &saveptr)) {
				rec->opts.call_graph = CALLCHAIN_FP;
				ret = 0;
			} else
				pr_err("callchain: No more arguments "
				       "needed for -g fp\n");
			break;

#ifndef NO_LIBUNWIND_SUPPORT
		/* Dwarf style */
		} else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
			ret = 0;
			rec->opts.call_graph = CALLCHAIN_DWARF;
			rec->opts.stack_dump_size = default_stack_dump_size;

			tok = strtok_r(NULL, ",", &saveptr);
			if (tok) {
				unsigned long size = 0;

				ret = get_stack_size(tok, &size);
				rec->opts.stack_dump_size = size;
			}

			if (!ret)
				pr_debug("callchain: stack dump size %d\n",
					 rec->opts.stack_dump_size);
#endif /* !NO_LIBUNWIND_SUPPORT */
		} else {
			pr_err("callchain: Unknown -g option "
			       "value: %s\n", arg);
			break;
		}

	} while (0);

	free(buf);

	if (!ret)
		pr_debug("callchain: type %d\n", rec->opts.call_graph);

	return ret;
}

841
static const char * const record_usage[] = {
842 843
	"perf record [<options>] [<command>]",
	"perf record [<options>] -- <command> [<options>]",
844 845 846
	NULL
};

847 848 849 850 851 852 853 854 855 856 857 858 859 860 861
/*
 * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
 * because we need to have access to it in perf_record__exit, that is called
 * after cmd_record() exits, but since record_options need to be accessible to
 * builtin-script, leave it here.
 *
 * At least we don't ouch it in all the other functions here directly.
 *
 * Just say no to tons of global variables, sigh.
 */
static struct perf_record record = {
	.opts = {
		.mmap_pages	     = UINT_MAX,
		.user_freq	     = UINT_MAX,
		.user_interval	     = ULLONG_MAX,
862
		.freq		     = 4000,
N
Namhyung Kim 已提交
863 864 865
		.target		     = {
			.uses_mmap   = true,
		},
866 867 868 869
	},
	.write_mode = WRITE_FORCE,
	.file_new   = true,
};
870

871 872 873 874 875 876 877
/*
 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
 * with it and switch to use the library functions in perf_evlist that came
 * from builtin-record.c, i.e. use perf_record_opts,
 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
 * using pipes, etc.
 */
878
const struct option record_options[] = {
879
	OPT_CALLBACK('e', "event", &record.evlist, "event",
880
		     "event selector. use 'perf list' to list available events",
881
		     parse_events_option),
882
	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
L
Li Zefan 已提交
883
		     "event filter", parse_filter),
884
	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
885
		    "record events on existing process id"),
886
	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
887
		    "record events on existing thread id"),
888
	OPT_INTEGER('r', "realtime", &record.realtime_prio,
889
		    "collect data with this RT SCHED_FIFO priority"),
890
	OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
891
		    "collect data without buffering"),
892
	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
893
		    "collect raw sample records from all opened counters"),
894
	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
895
			    "system-wide collection from all CPUs"),
896
	OPT_BOOLEAN('A', "append", &record.append_file,
I
Ingo Molnar 已提交
897
			    "append to the output file to do incremental profiling"),
898
	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
899
		    "list of cpus to monitor"),
900
	OPT_BOOLEAN('f', "force", &record.force,
901
			"overwrite existing data file (deprecated)"),
902 903
	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
	OPT_STRING('o', "output", &record.output_name, "file",
I
Ingo Molnar 已提交
904
		    "output file name"),
905
	OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
906
		    "child tasks do not inherit counters"),
907 908
	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
	OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
909
		     "number of mmap data pages"),
910
	OPT_BOOLEAN(0, "group", &record.opts.group,
911
		    "put the counters into a counter group"),
912 913 914
	OPT_CALLBACK_DEFAULT('g', "call-graph", &record, "mode[,dump_size]",
			     callchain_help, &parse_callchain_opt,
			     "fp"),
915
	OPT_INCR('v', "verbose", &verbose,
916
		    "be more verbose (show counter open errors, etc)"),
917
	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
918
	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
919
		    "per thread counts"),
920
	OPT_BOOLEAN('d', "data", &record.opts.sample_address,
921
		    "Sample addresses"),
922
	OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
923
	OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
924
	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
925
		    "don't sample"),
926
	OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
927
		    "do not update the buildid cache"),
928
	OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
929
		    "do not collect buildids in perf.data"),
930
	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
S
Stephane Eranian 已提交
931 932
		     "monitor event in cgroup name only",
		     parse_cgroups),
933 934
	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
		   "user to profile"),
935 936 937 938 939 940 941

	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
		     "branch any", "sample any taken branches",
		     parse_branch_stack),

	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
		     "branch filter mask", "branch stack filter modes",
942
		     parse_branch_stack),
943 944 945
	OPT_END()
};

946
int cmd_record(int argc, const char **argv, const char *prefix __used)
947
{
948 949
	int err = -ENOMEM;
	struct perf_evsel *pos;
950 951
	struct perf_evlist *evsel_list;
	struct perf_record *rec = &record;
952
	char errbuf[BUFSIZ];
953

954
	evsel_list = perf_evlist__new(NULL, NULL);
955 956 957
	if (evsel_list == NULL)
		return -ENOMEM;

958 959
	rec->evlist = evsel_list;

960
	argc = parse_options(argc, argv, record_options, record_usage,
961
			    PARSE_OPT_STOP_AT_NON_OPTION);
962
	if (!argc && perf_target__none(&rec->opts.target))
963
		usage_with_options(record_usage, record_options);
964

965
	if (rec->force && rec->append_file) {
966 967
		ui__error("Can't overwrite and append at the same time."
			  " You need to choose between -f and -A");
968
		usage_with_options(record_usage, record_options);
969 970
	} else if (rec->append_file) {
		rec->write_mode = WRITE_APPEND;
971
	} else {
972
		rec->write_mode = WRITE_FORCE;
973 974
	}

975
	if (nr_cgroups && !rec->opts.target.system_wide) {
976 977
		ui__error("cgroup monitoring only available in"
			  " system-wide mode\n");
S
Stephane Eranian 已提交
978 979 980
		usage_with_options(record_usage, record_options);
	}

981
	symbol__init();
982

983
	if (symbol_conf.kptr_restrict)
984 985 986 987 988 989 990 991
		pr_warning(
"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
"check /proc/sys/kernel/kptr_restrict.\n\n"
"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
"file is not found in the buildid cache or in the vmlinux path.\n\n"
"Samples in kernel modules won't be resolved at all.\n\n"
"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
"even with a suitable vmlinux or kallsyms file.\n\n");
992

993
	if (rec->no_buildid_cache || rec->no_buildid)
994
		disable_buildid_cache();
995

996 997
	if (evsel_list->nr_entries == 0 &&
	    perf_evlist__add_default(evsel_list) < 0) {
998 999
		pr_err("Not enough memory for event selector list\n");
		goto out_symbol_exit;
1000
	}
1001

1002 1003 1004 1005 1006 1007 1008 1009 1010
	err = perf_target__validate(&rec->opts.target);
	if (err) {
		perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
		ui__warning("%s", errbuf);
	}

	err = perf_target__parse_uid(&rec->opts.target);
	if (err) {
		int saved_errno = errno;
1011

1012
		perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1013
		ui__error("%s", errbuf);
1014 1015

		err = -saved_errno;
1016
		goto out_free_fd;
1017
	}
1018

1019
	err = -ENOMEM;
1020
	if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
1021
		usage_with_options(record_usage, record_options);
1022

1023
	list_for_each_entry(pos, &evsel_list->entries, node) {
1024
		if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
1025
			goto out_free_fd;
1026
	}
1027

1028 1029 1030 1031
	if (rec->opts.user_interval != ULLONG_MAX)
		rec->opts.default_interval = rec->opts.user_interval;
	if (rec->opts.user_freq != UINT_MAX)
		rec->opts.freq = rec->opts.user_freq;
1032

1033 1034 1035
	/*
	 * User specified count overrides default frequency.
	 */
1036 1037 1038 1039
	if (rec->opts.default_interval)
		rec->opts.freq = 0;
	else if (rec->opts.freq) {
		rec->opts.default_interval = rec->opts.freq;
1040
	} else {
1041
		ui__error("frequency and count are zero, aborting\n");
1042
		err = -EINVAL;
1043
		goto out_free_fd;
1044 1045
	}

1046
	err = __cmd_record(&record, argc, argv);
1047
out_free_fd:
1048
	perf_evlist__delete_maps(evsel_list);
1049 1050
out_symbol_exit:
	symbol__exit();
1051
	return err;
1052
}