builtin-record.c 20.7 KB
Newer Older
I
Ingo Molnar 已提交
1
/*
2 3 4 5 6
 * builtin-record.c
 *
 * Builtin record command: Record the profile of a workload
 * (or a CPU, or a PID) into the perf.data output file - for
 * later analysis via perf report.
I
Ingo Molnar 已提交
7
 */
8 9
#define _FILE_OFFSET_BITS 64

10
#include "builtin.h"
11 12 13

#include "perf.h"

14
#include "util/build-id.h"
15
#include "util/util.h"
16
#include "util/parse-options.h"
17
#include "util/parse-events.h"
18

19
#include "util/header.h"
20
#include "util/event.h"
21
#include "util/evlist.h"
22
#include "util/evsel.h"
23
#include "util/debug.h"
24
#include "util/session.h"
25
#include "util/symbol.h"
26
#include "util/cpumap.h"
27
#include "util/thread_map.h"
28

29
#include <unistd.h>
30
#include <sched.h>
31
#include <sys/mman.h>
32

33 34 35 36 37
enum write_mode_t {
	WRITE_FORCE,
	WRITE_APPEND
};

38
struct perf_record_opts record_opts = {
39 40
	.target_pid	     = -1,
	.target_tid	     = -1,
41
	.mmap_pages	     = UINT_MAX,
42 43 44 45 46
	.user_freq	     = UINT_MAX,
	.user_interval	     = ULLONG_MAX,
	.freq		     = 1000,
	.sample_id_all_avail = true,
};
47

48 49
static unsigned int		page_size;
static int			output;
50
static const char		*output_name			= NULL;
51
static int			realtime_prio			=      0;
52
static enum write_mode_t	write_mode			= WRITE_FORCE;
53
static bool			no_buildid			=  false;
54
static bool			no_buildid_cache		=  false;
55
static struct perf_evlist	*evsel_list;
56 57 58

static long			samples				=      0;
static u64			bytes_written			=      0;
59

60
static int			file_new			=      1;
61
static off_t			post_processing_offset;
62

63
static struct perf_session	*session;
64
static const char               *progname;
65

66 67 68 69 70
static void advance_output(size_t size)
{
	bytes_written += size;
}

71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
static void write_output(void *buf, size_t size)
{
	while (size) {
		int ret = write(output, buf, size);

		if (ret < 0)
			die("failed to write");

		size -= ret;
		buf += ret;

		bytes_written += ret;
	}
}

86
static int process_synthesized_event(union perf_event *event,
87
				     struct perf_sample *sample __used,
88
				     struct perf_session *self __used)
89
{
90
	write_output(event, event->header.size);
91 92 93
	return 0;
}

94
static void mmap_read(struct perf_mmap *md)
95
{
96
	unsigned int head = perf_mmap__read_head(md);
97 98 99 100 101
	unsigned int old = md->prev;
	unsigned char *data = md->base + page_size;
	unsigned long size;
	void *buf;

102 103 104 105
	if (old == head)
		return;

	samples++;
106 107 108 109 110 111 112

	size = head - old;

	if ((old & md->mask) + size != (head & md->mask)) {
		buf = &data[old & md->mask];
		size = md->mask + 1 - (old & md->mask);
		old += size;
113

114
		write_output(buf, size);
115 116 117 118 119
	}

	buf = &data[old & md->mask];
	size = head - old;
	old += size;
120

121
	write_output(buf, size);
122 123

	md->prev = old;
124
	perf_mmap__write_tail(md, old);
125 126 127
}

static volatile int done = 0;
128
static volatile int signr = -1;
129
static volatile int child_finished = 0;
130

131
static void sig_handler(int sig)
132
{
133 134 135
	if (sig == SIGCHLD)
		child_finished = 1;

136
	done = 1;
137 138 139 140 141
	signr = sig;
}

static void sig_atexit(void)
{
142 143
	int status;

144
	if (evsel_list->workload.pid > 0) {
145
		if (!child_finished)
146
			kill(evsel_list->workload.pid, SIGTERM);
147 148 149 150 151

		wait(&status);
		if (WIFSIGNALED(status))
			psignal(WTERMSIG(status), progname);
	}
152

153
	if (signr == -1 || signr == SIGUSR1)
154 155 156 157
		return;

	signal(signr, SIG_DFL);
	kill(getpid(), signr);
158 159
}

160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
static bool perf_evlist__equal(struct perf_evlist *evlist,
			       struct perf_evlist *other)
{
	struct perf_evsel *pos, *pair;

	if (evlist->nr_entries != other->nr_entries)
		return false;

	pair = list_entry(other->entries.next, struct perf_evsel, node);

	list_for_each_entry(pos, &evlist->entries, node) {
		if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
			return false;
		pair = list_entry(pair->node.next, struct perf_evsel, node);
	}

	return true;
}

179 180
static void open_counters(struct perf_evlist *evlist)
{
181
	struct perf_evsel *pos, *first;
182

183 184
	first = list_entry(evlist->entries.next, struct perf_evsel, node);

185 186
	perf_evlist__config_attrs(evlist, &record_opts);

187 188
	list_for_each_entry(pos, &evlist->entries, node) {
		struct perf_event_attr *attr = &pos->attr;
189
		struct xyarray *group_fd = NULL;
190 191 192 193 194 195 196 197 198 199 200 201 202
		/*
		 * Check if parse_single_tracepoint_event has already asked for
		 * PERF_SAMPLE_TIME.
		 *
		 * XXX this is kludgy but short term fix for problems introduced by
		 * eac23d1c that broke 'perf script' by having different sample_types
		 * when using multiple tracepoint events when we use a perf binary
		 * that tries to use sample_id_all on an older kernel.
		 *
		 * We need to move counter creation to perf_session, support
		 * different sample_types, etc.
		 */
		bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
203

204
		if (record_opts.group && pos != first)
205
			group_fd = first->fd;
206
retry_sample_id:
207
		attr->sample_id_all = record_opts.sample_id_all_avail ? 1 : 0;
208
try_again:
209 210
		if (perf_evsel__open(pos, evlist->cpus, evlist->threads,
				     record_opts.group, group_fd) < 0) {
211 212
			int err = errno;

213
			if (err == EPERM || err == EACCES) {
214
				ui__error_paranoid();
215
				exit(EXIT_FAILURE);
216
			} else if (err ==  ENODEV && record_opts.cpu_list) {
217 218
				die("No such device - did you specify"
					" an out-of-range profile CPU?\n");
219
			} else if (err == EINVAL && record_opts.sample_id_all_avail) {
220 221 222
				/*
				 * Old kernel, no attr->sample_id_type_all field
				 */
223 224
				record_opts.sample_id_all_avail = false;
				if (!record_opts.sample_time && !record_opts.raw_samples && !time_needed)
225 226
					attr->sample_type &= ~PERF_SAMPLE_TIME;

227
				goto retry_sample_id;
228
			}
229

230 231 232 233 234 235 236 237 238
			/*
			 * If it's cycles then fall back to hrtimer
			 * based cpu-clock-tick sw counter, which
			 * is always available even if no PMU support:
			 */
			if (attr->type == PERF_TYPE_HARDWARE
					&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {

				if (verbose)
239 240
					ui__warning("The cycles event is not supported, "
						    "trying to fall back to cpu-clock-ticks\n");
241 242 243 244
				attr->type = PERF_TYPE_SOFTWARE;
				attr->config = PERF_COUNT_SW_CPU_CLOCK;
				goto try_again;
			}
245 246 247 248 249 250 251

			if (err == ENOENT) {
				ui__warning("The %s event is not supported.\n",
					    event_name(pos));
				exit(EXIT_FAILURE);
			}

252
			printf("\n");
253
			error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
254
			      err, strerror(err));
255 256

#if defined(__i386__) || defined(__x86_64__)
257 258 259 260 261
			if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
				die("No hardware sampling interrupt available."
				    " No APIC? If so then you can boot the kernel"
				    " with the \"lapic\" boot parameter to"
				    " force-enable it.\n");
262 263
#endif

264
			die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
L
Li Zefan 已提交
265 266
		}
	}
267

268 269 270 271 272 273
	if (perf_evlist__set_filters(evlist)) {
		error("failed to set filter with %d (%s)\n", errno,
			strerror(errno));
		exit(-1);
	}

274
	if (perf_evlist__mmap(evlist, record_opts.mmap_pages, false) < 0)
275 276
		die("failed to mmap with %d (%s)\n", errno, strerror(errno));

277 278 279 280 281 282 283 284 285 286
	if (file_new)
		session->evlist = evlist;
	else {
		if (!perf_evlist__equal(session->evlist, evlist)) {
			fprintf(stderr, "incompatible append\n");
			exit(-1);
		}
 	}

	perf_session__update_sample_type(session);
287 288
}

289 290 291 292
static int process_buildids(void)
{
	u64 size = lseek(output, 0, SEEK_CUR);

293 294 295
	if (size == 0)
		return 0;

296 297 298 299 300 301
	session->fd = output;
	return __perf_session__process_events(session, post_processing_offset,
					      size - post_processing_offset,
					      size, &build_id__mark_dso_hit_ops);
}

302 303
static void atexit_header(void)
{
304
	if (!record_opts.pipe_output) {
305
		session->header.data_size += bytes_written;
306

307 308
		if (!no_buildid)
			process_buildids();
309
		perf_session__write_header(session, evsel_list, output, true);
310
		perf_session__delete(session);
311
		perf_evlist__delete(evsel_list);
312
		symbol__exit();
313
	}
314 315
}

316
static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
317 318
{
	int err;
319
	struct perf_session *psession = data;
320

321
	if (machine__is_host(machine))
322 323 324 325 326 327 328 329 330 331
		return;

	/*
	 *As for guest kernel when processing subcommand record&report,
	 *we arrange module mmap prior to guest kernel mmap and trigger
	 *a preload dso because default guest module symbols are loaded
	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
	 *method is used to avoid symbol missing when the first addr is
	 *in module instead of in guest kernel.
	 */
332 333
	err = perf_event__synthesize_modules(process_synthesized_event,
					     psession, machine);
334 335
	if (err < 0)
		pr_err("Couldn't record guest kernel [%d]'s reference"
336
		       " relocation symbol.\n", machine->pid);
337 338 339 340 341

	/*
	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
	 * have no _text sometimes.
	 */
342 343
	err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
						 psession, machine, "_text");
344
	if (err < 0)
345 346 347
		err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
							 psession, machine,
							 "_stext");
348 349
	if (err < 0)
		pr_err("Couldn't record guest kernel [%d]'s reference"
350
		       " relocation symbol.\n", machine->pid);
351 352
}

353 354 355 356 357 358 359
static struct perf_event_header finished_round_event = {
	.size = sizeof(struct perf_event_header),
	.type = PERF_RECORD_FINISHED_ROUND,
};

static void mmap_read_all(void)
{
360
	int i;
361

362
	for (i = 0; i < evsel_list->nr_mmaps; i++) {
363 364
		if (evsel_list->mmap[i].base)
			mmap_read(&evsel_list->mmap[i]);
365 366 367 368 369 370
	}

	if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
		write_output(&finished_round_event, sizeof(finished_round_event));
}

371
static int __cmd_record(int argc, const char **argv)
372
{
I
Ingo Molnar 已提交
373 374
	struct stat st;
	int flags;
375
	int err;
376
	unsigned long waking = 0;
377
	const bool forks = argc > 0;
378
	struct machine *machine;
379

380 381
	progname = argv[0];

382 383
	page_size = sysconf(_SC_PAGE_SIZE);

384 385 386
	atexit(sig_atexit);
	signal(SIGCHLD, sig_handler);
	signal(SIGINT, sig_handler);
387
	signal(SIGUSR1, sig_handler);
388

389 390
	if (!output_name) {
		if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
391
			record_opts.pipe_output = true;
392 393 394 395 396
		else
			output_name = "perf.data";
	}
	if (output_name) {
		if (!strcmp(output_name, "-"))
397
			record_opts.pipe_output = true;
398 399 400 401 402 403 404 405 406 407
		else if (!stat(output_name, &st) && st.st_size) {
			if (write_mode == WRITE_FORCE) {
				char oldname[PATH_MAX];
				snprintf(oldname, sizeof(oldname), "%s.old",
					 output_name);
				unlink(oldname);
				rename(output_name, oldname);
			}
		} else if (write_mode == WRITE_APPEND) {
			write_mode = WRITE_FORCE;
408
		}
409 410
	}

411
	flags = O_CREAT|O_RDWR;
412
	if (write_mode == WRITE_APPEND)
413
		file_new = 0;
I
Ingo Molnar 已提交
414 415 416
	else
		flags |= O_TRUNC;

417
	if (record_opts.pipe_output)
418 419 420
		output = STDOUT_FILENO;
	else
		output = open(output_name, flags, S_IRUSR | S_IWUSR);
421 422 423 424 425
	if (output < 0) {
		perror("failed to create output file");
		exit(-1);
	}

426
	session = perf_session__new(output_name, O_WRONLY,
427
				    write_mode == WRITE_FORCE, false, NULL);
428
	if (session == NULL) {
429 430 431 432
		pr_err("Not enough memory for reading perf file header\n");
		return -1;
	}

433 434 435
	if (!no_buildid)
		perf_header__set_feat(&session->header, HEADER_BUILD_ID);

436
	if (!file_new) {
437
		err = perf_session__read_header(session, output);
438
		if (err < 0)
439
			goto out_delete_session;
440 441
	}

442
	if (have_tracepoints(&evsel_list->entries))
443
		perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
444

445 446 447 448 449 450 451 452 453 454 455 456 457
	perf_header__set_feat(&session->header, HEADER_HOSTNAME);
	perf_header__set_feat(&session->header, HEADER_OSRELEASE);
	perf_header__set_feat(&session->header, HEADER_ARCH);
	perf_header__set_feat(&session->header, HEADER_CPUDESC);
	perf_header__set_feat(&session->header, HEADER_NRCPUS);
	perf_header__set_feat(&session->header, HEADER_EVENT_DESC);
	perf_header__set_feat(&session->header, HEADER_CMDLINE);
	perf_header__set_feat(&session->header, HEADER_VERSION);
	perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
	perf_header__set_feat(&session->header, HEADER_TOTAL_MEM);
	perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY);
	perf_header__set_feat(&session->header, HEADER_CPUID);

458
	if (forks) {
459 460 461 462
		err = perf_evlist__prepare_workload(evsel_list, &record_opts, argv);
		if (err < 0) {
			pr_err("Couldn't run the workload!\n");
			goto out_delete_session;
463 464 465
		}
	}

466
	open_counters(evsel_list);
467

468 469 470 471 472
	/*
	 * perf_session__delete(session) will be called at atexit_header()
	 */
	atexit(atexit_header);

473
	if (record_opts.pipe_output) {
474 475 476 477
		err = perf_header__write_pipe(output);
		if (err < 0)
			return err;
	} else if (file_new) {
478 479
		err = perf_session__write_header(session, evsel_list,
						 output, false);
480 481
		if (err < 0)
			return err;
482 483
	}

484 485
	post_processing_offset = lseek(output, 0, SEEK_CUR);

486
	if (record_opts.pipe_output) {
487 488
		err = perf_session__synthesize_attrs(session,
						     process_synthesized_event);
489 490 491 492
		if (err < 0) {
			pr_err("Couldn't synthesize attrs.\n");
			return err;
		}
493

494 495
		err = perf_event__synthesize_event_types(process_synthesized_event,
							 session);
496 497 498 499
		if (err < 0) {
			pr_err("Couldn't synthesize event_types.\n");
			return err;
		}
500

501
		if (have_tracepoints(&evsel_list->entries)) {
502 503 504 505 506 507 508 509
			/*
			 * FIXME err <= 0 here actually means that
			 * there were no tracepoints so its not really
			 * an error, just that we don't need to
			 * synthesize anything.  We really have to
			 * return this more properly and also
			 * propagate errors that now are calling die()
			 */
510 511 512
			err = perf_event__synthesize_tracing_data(output, evsel_list,
								  process_synthesized_event,
								  session);
513 514 515 516
			if (err <= 0) {
				pr_err("Couldn't record tracing data.\n");
				return err;
			}
517
			advance_output(err);
518
		}
519 520
	}

521 522
	machine = perf_session__find_host_machine(session);
	if (!machine) {
523 524 525 526
		pr_err("Couldn't find native kernel information.\n");
		return -1;
	}

527 528
	err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
						 session, machine, "_text");
529
	if (err < 0)
530 531
		err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
							 session, machine, "_stext");
532 533 534 535
	if (err < 0)
		pr_err("Couldn't record kernel reference relocation symbol\n"
		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
		       "Check /proc/kallsyms permission or run as root.\n");
536

537 538
	err = perf_event__synthesize_modules(process_synthesized_event,
					     session, machine);
539 540 541 542 543
	if (err < 0)
		pr_err("Couldn't record kernel module information.\n"
		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
		       "Check /proc/modules permission or run as root.\n");

544
	if (perf_guest)
545 546
		perf_session__process_machines(session,
					       perf_event__synthesize_guest_os);
547

548
	if (!record_opts.system_wide)
549 550 551
		perf_event__synthesize_thread_map(evsel_list->threads,
						  process_synthesized_event,
						  session);
552
	else
553 554
		perf_event__synthesize_threads(process_synthesized_event,
					       session);
555

556 557 558 559 560
	if (realtime_prio) {
		struct sched_param param;

		param.sched_priority = realtime_prio;
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
561
			pr_err("Could not set realtime priority.\n");
562 563 564 565
			exit(-1);
		}
	}

566 567
	perf_evlist__enable(evsel_list);

568 569 570
	/*
	 * Let the child rip
	 */
571
	if (forks)
572
		perf_evlist__start_workload(evsel_list);
573

574
	for (;;) {
575
		int hits = samples;
576

577
		mmap_read_all();
578

579 580 581
		if (hits == samples) {
			if (done)
				break;
582
			err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
583 584 585
			waking++;
		}

586 587
		if (done)
			perf_evlist__disable(evsel_list);
588 589
	}

590
	if (quiet || signr == SIGUSR1)
591 592
		return 0;

593 594
	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);

595 596 597 598
	/*
	 * Approximate RIP event size: 24 bytes.
	 */
	fprintf(stderr,
599
		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
600 601 602
		(double)bytes_written / 1024.0 / 1024.0,
		output_name,
		bytes_written / 24);
603

604
	return 0;
605 606 607 608

out_delete_session:
	perf_session__delete(session);
	return err;
609
}
610 611

static const char * const record_usage[] = {
612 613
	"perf record [<options>] [<command>]",
	"perf record [<options>] -- <command> [<options>]",
614 615 616
	NULL
};

617 618
static bool force, append_file;

619
const struct option record_options[] = {
620
	OPT_CALLBACK('e', "event", &evsel_list, "event",
621
		     "event selector. use 'perf list' to list available events",
622
		     parse_events_option),
623
	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
L
Li Zefan 已提交
624
		     "event filter", parse_filter),
625
	OPT_INTEGER('p', "pid", &record_opts.target_pid,
626
		    "record events on existing process id"),
627
	OPT_INTEGER('t', "tid", &record_opts.target_tid,
628
		    "record events on existing thread id"),
629 630
	OPT_INTEGER('r', "realtime", &realtime_prio,
		    "collect data with this RT SCHED_FIFO priority"),
631
	OPT_BOOLEAN('D', "no-delay", &record_opts.no_delay,
632
		    "collect data without buffering"),
633
	OPT_BOOLEAN('R', "raw-samples", &record_opts.raw_samples,
634
		    "collect raw sample records from all opened counters"),
635
	OPT_BOOLEAN('a', "all-cpus", &record_opts.system_wide,
636
			    "system-wide collection from all CPUs"),
I
Ingo Molnar 已提交
637 638
	OPT_BOOLEAN('A', "append", &append_file,
			    "append to the output file to do incremental profiling"),
639
	OPT_STRING('C', "cpu", &record_opts.cpu_list, "cpu",
640
		    "list of cpus to monitor"),
641
	OPT_BOOLEAN('f', "force", &force,
642
			"overwrite existing data file (deprecated)"),
643
	OPT_U64('c', "count", &record_opts.user_interval, "event period to sample"),
I
Ingo Molnar 已提交
644 645
	OPT_STRING('o', "output", &output_name, "file",
		    "output file name"),
646
	OPT_BOOLEAN('i', "no-inherit", &record_opts.no_inherit,
647
		    "child tasks do not inherit counters"),
648
	OPT_UINTEGER('F', "freq", &record_opts.user_freq, "profile at this frequency"),
649 650
	OPT_UINTEGER('m', "mmap-pages", &record_opts.mmap_pages,
		     "number of mmap data pages"),
651
	OPT_BOOLEAN(0, "group", &record_opts.group,
652
		    "put the counters into a counter group"),
653
	OPT_BOOLEAN('g', "call-graph", &record_opts.call_graph,
654
		    "do call-graph (stack chain/backtrace) recording"),
655
	OPT_INCR('v', "verbose", &verbose,
656
		    "be more verbose (show counter open errors, etc)"),
657
	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
658
	OPT_BOOLEAN('s', "stat", &record_opts.inherit_stat,
659
		    "per thread counts"),
660
	OPT_BOOLEAN('d', "data", &record_opts.sample_address,
661
		    "Sample addresses"),
662 663
	OPT_BOOLEAN('T', "timestamp", &record_opts.sample_time, "Sample timestamps"),
	OPT_BOOLEAN('n', "no-samples", &record_opts.no_samples,
664
		    "don't sample"),
665
	OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
666
		    "do not update the buildid cache"),
667 668
	OPT_BOOLEAN('B', "no-buildid", &no_buildid,
		    "do not collect buildids in perf.data"),
S
Stephane Eranian 已提交
669 670 671
	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
		     "monitor event in cgroup name only",
		     parse_cgroups),
672 673 674
	OPT_END()
};

675
int cmd_record(int argc, const char **argv, const char *prefix __used)
676
{
677 678
	int err = -ENOMEM;
	struct perf_evsel *pos;
679

680 681
	perf_header__set_cmdline(argc, argv);

682
	evsel_list = perf_evlist__new(NULL, NULL);
683 684 685
	if (evsel_list == NULL)
		return -ENOMEM;

686
	argc = parse_options(argc, argv, record_options, record_usage,
687
			    PARSE_OPT_STOP_AT_NON_OPTION);
688 689
	if (!argc && record_opts.target_pid == -1 && record_opts.target_tid == -1 &&
		!record_opts.system_wide && !record_opts.cpu_list)
690
		usage_with_options(record_usage, record_options);
691

692 693 694
	if (force && append_file) {
		fprintf(stderr, "Can't overwrite and append at the same time."
				" You need to choose between -f and -A");
695
		usage_with_options(record_usage, record_options);
696 697 698 699 700 701
	} else if (append_file) {
		write_mode = WRITE_APPEND;
	} else {
		write_mode = WRITE_FORCE;
	}

702
	if (nr_cgroups && !record_opts.system_wide) {
S
Stephane Eranian 已提交
703 704 705 706 707
		fprintf(stderr, "cgroup monitoring only available in"
			" system-wide mode\n");
		usage_with_options(record_usage, record_options);
	}

708
	symbol__init();
709

710
	if (symbol_conf.kptr_restrict)
711 712 713 714 715 716 717 718
		pr_warning(
"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
"check /proc/sys/kernel/kptr_restrict.\n\n"
"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
"file is not found in the buildid cache or in the vmlinux path.\n\n"
"Samples in kernel modules won't be resolved at all.\n\n"
"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
"even with a suitable vmlinux or kallsyms file.\n\n");
719

720
	if (no_buildid_cache || no_buildid)
721
		disable_buildid_cache();
722

723 724
	if (evsel_list->nr_entries == 0 &&
	    perf_evlist__add_default(evsel_list) < 0) {
725 726
		pr_err("Not enough memory for event selector list\n");
		goto out_symbol_exit;
727
	}
728

729 730
	if (record_opts.target_pid != -1)
		record_opts.target_tid = record_opts.target_pid;
731

732 733
	if (perf_evlist__create_maps(evsel_list, record_opts.target_pid,
				     record_opts.target_tid, record_opts.cpu_list) < 0)
734
		usage_with_options(record_usage, record_options);
735

736
	list_for_each_entry(pos, &evsel_list->entries, node) {
737 738
		if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr,
					 evsel_list->threads->nr) < 0)
739
			goto out_free_fd;
740 741
		if (perf_header__push_event(pos->attr.config, event_name(pos)))
			goto out_free_fd;
742
	}
743

744
	if (perf_evlist__alloc_pollfd(evsel_list) < 0)
745
		goto out_free_fd;
746

747 748 749 750
	if (record_opts.user_interval != ULLONG_MAX)
		record_opts.default_interval = record_opts.user_interval;
	if (record_opts.user_freq != UINT_MAX)
		record_opts.freq = record_opts.user_freq;
751

752 753 754
	/*
	 * User specified count overrides default frequency.
	 */
755 756 757 758
	if (record_opts.default_interval)
		record_opts.freq = 0;
	else if (record_opts.freq) {
		record_opts.default_interval = record_opts.freq;
759 760
	} else {
		fprintf(stderr, "frequency and count are zero, aborting\n");
761
		err = -EINVAL;
762
		goto out_free_fd;
763 764
	}

765 766
	err = __cmd_record(argc, argv);
out_free_fd:
767
	perf_evlist__delete_maps(evsel_list);
768 769
out_symbol_exit:
	symbol__exit();
770
	return err;
771
}