builtin-record.c 71.1 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
I
Ingo Molnar 已提交
2
/*
3 4 5 6 7
 * builtin-record.c
 *
 * Builtin record command: Record the profile of a workload
 * (or a CPU, or a PID) into the perf.data output file - for
 * later analysis via perf report.
I
Ingo Molnar 已提交
8
 */
9
#include "builtin.h"
10

11
#include "util/build-id.h"
12
#include <subcmd/parse-options.h>
13
#include "util/parse-events.h"
14
#include "util/config.h"
15

16
#include "util/callchain.h"
17
#include "util/cgroup.h"
18
#include "util/header.h"
19
#include "util/event.h"
20
#include "util/evlist.h"
21
#include "util/evsel.h"
22
#include "util/debug.h"
23
#include "util/mmap.h"
24
#include "util/target.h"
25
#include "util/session.h"
26
#include "util/tool.h"
27
#include "util/symbol.h"
28
#include "util/record.h"
29
#include "util/cpumap.h"
30
#include "util/thread_map.h"
31
#include "util/data.h"
32
#include "util/perf_regs.h"
33
#include "util/auxtrace.h"
34
#include "util/tsc.h"
35
#include "util/parse-branch-options.h"
36
#include "util/parse-regs-options.h"
37
#include "util/perf_api_probe.h"
38
#include "util/llvm-utils.h"
39
#include "util/bpf-loader.h"
40
#include "util/trigger.h"
W
Wang Nan 已提交
41
#include "util/perf-hooks.h"
42
#include "util/cpu-set-sched.h"
43
#include "util/synthetic-events.h"
44
#include "util/time-utils.h"
45
#include "util/units.h"
46
#include "util/bpf-event.h"
47
#include "util/util.h"
48
#include "asm/bug.h"
49
#include "perf.h"
50

51
#include <errno.h>
52
#include <inttypes.h>
53
#include <locale.h>
54
#include <poll.h>
55
#include <pthread.h>
56
#include <unistd.h>
57
#include <sched.h>
58
#include <signal.h>
59
#include <sys/mman.h>
60
#include <sys/wait.h>
61 62 63
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
64
#include <linux/err.h>
65
#include <linux/string.h>
66
#include <linux/time64.h>
67
#include <linux/zalloc.h>
68
#include <linux/bitmap.h>
69

70
struct switch_output {
71
	bool		 enabled;
72
	bool		 signal;
73
	unsigned long	 size;
74
	unsigned long	 time;
75 76
	const char	*str;
	bool		 set;
77 78 79
	char		 **filenames;
	int		 num_files;
	int		 cur_file;
80 81
};

82
struct record {
83
	struct perf_tool	tool;
84
	struct record_opts	opts;
85
	u64			bytes_written;
86
	struct perf_data	data;
87
	struct auxtrace_record	*itr;
88
	struct evlist	*evlist;
89
	struct perf_session	*session;
90
	struct evlist		*sb_evlist;
91 92
	int			realtime_prio;
	bool			no_buildid;
93
	bool			no_buildid_set;
94
	bool			no_buildid_cache;
95
	bool			no_buildid_cache_set;
96
	bool			buildid_all;
97
	bool			timestamp_filename;
98
	bool			timestamp_boundary;
99
	struct switch_output	switch_output;
100
	unsigned long long	samples;
101
	struct mmap_cpu_mask	affinity_mask;
102
	unsigned long		output_max_size;	/* = 0: unlimited */
103
};
104

105 106
static volatile int done;

107 108 109 110
static volatile int auxtrace_record__snapshot_started;
static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
static DEFINE_TRIGGER(switch_output_trigger);

111 112 113 114
static const char *affinity_tags[PERF_AFFINITY_MAX] = {
	"SYS", "NODE", "CPU"
};

115 116 117 118 119 120 121 122 123 124 125 126 127
static bool switch_output_signal(struct record *rec)
{
	return rec->switch_output.signal &&
	       trigger_is_ready(&switch_output_trigger);
}

static bool switch_output_size(struct record *rec)
{
	return rec->switch_output.size &&
	       trigger_is_ready(&switch_output_trigger) &&
	       (rec->bytes_written >= rec->switch_output.size);
}

128 129 130 131 132 133
static bool switch_output_time(struct record *rec)
{
	return rec->switch_output.time &&
	       trigger_is_ready(&switch_output_trigger);
}

134 135 136 137 138 139
static bool record__output_max_size_exceeded(struct record *rec)
{
	return rec->output_max_size &&
	       (rec->bytes_written >= rec->output_max_size);
}

140
static int record__write(struct record *rec, struct mmap *map __maybe_unused,
141
			 void *bf, size_t size)
142
{
143 144 145
	struct perf_data_file *file = &rec->session->data->file;

	if (perf_data_file__write(file, bf, size) < 0) {
146 147
		pr_err("failed to write perf data, error: %m\n");
		return -1;
148
	}
149

150
	rec->bytes_written += size;
151

152 153 154 155 156 157 158
	if (record__output_max_size_exceeded(rec) && !done) {
		fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
				" stopping session ]\n",
				rec->bytes_written >> 10);
		done = 1;
	}

159 160 161
	if (switch_output_size(rec))
		trigger_hit(&switch_output_trigger);

162
	return 0;
163 164
}

165 166
static int record__aio_enabled(struct record *rec);
static int record__comp_enabled(struct record *rec);
167 168 169
static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
			    void *src, size_t src_size);

170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
#ifdef HAVE_AIO_SUPPORT
static int record__aio_write(struct aiocb *cblock, int trace_fd,
		void *buf, size_t size, off_t off)
{
	int rc;

	cblock->aio_fildes = trace_fd;
	cblock->aio_buf    = buf;
	cblock->aio_nbytes = size;
	cblock->aio_offset = off;
	cblock->aio_sigevent.sigev_notify = SIGEV_NONE;

	do {
		rc = aio_write(cblock);
		if (rc == 0) {
			break;
		} else if (errno != EAGAIN) {
			cblock->aio_fildes = -1;
			pr_err("failed to queue perf data, error: %m\n");
			break;
		}
	} while (1);

	return rc;
}

196
static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
{
	void *rem_buf;
	off_t rem_off;
	size_t rem_size;
	int rc, aio_errno;
	ssize_t aio_ret, written;

	aio_errno = aio_error(cblock);
	if (aio_errno == EINPROGRESS)
		return 0;

	written = aio_ret = aio_return(cblock);
	if (aio_ret < 0) {
		if (aio_errno != EINTR)
			pr_err("failed to write perf data, error: %m\n");
		written = 0;
	}

	rem_size = cblock->aio_nbytes - written;

	if (rem_size == 0) {
		cblock->aio_fildes = -1;
		/*
220 221 222
		 * md->refcount is incremented in record__aio_pushfn() for
		 * every aio write request started in record__aio_push() so
		 * decrement it because the request is now complete.
223
		 */
224
		perf_mmap__put(&md->core);
225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
		rc = 1;
	} else {
		/*
		 * aio write request may require restart with the
		 * reminder if the kernel didn't write whole
		 * chunk at once.
		 */
		rem_off = cblock->aio_offset + written;
		rem_buf = (void *)(cblock->aio_buf + written);
		record__aio_write(cblock, cblock->aio_fildes,
				rem_buf, rem_size, rem_off);
		rc = 0;
	}

	return rc;
}

242
static int record__aio_sync(struct mmap *md, bool sync_all)
243
{
244 245
	struct aiocb **aiocb = md->aio.aiocb;
	struct aiocb *cblocks = md->aio.cblocks;
246
	struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
247
	int i, do_suspend;
248 249

	do {
250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
		do_suspend = 0;
		for (i = 0; i < md->aio.nr_cblocks; ++i) {
			if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
				if (sync_all)
					aiocb[i] = NULL;
				else
					return i;
			} else {
				/*
				 * Started aio write is not complete yet
				 * so it has to be waited before the
				 * next allocation.
				 */
				aiocb[i] = &cblocks[i];
				do_suspend = 1;
			}
		}
		if (!do_suspend)
			return -1;
269

270
		while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
271 272 273 274 275 276
			if (!(errno == EAGAIN || errno == EINTR))
				pr_err("failed to sync perf data, error: %m\n");
		}
	} while (1);
}

277 278 279 280 281 282
struct record_aio {
	struct record	*rec;
	void		*data;
	size_t		size;
};

283
static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
284
{
285
	struct record_aio *aio = to;
286

287
	/*
J
Jiri Olsa 已提交
288
	 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
289 290 291 292 293 294 295 296 297 298 299 300 301 302
	 * to release space in the kernel buffer as fast as possible, calling
	 * perf_mmap__consume() from perf_mmap__push() function.
	 *
	 * That lets the kernel to proceed with storing more profiling data into
	 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
	 *
	 * Coping can be done in two steps in case the chunk of profiling data
	 * crosses the upper bound of the kernel buffer. In this case we first move
	 * part of data from map->start till the upper bound and then the reminder
	 * from the beginning of the kernel buffer till the end of the data chunk.
	 */

	if (record__comp_enabled(aio->rec)) {
		size = zstd_compress(aio->rec->session, aio->data + aio->size,
303
				     mmap__mmap_len(map) - aio->size,
304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
				     buf, size);
	} else {
		memcpy(aio->data + aio->size, buf, size);
	}

	if (!aio->size) {
		/*
		 * Increment map->refcount to guard map->aio.data[] buffer
		 * from premature deallocation because map object can be
		 * released earlier than aio write request started on
		 * map->aio.data[] buffer is complete.
		 *
		 * perf_mmap__put() is done at record__aio_complete()
		 * after started aio request completion or at record__aio_push()
		 * if the request failed to start.
		 */
320
		perf_mmap__get(&map->core);
321 322 323 324 325 326 327
	}

	aio->size += size;

	return size;
}

328
static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
329 330 331 332
{
	int ret, idx;
	int trace_fd = rec->session->data->file.fd;
	struct record_aio aio = { .rec = rec, .size = 0 };
333

334 335 336 337 338 339 340 341 342 343 344 345 346
	/*
	 * Call record__aio_sync() to wait till map->aio.data[] buffer
	 * becomes available after previous aio write operation.
	 */

	idx = record__aio_sync(map, false);
	aio.data = map->aio.data[idx];
	ret = perf_mmap__push(map, &aio, record__aio_pushfn);
	if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
		return ret;

	rec->samples++;
	ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
347
	if (!ret) {
348 349
		*off += aio.size;
		rec->bytes_written += aio.size;
350 351
		if (switch_output_size(rec))
			trigger_hit(&switch_output_trigger);
352 353 354 355 356 357 358
	} else {
		/*
		 * Decrement map->refcount incremented in record__aio_pushfn()
		 * back if record__aio_write() operation failed to start, otherwise
		 * map->refcount is decremented in record__aio_complete() after
		 * aio write operation finishes successfully.
		 */
359
		perf_mmap__put(&map->core);
360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377
	}

	return ret;
}

static off_t record__aio_get_pos(int trace_fd)
{
	return lseek(trace_fd, 0, SEEK_CUR);
}

static void record__aio_set_pos(int trace_fd, off_t pos)
{
	lseek(trace_fd, pos, SEEK_SET);
}

static void record__aio_mmap_read_sync(struct record *rec)
{
	int i;
378
	struct evlist *evlist = rec->evlist;
379
	struct mmap *maps = evlist->mmap;
380

381
	if (!record__aio_enabled(rec))
382 383
		return;

384
	for (i = 0; i < evlist->core.nr_mmaps; i++) {
385
		struct mmap *map = &maps[i];
386

J
Jiri Olsa 已提交
387
		if (map->core.base)
388
			record__aio_sync(map, true);
389 390 391 392
	}
}

static int nr_cblocks_default = 1;
393
static int nr_cblocks_max = 4;
394 395

static int record__aio_parse(const struct option *opt,
396
			     const char *str,
397 398 399 400
			     int unset)
{
	struct record_opts *opts = (struct record_opts *)opt->value;

401
	if (unset) {
402
		opts->nr_cblocks = 0;
403 404 405 406 407 408
	} else {
		if (str)
			opts->nr_cblocks = strtol(str, NULL, 0);
		if (!opts->nr_cblocks)
			opts->nr_cblocks = nr_cblocks_default;
	}
409 410 411 412

	return 0;
}
#else /* HAVE_AIO_SUPPORT */
413 414
static int nr_cblocks_max = 0;

415
static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
416
			    off_t *off __maybe_unused)
417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439
{
	return -1;
}

static off_t record__aio_get_pos(int trace_fd __maybe_unused)
{
	return -1;
}

static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
{
}

static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
{
}
#endif

static int record__aio_enabled(struct record *rec)
{
	return rec->opts.nr_cblocks > 0;
}

440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466
#define MMAP_FLUSH_DEFAULT 1
static int record__mmap_flush_parse(const struct option *opt,
				    const char *str,
				    int unset)
{
	int flush_max;
	struct record_opts *opts = (struct record_opts *)opt->value;
	static struct parse_tag tags[] = {
			{ .tag  = 'B', .mult = 1       },
			{ .tag  = 'K', .mult = 1 << 10 },
			{ .tag  = 'M', .mult = 1 << 20 },
			{ .tag  = 'G', .mult = 1 << 30 },
			{ .tag  = 0 },
	};

	if (unset)
		return 0;

	if (str) {
		opts->mmap_flush = parse_tag_value(str, tags);
		if (opts->mmap_flush == (int)-1)
			opts->mmap_flush = strtol(str, NULL, 0);
	}

	if (!opts->mmap_flush)
		opts->mmap_flush = MMAP_FLUSH_DEFAULT;

467
	flush_max = evlist__mmap_size(opts->mmap_pages);
468 469 470 471 472 473 474
	flush_max /= 4;
	if (opts->mmap_flush > flush_max)
		opts->mmap_flush = flush_max;

	return 0;
}

475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493
#ifdef HAVE_ZSTD_SUPPORT
static unsigned int comp_level_default = 1;

static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
{
	struct record_opts *opts = opt->value;

	if (unset) {
		opts->comp_level = 0;
	} else {
		if (str)
			opts->comp_level = strtol(str, NULL, 0);
		if (!opts->comp_level)
			opts->comp_level = comp_level_default;
	}

	return 0;
}
#endif
494 495
static unsigned int comp_level_max = 22;

496 497 498 499 500
static int record__comp_enabled(struct record *rec)
{
	return rec->opts.comp_level > 0;
}

501
static int process_synthesized_event(struct perf_tool *tool,
502
				     union perf_event *event,
503 504
				     struct perf_sample *sample __maybe_unused,
				     struct machine *machine __maybe_unused)
505
{
506
	struct record *rec = container_of(tool, struct record, tool);
507
	return record__write(rec, NULL, event, event->header.size);
508 509
}

510 511 512 513 514 515 516 517 518 519 520 521 522 523
static int process_locked_synthesized_event(struct perf_tool *tool,
				     union perf_event *event,
				     struct perf_sample *sample __maybe_unused,
				     struct machine *machine __maybe_unused)
{
	static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER;
	int ret;

	pthread_mutex_lock(&synth_lock);
	ret = process_synthesized_event(tool, event, sample, machine);
	pthread_mutex_unlock(&synth_lock);
	return ret;
}

524
static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
525 526 527
{
	struct record *rec = to;

528
	if (record__comp_enabled(rec)) {
529
		size = zstd_compress(rec->session, map->data, mmap__mmap_len(map), bf, size);
530 531 532
		bf   = map->data;
	}

533
	rec->samples++;
534
	return record__write(rec, map, bf, size);
535 536
}

537 538
static volatile int signr = -1;
static volatile int child_finished;
539

540 541 542 543 544 545 546 547 548 549
static void sig_handler(int sig)
{
	if (sig == SIGCHLD)
		child_finished = 1;
	else
		signr = sig;

	done = 1;
}

W
Wang Nan 已提交
550 551 552 553 554 555
static void sigsegv_handler(int sig)
{
	perf_hooks__recover();
	sighandler_dump_stack(sig);
}

556 557 558 559 560 561 562 563 564
static void record__sig_exit(void)
{
	if (signr == -1)
		return;

	signal(signr, SIG_DFL);
	raise(signr);
}

565 566
#ifdef HAVE_AUXTRACE_SUPPORT

567
static int record__process_auxtrace(struct perf_tool *tool,
568
				    struct mmap *map,
569 570 571 572
				    union perf_event *event, void *data1,
				    size_t len1, void *data2, size_t len2)
{
	struct record *rec = container_of(tool, struct record, tool);
573
	struct perf_data *data = &rec->data;
574 575 576
	size_t padding;
	u8 pad[8] = {0};

577
	if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
578
		off_t file_offset;
579
		int fd = perf_data__fd(data);
580 581 582 583 584 585 586 587 588 589 590
		int err;

		file_offset = lseek(fd, 0, SEEK_CUR);
		if (file_offset == -1)
			return -1;
		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
						     event, file_offset);
		if (err)
			return err;
	}

591 592 593 594 595
	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
	padding = (len1 + len2) & 7;
	if (padding)
		padding = 8 - padding;

596 597
	record__write(rec, map, event, event->header.size);
	record__write(rec, map, data1, len1);
598
	if (len2)
599 600
		record__write(rec, map, data2, len2);
	record__write(rec, map, &pad, padding);
601 602 603 604 605

	return 0;
}

static int record__auxtrace_mmap_read(struct record *rec,
606
				      struct mmap *map)
607 608 609
{
	int ret;

610
	ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
611 612 613 614 615 616 617 618 619 620
				  record__process_auxtrace);
	if (ret < 0)
		return ret;

	if (ret)
		rec->samples++;

	return 0;
}

621
static int record__auxtrace_mmap_read_snapshot(struct record *rec,
622
					       struct mmap *map)
623 624 625
{
	int ret;

626
	ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642
					   record__process_auxtrace,
					   rec->opts.auxtrace_snapshot_size);
	if (ret < 0)
		return ret;

	if (ret)
		rec->samples++;

	return 0;
}

static int record__auxtrace_read_snapshot_all(struct record *rec)
{
	int i;
	int rc = 0;

643
	for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
644
		struct mmap *map = &rec->evlist->mmap[i];
645

646
		if (!map->auxtrace_mmap.base)
647 648
			continue;

649
		if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
650 651 652 653 654 655 656 657
			rc = -1;
			goto out;
		}
	}
out:
	return rc;
}

658
static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
659 660 661
{
	pr_debug("Recording AUX area tracing snapshot\n");
	if (record__auxtrace_read_snapshot_all(rec) < 0) {
662
		trigger_error(&auxtrace_snapshot_trigger);
663
	} else {
664
		if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
665 666 667
			trigger_error(&auxtrace_snapshot_trigger);
		else
			trigger_ready(&auxtrace_snapshot_trigger);
668 669 670
	}
}

671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686
static int record__auxtrace_snapshot_exit(struct record *rec)
{
	if (trigger_is_error(&auxtrace_snapshot_trigger))
		return 0;

	if (!auxtrace_record__snapshot_started &&
	    auxtrace_record__snapshot_start(rec->itr))
		return -1;

	record__read_auxtrace_snapshot(rec, true);
	if (trigger_is_error(&auxtrace_snapshot_trigger))
		return -1;

	return 0;
}

687 688 689 690 691 692 693 694 695 696 697 698 699 700 701
static int record__auxtrace_init(struct record *rec)
{
	int err;

	if (!rec->itr) {
		rec->itr = auxtrace_record__init(rec->evlist, &err);
		if (err)
			return err;
	}

	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
					      rec->opts.auxtrace_snapshot_opts);
	if (err)
		return err;

702 703 704 705 706
	err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
					    rec->opts.auxtrace_sample_opts);
	if (err)
		return err;

707 708 709
	return auxtrace_parse_filters(rec->evlist);
}

710 711 712 713
#else

static inline
int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
714
			       struct mmap *map __maybe_unused)
715 716 717 718
{
	return 0;
}

719
static inline
720 721
void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
				    bool on_exit __maybe_unused)
722
{
723 724
}

725 726
static inline
int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
727
{
728
	return 0;
729 730
}

731 732 733 734 735 736
static inline
int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
{
	return 0;
}

737 738 739 740 741
static int record__auxtrace_init(struct record *rec __maybe_unused)
{
	return 0;
}

742 743
#endif

744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774
static bool record__kcore_readable(struct machine *machine)
{
	char kcore[PATH_MAX];
	int fd;

	scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);

	fd = open(kcore, O_RDONLY);
	if (fd < 0)
		return false;

	close(fd);

	return true;
}

static int record__kcore_copy(struct machine *machine, struct perf_data *data)
{
	char from_dir[PATH_MAX];
	char kcore_dir[PATH_MAX];
	int ret;

	snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);

	ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
	if (ret)
		return ret;

	return kcore_copy(from_dir, kcore_dir);
}

775
static int record__mmap_evlist(struct record *rec,
776
			       struct evlist *evlist)
777 778
{
	struct record_opts *opts = &rec->opts;
779 780
	bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
				  opts->auxtrace_sample_mode;
781 782
	char msg[512];

783 784 785
	if (opts->affinity != PERF_AFFINITY_SYS)
		cpu__setup_cpunode_map();

786
	if (evlist__mmap_ex(evlist, opts->mmap_pages,
787
				 opts->auxtrace_mmap_pages,
788
				 auxtrace_overwrite,
789
				 opts->nr_cblocks, opts->affinity,
790
				 opts->mmap_flush, opts->comp_level) < 0) {
791 792 793 794 795 796 797 798 799 800
		if (errno == EPERM) {
			pr_err("Permission error mapping pages.\n"
			       "Consider increasing "
			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
			       "or try again with a smaller value of -m/--mmap_pages.\n"
			       "(current value: %u,%u)\n",
			       opts->mmap_pages, opts->auxtrace_mmap_pages);
			return -errno;
		} else {
			pr_err("failed to mmap with %d (%s)\n", errno,
801
				str_error_r(errno, msg, sizeof(msg)));
802 803 804 805 806 807 808 809 810 811 812 813 814 815
			if (errno)
				return -errno;
			else
				return -EINVAL;
		}
	}
	return 0;
}

static int record__mmap(struct record *rec)
{
	return record__mmap_evlist(rec, rec->evlist);
}

816
static int record__open(struct record *rec)
817
{
818
	char msg[BUFSIZ];
819
	struct evsel *pos;
820
	struct evlist *evlist = rec->evlist;
821
	struct perf_session *session = rec->session;
822
	struct record_opts *opts = &rec->opts;
823
	int rc = 0;
824

825 826 827 828 829 830 831 832 833
	/*
	 * For initial_delay we need to add a dummy event so that we can track
	 * PERF_RECORD_MMAP while we wait for the initial delay to enable the
	 * real events, the ones asked by the user.
	 */
	if (opts->initial_delay) {
		if (perf_evlist__add_dummy(evlist))
			return -ENOMEM;

834
		pos = evlist__first(evlist);
835
		pos->tracking = 0;
836
		pos = evlist__last(evlist);
837
		pos->tracking = 1;
838
		pos->core.attr.enable_on_exec = 1;
839 840
	}

841
	perf_evlist__config(evlist, opts, &callchain_param);
842

843
	evlist__for_each_entry(evlist, pos) {
844
try_again:
845
		if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
846
			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
847
				if (verbose > 0)
848
					ui__warning("%s\n", msg);
849 850
				goto try_again;
			}
A
Andi Kleen 已提交
851 852 853
			if ((errno == EINVAL || errno == EBADF) &&
			    pos->leader != pos &&
			    pos->weak_group) {
854
			        pos = perf_evlist__reset_weak_group(evlist, pos, true);
A
Andi Kleen 已提交
855 856
				goto try_again;
			}
857 858 859 860
			rc = -errno;
			perf_evsel__open_strerror(pos, &opts->target,
						  errno, msg, sizeof(msg));
			ui__error("%s\n", msg);
861
			goto out;
L
Li Zefan 已提交
862
		}
863 864

		pos->supported = true;
L
Li Zefan 已提交
865
	}
866

867 868 869 870 871 872 873 874 875 876 877
	if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(evlist)) {
		pr_warning(
"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
"check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
"file is not found in the buildid cache or in the vmlinux path.\n\n"
"Samples in kernel modules won't be resolved at all.\n\n"
"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
"even with a suitable vmlinux or kallsyms file.\n\n");
	}

878
	if (perf_evlist__apply_filters(evlist, &pos)) {
879
		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
880
			pos->filter, perf_evsel__name(pos), errno,
881
			str_error_r(errno, msg, sizeof(msg)));
882
		rc = -1;
883 884 885
		goto out;
	}

886 887
	rc = record__mmap(rec);
	if (rc)
888
		goto out;
889

890
	session->evlist = evlist;
891
	perf_session__set_id_hdr_size(session);
892 893
out:
	return rc;
894 895
}

896 897 898
static int process_sample_event(struct perf_tool *tool,
				union perf_event *event,
				struct perf_sample *sample,
899
				struct evsel *evsel,
900 901 902 903
				struct machine *machine)
{
	struct record *rec = container_of(tool, struct record, tool);

904 905 906 907
	if (rec->evlist->first_sample_time == 0)
		rec->evlist->first_sample_time = sample->time;

	rec->evlist->last_sample_time = sample->time;
908

909 910 911 912
	if (rec->buildid_all)
		return 0;

	rec->samples++;
913 914 915
	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
}

916
static int process_buildids(struct record *rec)
917
{
918
	struct perf_session *session = rec->session;
919

920
	if (perf_data__size(&rec->data) == 0)
921 922
		return 0;

923 924 925 926 927 928 929 930 931 932 933
	/*
	 * During this process, it'll load kernel map and replace the
	 * dso->long_name to a real pathname it found.  In this case
	 * we prefer the vmlinux path like
	 *   /lib/modules/3.16.4/build/vmlinux
	 *
	 * rather than build-id path (in debug directory).
	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
	 */
	symbol_conf.ignore_vmlinux_buildid = true;

934 935
	/*
	 * If --buildid-all is given, it marks all DSO regardless of hits,
936 937 938
	 * so no need to process samples. But if timestamp_boundary is enabled,
	 * it still needs to walk on all samples to get the timestamps of
	 * first/last samples.
939
	 */
940
	if (rec->buildid_all && !rec->timestamp_boundary)
941 942
		rec->tool.sample = NULL;

943
	return perf_session__process_events(session);
944 945
}

946
static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
947 948
{
	int err;
949
	struct perf_tool *tool = data;
950 951 952 953 954 955 956 957
	/*
	 *As for guest kernel when processing subcommand record&report,
	 *we arrange module mmap prior to guest kernel mmap and trigger
	 *a preload dso because default guest module symbols are loaded
	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
	 *method is used to avoid symbol missing when the first addr is
	 *in module instead of in guest kernel.
	 */
958
	err = perf_event__synthesize_modules(tool, process_synthesized_event,
959
					     machine);
960 961
	if (err < 0)
		pr_err("Couldn't record guest kernel [%d]'s reference"
962
		       " relocation symbol.\n", machine->pid);
963 964 965 966 967

	/*
	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
	 * have no _text sometimes.
	 */
968
	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
969
						 machine);
970 971
	if (err < 0)
		pr_err("Couldn't record guest kernel [%d]'s reference"
972
		       " relocation symbol.\n", machine->pid);
973 974
}

975 976 977 978 979
static struct perf_event_header finished_round_event = {
	.size = sizeof(struct perf_event_header),
	.type = PERF_RECORD_FINISHED_ROUND,
};

980
static void record__adjust_affinity(struct record *rec, struct mmap *map)
981 982
{
	if (rec->opts.affinity != PERF_AFFINITY_SYS &&
983 984 985 986 987 988 989 990 991
	    !bitmap_equal(rec->affinity_mask.bits, map->affinity_mask.bits,
			  rec->affinity_mask.nbits)) {
		bitmap_zero(rec->affinity_mask.bits, rec->affinity_mask.nbits);
		bitmap_or(rec->affinity_mask.bits, rec->affinity_mask.bits,
			  map->affinity_mask.bits, rec->affinity_mask.nbits);
		sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&rec->affinity_mask),
				  (cpu_set_t *)rec->affinity_mask.bits);
		if (verbose == 2)
			mmap_cpu_mask__scnprintf(&rec->affinity_mask, "thread");
992 993 994
	}
}

995 996
static size_t process_comp_header(void *record, size_t increment)
{
997
	struct perf_record_compressed *event = record;
998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014
	size_t size = sizeof(*event);

	if (increment) {
		event->header.size += increment;
		return increment;
	}

	event->header.type = PERF_RECORD_COMPRESSED;
	event->header.size = size;

	return size;
}

static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
			    void *src, size_t src_size)
{
	size_t compressed;
1015
	size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
1016 1017 1018 1019 1020 1021 1022 1023 1024 1025

	compressed = zstd_compress_stream_to_records(&session->zstd_data, dst, dst_size, src, src_size,
						     max_record_size, process_comp_header);

	session->bytes_transferred += src_size;
	session->bytes_compressed  += compressed;

	return compressed;
}

1026
static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1027
				    bool overwrite, bool synch)
1028
{
1029
	u64 bytes_written = rec->bytes_written;
1030
	int i;
1031
	int rc = 0;
1032
	struct mmap *maps;
1033
	int trace_fd = rec->data.file.fd;
1034
	off_t off = 0;
1035

1036 1037
	if (!evlist)
		return 0;
1038

1039
	maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
1040 1041 1042
	if (!maps)
		return 0;

1043
	if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1044 1045
		return 0;

1046 1047 1048
	if (record__aio_enabled(rec))
		off = record__aio_get_pos(trace_fd);

1049
	for (i = 0; i < evlist->core.nr_mmaps; i++) {
1050
		u64 flush = 0;
1051
		struct mmap *map = &maps[i];
1052

J
Jiri Olsa 已提交
1053
		if (map->core.base) {
1054
			record__adjust_affinity(rec, map);
1055
			if (synch) {
1056 1057
				flush = map->core.flush;
				map->core.flush = 1;
1058
			}
1059
			if (!record__aio_enabled(rec)) {
1060
				if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1061
					if (synch)
1062
						map->core.flush = flush;
1063 1064 1065 1066
					rc = -1;
					goto out;
				}
			} else {
1067
				if (record__aio_push(rec, map, &off) < 0) {
1068
					record__aio_set_pos(trace_fd, off);
1069
					if (synch)
1070
						map->core.flush = flush;
1071 1072 1073
					rc = -1;
					goto out;
				}
1074
			}
1075
			if (synch)
1076
				map->core.flush = flush;
1077
		}
1078

1079
		if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1080
		    !rec->opts.auxtrace_sample_mode &&
1081
		    record__auxtrace_mmap_read(rec, map) != 0) {
1082 1083 1084
			rc = -1;
			goto out;
		}
1085 1086
	}

1087 1088 1089
	if (record__aio_enabled(rec))
		record__aio_set_pos(trace_fd, off);

1090 1091 1092 1093 1094
	/*
	 * Mark the round finished in case we wrote
	 * at least one event.
	 */
	if (bytes_written != rec->bytes_written)
1095
		rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1096

1097
	if (overwrite)
1098
		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1099 1100
out:
	return rc;
1101 1102
}

1103
static int record__mmap_read_all(struct record *rec, bool synch)
1104 1105 1106
{
	int err;

1107
	err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1108 1109 1110
	if (err)
		return err;

1111
	return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1112 1113
}

1114
static void record__init_features(struct record *rec)
1115 1116 1117 1118 1119 1120 1121 1122 1123 1124
{
	struct perf_session *session = rec->session;
	int feat;

	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
		perf_header__set_feat(&session->header, feat);

	if (rec->no_buildid)
		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);

1125
	if (!have_tracepoints(&rec->evlist->core.entries))
1126 1127 1128 1129
		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);

	if (!rec->opts.branch_stack)
		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1130 1131 1132

	if (!rec->opts.full_auxtrace)
		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1133

1134 1135 1136
	if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
		perf_header__clear_feat(&session->header, HEADER_CLOCKID);

1137
	perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1138 1139
	if (!record__comp_enabled(rec))
		perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1140

1141
	perf_header__clear_feat(&session->header, HEADER_STAT);
1142 1143
}

1144 1145 1146
static void
record__finish_output(struct record *rec)
{
1147 1148
	struct perf_data *data = &rec->data;
	int fd = perf_data__fd(data);
1149

1150
	if (data->is_pipe)
1151 1152 1153
		return;

	rec->session->header.data_size += rec->bytes_written;
1154
	data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166

	if (!rec->no_buildid) {
		process_buildids(rec);

		if (rec->buildid_all)
			dsos__hit_all(rec->session);
	}
	perf_session__write_header(rec->session, rec->evlist, fd, true);

	return;
}

1167
static int record__synthesize_workload(struct record *rec, bool tail)
1168
{
1169
	int err;
1170
	struct perf_thread_map *thread_map;
1171

1172 1173 1174
	if (rec->opts.tail_synthesize != tail)
		return 0;

1175 1176 1177 1178 1179
	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
	if (thread_map == NULL)
		return -1;

	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1180 1181
						 process_synthesized_event,
						 &rec->session->machines.host,
1182
						 rec->opts.sample_address);
1183
	perf_thread_map__put(thread_map);
1184
	return err;
1185 1186
}

1187
static int record__synthesize(struct record *rec, bool tail);
1188

1189 1190 1191
static int
record__switch_output(struct record *rec, bool at_exit)
{
1192
	struct perf_data *data = &rec->data;
1193
	int fd, err;
1194
	char *new_filename;
1195 1196 1197 1198

	/* Same Size:      "2015122520103046"*/
	char timestamp[] = "InvalidTimestamp";

1199 1200
	record__aio_mmap_read_sync(rec);

1201 1202 1203 1204
	record__synthesize(rec, true);
	if (target__none(&rec->opts.target))
		record__synthesize_workload(rec, true);

1205 1206 1207 1208 1209 1210 1211 1212
	rec->samples = 0;
	record__finish_output(rec);
	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
	if (err) {
		pr_err("Failed to get current timestamp\n");
		return -EINVAL;
	}

1213
	fd = perf_data__switch(data, timestamp,
1214
				    rec->session->header.data_offset,
1215
				    at_exit, &new_filename);
1216 1217 1218 1219 1220 1221 1222
	if (fd >= 0 && !at_exit) {
		rec->bytes_written = 0;
		rec->session->header.data_size = 0;
	}

	if (!quiet)
		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
J
Jiri Olsa 已提交
1223
			data->path, timestamp);
1224

1225 1226 1227 1228 1229 1230 1231 1232
	if (rec->switch_output.num_files) {
		int n = rec->switch_output.cur_file + 1;

		if (n >= rec->switch_output.num_files)
			n = 0;
		rec->switch_output.cur_file = n;
		if (rec->switch_output.filenames[n]) {
			remove(rec->switch_output.filenames[n]);
1233
			zfree(&rec->switch_output.filenames[n]);
1234 1235 1236 1237 1238 1239
		}
		rec->switch_output.filenames[n] = new_filename;
	} else {
		free(new_filename);
	}

1240
	/* Output tracking events */
1241
	if (!at_exit) {
1242
		record__synthesize(rec, false);
1243

1244 1245 1246 1247 1248 1249 1250 1251 1252 1253
		/*
		 * In 'perf record --switch-output' without -a,
		 * record__synthesize() in record__switch_output() won't
		 * generate tracking events because there's no thread_map
		 * in evlist. Which causes newly created perf.data doesn't
		 * contain map and comm information.
		 * Create a fake thread_map and directly call
		 * perf_event__synthesize_thread_map() for those events.
		 */
		if (target__none(&rec->opts.target))
1254
			record__synthesize_workload(rec, false);
1255
	}
1256 1257 1258
	return fd;
}

1259 1260 1261 1262 1263 1264 1265
static volatile int workload_exec_errno;

/*
 * perf_evlist__prepare_workload will send a SIGUSR1
 * if the fork fails, since we asked by setting its
 * want_signal to true.
 */
1266 1267
static void workload_exec_failed_signal(int signo __maybe_unused,
					siginfo_t *info,
1268 1269 1270 1271 1272 1273 1274
					void *ucontext __maybe_unused)
{
	workload_exec_errno = info->si_value.sival_int;
	done = 1;
	child_finished = 1;
}

1275
static void snapshot_sig_handler(int sig);
1276
static void alarm_sig_handler(int sig);
1277

1278
static const struct perf_event_mmap_page *
1279
perf_evlist__pick_pc(struct evlist *evlist)
1280
{
1281
	if (evlist) {
J
Jiri Olsa 已提交
1282 1283 1284 1285
		if (evlist->mmap && evlist->mmap[0].core.base)
			return evlist->mmap[0].core.base;
		if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
			return evlist->overwrite_mmap[0].core.base;
1286
	}
1287 1288 1289
	return NULL;
}

1290 1291
static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
{
1292 1293 1294 1295 1296
	const struct perf_event_mmap_page *pc;

	pc = perf_evlist__pick_pc(rec->evlist);
	if (pc)
		return pc;
1297 1298 1299
	return NULL;
}

1300
static int record__synthesize(struct record *rec, bool tail)
1301 1302 1303
{
	struct perf_session *session = rec->session;
	struct machine *machine = &session->machines.host;
1304
	struct perf_data *data = &rec->data;
1305 1306
	struct record_opts *opts = &rec->opts;
	struct perf_tool *tool = &rec->tool;
1307
	int fd = perf_data__fd(data);
1308
	int err = 0;
1309
	event_op f = process_synthesized_event;
1310

1311 1312 1313
	if (rec->opts.tail_synthesize != tail)
		return 0;

1314
	if (data->is_pipe) {
1315 1316 1317 1318
		/*
		 * We need to synthesize events first, because some
		 * features works on top of them (on report side).
		 */
1319
		err = perf_event__synthesize_attrs(tool, rec->evlist,
1320 1321 1322 1323 1324 1325
						   process_synthesized_event);
		if (err < 0) {
			pr_err("Couldn't synthesize attrs.\n");
			goto out;
		}

1326 1327 1328 1329 1330 1331 1332
		err = perf_event__synthesize_features(tool, session, rec->evlist,
						      process_synthesized_event);
		if (err < 0) {
			pr_err("Couldn't synthesize features.\n");
			return err;
		}

1333
		if (have_tracepoints(&rec->evlist->core.entries)) {
1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351
			/*
			 * FIXME err <= 0 here actually means that
			 * there were no tracepoints so its not really
			 * an error, just that we don't need to
			 * synthesize anything.  We really have to
			 * return this more properly and also
			 * propagate errors that now are calling die()
			 */
			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
								  process_synthesized_event);
			if (err <= 0) {
				pr_err("Couldn't record tracing data.\n");
				goto out;
			}
			rec->bytes_written += err;
		}
	}

1352
	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1353 1354 1355 1356
					  process_synthesized_event, machine);
	if (err)
		goto out;

1357 1358 1359 1360 1361 1362 1363 1364 1365
	/* Synthesize id_index before auxtrace_info */
	if (rec->opts.auxtrace_sample_mode) {
		err = perf_event__synthesize_id_index(tool,
						      process_synthesized_event,
						      session->evlist, machine);
		if (err)
			goto out;
	}

1366 1367 1368 1369 1370 1371 1372
	if (rec->opts.full_auxtrace) {
		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
					session, process_synthesized_event);
		if (err)
			goto out;
	}

1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385
	if (!perf_evlist__exclude_kernel(rec->evlist)) {
		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
							 machine);
		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
				   "Check /proc/kallsyms permission or run as root.\n");

		err = perf_event__synthesize_modules(tool, process_synthesized_event,
						     machine);
		WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
				   "Check /proc/modules permission or run as root.\n");
	}
1386 1387 1388 1389 1390 1391

	if (perf_guest) {
		machines__process_guests(&session->machines,
					 perf_event__synthesize_guest_os, tool);
	}

1392 1393 1394 1395 1396 1397 1398
	err = perf_event__synthesize_extra_attr(&rec->tool,
						rec->evlist,
						process_synthesized_event,
						data->is_pipe);
	if (err)
		goto out;

1399
	err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
1400 1401 1402 1403 1404 1405 1406
						 process_synthesized_event,
						NULL);
	if (err < 0) {
		pr_err("Couldn't synthesize thread map.\n");
		return err;
	}

1407
	err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.cpus,
1408 1409 1410 1411 1412 1413
					     process_synthesized_event, NULL);
	if (err < 0) {
		pr_err("Couldn't synthesize cpu map.\n");
		return err;
	}

1414
	err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
1415 1416 1417 1418
						machine, opts);
	if (err < 0)
		pr_warning("Couldn't synthesize bpf events.\n");

1419 1420 1421 1422 1423
	err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
					     machine);
	if (err < 0)
		pr_warning("Couldn't synthesize cgroup events.\n");

1424 1425 1426 1427 1428
	if (rec->opts.nr_threads_synthesize > 1) {
		perf_set_multithreaded();
		f = process_locked_synthesized_event;
	}

1429
	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads,
1430 1431 1432 1433 1434 1435
					    f, opts->sample_address,
					    rec->opts.nr_threads_synthesize);

	if (rec->opts.nr_threads_synthesize > 1)
		perf_set_singlethreaded();

1436 1437 1438 1439
out:
	return err;
}

1440
static int __cmd_record(struct record *rec, int argc, const char **argv)
1441
{
1442
	int err;
1443
	int status = 0;
1444
	unsigned long waking = 0;
1445
	const bool forks = argc > 0;
1446
	struct perf_tool *tool = &rec->tool;
1447
	struct record_opts *opts = &rec->opts;
1448
	struct perf_data *data = &rec->data;
1449
	struct perf_session *session;
1450
	bool disabled = false, draining = false;
1451
	int fd;
1452
	float ratio = 0;
1453

1454
	atexit(record__sig_exit);
1455 1456
	signal(SIGCHLD, sig_handler);
	signal(SIGINT, sig_handler);
1457
	signal(SIGTERM, sig_handler);
W
Wang Nan 已提交
1458
	signal(SIGSEGV, sigsegv_handler);
1459

1460 1461 1462
	if (rec->opts.record_namespaces)
		tool->namespace_events = true;

1463 1464 1465 1466 1467 1468 1469 1470 1471
	if (rec->opts.record_cgroup) {
#ifdef HAVE_FILE_HANDLE
		tool->cgroup_events = true;
#else
		pr_err("cgroup tracking is not supported\n");
		return -1;
#endif
	}

1472
	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
1473
		signal(SIGUSR2, snapshot_sig_handler);
1474 1475
		if (rec->opts.auxtrace_snapshot_mode)
			trigger_on(&auxtrace_snapshot_trigger);
1476
		if (rec->switch_output.enabled)
1477
			trigger_on(&switch_output_trigger);
1478
	} else {
1479
		signal(SIGUSR2, SIG_IGN);
1480
	}
1481

1482
	session = perf_session__new(data, false, tool);
1483
	if (IS_ERR(session)) {
A
Adrien BAK 已提交
1484
		pr_err("Perf session creation failed.\n");
1485
		return PTR_ERR(session);
1486 1487
	}

1488
	fd = perf_data__fd(data);
1489 1490
	rec->session = session;

1491 1492 1493 1494 1495 1496 1497 1498
	if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
		pr_err("Compression initialization failed.\n");
		return -1;
	}

	session->header.env.comp_type  = PERF_COMP_ZSTD;
	session->header.env.comp_level = rec->opts.comp_level;

1499 1500 1501 1502 1503 1504
	if (rec->opts.kcore &&
	    !record__kcore_readable(&session->machines.host)) {
		pr_err("ERROR: kcore is not readable.\n");
		return -1;
	}

1505
	record__init_features(rec);
1506

1507 1508 1509
	if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
		session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;

1510
	if (forks) {
1511
		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
1512
						    argv, data->is_pipe,
1513
						    workload_exec_failed_signal);
1514 1515
		if (err < 0) {
			pr_err("Couldn't run the workload!\n");
1516
			status = err;
1517
			goto out_delete_session;
1518 1519 1520
		}
	}

J
Jiri Olsa 已提交
1521 1522 1523 1524 1525 1526
	/*
	 * If we have just single event and are sending data
	 * through pipe, we need to force the ids allocation,
	 * because we synthesize event name through the pipe
	 * and need the id for that.
	 */
1527
	if (data->is_pipe && rec->evlist->core.nr_entries == 1)
J
Jiri Olsa 已提交
1528 1529
		rec->opts.sample_id = true;

1530
	if (record__open(rec) != 0) {
1531
		err = -1;
1532
		goto out_child;
1533
	}
1534
	session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
1535

1536 1537 1538 1539 1540 1541 1542 1543
	if (rec->opts.kcore) {
		err = record__kcore_copy(&session->machines.host, data);
		if (err) {
			pr_err("ERROR: Failed to copy kcore\n");
			goto out_child;
		}
	}

1544 1545 1546 1547 1548 1549 1550 1551 1552 1553
	err = bpf__apply_obj_config();
	if (err) {
		char errbuf[BUFSIZ];

		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
		pr_err("ERROR: Apply config to BPF failed: %s\n",
			 errbuf);
		goto out_child;
	}

1554 1555 1556 1557 1558 1559 1560 1561 1562
	/*
	 * Normally perf_session__new would do this, but it doesn't have the
	 * evlist.
	 */
	if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
		rec->tool.ordered_events = false;
	}

1563
	if (!rec->evlist->nr_groups)
1564 1565
		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);

1566
	if (data->is_pipe) {
1567
		err = perf_header__write_pipe(fd);
1568
		if (err < 0)
1569
			goto out_child;
1570
	} else {
1571
		err = perf_session__write_header(session, rec->evlist, fd, false);
1572
		if (err < 0)
1573
			goto out_child;
1574 1575
	}

1576
	err = -1;
1577
	if (!rec->no_buildid
1578
	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
1579
		pr_err("Couldn't generate buildids. "
1580
		       "Use --no-buildid to profile anyway.\n");
1581
		goto out_child;
1582 1583
	}

1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596
	if (!opts->no_bpf_event) {
		rec->sb_evlist = evlist__new();

		if (rec->sb_evlist == NULL) {
			pr_err("Couldn't create side band evlist.\n.");
			goto out_child;
		}

		if (evlist__add_bpf_sb_event(rec->sb_evlist, &session->header.env)) {
			pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
			goto out_child;
		}
	}
1597

1598
	if (perf_evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
1599 1600 1601 1602
		pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
		opts->no_bpf_event = true;
	}

1603
	err = record__synthesize(rec, false);
1604
	if (err < 0)
1605
		goto out_child;
1606

1607
	if (rec->realtime_prio) {
1608 1609
		struct sched_param param;

1610
		param.sched_priority = rec->realtime_prio;
1611
		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1612
			pr_err("Could not set realtime priority.\n");
1613
			err = -1;
1614
			goto out_child;
1615 1616 1617
		}
	}

1618 1619 1620 1621 1622
	/*
	 * When perf is starting the traced process, all the events
	 * (apart from group members) have enable_on_exec=1 set,
	 * so don't spoil it by prematurely enabling them.
	 */
1623
	if (!target__none(&opts->target) && !opts->initial_delay)
1624
		evlist__enable(rec->evlist);
1625

1626 1627 1628
	/*
	 * Let the child rip
	 */
1629
	if (forks) {
1630
		struct machine *machine = &session->machines.host;
1631
		union perf_event *event;
1632
		pid_t tgid;
1633 1634 1635 1636 1637 1638 1639

		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
		if (event == NULL) {
			err = -ENOMEM;
			goto out_child;
		}

1640 1641 1642 1643 1644 1645
		/*
		 * Some H/W events are generated before COMM event
		 * which is emitted during exec(), so perf script
		 * cannot see a correct process name for those events.
		 * Synthesize COMM event to prevent it.
		 */
1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669
		tgid = perf_event__synthesize_comm(tool, event,
						   rec->evlist->workload.pid,
						   process_synthesized_event,
						   machine);
		free(event);

		if (tgid == -1)
			goto out_child;

		event = malloc(sizeof(event->namespaces) +
			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
			       machine->id_hdr_size);
		if (event == NULL) {
			err = -ENOMEM;
			goto out_child;
		}

		/*
		 * Synthesize NAMESPACES event for the command specified.
		 */
		perf_event__synthesize_namespaces(tool, event,
						  rec->evlist->workload.pid,
						  tgid, process_synthesized_event,
						  machine);
1670
		free(event);
1671

1672
		perf_evlist__start_workload(rec->evlist);
1673
	}
1674

1675
	if (opts->initial_delay) {
1676
		usleep(opts->initial_delay * USEC_PER_MSEC);
1677
		evlist__enable(rec->evlist);
1678 1679
	}

1680
	trigger_ready(&auxtrace_snapshot_trigger);
1681
	trigger_ready(&switch_output_trigger);
W
Wang Nan 已提交
1682
	perf_hooks__invoke_record_start();
1683
	for (;;) {
1684
		unsigned long long hits = rec->samples;
1685

1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696
		/*
		 * rec->evlist->bkw_mmap_state is possible to be
		 * BKW_MMAP_EMPTY here: when done == true and
		 * hits != rec->samples in previous round.
		 *
		 * perf_evlist__toggle_bkw_mmap ensure we never
		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
		 */
		if (trigger_is_hit(&switch_output_trigger) || done || draining)
			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);

1697
		if (record__mmap_read_all(rec, false) < 0) {
1698
			trigger_error(&auxtrace_snapshot_trigger);
1699
			trigger_error(&switch_output_trigger);
1700
			err = -1;
1701
			goto out_child;
1702
		}
1703

1704 1705
		if (auxtrace_record__snapshot_started) {
			auxtrace_record__snapshot_started = 0;
1706
			if (!trigger_is_error(&auxtrace_snapshot_trigger))
1707
				record__read_auxtrace_snapshot(rec, false);
1708
			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1709 1710 1711 1712 1713 1714
				pr_err("AUX area tracing snapshot failed\n");
				err = -1;
				goto out_child;
			}
		}

1715
		if (trigger_is_hit(&switch_output_trigger)) {
1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726
			/*
			 * If switch_output_trigger is hit, the data in
			 * overwritable ring buffer should have been collected,
			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
			 *
			 * If SIGUSR2 raise after or during record__mmap_read_all(),
			 * record__mmap_read_all() didn't collect data from
			 * overwritable ring buffer. Read again.
			 */
			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
				continue;
1727 1728
			trigger_ready(&switch_output_trigger);

1729 1730 1731 1732 1733 1734 1735
			/*
			 * Reenable events in overwrite ring buffer after
			 * record__mmap_read_all(): we should have collected
			 * data from it.
			 */
			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);

1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746
			if (!quiet)
				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
					waking);
			waking = 0;
			fd = record__switch_output(rec, false);
			if (fd < 0) {
				pr_err("Failed to switch to new file\n");
				trigger_error(&switch_output_trigger);
				err = fd;
				goto out_child;
			}
1747 1748 1749 1750

			/* re-arm the alarm */
			if (rec->switch_output.time)
				alarm(rec->switch_output.time);
1751 1752
		}

1753
		if (hits == rec->samples) {
1754
			if (done || draining)
1755
				break;
1756
			err = evlist__poll(rec->evlist, -1);
1757 1758 1759 1760 1761
			/*
			 * Propagate error, only if there's any. Ignore positive
			 * number of returned events and interrupt error.
			 */
			if (err > 0 || (err < 0 && errno == EINTR))
1762
				err = 0;
1763
			waking++;
1764

1765
			if (evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1766
				draining = true;
1767 1768
		}

1769 1770 1771 1772 1773
		/*
		 * When perf is starting the traced process, at the end events
		 * die with the process and we wait for that. Thus no need to
		 * disable events in this case.
		 */
1774
		if (done && !disabled && !target__none(&opts->target)) {
1775
			trigger_off(&auxtrace_snapshot_trigger);
1776
			evlist__disable(rec->evlist);
1777 1778
			disabled = true;
		}
1779
	}
1780

1781
	trigger_off(&auxtrace_snapshot_trigger);
1782
	trigger_off(&switch_output_trigger);
1783

1784 1785 1786
	if (opts->auxtrace_snapshot_on_exit)
		record__auxtrace_snapshot_exit(rec);

1787
	if (forks && workload_exec_errno) {
1788
		char msg[STRERR_BUFSIZE];
1789
		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1790 1791
		pr_err("Workload failed: %s\n", emsg);
		err = -1;
1792
		goto out_child;
1793 1794
	}

1795
	if (!quiet)
1796
		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1797

1798 1799 1800
	if (target__none(&rec->opts.target))
		record__synthesize_workload(rec, true);

1801
out_child:
1802
	record__mmap_read_all(rec, true);
1803 1804
	record__aio_mmap_read_sync(rec);

1805 1806 1807 1808 1809
	if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
		ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
		session->header.env.comp_ratio = ratio + 0.5;
	}

1810 1811
	if (forks) {
		int exit_status;
1812

1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826
		if (!child_finished)
			kill(rec->evlist->workload.pid, SIGTERM);

		wait(&exit_status);

		if (err < 0)
			status = err;
		else if (WIFEXITED(exit_status))
			status = WEXITSTATUS(exit_status);
		else if (WIFSIGNALED(exit_status))
			signr = WTERMSIG(exit_status);
	} else
		status = err;

1827
	record__synthesize(rec, true);
1828 1829 1830
	/* this will be recalculated during process_buildids() */
	rec->samples = 0;

1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841
	if (!err) {
		if (!rec->timestamp_filename) {
			record__finish_output(rec);
		} else {
			fd = record__switch_output(rec, true);
			if (fd < 0) {
				status = fd;
				goto out_delete_session;
			}
		}
	}
1842

W
Wang Nan 已提交
1843 1844
	perf_hooks__invoke_record_end();

1845 1846
	if (!err && !quiet) {
		char samples[128];
1847 1848
		const char *postfix = rec->timestamp_filename ?
					".<timestamp>" : "";
1849

1850
		if (rec->samples && !rec->opts.full_auxtrace)
1851 1852 1853 1854 1855
			scnprintf(samples, sizeof(samples),
				  " (%" PRIu64 " samples)", rec->samples);
		else
			samples[0] = '\0';

1856
		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s",
1857
			perf_data__size(data) / 1024.0 / 1024.0,
J
Jiri Olsa 已提交
1858
			data->path, postfix, samples);
1859 1860 1861 1862 1863 1864
		if (ratio) {
			fprintf(stderr,	", compressed (original %.3f MB, ratio is %.3f)",
					rec->session->bytes_transferred / 1024.0 / 1024.0,
					ratio);
		}
		fprintf(stderr, " ]\n");
1865 1866
	}

1867
out_delete_session:
1868
	zstd_fini(&session->zstd_data);
1869
	perf_session__delete(session);
1870 1871

	if (!opts->no_bpf_event)
1872
		perf_evlist__stop_sb_thread(rec->sb_evlist);
1873
	return status;
1874
}
1875

1876
static void callchain_debug(struct callchain_param *callchain)
J
Jiri Olsa 已提交
1877
{
1878
	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1879

1880
	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1881

1882
	if (callchain->record_mode == CALLCHAIN_DWARF)
J
Jiri Olsa 已提交
1883
		pr_debug("callchain: stack dump size %d\n",
1884
			 callchain->dump_size);
J
Jiri Olsa 已提交
1885 1886
}

1887 1888 1889
int record_opts__parse_callchain(struct record_opts *record,
				 struct callchain_param *callchain,
				 const char *arg, bool unset)
J
Jiri Olsa 已提交
1890 1891
{
	int ret;
1892
	callchain->enabled = !unset;
1893

J
Jiri Olsa 已提交
1894 1895
	/* --no-call-graph */
	if (unset) {
1896
		callchain->record_mode = CALLCHAIN_NONE;
J
Jiri Olsa 已提交
1897 1898 1899 1900
		pr_debug("callchain: disabled\n");
		return 0;
	}

1901
	ret = parse_callchain_record_opt(arg, callchain);
1902 1903
	if (!ret) {
		/* Enable data address sampling for DWARF unwind. */
1904
		if (callchain->record_mode == CALLCHAIN_DWARF)
1905
			record->sample_address = true;
1906
		callchain_debug(callchain);
1907
	}
1908 1909 1910 1911

	return ret;
}

1912 1913 1914 1915 1916 1917 1918
int record_parse_callchain_opt(const struct option *opt,
			       const char *arg,
			       int unset)
{
	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
}

1919
int record_callchain_opt(const struct option *opt,
J
Jiri Olsa 已提交
1920 1921 1922
			 const char *arg __maybe_unused,
			 int unset __maybe_unused)
{
1923
	struct callchain_param *callchain = opt->value;
1924

1925
	callchain->enabled = true;
J
Jiri Olsa 已提交
1926

1927 1928
	if (callchain->record_mode == CALLCHAIN_NONE)
		callchain->record_mode = CALLCHAIN_FP;
1929

1930
	callchain_debug(callchain);
J
Jiri Olsa 已提交
1931 1932 1933
	return 0;
}

1934 1935
static int perf_record_config(const char *var, const char *value, void *cb)
{
1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948
	struct record *rec = cb;

	if (!strcmp(var, "record.build-id")) {
		if (!strcmp(value, "cache"))
			rec->no_buildid_cache = false;
		else if (!strcmp(value, "no-cache"))
			rec->no_buildid_cache = true;
		else if (!strcmp(value, "skip"))
			rec->no_buildid = true;
		else
			return -1;
		return 0;
	}
1949 1950 1951 1952
	if (!strcmp(var, "record.call-graph")) {
		var = "call-graph.record-mode";
		return perf_default_config(var, value, cb);
	}
1953 1954 1955 1956 1957 1958 1959
#ifdef HAVE_AIO_SUPPORT
	if (!strcmp(var, "record.aio")) {
		rec->opts.nr_cblocks = strtol(value, NULL, 0);
		if (!rec->opts.nr_cblocks)
			rec->opts.nr_cblocks = nr_cblocks_default;
	}
#endif
1960

1961
	return 0;
1962 1963
}

1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006
struct clockid_map {
	const char *name;
	int clockid;
};

#define CLOCKID_MAP(n, c)	\
	{ .name = n, .clockid = (c), }

#define CLOCKID_END	{ .name = NULL, }


/*
 * Add the missing ones, we need to build on many distros...
 */
#ifndef CLOCK_MONOTONIC_RAW
#define CLOCK_MONOTONIC_RAW 4
#endif
#ifndef CLOCK_BOOTTIME
#define CLOCK_BOOTTIME 7
#endif
#ifndef CLOCK_TAI
#define CLOCK_TAI 11
#endif

static const struct clockid_map clockids[] = {
	/* available for all events, NMI safe */
	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),

	/* available for some events */
	CLOCKID_MAP("realtime", CLOCK_REALTIME),
	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
	CLOCKID_MAP("tai", CLOCK_TAI),

	/* available for the lazy */
	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
	CLOCKID_MAP("real", CLOCK_REALTIME),
	CLOCKID_MAP("boot", CLOCK_BOOTTIME),

	CLOCKID_END,
};

2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019
static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
{
	struct timespec res;

	*res_ns = 0;
	if (!clock_getres(clk_id, &res))
		*res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
	else
		pr_warning("WARNING: Failed to determine specified clock resolution.\n");

	return 0;
}

2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042
static int parse_clockid(const struct option *opt, const char *str, int unset)
{
	struct record_opts *opts = (struct record_opts *)opt->value;
	const struct clockid_map *cm;
	const char *ostr = str;

	if (unset) {
		opts->use_clockid = 0;
		return 0;
	}

	/* no arg passed */
	if (!str)
		return 0;

	/* no setting it twice */
	if (opts->use_clockid)
		return -1;

	opts->use_clockid = true;

	/* if its a number, we're done */
	if (sscanf(str, "%d", &opts->clockid) == 1)
2043
		return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
2044 2045 2046 2047 2048 2049 2050 2051

	/* allow a "CLOCK_" prefix to the name */
	if (!strncasecmp(str, "CLOCK_", 6))
		str += 6;

	for (cm = clockids; cm->name; cm++) {
		if (!strcasecmp(str, cm->name)) {
			opts->clockid = cm->clockid;
2052 2053
			return get_clockid_res(opts->clockid,
					       &opts->clockid_res_ns);
2054 2055 2056 2057 2058 2059 2060 2061
		}
	}

	opts->use_clockid = false;
	ui__warning("unknown clockid %s, check man page\n", ostr);
	return -1;
}

2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076
static int record__parse_affinity(const struct option *opt, const char *str, int unset)
{
	struct record_opts *opts = (struct record_opts *)opt->value;

	if (unset || !str)
		return 0;

	if (!strcasecmp(str, "node"))
		opts->affinity = PERF_AFFINITY_NODE;
	else if (!strcasecmp(str, "cpu"))
		opts->affinity = PERF_AFFINITY_CPU;

	return 0;
}

2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103
static int parse_output_max_size(const struct option *opt,
				 const char *str, int unset)
{
	unsigned long *s = (unsigned long *)opt->value;
	static struct parse_tag tags_size[] = {
		{ .tag  = 'B', .mult = 1       },
		{ .tag  = 'K', .mult = 1 << 10 },
		{ .tag  = 'M', .mult = 1 << 20 },
		{ .tag  = 'G', .mult = 1 << 30 },
		{ .tag  = 0 },
	};
	unsigned long val;

	if (unset) {
		*s = 0;
		return 0;
	}

	val = parse_tag_value(str, tags_size);
	if (val != (unsigned long) -1) {
		*s = val;
		return 0;
	}

	return -1;
}

2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146
static int record__parse_mmap_pages(const struct option *opt,
				    const char *str,
				    int unset __maybe_unused)
{
	struct record_opts *opts = opt->value;
	char *s, *p;
	unsigned int mmap_pages;
	int ret;

	if (!str)
		return -EINVAL;

	s = strdup(str);
	if (!s)
		return -ENOMEM;

	p = strchr(s, ',');
	if (p)
		*p = '\0';

	if (*s) {
		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
		if (ret)
			goto out_free;
		opts->mmap_pages = mmap_pages;
	}

	if (!p) {
		ret = 0;
		goto out_free;
	}

	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
	if (ret)
		goto out_free;

	opts->auxtrace_mmap_pages = mmap_pages;

out_free:
	free(s);
	return ret;
}

2147 2148
static void switch_output_size_warn(struct record *rec)
{
2149
	u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163
	struct switch_output *s = &rec->switch_output;

	wakeup_size /= 2;

	if (s->size < wakeup_size) {
		char buf[100];

		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
		pr_warning("WARNING: switch-output data size lower than "
			   "wakeup kernel buffer size (%s) "
			   "expect bigger perf.data sizes\n", buf);
	}
}

2164 2165 2166
static int switch_output_setup(struct record *rec)
{
	struct switch_output *s = &rec->switch_output;
2167 2168 2169 2170 2171 2172 2173
	static struct parse_tag tags_size[] = {
		{ .tag  = 'B', .mult = 1       },
		{ .tag  = 'K', .mult = 1 << 10 },
		{ .tag  = 'M', .mult = 1 << 20 },
		{ .tag  = 'G', .mult = 1 << 30 },
		{ .tag  = 0 },
	};
2174 2175 2176 2177 2178 2179 2180
	static struct parse_tag tags_time[] = {
		{ .tag  = 's', .mult = 1        },
		{ .tag  = 'm', .mult = 60       },
		{ .tag  = 'h', .mult = 60*60    },
		{ .tag  = 'd', .mult = 60*60*24 },
		{ .tag  = 0 },
	};
2181
	unsigned long val;
2182 2183 2184 2185 2186 2187 2188

	if (!s->set)
		return 0;

	if (!strcmp(s->str, "signal")) {
		s->signal = true;
		pr_debug("switch-output with SIGUSR2 signal\n");
2189 2190 2191 2192 2193 2194 2195 2196
		goto enabled;
	}

	val = parse_tag_value(s->str, tags_size);
	if (val != (unsigned long) -1) {
		s->size = val;
		pr_debug("switch-output with %s size threshold\n", s->str);
		goto enabled;
2197 2198
	}

2199 2200 2201 2202 2203 2204 2205 2206
	val = parse_tag_value(s->str, tags_time);
	if (val != (unsigned long) -1) {
		s->time = val;
		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
			 s->str, s->time);
		goto enabled;
	}

2207
	return -1;
2208 2209 2210 2211

enabled:
	rec->timestamp_filename = true;
	s->enabled              = true;
2212 2213 2214 2215

	if (s->size && !rec->opts.no_buffering)
		switch_output_size_warn(rec);

2216
	return 0;
2217 2218
}

2219
static const char * const __record_usage[] = {
2220 2221
	"perf record [<options>] [<command>]",
	"perf record [<options>] -- <command> [<options>]",
2222 2223
	NULL
};
2224
const char * const *record_usage = __record_usage;
2225

2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250
static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
				  struct perf_sample *sample, struct machine *machine)
{
	/*
	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
	 * no need to add them twice.
	 */
	if (!(event->header.misc & PERF_RECORD_MISC_USER))
		return 0;
	return perf_event__process_mmap(tool, event, sample, machine);
}

static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
				   struct perf_sample *sample, struct machine *machine)
{
	/*
	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
	 * no need to add them twice.
	 */
	if (!(event->header.misc & PERF_RECORD_MISC_USER))
		return 0;

	return perf_event__process_mmap2(tool, event, sample, machine);
}

2251
/*
2252 2253
 * XXX Ideally would be local to cmd_record() and passed to a record__new
 * because we need to have access to it in record__exit, that is called
2254 2255 2256 2257 2258 2259 2260
 * after cmd_record() exits, but since record_options need to be accessible to
 * builtin-script, leave it here.
 *
 * At least we don't ouch it in all the other functions here directly.
 *
 * Just say no to tons of global variables, sigh.
 */
2261
static struct record record = {
2262
	.opts = {
2263
		.sample_time	     = true,
2264 2265 2266
		.mmap_pages	     = UINT_MAX,
		.user_freq	     = UINT_MAX,
		.user_interval	     = ULLONG_MAX,
2267
		.freq		     = 4000,
N
Namhyung Kim 已提交
2268 2269
		.target		     = {
			.uses_mmap   = true,
2270
			.default_per_cpu = true,
N
Namhyung Kim 已提交
2271
		},
2272
		.mmap_flush          = MMAP_FLUSH_DEFAULT,
2273
		.nr_threads_synthesize = 1,
2274
	},
2275 2276 2277
	.tool = {
		.sample		= process_sample_event,
		.fork		= perf_event__process_fork,
2278
		.exit		= perf_event__process_exit,
2279
		.comm		= perf_event__process_comm,
2280
		.namespaces	= perf_event__process_namespaces,
2281 2282
		.mmap		= build_id__process_mmap,
		.mmap2		= build_id__process_mmap2,
2283
		.ordered_events	= true,
2284
	},
2285
};
2286

2287 2288
const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
	"\n\t\t\t\tDefault: fp";
2289

2290 2291
static bool dry_run;

2292 2293 2294
/*
 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
 * with it and switch to use the library functions in perf_evlist that came
2295
 * from builtin-record.c, i.e. use record_opts,
2296 2297 2298
 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
 * using pipes, etc.
 */
2299
static struct option __record_options[] = {
2300
	OPT_CALLBACK('e', "event", &record.evlist, "event",
2301
		     "event selector. use 'perf list' to list available events",
2302
		     parse_events_option),
2303
	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
L
Li Zefan 已提交
2304
		     "event filter", parse_filter),
2305 2306 2307
	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
			   NULL, "don't record events from perf itself",
			   exclude_perf),
2308
	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
2309
		    "record events on existing process id"),
2310
	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
2311
		    "record events on existing thread id"),
2312
	OPT_INTEGER('r', "realtime", &record.realtime_prio,
2313
		    "collect data with this RT SCHED_FIFO priority"),
2314
	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
2315
		    "collect data without buffering"),
2316
	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
2317
		    "collect raw sample records from all opened counters"),
2318
	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
2319
			    "system-wide collection from all CPUs"),
2320
	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
2321
		    "list of cpus to monitor"),
2322
	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
J
Jiri Olsa 已提交
2323
	OPT_STRING('o', "output", &record.data.path, "file",
I
Ingo Molnar 已提交
2324
		    "output file name"),
2325 2326 2327
	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
			&record.opts.no_inherit_set,
			"child tasks do not inherit counters"),
2328 2329
	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
		    "synthesize non-sample events at the end of output"),
W
Wang Nan 已提交
2330
	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
2331
	OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"),
2332 2333
	OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
		    "Fail if the specified frequency can't be used"),
2334 2335 2336
	OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
		     "profile at this frequency",
		      record__parse_freq),
2337 2338 2339
	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
		     "number of mmap data pages and AUX area tracing mmap pages",
		     record__parse_mmap_pages),
2340 2341 2342
	OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
		     "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
		     record__mmap_flush_parse),
2343
	OPT_BOOLEAN(0, "group", &record.opts.group,
2344
		    "put the counters into a counter group"),
2345
	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
J
Jiri Olsa 已提交
2346 2347 2348
			   NULL, "enables call-graph recording" ,
			   &record_callchain_opt),
	OPT_CALLBACK(0, "call-graph", &record.opts,
2349
		     "record_mode[,record_size]", record_callchain_help,
J
Jiri Olsa 已提交
2350
		     &record_parse_callchain_opt),
2351
	OPT_INCR('v', "verbose", &verbose,
2352
		    "be more verbose (show counter open errors, etc)"),
2353
	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
2354
	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
2355
		    "per thread counts"),
2356
	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
2357 2358
	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
		    "Record the sample physical addresses"),
J
Jiri Olsa 已提交
2359
	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
2360 2361 2362
	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
			&record.opts.sample_time_set,
			"Record the sample timestamps"),
2363 2364
	OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
			"Record the sample period"),
2365
	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
2366
		    "don't sample"),
2367 2368 2369 2370 2371 2372
	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
			&record.no_buildid_cache_set,
			"do not update the buildid cache"),
	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
			&record.no_buildid_set,
			"do not collect buildids in perf.data"),
2373
	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
S
Stephane Eranian 已提交
2374 2375
		     "monitor event in cgroup name only",
		     parse_cgroups),
2376
	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
2377
		  "ms to wait before starting measurement after program start"),
2378
	OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
2379 2380
	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
		   "user to profile"),
2381 2382 2383 2384 2385 2386 2387

	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
		     "branch any", "sample any taken branches",
		     parse_branch_stack),

	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
		     "branch filter mask", "branch stack filter modes",
2388
		     parse_branch_stack),
2389 2390
	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
		    "sample by weight (on special events only)"),
2391 2392
	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
		    "sample transaction flags (special events only)"),
2393 2394
	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
		    "use per-thread mmaps"),
2395 2396
	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
		    "sample selected machine registers on interrupt,"
K
Kan Liang 已提交
2397
		    " use '-I?' to list register names", parse_intr_regs),
2398 2399
	OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
		    "sample selected machine registers on interrupt,"
K
Kan Liang 已提交
2400
		    " use '--user-regs=?' to list register names", parse_user_regs),
2401 2402
	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
		    "Record running/enabled time of read (:S) events"),
2403 2404 2405
	OPT_CALLBACK('k', "clockid", &record.opts,
	"clockid", "clockid to use for events, see clock_gettime()",
	parse_clockid),
2406 2407
	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
			  "opts", "AUX area tracing Snapshot Mode", ""),
2408 2409
	OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
			  "opts", "sample AUX area", ""),
2410
	OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
2411
			"per thread proc mmap processing timeout in ms"),
2412 2413
	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
		    "Record namespaces events"),
2414 2415
	OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
		    "Record cgroup events"),
2416 2417
	OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
		    "Record context switch events"),
2418 2419 2420 2421 2422 2423
	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
			 "Configure all used events to run in kernel space.",
			 PARSE_OPT_EXCLUSIVE),
	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
			 "Configure all used events to run in user space.",
			 PARSE_OPT_EXCLUSIVE),
2424 2425 2426 2427
	OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
		    "collect kernel callchains"),
	OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
		    "collect user callchains"),
2428 2429 2430 2431
	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
		   "clang binary to use for compiling BPF scriptlets"),
	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
		   "options passed to clang when compiling BPF scriptlets"),
2432 2433
	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
		   "file", "vmlinux pathname"),
2434 2435
	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
		    "Record build-id of all DSOs regardless of hits"),
2436 2437
	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
		    "append timestamp to output filename"),
2438 2439
	OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
		    "Record timestamp boundary (time of first/last samples)"),
2440
	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
2441 2442
			  &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
			  "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
2443
			  "signal"),
2444 2445
	OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
		   "Limit number of switch output generated files"),
2446 2447
	OPT_BOOLEAN(0, "dry-run", &dry_run,
		    "Parse options then exit"),
2448
#ifdef HAVE_AIO_SUPPORT
2449 2450
	OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
		     &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
2451 2452
		     record__aio_parse),
#endif
2453 2454 2455
	OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
		     "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
		     record__parse_affinity),
2456 2457 2458 2459 2460
#ifdef HAVE_ZSTD_SUPPORT
	OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default,
			    "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
			    record__parse_comp_level),
#endif
2461 2462
	OPT_CALLBACK(0, "max-size", &record.output_max_size,
		     "size", "Limit the maximum size of the output file", parse_output_max_size),
2463 2464 2465
	OPT_UINTEGER(0, "num-thread-synthesize",
		     &record.opts.nr_threads_synthesize,
		     "number of threads to run for event synthesis"),
2466 2467 2468
	OPT_END()
};

2469 2470
struct option *record_options = __record_options;

2471
int cmd_record(int argc, const char **argv)
2472
{
2473
	int err;
2474
	struct record *rec = &record;
2475
	char errbuf[BUFSIZ];
2476

2477 2478
	setlocale(LC_ALL, "");

2479 2480 2481 2482 2483
#ifndef HAVE_LIBBPF_SUPPORT
# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
	set_nobuild('\0', "clang-path", true);
	set_nobuild('\0', "clang-opt", true);
# undef set_nobuild
2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497
#endif

#ifndef HAVE_BPF_PROLOGUE
# if !defined (HAVE_DWARF_SUPPORT)
#  define REASON  "NO_DWARF=1"
# elif !defined (HAVE_LIBBPF_SUPPORT)
#  define REASON  "NO_LIBBPF=1"
# else
#  define REASON  "this architecture doesn't support BPF prologue"
# endif
# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
	set_nobuild('\0', "vmlinux", true);
# undef set_nobuild
# undef REASON
2498 2499
#endif

2500 2501
	rec->opts.affinity = PERF_AFFINITY_SYS;

2502
	rec->evlist = evlist__new();
2503
	if (rec->evlist == NULL)
2504 2505
		return -ENOMEM;

2506 2507 2508
	err = perf_config(perf_record_config, rec);
	if (err)
		return err;
2509

2510
	argc = parse_options(argc, argv, record_options, record_usage,
2511
			    PARSE_OPT_STOP_AT_NON_OPTION);
2512 2513
	if (quiet)
		perf_quiet_option();
2514 2515

	/* Make system wide (-a) the default target. */
2516
	if (!argc && target__none(&rec->opts.target))
2517
		rec->opts.target.system_wide = true;
2518

2519
	if (nr_cgroups && !rec->opts.target.system_wide) {
2520 2521 2522
		usage_with_options_msg(record_usage, record_options,
			"cgroup monitoring only available in system-wide mode");

S
Stephane Eranian 已提交
2523
	}
2524

2525 2526 2527
	if (rec->opts.kcore)
		rec->data.is_dir = true;

2528 2529 2530 2531 2532
	if (rec->opts.comp_level != 0) {
		pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
		rec->no_buildid = true;
	}

2533 2534
	if (rec->opts.record_switch_events &&
	    !perf_can_record_switch_events()) {
2535 2536 2537
		ui__error("kernel does not support recording context switch events\n");
		parse_options_usage(record_usage, record_options, "switch-events", 0);
		return -EINVAL;
2538
	}
S
Stephane Eranian 已提交
2539

2540 2541 2542 2543 2544
	if (switch_output_setup(rec)) {
		parse_options_usage(record_usage, record_options, "switch-output", 0);
		return -EINVAL;
	}

2545 2546 2547 2548 2549
	if (rec->switch_output.time) {
		signal(SIGALRM, alarm_sig_handler);
		alarm(rec->switch_output.time);
	}

2550 2551 2552 2553 2554 2555 2556
	if (rec->switch_output.num_files) {
		rec->switch_output.filenames = calloc(sizeof(char *),
						      rec->switch_output.num_files);
		if (!rec->switch_output.filenames)
			return -EINVAL;
	}

2557 2558 2559 2560 2561 2562 2563 2564
	/*
	 * Allow aliases to facilitate the lookup of symbols for address
	 * filters. Refer to auxtrace_parse_filters().
	 */
	symbol_conf.allow_aliases = true;

	symbol__init(NULL);

2565 2566 2567 2568 2569 2570 2571 2572 2573 2574
	if (rec->opts.affinity != PERF_AFFINITY_SYS) {
		rec->affinity_mask.nbits = cpu__max_cpu();
		rec->affinity_mask.bits = bitmap_alloc(rec->affinity_mask.nbits);
		if (!rec->affinity_mask.bits) {
			pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits);
			return -ENOMEM;
		}
		pr_debug2("thread mask[%zd]: empty\n", rec->affinity_mask.nbits);
	}

2575
	err = record__auxtrace_init(rec);
2576 2577 2578
	if (err)
		goto out;

2579
	if (dry_run)
A
Adrian Hunter 已提交
2580
		goto out;
2581

2582 2583 2584 2585 2586
	err = bpf__setup_stdout(rec->evlist);
	if (err) {
		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
		pr_err("ERROR: Setup BPF stdout failed: %s\n",
			 errbuf);
A
Adrian Hunter 已提交
2587
		goto out;
2588 2589
	}

2590 2591
	err = -ENOMEM;

2592
	if (rec->no_buildid_cache || rec->no_buildid) {
2593
		disable_buildid_cache();
2594
	} else if (rec->switch_output.enabled) {
2595 2596 2597 2598 2599 2600
		/*
		 * In 'perf record --switch-output', disable buildid
		 * generation by default to reduce data file switching
		 * overhead. Still generate buildid if they are required
		 * explicitly using
		 *
2601
		 *  perf record --switch-output --no-no-buildid \
2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621
		 *              --no-no-buildid-cache
		 *
		 * Following code equals to:
		 *
		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
		 *         disable_buildid_cache();
		 */
		bool disable = true;

		if (rec->no_buildid_set && !rec->no_buildid)
			disable = false;
		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
			disable = false;
		if (disable) {
			rec->no_buildid = true;
			rec->no_buildid_cache = true;
			disable_buildid_cache();
		}
	}
2622

2623 2624 2625
	if (record.opts.overwrite)
		record.opts.tail_synthesize = true;

2626
	if (rec->evlist->core.nr_entries == 0 &&
2627
	    __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
2628
		pr_err("Not enough memory for event selector list\n");
2629
		goto out;
2630
	}
2631

2632 2633 2634
	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
		rec->opts.no_inherit = true;

2635
	err = target__validate(&rec->opts.target);
2636
	if (err) {
2637
		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2638
		ui__warning("%s\n", errbuf);
2639 2640
	}

2641
	err = target__parse_uid(&rec->opts.target);
2642 2643
	if (err) {
		int saved_errno = errno;
2644

2645
		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2646
		ui__error("%s", errbuf);
2647 2648

		err = -saved_errno;
2649
		goto out;
2650
	}
2651

2652 2653
	/* Enable ignoring missing threads when -u/-p option is defined. */
	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
2654

2655
	err = -ENOMEM;
2656
	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
2657
		usage_with_options(record_usage, record_options);
2658

2659 2660
	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
	if (err)
2661
		goto out;
2662

2663 2664 2665 2666 2667 2668 2669 2670
	/*
	 * We take all buildids when the file contains
	 * AUX area tracing data because we do not decode the
	 * trace because it would take too long.
	 */
	if (rec->opts.full_auxtrace)
		rec->buildid_all = true;

2671
	if (record_opts__config(&rec->opts)) {
2672
		err = -EINVAL;
2673
		goto out;
2674 2675
	}

2676 2677
	if (rec->opts.nr_cblocks > nr_cblocks_max)
		rec->opts.nr_cblocks = nr_cblocks_max;
2678
	pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
2679

2680
	pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
2681
	pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
2682

2683 2684 2685 2686
	if (rec->opts.comp_level > comp_level_max)
		rec->opts.comp_level = comp_level_max;
	pr_debug("comp level: %d\n", rec->opts.comp_level);

2687
	err = __cmd_record(&record, argc, argv);
2688
out:
2689
	bitmap_free(rec->affinity_mask.bits);
2690
	evlist__delete(rec->evlist);
2691
	symbol__exit();
2692
	auxtrace_record__free(rec->itr);
2693
	return err;
2694
}
2695 2696 2697

static void snapshot_sig_handler(int sig __maybe_unused)
{
2698 2699
	struct record *rec = &record;

2700 2701 2702 2703 2704 2705
	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
		trigger_hit(&auxtrace_snapshot_trigger);
		auxtrace_record__snapshot_started = 1;
		if (auxtrace_record__snapshot_start(record.itr))
			trigger_error(&auxtrace_snapshot_trigger);
	}
2706

2707
	if (switch_output_signal(rec))
2708
		trigger_hit(&switch_output_trigger);
2709
}
2710 2711 2712 2713 2714 2715 2716 2717

static void alarm_sig_handler(int sig __maybe_unused)
{
	struct record *rec = &record;

	if (switch_output_time(rec))
		trigger_hit(&switch_output_trigger);
}