evlist.c 42.9 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
2 3 4 5 6 7
/*
 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
 *
 * Parts came from builtin-{top,stat,record}.c, see those files for further
 * copyright notes.
 */
8
#include <api/fs/fs.h>
9
#include <errno.h>
10
#include <inttypes.h>
11
#include <poll.h>
12
#include "cpumap.h"
13
#include "util/mmap.h"
14
#include "thread_map.h"
15
#include "target.h"
16 17
#include "evlist.h"
#include "evsel.h"
A
Adrian Hunter 已提交
18
#include "debug.h"
19
#include "units.h"
20
#include "util.h" // page_size
21
#include "../perf.h"
22
#include "asm/bug.h"
23
#include "bpf-event.h"
24
#include <signal.h>
25
#include <unistd.h>
26
#include <sched.h>
27
#include <stdlib.h>
28

29
#include "parse-events.h"
30
#include <subcmd/parse-options.h>
31

32
#include <fcntl.h>
33
#include <sys/ioctl.h>
34 35
#include <sys/mman.h>

36 37
#include <linux/bitops.h>
#include <linux/hash.h>
38
#include <linux/log2.h>
39
#include <linux/err.h>
40
#include <linux/string.h>
41
#include <linux/zalloc.h>
42
#include <perf/evlist.h>
43
#include <perf/evsel.h>
44
#include <perf/cpumap.h>
45

46 47
#include <internal/xyarray.h>

48 49 50 51
#ifdef LACKS_SIGQUEUE_PROTOTYPE
int sigqueue(pid_t pid, int sig, const union sigval value);
#endif

52
#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
53
#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
54

55 56
void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
		  struct perf_thread_map *threads)
57 58 59 60 61
{
	int i;

	for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
		INIT_HLIST_HEAD(&evlist->heads[i]);
62
	perf_evlist__init(&evlist->core);
63
	perf_evlist__set_maps(&evlist->core, cpus, threads);
64
	fdarray__init(&evlist->pollfd, 64);
65
	evlist->workload.pid = -1;
66
	evlist->bkw_mmap_state = BKW_MMAP_NOTREADY;
67 68
}

69
struct evlist *evlist__new(void)
70
{
71
	struct evlist *evlist = zalloc(sizeof(*evlist));
72

73
	if (evlist != NULL)
74
		evlist__init(evlist, NULL, NULL);
75 76 77 78

	return evlist;
}

79
struct evlist *perf_evlist__new_default(void)
80
{
81
	struct evlist *evlist = evlist__new();
82 83

	if (evlist && perf_evlist__add_default(evlist)) {
84
		evlist__delete(evlist);
85 86 87 88 89 90
		evlist = NULL;
	}

	return evlist;
}

91
struct evlist *perf_evlist__new_dummy(void)
92
{
93
	struct evlist *evlist = evlist__new();
94 95

	if (evlist && perf_evlist__add_dummy(evlist)) {
96
		evlist__delete(evlist);
97 98 99 100 101 102
		evlist = NULL;
	}

	return evlist;
}

103 104 105 106 107 108 109
/**
 * perf_evlist__set_id_pos - set the positions of event ids.
 * @evlist: selected event list
 *
 * Events with compatible sample types all have the same id_pos
 * and is_pos.  For convenience, put a copy on evlist.
 */
110
void perf_evlist__set_id_pos(struct evlist *evlist)
111
{
112
	struct evsel *first = perf_evlist__first(evlist);
113 114 115 116 117

	evlist->id_pos = first->id_pos;
	evlist->is_pos = first->is_pos;
}

118
static void perf_evlist__update_id_pos(struct evlist *evlist)
119
{
120
	struct evsel *evsel;
121

122
	evlist__for_each_entry(evlist, evsel)
123 124 125 126 127
		perf_evsel__calc_id_pos(evsel);

	perf_evlist__set_id_pos(evlist);
}

128
static void evlist__purge(struct evlist *evlist)
129
{
130
	struct evsel *pos, *n;
131

132
	evlist__for_each_entry_safe(evlist, n, pos) {
133
		list_del_init(&pos->core.node);
134
		pos->evlist = NULL;
135
		evsel__delete(pos);
136 137
	}

138
	evlist->core.nr_entries = 0;
139 140
}

141
void evlist__exit(struct evlist *evlist)
142
{
143
	zfree(&evlist->mmap);
144
	zfree(&evlist->overwrite_mmap);
145
	fdarray__exit(&evlist->pollfd);
146 147
}

148
void evlist__delete(struct evlist *evlist)
149
{
150 151 152
	if (evlist == NULL)
		return;

153
	evlist__munmap(evlist);
154
	evlist__close(evlist);
155
	perf_cpu_map__put(evlist->core.cpus);
156
	perf_thread_map__put(evlist->core.threads);
157
	evlist->core.cpus = NULL;
158
	evlist->core.threads = NULL;
159
	evlist__purge(evlist);
160
	evlist__exit(evlist);
161 162 163
	free(evlist);
}

164
void evlist__add(struct evlist *evlist, struct evsel *entry)
165
{
166
	entry->evlist = evlist;
167
	entry->idx = evlist->core.nr_entries;
168
	entry->tracking = !entry->idx;
169

170 171 172
	perf_evlist__add(&evlist->core, &entry->core);

	if (evlist->core.nr_entries == 1)
173
		perf_evlist__set_id_pos(evlist);
174 175
}

176
void evlist__remove(struct evlist *evlist, struct evsel *evsel)
177 178
{
	evsel->evlist = NULL;
179
	perf_evlist__remove(&evlist->core, &evsel->core);
180 181
}

182
void perf_evlist__splice_list_tail(struct evlist *evlist,
183
				   struct list_head *list)
184
{
185
	struct evsel *evsel, *temp;
186

187
	__evlist__for_each_entry_safe(list, temp, evsel) {
188
		list_del_init(&evsel->core.node);
189
		evlist__add(evlist, evsel);
190
	}
191 192
}

193 194
void __perf_evlist__set_leader(struct list_head *list)
{
195
	struct evsel *evsel, *leader;
196

197 198
	leader = list_entry(list->next, struct evsel, core.node);
	evsel = list_entry(list->prev, struct evsel, core.node);
199

200
	leader->core.nr_members = evsel->idx - leader->idx + 1;
201

202
	__evlist__for_each_entry(list, evsel) {
203
		evsel->leader = leader;
204 205 206
	}
}

207
void perf_evlist__set_leader(struct evlist *evlist)
208
{
209 210
	if (evlist->core.nr_entries) {
		evlist->nr_groups = evlist->core.nr_entries > 1 ? 1 : 0;
211
		__perf_evlist__set_leader(&evlist->core.entries);
212
	}
213 214
}

215
int __perf_evlist__add_default(struct evlist *evlist, bool precise)
216
{
217
	struct evsel *evsel = perf_evsel__new_cycles(precise);
218

219
	if (evsel == NULL)
220
		return -ENOMEM;
221

222
	evlist__add(evlist, evsel);
223 224
	return 0;
}
225

226
int perf_evlist__add_dummy(struct evlist *evlist)
227 228 229 230 231 232
{
	struct perf_event_attr attr = {
		.type	= PERF_TYPE_SOFTWARE,
		.config = PERF_COUNT_SW_DUMMY,
		.size	= sizeof(attr), /* to capture ABI version */
	};
233
	struct evsel *evsel = perf_evsel__new_idx(&attr, evlist->core.nr_entries);
234 235 236 237

	if (evsel == NULL)
		return -ENOMEM;

238
	evlist__add(evlist, evsel);
239 240 241
	return 0;
}

242
static int evlist__add_attrs(struct evlist *evlist,
243
				  struct perf_event_attr *attrs, size_t nr_attrs)
244
{
245
	struct evsel *evsel, *n;
246 247 248 249
	LIST_HEAD(head);
	size_t i;

	for (i = 0; i < nr_attrs; i++) {
250
		evsel = perf_evsel__new_idx(attrs + i, evlist->core.nr_entries + i);
251 252
		if (evsel == NULL)
			goto out_delete_partial_list;
253
		list_add_tail(&evsel->core.node, &head);
254 255
	}

256
	perf_evlist__splice_list_tail(evlist, &head);
257 258 259 260

	return 0;

out_delete_partial_list:
261
	__evlist__for_each_entry_safe(&head, n, evsel)
262
		evsel__delete(evsel);
263 264 265
	return -1;
}

266
int __perf_evlist__add_default_attrs(struct evlist *evlist,
267 268 269 270 271 272 273
				     struct perf_event_attr *attrs, size_t nr_attrs)
{
	size_t i;

	for (i = 0; i < nr_attrs; i++)
		event_attr_init(attrs + i);

274
	return evlist__add_attrs(evlist, attrs, nr_attrs);
275 276
}

277
struct evsel *
278
perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id)
279
{
280
	struct evsel *evsel;
281

282
	evlist__for_each_entry(evlist, evsel) {
283 284
		if (evsel->core.attr.type   == PERF_TYPE_TRACEPOINT &&
		    (int)evsel->core.attr.config == id)
285 286 287 288 289 290
			return evsel;
	}

	return NULL;
}

291
struct evsel *
292
perf_evlist__find_tracepoint_by_name(struct evlist *evlist,
293 294
				     const char *name)
{
295
	struct evsel *evsel;
296

297
	evlist__for_each_entry(evlist, evsel) {
298
		if ((evsel->core.attr.type == PERF_TYPE_TRACEPOINT) &&
299 300 301 302 303 304 305
		    (strcmp(evsel->name, name) == 0))
			return evsel;
	}

	return NULL;
}

306
int perf_evlist__add_newtp(struct evlist *evlist,
307 308
			   const char *sys, const char *name, void *handler)
{
309
	struct evsel *evsel = perf_evsel__newtp(sys, name);
310

311
	if (IS_ERR(evsel))
312 313
		return -1;

314
	evsel->handler = handler;
315
	evlist__add(evlist, evsel);
316 317 318
	return 0;
}

319
static int perf_evlist__nr_threads(struct evlist *evlist,
320
				   struct evsel *evsel)
321 322 323 324
{
	if (evsel->system_wide)
		return 1;
	else
325
		return perf_thread_map__nr(evlist->core.threads);
326 327
}

328
void evlist__disable(struct evlist *evlist)
329
{
330
	struct evsel *pos;
331

332
	evlist__for_each_entry(evlist, pos) {
333
		if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->core.fd)
334
			continue;
335
		evsel__disable(pos);
336
	}
337 338

	evlist->enabled = false;
339 340
}

341
void evlist__enable(struct evlist *evlist)
342
{
343
	struct evsel *pos;
344

345
	evlist__for_each_entry(evlist, pos) {
346
		if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
347
			continue;
348
		evsel__enable(pos);
349
	}
350 351 352 353

	evlist->enabled = true;
}

354
void perf_evlist__toggle_enable(struct evlist *evlist)
355
{
356
	(evlist->enabled ? evlist__disable : evlist__enable)(evlist);
357 358
}

359
static int perf_evlist__enable_event_cpu(struct evlist *evlist,
360
					 struct evsel *evsel, int cpu)
361
{
362
	int thread;
363 364
	int nr_threads = perf_evlist__nr_threads(evlist, evsel);

365
	if (!evsel->core.fd)
366 367 368
		return -EINVAL;

	for (thread = 0; thread < nr_threads; thread++) {
369
		int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
370 371 372 373 374 375
		if (err)
			return err;
	}
	return 0;
}

376
static int perf_evlist__enable_event_thread(struct evlist *evlist,
377
					    struct evsel *evsel,
378 379
					    int thread)
{
380
	int cpu;
381
	int nr_cpus = perf_cpu_map__nr(evlist->core.cpus);
382

383
	if (!evsel->core.fd)
384 385 386
		return -EINVAL;

	for (cpu = 0; cpu < nr_cpus; cpu++) {
387
		int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
388 389 390 391 392 393
		if (err)
			return err;
	}
	return 0;
}

394
int perf_evlist__enable_event_idx(struct evlist *evlist,
395
				  struct evsel *evsel, int idx)
396
{
397
	bool per_cpu_mmaps = !perf_cpu_map__empty(evlist->core.cpus);
398 399 400 401 402 403 404

	if (per_cpu_mmaps)
		return perf_evlist__enable_event_cpu(evlist, evsel, idx);
	else
		return perf_evlist__enable_event_thread(evlist, evsel, idx);
}

405
int perf_evlist__alloc_pollfd(struct evlist *evlist)
406
{
407
	int nr_cpus = perf_cpu_map__nr(evlist->core.cpus);
408
	int nr_threads = perf_thread_map__nr(evlist->core.threads);
409
	int nfds = 0;
410
	struct evsel *evsel;
411

412
	evlist__for_each_entry(evlist, evsel) {
413 414 415 416 417 418
		if (evsel->system_wide)
			nfds += nr_cpus;
		else
			nfds += nr_cpus * nr_threads;
	}

419 420
	if (fdarray__available_entries(&evlist->pollfd) < nfds &&
	    fdarray__grow(&evlist->pollfd, nfds) < 0)
421 422 423
		return -ENOMEM;

	return 0;
424
}
425

426
static int __perf_evlist__add_pollfd(struct evlist *evlist, int fd,
427
				     struct mmap *map, short revent)
428
{
429
	int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP);
430 431 432 433 434
	/*
	 * Save the idx so that when we filter out fds POLLHUP'ed we can
	 * close the associated evlist->mmap[] entry.
	 */
	if (pos >= 0) {
435
		evlist->pollfd.priv[pos].ptr = map;
436 437 438 439 440 441 442

		fcntl(fd, F_SETFL, O_NONBLOCK);
	}

	return pos;
}

443
int perf_evlist__add_pollfd(struct evlist *evlist, int fd)
444
{
445
	return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN);
446 447
}

448 449
static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd,
					 void *arg __maybe_unused)
450
{
451
	struct mmap *map = fda->priv[fd].ptr;
452

453 454
	if (map)
		perf_mmap__put(map);
455
}
456

457
int perf_evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask)
458
{
459
	return fdarray__filter(&evlist->pollfd, revents_and_mask,
460
			       perf_evlist__munmap_filtered, NULL);
461 462
}

463
int perf_evlist__poll(struct evlist *evlist, int timeout)
464
{
465
	return fdarray__poll(&evlist->pollfd, timeout);
466 467
}

468
static void perf_evlist__id_hash(struct evlist *evlist,
469
				 struct evsel *evsel,
470
				 int cpu, int thread, u64 id)
471 472 473 474 475 476 477 478 479 480
{
	int hash;
	struct perf_sample_id *sid = SID(evsel, cpu, thread);

	sid->id = id;
	sid->evsel = evsel;
	hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS);
	hlist_add_head(&sid->node, &evlist->heads[hash]);
}

481
void perf_evlist__id_add(struct evlist *evlist, struct evsel *evsel,
482 483 484 485 486 487
			 int cpu, int thread, u64 id)
{
	perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
	evsel->id[evsel->ids++] = id;
}

488
int perf_evlist__id_add_fd(struct evlist *evlist,
489
			   struct evsel *evsel,
J
Jiri Olsa 已提交
490
			   int cpu, int thread, int fd)
491 492
{
	u64 read_data[4] = { 0, };
493
	int id_idx = 1; /* The first entry is the counter value */
494 495 496 497 498 499 500 501 502 503 504
	u64 id;
	int ret;

	ret = ioctl(fd, PERF_EVENT_IOC_ID, &id);
	if (!ret)
		goto add;

	if (errno != ENOTTY)
		return -1;

	/* Legacy way to get event id.. All hail to old kernels! */
505

506 507 508 509 510 511 512
	/*
	 * This way does not work with group format read, so bail
	 * out in that case.
	 */
	if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP)
		return -1;

513
	if (!(evsel->core.attr.read_format & PERF_FORMAT_ID) ||
514 515 516
	    read(fd, &read_data, sizeof(read_data)) == -1)
		return -1;

517
	if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
518
		++id_idx;
519
	if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
520 521
		++id_idx;

522 523 524 525
	id = read_data[id_idx];

 add:
	perf_evlist__id_add(evlist, evsel, cpu, thread, id);
526 527 528
	return 0;
}

529
static void perf_evlist__set_sid_idx(struct evlist *evlist,
530
				     struct evsel *evsel, int idx, int cpu,
A
Adrian Hunter 已提交
531 532 533 534
				     int thread)
{
	struct perf_sample_id *sid = SID(evsel, cpu, thread);
	sid->idx = idx;
535 536
	if (evlist->core.cpus && cpu >= 0)
		sid->cpu = evlist->core.cpus->map[cpu];
A
Adrian Hunter 已提交
537 538
	else
		sid->cpu = -1;
539
	if (!evsel->system_wide && evlist->core.threads && thread >= 0)
540
		sid->tid = perf_thread_map__pid(evlist->core.threads, thread);
A
Adrian Hunter 已提交
541 542 543 544
	else
		sid->tid = -1;
}

545
struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id)
546 547 548 549 550 551 552 553
{
	struct hlist_head *head;
	struct perf_sample_id *sid;
	int hash;

	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
	head = &evlist->heads[hash];

554
	hlist_for_each_entry(sid, head, node)
555
		if (sid->id == id)
556 557 558 559 560
			return sid;

	return NULL;
}

561
struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id)
562 563 564
{
	struct perf_sample_id *sid;

565
	if (evlist->core.nr_entries == 1 || !id)
566 567 568 569 570
		return perf_evlist__first(evlist);

	sid = perf_evlist__id2sid(evlist, id);
	if (sid)
		return sid->evsel;
571 572

	if (!perf_evlist__sample_id_all(evlist))
573
		return perf_evlist__first(evlist);
574

575 576
	return NULL;
}
577

578
struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist,
579 580 581 582 583 584 585 586 587 588 589 590 591 592
						u64 id)
{
	struct perf_sample_id *sid;

	if (!id)
		return NULL;

	sid = perf_evlist__id2sid(evlist, id);
	if (sid)
		return sid->evsel;

	return NULL;
}

593
static int perf_evlist__event2id(struct evlist *evlist,
594 595
				 union perf_event *event, u64 *id)
{
596
	const __u64 *array = event->sample.array;
597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613
	ssize_t n;

	n = (event->header.size - sizeof(event->header)) >> 3;

	if (event->header.type == PERF_RECORD_SAMPLE) {
		if (evlist->id_pos >= n)
			return -1;
		*id = array[evlist->id_pos];
	} else {
		if (evlist->is_pos > n)
			return -1;
		n -= evlist->is_pos;
		*id = array[n];
	}
	return 0;
}

614
struct evsel *perf_evlist__event2evsel(struct evlist *evlist,
J
Jiri Olsa 已提交
615
					    union perf_event *event)
616
{
617
	struct evsel *first = perf_evlist__first(evlist);
618 619 620 621 622
	struct hlist_head *head;
	struct perf_sample_id *sid;
	int hash;
	u64 id;

623
	if (evlist->core.nr_entries == 1)
624 625
		return first;

626
	if (!first->core.attr.sample_id_all &&
627 628
	    event->header.type != PERF_RECORD_SAMPLE)
		return first;
629 630 631 632 633 634

	if (perf_evlist__event2id(evlist, event, &id))
		return NULL;

	/* Synthesized events have an id of zero */
	if (!id)
635
		return first;
636 637 638 639 640 641 642 643 644 645 646

	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
	head = &evlist->heads[hash];

	hlist_for_each_entry(sid, head, node) {
		if (sid->id == id)
			return sid->evsel;
	}
	return NULL;
}

647
static int perf_evlist__set_paused(struct evlist *evlist, bool value)
W
Wang Nan 已提交
648 649 650
{
	int i;

651
	if (!evlist->overwrite_mmap)
652 653
		return 0;

W
Wang Nan 已提交
654
	for (i = 0; i < evlist->nr_mmaps; i++) {
655
		int fd = evlist->overwrite_mmap[i].core.fd;
W
Wang Nan 已提交
656 657 658 659 660 661 662 663 664 665 666
		int err;

		if (fd < 0)
			continue;
		err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0);
		if (err)
			return err;
	}
	return 0;
}

667
static int perf_evlist__pause(struct evlist *evlist)
W
Wang Nan 已提交
668 669 670 671
{
	return perf_evlist__set_paused(evlist, true);
}

672
static int perf_evlist__resume(struct evlist *evlist)
W
Wang Nan 已提交
673 674 675 676
{
	return perf_evlist__set_paused(evlist, false);
}

677
static void evlist__munmap_nofree(struct evlist *evlist)
678
{
679
	int i;
680

681 682 683
	if (evlist->mmap)
		for (i = 0; i < evlist->nr_mmaps; i++)
			perf_mmap__munmap(&evlist->mmap[i]);
684

685
	if (evlist->overwrite_mmap)
686
		for (i = 0; i < evlist->nr_mmaps; i++)
687
			perf_mmap__munmap(&evlist->overwrite_mmap[i]);
688
}
689

690
void evlist__munmap(struct evlist *evlist)
691
{
692
	evlist__munmap_nofree(evlist);
693
	zfree(&evlist->mmap);
694
	zfree(&evlist->overwrite_mmap);
695 696
}

697 698
static struct mmap *evlist__alloc_mmap(struct evlist *evlist,
				       bool overwrite)
699
{
W
Wang Nan 已提交
700
	int i;
701
	struct mmap *map;
W
Wang Nan 已提交
702

703
	evlist->nr_mmaps = perf_cpu_map__nr(evlist->core.cpus);
704
	if (perf_cpu_map__empty(evlist->core.cpus))
705
		evlist->nr_mmaps = perf_thread_map__nr(evlist->core.threads);
706
	map = zalloc(evlist->nr_mmaps * sizeof(struct mmap));
707 708
	if (!map)
		return NULL;
709

710
	for (i = 0; i < evlist->nr_mmaps; i++) {
711
		map[i].core.fd = -1;
712
		map[i].core.overwrite = overwrite;
713 714
		/*
		 * When the perf_mmap() call is made we grab one refcount, plus
715
		 * one extra to let perf_mmap__consume() get the last
716 717 718 719 720 721
		 * events after all real references (perf_mmap__get()) are
		 * dropped.
		 *
		 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and
		 * thus does perf_mmap__get() on it.
		 */
722
		refcount_set(&map[i].core.refcnt, 0);
723
	}
724
	return map;
725 726
}

727
static bool
728
perf_evlist__should_poll(struct evlist *evlist __maybe_unused,
729
			 struct evsel *evsel)
730
{
731
	if (evsel->core.attr.write_backward)
732 733 734 735
		return false;
	return true;
}

736
static int evlist__mmap_per_evsel(struct evlist *evlist, int idx,
737
				       struct mmap_params *mp, int cpu_idx,
738
				       int thread, int *_output, int *_output_overwrite)
739
{
740
	struct evsel *evsel;
741
	int revent;
742
	int evlist_cpu = cpu_map__cpu(evlist->core.cpus, cpu_idx);
743

744
	evlist__for_each_entry(evlist, evsel) {
745
		struct mmap *maps = evlist->mmap;
746
		int *output = _output;
747
		int fd;
748
		int cpu;
749

W
Wang Nan 已提交
750
		mp->prot = PROT_READ | PROT_WRITE;
751
		if (evsel->core.attr.write_backward) {
752 753
			output = _output_overwrite;
			maps = evlist->overwrite_mmap;
754 755

			if (!maps) {
756
				maps = evlist__alloc_mmap(evlist, true);
757 758
				if (!maps)
					return -1;
759
				evlist->overwrite_mmap = maps;
760 761
				if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
					perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
762
			}
W
Wang Nan 已提交
763
			mp->prot &= ~PROT_WRITE;
764
		}
765

766 767 768
		if (evsel->system_wide && thread)
			continue;

769
		cpu = perf_cpu_map__idx(evsel->core.cpus, evlist_cpu);
770 771 772
		if (cpu == -1)
			continue;

773
		fd = FD(evsel, cpu, thread);
774 775 776

		if (*output == -1) {
			*output = fd;
777

778
			if (perf_mmap__mmap(&maps[idx], mp, *output, evlist_cpu) < 0)
779 780 781 782
				return -1;
		} else {
			if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
				return -1;
783

784
			perf_mmap__get(&maps[idx]);
785 786
		}

787 788
		revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0;

789 790 791 792 793 794 795 796
		/*
		 * The system_wide flag causes a selected event to be opened
		 * always without a pid.  Consequently it will never get a
		 * POLLHUP, but it is used for tracking in combination with
		 * other events, so it should not need to be polled anyway.
		 * Therefore don't add it for polling.
		 */
		if (!evsel->system_wide &&
797 798
		    __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) {
			perf_mmap__put(&maps[idx]);
799
			return -1;
800
		}
801

802
		if (evsel->core.attr.read_format & PERF_FORMAT_ID) {
A
Adrian Hunter 已提交
803 804 805 806 807 808
			if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread,
						   fd) < 0)
				return -1;
			perf_evlist__set_sid_idx(evlist, evsel, idx, cpu,
						 thread);
		}
809 810 811 812 813
	}

	return 0;
}

814
static int evlist__mmap_per_cpu(struct evlist *evlist,
815
				     struct mmap_params *mp)
816
{
817
	int cpu, thread;
818
	int nr_cpus = perf_cpu_map__nr(evlist->core.cpus);
819
	int nr_threads = perf_thread_map__nr(evlist->core.threads);
820

A
Adrian Hunter 已提交
821
	pr_debug2("perf event ring buffer mmapped per cpu\n");
822
	for (cpu = 0; cpu < nr_cpus; cpu++) {
823
		int output = -1;
824
		int output_overwrite = -1;
825

826 827 828
		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
					      true);

829
		for (thread = 0; thread < nr_threads; thread++) {
830
			if (evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
831
							thread, &output, &output_overwrite))
832
				goto out_unmap;
833 834 835 836 837 838
		}
	}

	return 0;

out_unmap:
839
	evlist__munmap_nofree(evlist);
840 841 842
	return -1;
}

843
static int evlist__mmap_per_thread(struct evlist *evlist,
844
					struct mmap_params *mp)
845 846
{
	int thread;
847
	int nr_threads = perf_thread_map__nr(evlist->core.threads);
848

A
Adrian Hunter 已提交
849
	pr_debug2("perf event ring buffer mmapped per thread\n");
850
	for (thread = 0; thread < nr_threads; thread++) {
851
		int output = -1;
852
		int output_overwrite = -1;
853

854 855 856
		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
					      false);

857
		if (evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
858
						&output, &output_overwrite))
859
			goto out_unmap;
860 861 862 863 864
	}

	return 0;

out_unmap:
865
	evlist__munmap_nofree(evlist);
866 867 868
	return -1;
}

869
unsigned long perf_event_mlock_kb_in_pages(void)
870
{
871 872
	unsigned long pages;
	int max;
873

874 875 876 877 878 879 880 881 882 883
	if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
		/*
		 * Pick a once upon a time good value, i.e. things look
		 * strange since we can't read a sysctl value, but lets not
		 * die yet...
		 */
		max = 512;
	} else {
		max -= (page_size / 1024);
	}
884

885 886 887 888 889 890 891
	pages = (max * 1024) / page_size;
	if (!is_power_of_2(pages))
		pages = rounddown_pow_of_two(pages);

	return pages;
}

892
size_t evlist__mmap_size(unsigned long pages)
893 894 895 896
{
	if (pages == UINT_MAX)
		pages = perf_event_mlock_kb_in_pages();
	else if (!is_power_of_2(pages))
897 898 899 900 901
		return 0;

	return (pages + 1) * page_size;
}

902 903
static long parse_pages_arg(const char *str, unsigned long min,
			    unsigned long max)
904
{
905
	unsigned long pages, val;
906 907 908 909 910 911 912
	static struct parse_tag tags[] = {
		{ .tag  = 'B', .mult = 1       },
		{ .tag  = 'K', .mult = 1 << 10 },
		{ .tag  = 'M', .mult = 1 << 20 },
		{ .tag  = 'G', .mult = 1 << 30 },
		{ .tag  = 0 },
	};
913

914
	if (str == NULL)
915
		return -EINVAL;
916

917
	val = parse_tag_value(str, tags);
918
	if (val != (unsigned long) -1) {
919 920 921 922 923 924
		/* we got file size value */
		pages = PERF_ALIGN(val, page_size) / page_size;
	} else {
		/* we got pages count value */
		char *eptr;
		pages = strtoul(str, &eptr, 10);
925 926
		if (*eptr != '\0')
			return -EINVAL;
927 928
	}

929
	if (pages == 0 && min == 0) {
930
		/* leave number of pages at 0 */
931
	} else if (!is_power_of_2(pages)) {
932 933
		char buf[100];

934
		/* round pages up to next power of 2 */
935
		pages = roundup_pow_of_two(pages);
936 937
		if (!pages)
			return -EINVAL;
938 939 940 941

		unit_number__scnprintf(buf, sizeof(buf), pages * page_size);
		pr_info("rounding mmap pages size to %s (%lu pages)\n",
			buf, pages);
942 943
	}

944 945 946 947 948 949
	if (pages > max)
		return -EINVAL;

	return pages;
}

950
int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
951 952 953 954
{
	unsigned long max = UINT_MAX;
	long pages;

A
Adrian Hunter 已提交
955
	if (max > SIZE_MAX / page_size)
956 957 958 959 960
		max = SIZE_MAX / page_size;

	pages = parse_pages_arg(str, 1, max);
	if (pages < 0) {
		pr_err("Invalid argument for --mmap_pages/-m\n");
961 962 963 964 965 966 967
		return -1;
	}

	*mmap_pages = pages;
	return 0;
}

968 969 970 971 972 973
int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
				  int unset __maybe_unused)
{
	return __perf_evlist__parse_mmap_pages(opt->value, str);
}

974
/**
975
 * evlist__mmap_ex - Create mmaps to receive events.
976 977 978
 * @evlist: list of events
 * @pages: map length in pages
 * @overwrite: overwrite older events?
979 980
 * @auxtrace_pages - auxtrace map length in pages
 * @auxtrace_overwrite - overwrite older auxtrace data?
981
 *
982
 * If @overwrite is %false the user needs to signal event consumption using
983
 * perf_mmap__write_tail().  Using evlist__mmap_read() does this
984
 * automatically.
985
 *
986 987 988
 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
 * consumption using auxtrace_mmap__write_tail().
 *
989
 * Return: %0 on success, negative error code otherwise.
990
 */
991
int evlist__mmap_ex(struct evlist *evlist, unsigned int pages,
992
			 unsigned int auxtrace_pages,
993 994
			 bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush,
			 int comp_level)
995
{
996
	struct evsel *evsel;
997
	const struct perf_cpu_map *cpus = evlist->core.cpus;
998
	const struct perf_thread_map *threads = evlist->core.threads;
W
Wang Nan 已提交
999 1000 1001 1002 1003
	/*
	 * Delay setting mp.prot: set it before calling perf_mmap__mmap.
	 * Its value is decided by evsel's write_backward.
	 * So &mp should not be passed through const pointer.
	 */
1004 1005
	struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush,
				  .comp_level = comp_level };
1006

1007
	if (!evlist->mmap)
1008
		evlist->mmap = evlist__alloc_mmap(evlist, false);
1009
	if (!evlist->mmap)
1010 1011
		return -ENOMEM;

1012
	if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
1013 1014
		return -ENOMEM;

1015
	evlist->mmap_len = evlist__mmap_size(pages);
1016
	pr_debug("mmap size %zuB\n", evlist->mmap_len);
1017
	mp.mask = evlist->mmap_len - page_size - 1;
1018

1019 1020 1021
	auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len,
				   auxtrace_pages, auxtrace_overwrite);

1022
	evlist__for_each_entry(evlist, evsel) {
1023
		if ((evsel->core.attr.read_format & PERF_FORMAT_ID) &&
1024
		    evsel->sample_id == NULL &&
1025
		    perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0)
1026 1027 1028
			return -ENOMEM;
	}

1029
	if (perf_cpu_map__empty(cpus))
1030
		return evlist__mmap_per_thread(evlist, &mp);
1031

1032
	return evlist__mmap_per_cpu(evlist, &mp);
1033
}
1034

1035
int evlist__mmap(struct evlist *evlist, unsigned int pages)
1036
{
1037
	return evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0);
1038 1039
}

1040
int perf_evlist__create_maps(struct evlist *evlist, struct target *target)
1041
{
1042
	bool all_threads = (target->per_thread && target->system_wide);
1043
	struct perf_cpu_map *cpus;
1044
	struct perf_thread_map *threads;
1045

1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063
	/*
	 * If specify '-a' and '--per-thread' to perf record, perf record
	 * will override '--per-thread'. target->per_thread = false and
	 * target->system_wide = true.
	 *
	 * If specify '--per-thread' only to perf record,
	 * target->per_thread = true and target->system_wide = false.
	 *
	 * So target->per_thread && target->system_wide is false.
	 * For perf record, thread_map__new_str doesn't call
	 * thread_map__new_all_cpus. That will keep perf record's
	 * current behavior.
	 *
	 * For perf stat, it allows the case that target->per_thread and
	 * target->system_wide are all true. It means to collect system-wide
	 * per-thread data. thread_map__new_str will call
	 * thread_map__new_all_cpus to enumerate all threads.
	 */
1064
	threads = thread_map__new_str(target->pid, target->tid, target->uid,
1065
				      all_threads);
1066

1067
	if (!threads)
1068 1069
		return -1;

1070
	if (target__uses_dummy_map(target))
1071
		cpus = perf_cpu_map__dummy_new();
1072
	else
1073
		cpus = perf_cpu_map__new(target->cpu_list);
1074

1075
	if (!cpus)
1076 1077
		goto out_delete_threads;

1078
	evlist->core.has_user_cpus = !!target->cpu_list;
1079

1080
	perf_evlist__set_maps(&evlist->core, cpus, threads);
1081 1082

	return 0;
1083 1084

out_delete_threads:
1085
	perf_thread_map__put(threads);
1086 1087 1088
	return -1;
}

1089
void __perf_evlist__set_sample_bit(struct evlist *evlist,
1090 1091
				   enum perf_event_sample_format bit)
{
1092
	struct evsel *evsel;
1093

1094
	evlist__for_each_entry(evlist, evsel)
1095 1096 1097
		__perf_evsel__set_sample_bit(evsel, bit);
}

1098
void __perf_evlist__reset_sample_bit(struct evlist *evlist,
1099 1100
				     enum perf_event_sample_format bit)
{
1101
	struct evsel *evsel;
1102

1103
	evlist__for_each_entry(evlist, evsel)
1104 1105 1106
		__perf_evsel__reset_sample_bit(evsel, bit);
}

1107
int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel)
1108
{
1109
	struct evsel *evsel;
1110
	int err = 0;
1111

1112
	evlist__for_each_entry(evlist, evsel) {
1113
		if (evsel->filter == NULL)
1114
			continue;
1115

1116 1117 1118 1119
		/*
		 * filters only work for tracepoint event, which doesn't have cpu limit.
		 * So evlist and evsel should always be same.
		 */
1120
		err = perf_evsel__apply_filter(&evsel->core, evsel->filter);
1121 1122
		if (err) {
			*err_evsel = evsel;
1123
			break;
1124
		}
1125 1126
	}

1127 1128 1129
	return err;
}

1130
int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter)
1131
{
1132
	struct evsel *evsel;
1133 1134
	int err = 0;

1135
	evlist__for_each_entry(evlist, evsel) {
1136
		if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
1137 1138
			continue;

1139
		err = perf_evsel__set_filter(evsel, filter);
1140 1141 1142 1143 1144
		if (err)
			break;
	}

	return err;
1145
}
1146

1147
int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
1148 1149
{
	char *filter;
1150 1151
	int ret = -1;
	size_t i;
1152

1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166
	for (i = 0; i < npids; ++i) {
		if (i == 0) {
			if (asprintf(&filter, "common_pid != %d", pids[i]) < 0)
				return -1;
		} else {
			char *tmp;

			if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0)
				goto out_free;

			free(filter);
			filter = tmp;
		}
	}
1167

1168
	ret = perf_evlist__set_tp_filter(evlist, filter);
1169
out_free:
1170 1171 1172 1173
	free(filter);
	return ret;
}

1174
int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid)
1175
{
1176
	return perf_evlist__set_tp_filter_pids(evlist, 1, &pid);
1177 1178
}

1179
bool perf_evlist__valid_sample_type(struct evlist *evlist)
1180
{
1181
	struct evsel *pos;
1182

1183
	if (evlist->core.nr_entries == 1)
1184 1185 1186 1187 1188
		return true;

	if (evlist->id_pos < 0 || evlist->is_pos < 0)
		return false;

1189
	evlist__for_each_entry(evlist, pos) {
1190 1191
		if (pos->id_pos != evlist->id_pos ||
		    pos->is_pos != evlist->is_pos)
1192
			return false;
1193 1194
	}

1195
	return true;
1196 1197
}

1198
u64 __perf_evlist__combined_sample_type(struct evlist *evlist)
1199
{
1200
	struct evsel *evsel;
1201 1202 1203 1204

	if (evlist->combined_sample_type)
		return evlist->combined_sample_type;

1205
	evlist__for_each_entry(evlist, evsel)
1206
		evlist->combined_sample_type |= evsel->core.attr.sample_type;
1207 1208 1209 1210

	return evlist->combined_sample_type;
}

1211
u64 perf_evlist__combined_sample_type(struct evlist *evlist)
1212 1213 1214
{
	evlist->combined_sample_type = 0;
	return __perf_evlist__combined_sample_type(evlist);
1215 1216
}

1217
u64 perf_evlist__combined_branch_type(struct evlist *evlist)
1218
{
1219
	struct evsel *evsel;
1220 1221
	u64 branch_type = 0;

1222
	evlist__for_each_entry(evlist, evsel)
1223
		branch_type |= evsel->core.attr.branch_sample_type;
1224 1225 1226
	return branch_type;
}

1227
bool perf_evlist__valid_read_format(struct evlist *evlist)
1228
{
1229
	struct evsel *first = perf_evlist__first(evlist), *pos = first;
1230 1231
	u64 read_format = first->core.attr.read_format;
	u64 sample_type = first->core.attr.sample_type;
1232

1233
	evlist__for_each_entry(evlist, pos) {
1234
		if (read_format != pos->core.attr.read_format)
1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246
			return false;
	}

	/* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */
	if ((sample_type & PERF_SAMPLE_READ) &&
	    !(read_format & PERF_FORMAT_ID)) {
		return false;
	}

	return true;
}

1247
u64 perf_evlist__read_format(struct evlist *evlist)
1248
{
1249
	struct evsel *first = perf_evlist__first(evlist);
1250
	return first->core.attr.read_format;
1251 1252
}

1253
u16 perf_evlist__id_hdr_size(struct evlist *evlist)
1254
{
1255
	struct evsel *first = perf_evlist__first(evlist);
1256 1257 1258 1259
	struct perf_sample *data;
	u64 sample_type;
	u16 size = 0;

1260
	if (!first->core.attr.sample_id_all)
1261 1262
		goto out;

1263
	sample_type = first->core.attr.sample_type;
1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278

	if (sample_type & PERF_SAMPLE_TID)
		size += sizeof(data->tid) * 2;

       if (sample_type & PERF_SAMPLE_TIME)
		size += sizeof(data->time);

	if (sample_type & PERF_SAMPLE_ID)
		size += sizeof(data->id);

	if (sample_type & PERF_SAMPLE_STREAM_ID)
		size += sizeof(data->stream_id);

	if (sample_type & PERF_SAMPLE_CPU)
		size += sizeof(data->cpu) * 2;
1279 1280 1281

	if (sample_type & PERF_SAMPLE_IDENTIFIER)
		size += sizeof(data->id);
1282 1283 1284 1285
out:
	return size;
}

1286
bool perf_evlist__valid_sample_id_all(struct evlist *evlist)
1287
{
1288
	struct evsel *first = perf_evlist__first(evlist), *pos = first;
1289

1290
	evlist__for_each_entry_continue(evlist, pos) {
1291
		if (first->core.attr.sample_id_all != pos->core.attr.sample_id_all)
1292
			return false;
1293 1294
	}

1295 1296 1297
	return true;
}

1298
bool perf_evlist__sample_id_all(struct evlist *evlist)
1299
{
1300
	struct evsel *first = perf_evlist__first(evlist);
1301
	return first->core.attr.sample_id_all;
1302
}
1303

1304
void perf_evlist__set_selected(struct evlist *evlist,
1305
			       struct evsel *evsel)
1306 1307 1308
{
	evlist->selected = evsel;
}
1309

1310
void evlist__close(struct evlist *evlist)
1311
{
1312
	struct evsel *evsel;
1313

1314
	evlist__for_each_entry_reverse(evlist, evsel)
1315
		evsel__close(evsel);
1316 1317
}

1318
static int perf_evlist__create_syswide_maps(struct evlist *evlist)
1319
{
1320
	struct perf_cpu_map *cpus;
1321
	struct perf_thread_map *threads;
1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332
	int err = -ENOMEM;

	/*
	 * Try reading /sys/devices/system/cpu/online to get
	 * an all cpus map.
	 *
	 * FIXME: -ENOMEM is the best we can do here, the cpu_map
	 * code needs an overhaul to properly forward the
	 * error, and we may not want to do that fallback to a
	 * default cpu identity map :-\
	 */
1333
	cpus = perf_cpu_map__new(NULL);
1334
	if (!cpus)
1335 1336
		goto out;

1337
	threads = perf_thread_map__new_dummy();
1338 1339
	if (!threads)
		goto out_put;
1340

1341
	perf_evlist__set_maps(&evlist->core, cpus, threads);
1342 1343
out:
	return err;
1344
out_put:
1345
	perf_cpu_map__put(cpus);
1346 1347 1348
	goto out;
}

1349
int evlist__open(struct evlist *evlist)
1350
{
1351
	struct evsel *evsel;
1352
	int err;
1353

1354 1355 1356 1357
	/*
	 * Default: one fd per CPU, all threads, aka systemwide
	 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL
	 */
1358
	if (evlist->core.threads == NULL && evlist->core.cpus == NULL) {
1359 1360 1361 1362 1363
		err = perf_evlist__create_syswide_maps(evlist);
		if (err < 0)
			goto out_err;
	}

1364 1365
	perf_evlist__update_id_pos(evlist);

1366
	evlist__for_each_entry(evlist, evsel) {
1367
		err = evsel__open(evsel, evsel->core.cpus, evsel->core.threads);
1368 1369 1370 1371 1372 1373
		if (err < 0)
			goto out_err;
	}

	return 0;
out_err:
1374
	evlist__close(evlist);
1375
	errno = -err;
1376 1377
	return err;
}
1378

1379
int perf_evlist__prepare_workload(struct evlist *evlist, struct target *target,
1380
				  const char *argv[], bool pipe_output,
1381
				  void (*exec_error)(int signo, siginfo_t *info, void *ucontext))
1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402
{
	int child_ready_pipe[2], go_pipe[2];
	char bf;

	if (pipe(child_ready_pipe) < 0) {
		perror("failed to create 'ready' pipe");
		return -1;
	}

	if (pipe(go_pipe) < 0) {
		perror("failed to create 'go' pipe");
		goto out_close_ready_pipe;
	}

	evlist->workload.pid = fork();
	if (evlist->workload.pid < 0) {
		perror("failed to fork");
		goto out_close_pipes;
	}

	if (!evlist->workload.pid) {
1403 1404
		int ret;

1405
		if (pipe_output)
1406 1407
			dup2(2, 1);

1408 1409
		signal(SIGTERM, SIG_DFL);

1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421
		close(child_ready_pipe[0]);
		close(go_pipe[1]);
		fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);

		/*
		 * Tell the parent we're ready to go
		 */
		close(child_ready_pipe[1]);

		/*
		 * Wait until the parent tells us to go.
		 */
1422 1423 1424 1425 1426 1427
		ret = read(go_pipe[0], &bf, 1);
		/*
		 * The parent will ask for the execvp() to be performed by
		 * writing exactly one byte, in workload.cork_fd, usually via
		 * perf_evlist__start_workload().
		 *
1428
		 * For cancelling the workload without actually running it,
1429 1430 1431 1432 1433 1434 1435 1436 1437
		 * the parent will just close workload.cork_fd, without writing
		 * anything, i.e. read will return zero and we just exit()
		 * here.
		 */
		if (ret != 1) {
			if (ret == -1)
				perror("unable to read pipe");
			exit(ret);
		}
1438 1439 1440

		execvp(argv[0], (char **)argv);

1441
		if (exec_error) {
1442 1443 1444 1445 1446 1447 1448
			union sigval val;

			val.sival_int = errno;
			if (sigqueue(getppid(), SIGUSR1, val))
				perror(argv[0]);
		} else
			perror(argv[0]);
1449 1450 1451
		exit(-1);
	}

1452 1453 1454 1455 1456 1457 1458 1459
	if (exec_error) {
		struct sigaction act = {
			.sa_flags     = SA_SIGINFO,
			.sa_sigaction = exec_error,
		};
		sigaction(SIGUSR1, &act, NULL);
	}

1460
	if (target__none(target)) {
1461
		if (evlist->core.threads == NULL) {
1462 1463 1464 1465
			fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n",
				__func__, __LINE__);
			goto out_close_pipes;
		}
1466
		perf_thread_map__set_pid(evlist->core.threads, 0, evlist->workload.pid);
1467
	}
1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478

	close(child_ready_pipe[1]);
	close(go_pipe[0]);
	/*
	 * wait for child to settle
	 */
	if (read(child_ready_pipe[0], &bf, 1) == -1) {
		perror("unable to read pipe");
		goto out_close_pipes;
	}

1479
	fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC);
1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492
	evlist->workload.cork_fd = go_pipe[1];
	close(child_ready_pipe[0]);
	return 0;

out_close_pipes:
	close(go_pipe[0]);
	close(go_pipe[1]);
out_close_ready_pipe:
	close(child_ready_pipe[0]);
	close(child_ready_pipe[1]);
	return -1;
}

1493
int perf_evlist__start_workload(struct evlist *evlist)
1494 1495
{
	if (evlist->workload.cork_fd > 0) {
1496
		char bf = 0;
1497
		int ret;
1498 1499 1500
		/*
		 * Remove the cork, let it rip!
		 */
1501 1502
		ret = write(evlist->workload.cork_fd, &bf, 1);
		if (ret < 0)
1503
			perror("unable to write to pipe");
1504 1505 1506

		close(evlist->workload.cork_fd);
		return ret;
1507 1508 1509 1510
	}

	return 0;
}
1511

1512
int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event,
1513
			      struct perf_sample *sample)
1514
{
1515
	struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
1516 1517 1518

	if (!evsel)
		return -EFAULT;
1519
	return perf_evsel__parse_sample(evsel, event, sample);
1520
}
1521

1522
int perf_evlist__parse_sample_timestamp(struct evlist *evlist,
1523 1524 1525
					union perf_event *event,
					u64 *timestamp)
{
1526
	struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
1527 1528 1529 1530 1531 1532

	if (!evsel)
		return -EFAULT;
	return perf_evsel__parse_sample_timestamp(evsel, event, timestamp);
}

1533
size_t perf_evlist__fprintf(struct evlist *evlist, FILE *fp)
1534
{
1535
	struct evsel *evsel;
1536 1537
	size_t printed = 0;

1538
	evlist__for_each_entry(evlist, evsel) {
1539 1540 1541 1542
		printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "",
				   perf_evsel__name(evsel));
	}

1543
	return printed + fprintf(fp, "\n");
1544
}
1545

1546
int perf_evlist__strerror_open(struct evlist *evlist,
1547 1548 1549
			       int err, char *buf, size_t size)
{
	int printed, value;
1550
	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
1551 1552 1553 1554 1555 1556 1557 1558

	switch (err) {
	case EACCES:
	case EPERM:
		printed = scnprintf(buf, size,
				    "Error:\t%s.\n"
				    "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg);

1559
		value = perf_event_paranoid();
1560 1561 1562 1563 1564 1565 1566 1567

		printed += scnprintf(buf + printed, size - printed, "\nHint:\t");

		if (value >= 2) {
			printed += scnprintf(buf + printed, size - printed,
					     "For your workloads it needs to be <= 1\nHint:\t");
		}
		printed += scnprintf(buf + printed, size - printed,
1568
				     "For system wide tracing it needs to be set to -1.\n");
1569 1570

		printed += scnprintf(buf + printed, size - printed,
1571 1572
				    "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
				    "Hint:\tThe current value is %d.", value);
1573
		break;
1574
	case EINVAL: {
1575
		struct evsel *first = perf_evlist__first(evlist);
1576 1577 1578 1579 1580
		int max_freq;

		if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0)
			goto out_default;

1581
		if (first->core.attr.sample_freq < (u64)max_freq)
1582 1583 1584 1585 1586 1587
			goto out_default;

		printed = scnprintf(buf, size,
				    "Error:\t%s.\n"
				    "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n"
				    "Hint:\tThe current value is %d and %" PRIu64 " is being requested.",
1588
				    emsg, max_freq, first->core.attr.sample_freq);
1589 1590
		break;
	}
1591
	default:
1592
out_default:
1593 1594 1595 1596 1597 1598
		scnprintf(buf, size, "%s", emsg);
		break;
	}

	return 0;
}
1599

1600
int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size)
1601
{
1602
	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
1603
	int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0;
1604 1605 1606

	switch (err) {
	case EPERM:
1607
		sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user);
1608 1609
		printed += scnprintf(buf + printed, size - printed,
				     "Error:\t%s.\n"
1610
				     "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n"
1611
				     "Hint:\tTried using %zd kB.\n",
1612
				     emsg, pages_max_per_user, pages_attempted);
1613 1614 1615 1616 1617 1618 1619 1620 1621

		if (pages_attempted >= pages_max_per_user) {
			printed += scnprintf(buf + printed, size - printed,
					     "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n",
					     pages_max_per_user + pages_attempted);
		}

		printed += scnprintf(buf + printed, size - printed,
				     "Hint:\tTry using a smaller -m/--mmap-pages value.");
1622 1623 1624 1625 1626 1627 1628 1629 1630
		break;
	default:
		scnprintf(buf, size, "%s", emsg);
		break;
	}

	return 0;
}

1631
void perf_evlist__to_front(struct evlist *evlist,
1632
			   struct evsel *move_evsel)
1633
{
1634
	struct evsel *evsel, *n;
1635 1636 1637 1638 1639
	LIST_HEAD(move);

	if (move_evsel == perf_evlist__first(evlist))
		return;

1640
	evlist__for_each_entry_safe(evlist, n, evsel) {
1641
		if (evsel->leader == move_evsel->leader)
1642
			list_move_tail(&evsel->core.node, &move);
1643 1644
	}

1645
	list_splice(&move, &evlist->core.entries);
1646
}
1647

1648
void perf_evlist__set_tracking_event(struct evlist *evlist,
1649
				     struct evsel *tracking_evsel)
1650
{
1651
	struct evsel *evsel;
1652 1653 1654 1655

	if (tracking_evsel->tracking)
		return;

1656
	evlist__for_each_entry(evlist, evsel) {
1657 1658 1659 1660 1661 1662
		if (evsel != tracking_evsel)
			evsel->tracking = false;
	}

	tracking_evsel->tracking = true;
}
1663

1664
struct evsel *
1665
perf_evlist__find_evsel_by_str(struct evlist *evlist,
1666 1667
			       const char *str)
{
1668
	struct evsel *evsel;
1669

1670
	evlist__for_each_entry(evlist, evsel) {
1671 1672 1673 1674 1675 1676 1677 1678
		if (!evsel->name)
			continue;
		if (strcmp(str, evsel->name) == 0)
			return evsel;
	}

	return NULL;
}
1679

1680
void perf_evlist__toggle_bkw_mmap(struct evlist *evlist,
1681 1682 1683 1684 1685 1686 1687 1688 1689
				  enum bkw_mmap_state state)
{
	enum bkw_mmap_state old_state = evlist->bkw_mmap_state;
	enum action {
		NONE,
		PAUSE,
		RESUME,
	} action = NONE;

1690
	if (!evlist->overwrite_mmap)
1691 1692 1693 1694 1695
		return;

	switch (old_state) {
	case BKW_MMAP_NOTREADY: {
		if (state != BKW_MMAP_RUNNING)
1696
			goto state_err;
1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736
		break;
	}
	case BKW_MMAP_RUNNING: {
		if (state != BKW_MMAP_DATA_PENDING)
			goto state_err;
		action = PAUSE;
		break;
	}
	case BKW_MMAP_DATA_PENDING: {
		if (state != BKW_MMAP_EMPTY)
			goto state_err;
		break;
	}
	case BKW_MMAP_EMPTY: {
		if (state != BKW_MMAP_RUNNING)
			goto state_err;
		action = RESUME;
		break;
	}
	default:
		WARN_ONCE(1, "Shouldn't get there\n");
	}

	evlist->bkw_mmap_state = state;

	switch (action) {
	case PAUSE:
		perf_evlist__pause(evlist);
		break;
	case RESUME:
		perf_evlist__resume(evlist);
		break;
	case NONE:
	default:
		break;
	}

state_err:
	return;
}
1737

1738
bool perf_evlist__exclude_kernel(struct evlist *evlist)
1739
{
1740
	struct evsel *evsel;
1741 1742

	evlist__for_each_entry(evlist, evsel) {
1743
		if (!evsel->core.attr.exclude_kernel)
1744 1745 1746 1747 1748
			return false;
	}

	return true;
}
1749 1750 1751 1752 1753 1754

/*
 * Events in data file are not collect in groups, but we still want
 * the group display. Set the artificial group and set the leader's
 * forced_leader flag to notify the display code.
 */
1755
void perf_evlist__force_leader(struct evlist *evlist)
1756 1757
{
	if (!evlist->nr_groups) {
1758
		struct evsel *leader = perf_evlist__first(evlist);
1759 1760 1761 1762 1763

		perf_evlist__set_leader(evlist);
		leader->forced_leader = true;
	}
}
1764

1765
struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
1766
						 struct evsel *evsel)
1767
{
1768
	struct evsel *c2, *leader;
1769 1770 1771 1772
	bool is_open = true;

	leader = evsel->leader;
	pr_debug("Weak group for %s/%d failed\n",
1773
			leader->name, leader->core.nr_members);
1774 1775 1776 1777 1778 1779 1780 1781 1782 1783

	/*
	 * for_each_group_member doesn't work here because it doesn't
	 * include the first entry.
	 */
	evlist__for_each_entry(evsel_list, c2) {
		if (c2 == evsel)
			is_open = false;
		if (c2->leader == leader) {
			if (is_open)
1784
				evsel__close(c2);
1785
			c2->leader = c2;
1786
			c2->core.nr_members = 0;
1787 1788 1789 1790
		}
	}
	return leader;
}
1791

1792
int perf_evlist__add_sb_event(struct evlist **evlist,
1793 1794 1795 1796
			      struct perf_event_attr *attr,
			      perf_evsel__sb_cb_t cb,
			      void *data)
{
1797
	struct evsel *evsel;
1798 1799 1800
	bool new_evlist = (*evlist) == NULL;

	if (*evlist == NULL)
1801
		*evlist = evlist__new();
1802 1803 1804 1805 1806 1807 1808 1809
	if (*evlist == NULL)
		return -1;

	if (!attr->sample_id_all) {
		pr_warning("enabling sample_id_all for all side band events\n");
		attr->sample_id_all = 1;
	}

1810
	evsel = perf_evsel__new_idx(attr, (*evlist)->core.nr_entries);
1811 1812 1813 1814 1815
	if (!evsel)
		goto out_err;

	evsel->side_band.cb = cb;
	evsel->side_band.data = data;
1816
	evlist__add(*evlist, evsel);
1817 1818 1819 1820
	return 0;

out_err:
	if (new_evlist) {
1821
		evlist__delete(*evlist);
1822 1823 1824 1825 1826 1827 1828
		*evlist = NULL;
	}
	return -1;
}

static void *perf_evlist__poll_thread(void *arg)
{
1829
	struct evlist *evlist = arg;
1830
	bool draining = false;
1831
	int i, done = 0;
1832 1833 1834 1835 1836 1837 1838 1839
	/*
	 * In order to read symbols from other namespaces perf to needs to call
	 * setns(2).  This isn't permitted if the struct_fs has multiple users.
	 * unshare(2) the fs so that we may continue to setns into namespaces
	 * that we're observing when, for instance, reading the build-ids at
	 * the end of a 'perf record' session.
	 */
	unshare(CLONE_FS);
1840 1841 1842

	while (!done) {
		bool got_data = false;
1843

1844
		if (evlist->thread.done)
1845 1846 1847 1848 1849 1850
			draining = true;

		if (!draining)
			perf_evlist__poll(evlist, 1000);

		for (i = 0; i < evlist->nr_mmaps; i++) {
1851
			struct mmap *map = &evlist->mmap[i];
1852 1853 1854 1855 1856
			union perf_event *event;

			if (perf_mmap__read_init(map))
				continue;
			while ((event = perf_mmap__read_event(map)) != NULL) {
1857
				struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
1858 1859 1860 1861 1862 1863 1864

				if (evsel && evsel->side_band.cb)
					evsel->side_band.cb(event, evsel->side_band.data);
				else
					pr_warning("cannot locate proper evsel for the side band event\n");

				perf_mmap__consume(map);
1865
				got_data = true;
1866 1867 1868
			}
			perf_mmap__read_done(map);
		}
1869 1870 1871

		if (draining && !got_data)
			break;
1872 1873 1874 1875
	}
	return NULL;
}

1876
int perf_evlist__start_sb_thread(struct evlist *evlist,
1877 1878
				 struct target *target)
{
1879
	struct evsel *counter;
1880 1881 1882 1883 1884 1885 1886 1887

	if (!evlist)
		return 0;

	if (perf_evlist__create_maps(evlist, target))
		goto out_delete_evlist;

	evlist__for_each_entry(evlist, counter) {
1888
		if (evsel__open(counter, evlist->core.cpus,
1889
				     evlist->core.threads) < 0)
1890 1891 1892
			goto out_delete_evlist;
	}

1893
	if (evlist__mmap(evlist, UINT_MAX))
1894 1895 1896
		goto out_delete_evlist;

	evlist__for_each_entry(evlist, counter) {
1897
		if (evsel__enable(counter))
1898 1899 1900 1901 1902 1903 1904 1905 1906 1907
			goto out_delete_evlist;
	}

	evlist->thread.done = 0;
	if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist))
		goto out_delete_evlist;

	return 0;

out_delete_evlist:
1908
	evlist__delete(evlist);
1909 1910 1911 1912
	evlist = NULL;
	return -1;
}

1913
void perf_evlist__stop_sb_thread(struct evlist *evlist)
1914 1915 1916 1917 1918
{
	if (!evlist)
		return;
	evlist->thread.done = 1;
	pthread_join(evlist->thread.th, NULL);
1919
	evlist__delete(evlist);
1920
}