evlist.c 38.4 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
2 3 4 5 6 7
/*
 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
 *
 * Parts came from builtin-{top,stat,record}.c, see those files for further
 * copyright notes.
 */
8
#include <api/fs/fs.h>
9
#include <errno.h>
10
#include <inttypes.h>
11
#include <poll.h>
12
#include "cpumap.h"
13
#include "util/mmap.h"
14
#include "thread_map.h"
15
#include "target.h"
16 17
#include "evlist.h"
#include "evsel.h"
A
Adrian Hunter 已提交
18
#include "debug.h"
19
#include "units.h"
20
#include <internal/lib.h> // page_size
21
#include "../perf.h"
22
#include "asm/bug.h"
23
#include "bpf-event.h"
24
#include <signal.h>
25
#include <unistd.h>
26
#include <sched.h>
27
#include <stdlib.h>
28

29
#include "parse-events.h"
30
#include <subcmd/parse-options.h>
31

32
#include <fcntl.h>
33
#include <sys/ioctl.h>
34 35
#include <sys/mman.h>

36 37
#include <linux/bitops.h>
#include <linux/hash.h>
38
#include <linux/log2.h>
39
#include <linux/err.h>
40
#include <linux/string.h>
41
#include <linux/zalloc.h>
42
#include <perf/evlist.h>
43
#include <perf/evsel.h>
44
#include <perf/cpumap.h>
45
#include <perf/mmap.h>
46

47 48
#include <internal/xyarray.h>

49 50 51 52
#ifdef LACKS_SIGQUEUE_PROTOTYPE
int sigqueue(pid_t pid, int sig, const union sigval value);
#endif

53
#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
54
#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
55

56 57
void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
		  struct perf_thread_map *threads)
58
{
59
	perf_evlist__init(&evlist->core);
60
	perf_evlist__set_maps(&evlist->core, cpus, threads);
61
	evlist->workload.pid = -1;
62
	evlist->bkw_mmap_state = BKW_MMAP_NOTREADY;
63 64
}

65
struct evlist *evlist__new(void)
66
{
67
	struct evlist *evlist = zalloc(sizeof(*evlist));
68

69
	if (evlist != NULL)
70
		evlist__init(evlist, NULL, NULL);
71 72 73 74

	return evlist;
}

75
struct evlist *perf_evlist__new_default(void)
76
{
77
	struct evlist *evlist = evlist__new();
78 79

	if (evlist && perf_evlist__add_default(evlist)) {
80
		evlist__delete(evlist);
81 82 83 84 85 86
		evlist = NULL;
	}

	return evlist;
}

87
struct evlist *perf_evlist__new_dummy(void)
88
{
89
	struct evlist *evlist = evlist__new();
90 91

	if (evlist && perf_evlist__add_dummy(evlist)) {
92
		evlist__delete(evlist);
93 94 95 96 97 98
		evlist = NULL;
	}

	return evlist;
}

99 100 101 102 103 104 105
/**
 * perf_evlist__set_id_pos - set the positions of event ids.
 * @evlist: selected event list
 *
 * Events with compatible sample types all have the same id_pos
 * and is_pos.  For convenience, put a copy on evlist.
 */
106
void perf_evlist__set_id_pos(struct evlist *evlist)
107
{
108
	struct evsel *first = evlist__first(evlist);
109 110 111 112 113

	evlist->id_pos = first->id_pos;
	evlist->is_pos = first->is_pos;
}

114
static void perf_evlist__update_id_pos(struct evlist *evlist)
115
{
116
	struct evsel *evsel;
117

118
	evlist__for_each_entry(evlist, evsel)
119 120 121 122 123
		perf_evsel__calc_id_pos(evsel);

	perf_evlist__set_id_pos(evlist);
}

124
static void evlist__purge(struct evlist *evlist)
125
{
126
	struct evsel *pos, *n;
127

128
	evlist__for_each_entry_safe(evlist, n, pos) {
129
		list_del_init(&pos->core.node);
130
		pos->evlist = NULL;
131
		evsel__delete(pos);
132 133
	}

134
	evlist->core.nr_entries = 0;
135 136
}

137
void evlist__exit(struct evlist *evlist)
138
{
139
	zfree(&evlist->mmap);
140
	zfree(&evlist->overwrite_mmap);
J
Jiri Olsa 已提交
141
	perf_evlist__exit(&evlist->core);
142 143
}

144
void evlist__delete(struct evlist *evlist)
145
{
146 147 148
	if (evlist == NULL)
		return;

149
	evlist__munmap(evlist);
150
	evlist__close(evlist);
151
	evlist__purge(evlist);
152
	evlist__exit(evlist);
153 154 155
	free(evlist);
}

156
void evlist__add(struct evlist *evlist, struct evsel *entry)
157
{
158
	entry->evlist = evlist;
159
	entry->idx = evlist->core.nr_entries;
160
	entry->tracking = !entry->idx;
161

162 163 164
	perf_evlist__add(&evlist->core, &entry->core);

	if (evlist->core.nr_entries == 1)
165
		perf_evlist__set_id_pos(evlist);
166 167
}

168
void evlist__remove(struct evlist *evlist, struct evsel *evsel)
169 170
{
	evsel->evlist = NULL;
171
	perf_evlist__remove(&evlist->core, &evsel->core);
172 173
}

174
void perf_evlist__splice_list_tail(struct evlist *evlist,
175
				   struct list_head *list)
176
{
177
	struct evsel *evsel, *temp;
178

179
	__evlist__for_each_entry_safe(list, temp, evsel) {
180
		list_del_init(&evsel->core.node);
181
		evlist__add(evlist, evsel);
182
	}
183 184
}

185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
int __evlist__set_tracepoints_handlers(struct evlist *evlist,
				       const struct evsel_str_handler *assocs, size_t nr_assocs)
{
	struct evsel *evsel;
	size_t i;
	int err;

	for (i = 0; i < nr_assocs; i++) {
		// Adding a handler for an event not in this evlist, just ignore it.
		evsel = perf_evlist__find_tracepoint_by_name(evlist, assocs[i].name);
		if (evsel == NULL)
			continue;

		err = -EEXIST;
		if (evsel->handler != NULL)
			goto out;
		evsel->handler = assocs[i].handler;
	}

	err = 0;
out:
	return err;
}

209 210
void __perf_evlist__set_leader(struct list_head *list)
{
211
	struct evsel *evsel, *leader;
212

213 214
	leader = list_entry(list->next, struct evsel, core.node);
	evsel = list_entry(list->prev, struct evsel, core.node);
215

216
	leader->core.nr_members = evsel->idx - leader->idx + 1;
217

218
	__evlist__for_each_entry(list, evsel) {
219
		evsel->leader = leader;
220 221 222
	}
}

223
void perf_evlist__set_leader(struct evlist *evlist)
224
{
225 226
	if (evlist->core.nr_entries) {
		evlist->nr_groups = evlist->core.nr_entries > 1 ? 1 : 0;
227
		__perf_evlist__set_leader(&evlist->core.entries);
228
	}
229 230
}

231
int __perf_evlist__add_default(struct evlist *evlist, bool precise)
232
{
233
	struct evsel *evsel = perf_evsel__new_cycles(precise);
234

235
	if (evsel == NULL)
236
		return -ENOMEM;
237

238
	evlist__add(evlist, evsel);
239 240
	return 0;
}
241

242
int perf_evlist__add_dummy(struct evlist *evlist)
243 244 245 246 247 248
{
	struct perf_event_attr attr = {
		.type	= PERF_TYPE_SOFTWARE,
		.config = PERF_COUNT_SW_DUMMY,
		.size	= sizeof(attr), /* to capture ABI version */
	};
249
	struct evsel *evsel = perf_evsel__new_idx(&attr, evlist->core.nr_entries);
250 251 252 253

	if (evsel == NULL)
		return -ENOMEM;

254
	evlist__add(evlist, evsel);
255 256 257
	return 0;
}

258
static int evlist__add_attrs(struct evlist *evlist,
259
				  struct perf_event_attr *attrs, size_t nr_attrs)
260
{
261
	struct evsel *evsel, *n;
262 263 264 265
	LIST_HEAD(head);
	size_t i;

	for (i = 0; i < nr_attrs; i++) {
266
		evsel = perf_evsel__new_idx(attrs + i, evlist->core.nr_entries + i);
267 268
		if (evsel == NULL)
			goto out_delete_partial_list;
269
		list_add_tail(&evsel->core.node, &head);
270 271
	}

272
	perf_evlist__splice_list_tail(evlist, &head);
273 274 275 276

	return 0;

out_delete_partial_list:
277
	__evlist__for_each_entry_safe(&head, n, evsel)
278
		evsel__delete(evsel);
279 280 281
	return -1;
}

282
int __perf_evlist__add_default_attrs(struct evlist *evlist,
283 284 285 286 287 288 289
				     struct perf_event_attr *attrs, size_t nr_attrs)
{
	size_t i;

	for (i = 0; i < nr_attrs; i++)
		event_attr_init(attrs + i);

290
	return evlist__add_attrs(evlist, attrs, nr_attrs);
291 292
}

293
struct evsel *
294
perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id)
295
{
296
	struct evsel *evsel;
297

298
	evlist__for_each_entry(evlist, evsel) {
299 300
		if (evsel->core.attr.type   == PERF_TYPE_TRACEPOINT &&
		    (int)evsel->core.attr.config == id)
301 302 303 304 305 306
			return evsel;
	}

	return NULL;
}

307
struct evsel *
308
perf_evlist__find_tracepoint_by_name(struct evlist *evlist,
309 310
				     const char *name)
{
311
	struct evsel *evsel;
312

313
	evlist__for_each_entry(evlist, evsel) {
314
		if ((evsel->core.attr.type == PERF_TYPE_TRACEPOINT) &&
315 316 317 318 319 320 321
		    (strcmp(evsel->name, name) == 0))
			return evsel;
	}

	return NULL;
}

322
int perf_evlist__add_newtp(struct evlist *evlist,
323 324
			   const char *sys, const char *name, void *handler)
{
325
	struct evsel *evsel = perf_evsel__newtp(sys, name);
326

327
	if (IS_ERR(evsel))
328 329
		return -1;

330
	evsel->handler = handler;
331
	evlist__add(evlist, evsel);
332 333 334
	return 0;
}

335
static int perf_evlist__nr_threads(struct evlist *evlist,
336
				   struct evsel *evsel)
337
{
338
	if (evsel->core.system_wide)
339 340
		return 1;
	else
341
		return perf_thread_map__nr(evlist->core.threads);
342 343
}

344
void evlist__disable(struct evlist *evlist)
345
{
346
	struct evsel *pos;
347

348
	evlist__for_each_entry(evlist, pos) {
349
		if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->core.fd)
350
			continue;
351
		evsel__disable(pos);
352
	}
353 354

	evlist->enabled = false;
355 356
}

357
void evlist__enable(struct evlist *evlist)
358
{
359
	struct evsel *pos;
360

361
	evlist__for_each_entry(evlist, pos) {
362
		if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
363
			continue;
364
		evsel__enable(pos);
365
	}
366 367 368 369

	evlist->enabled = true;
}

370
void perf_evlist__toggle_enable(struct evlist *evlist)
371
{
372
	(evlist->enabled ? evlist__disable : evlist__enable)(evlist);
373 374
}

375
static int perf_evlist__enable_event_cpu(struct evlist *evlist,
376
					 struct evsel *evsel, int cpu)
377
{
378
	int thread;
379 380
	int nr_threads = perf_evlist__nr_threads(evlist, evsel);

381
	if (!evsel->core.fd)
382 383 384
		return -EINVAL;

	for (thread = 0; thread < nr_threads; thread++) {
385
		int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
386 387 388 389 390 391
		if (err)
			return err;
	}
	return 0;
}

392
static int perf_evlist__enable_event_thread(struct evlist *evlist,
393
					    struct evsel *evsel,
394 395
					    int thread)
{
396
	int cpu;
397
	int nr_cpus = perf_cpu_map__nr(evlist->core.cpus);
398

399
	if (!evsel->core.fd)
400 401 402
		return -EINVAL;

	for (cpu = 0; cpu < nr_cpus; cpu++) {
403
		int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
404 405 406 407 408 409
		if (err)
			return err;
	}
	return 0;
}

410
int perf_evlist__enable_event_idx(struct evlist *evlist,
411
				  struct evsel *evsel, int idx)
412
{
413
	bool per_cpu_mmaps = !perf_cpu_map__empty(evlist->core.cpus);
414 415 416 417 418 419 420

	if (per_cpu_mmaps)
		return perf_evlist__enable_event_cpu(evlist, evsel, idx);
	else
		return perf_evlist__enable_event_thread(evlist, evsel, idx);
}

421
int evlist__add_pollfd(struct evlist *evlist, int fd)
422
{
423
	return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN);
424 425
}

426
int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask)
427
{
428
	return perf_evlist__filter_pollfd(&evlist->core, revents_and_mask);
429 430
}

431
int evlist__poll(struct evlist *evlist, int timeout)
432
{
433
	return perf_evlist__poll(&evlist->core, timeout);
434 435
}

436
struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id)
437 438 439 440 441 442
{
	struct hlist_head *head;
	struct perf_sample_id *sid;
	int hash;

	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
443
	head = &evlist->core.heads[hash];
444

445
	hlist_for_each_entry(sid, head, node)
446
		if (sid->id == id)
447 448 449 450 451
			return sid;

	return NULL;
}

452
struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id)
453 454 455
{
	struct perf_sample_id *sid;

456
	if (evlist->core.nr_entries == 1 || !id)
457
		return evlist__first(evlist);
458 459 460

	sid = perf_evlist__id2sid(evlist, id);
	if (sid)
461
		return container_of(sid->evsel, struct evsel, core);
462 463

	if (!perf_evlist__sample_id_all(evlist))
464
		return evlist__first(evlist);
465

466 467
	return NULL;
}
468

469
struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist,
470 471 472 473 474 475 476 477 478
						u64 id)
{
	struct perf_sample_id *sid;

	if (!id)
		return NULL;

	sid = perf_evlist__id2sid(evlist, id);
	if (sid)
479
		return container_of(sid->evsel, struct evsel, core);
480 481 482 483

	return NULL;
}

484
static int perf_evlist__event2id(struct evlist *evlist,
485 486
				 union perf_event *event, u64 *id)
{
487
	const __u64 *array = event->sample.array;
488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504
	ssize_t n;

	n = (event->header.size - sizeof(event->header)) >> 3;

	if (event->header.type == PERF_RECORD_SAMPLE) {
		if (evlist->id_pos >= n)
			return -1;
		*id = array[evlist->id_pos];
	} else {
		if (evlist->is_pos > n)
			return -1;
		n -= evlist->is_pos;
		*id = array[n];
	}
	return 0;
}

505
struct evsel *perf_evlist__event2evsel(struct evlist *evlist,
J
Jiri Olsa 已提交
506
					    union perf_event *event)
507
{
508
	struct evsel *first = evlist__first(evlist);
509 510 511 512 513
	struct hlist_head *head;
	struct perf_sample_id *sid;
	int hash;
	u64 id;

514
	if (evlist->core.nr_entries == 1)
515 516
		return first;

517
	if (!first->core.attr.sample_id_all &&
518 519
	    event->header.type != PERF_RECORD_SAMPLE)
		return first;
520 521 522 523 524 525

	if (perf_evlist__event2id(evlist, event, &id))
		return NULL;

	/* Synthesized events have an id of zero */
	if (!id)
526
		return first;
527 528

	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
529
	head = &evlist->core.heads[hash];
530 531 532

	hlist_for_each_entry(sid, head, node) {
		if (sid->id == id)
533
			return container_of(sid->evsel, struct evsel, core);
534 535 536 537
	}
	return NULL;
}

538
static int perf_evlist__set_paused(struct evlist *evlist, bool value)
W
Wang Nan 已提交
539 540 541
{
	int i;

542
	if (!evlist->overwrite_mmap)
543 544
		return 0;

545
	for (i = 0; i < evlist->core.nr_mmaps; i++) {
546
		int fd = evlist->overwrite_mmap[i].core.fd;
W
Wang Nan 已提交
547 548 549 550 551 552 553 554 555 556 557
		int err;

		if (fd < 0)
			continue;
		err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0);
		if (err)
			return err;
	}
	return 0;
}

558
static int perf_evlist__pause(struct evlist *evlist)
W
Wang Nan 已提交
559 560 561 562
{
	return perf_evlist__set_paused(evlist, true);
}

563
static int perf_evlist__resume(struct evlist *evlist)
W
Wang Nan 已提交
564 565 566 567
{
	return perf_evlist__set_paused(evlist, false);
}

568
static void evlist__munmap_nofree(struct evlist *evlist)
569
{
570
	int i;
571

572
	if (evlist->mmap)
573
		for (i = 0; i < evlist->core.nr_mmaps; i++)
574
			perf_mmap__munmap(&evlist->mmap[i].core);
575

576
	if (evlist->overwrite_mmap)
577
		for (i = 0; i < evlist->core.nr_mmaps; i++)
578
			perf_mmap__munmap(&evlist->overwrite_mmap[i].core);
579
}
580

581
void evlist__munmap(struct evlist *evlist)
582
{
583
	evlist__munmap_nofree(evlist);
584
	zfree(&evlist->mmap);
585
	zfree(&evlist->overwrite_mmap);
586 587
}

588 589 590 591 592 593 594
static void perf_mmap__unmap_cb(struct perf_mmap *map)
{
	struct mmap *m = container_of(map, struct mmap, core);

	mmap__munmap(m);
}

595 596
static struct mmap *evlist__alloc_mmap(struct evlist *evlist,
				       bool overwrite)
597
{
W
Wang Nan 已提交
598
	int i;
599
	struct mmap *map;
W
Wang Nan 已提交
600

601
	evlist->core.nr_mmaps = perf_cpu_map__nr(evlist->core.cpus);
602
	if (perf_cpu_map__empty(evlist->core.cpus))
603 604
		evlist->core.nr_mmaps = perf_thread_map__nr(evlist->core.threads);
	map = zalloc(evlist->core.nr_mmaps * sizeof(struct mmap));
605 606
	if (!map)
		return NULL;
607

608
	for (i = 0; i < evlist->core.nr_mmaps; i++) {
609 610
		/*
		 * When the perf_mmap() call is made we grab one refcount, plus
611
		 * one extra to let perf_mmap__consume() get the last
612 613 614 615 616 617
		 * events after all real references (perf_mmap__get()) are
		 * dropped.
		 *
		 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and
		 * thus does perf_mmap__get() on it.
		 */
618
		perf_mmap__init(&map[i].core, overwrite, perf_mmap__unmap_cb);
619
	}
620

621
	return map;
622 623
}

624 625 626 627 628 629 630 631 632 633 634
static void
perf_evlist__mmap_cb_idx(struct perf_evlist *_evlist,
			 struct perf_mmap_param *_mp,
			 int idx, bool per_cpu)
{
	struct evlist *evlist = container_of(_evlist, struct evlist, core);
	struct mmap_params *mp = container_of(_mp, struct mmap_params, core);

	auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, idx, per_cpu);
}

635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657
static struct perf_mmap*
perf_evlist__mmap_cb_get(struct perf_evlist *_evlist, bool overwrite, int idx)
{
	struct evlist *evlist = container_of(_evlist, struct evlist, core);
	struct mmap *maps = evlist->mmap;

	if (overwrite) {
		maps = evlist->overwrite_mmap;

		if (!maps) {
			maps = evlist__alloc_mmap(evlist, true);
			if (!maps)
				return NULL;

			evlist->overwrite_mmap = maps;
			if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
				perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
		}
	}

	return &maps[idx].core;
}

658 659 660 661 662 663 664 665 666 667
static int
perf_evlist__mmap_cb_mmap(struct perf_mmap *_map, struct perf_mmap_param *_mp,
			  int output, int cpu)
{
	struct mmap *map = container_of(_map, struct mmap, core);
	struct mmap_params *mp = container_of(_mp, struct mmap_params, core);

	return mmap__mmap(map, mp, output, cpu);
}

668
unsigned long perf_event_mlock_kb_in_pages(void)
669
{
670 671
	unsigned long pages;
	int max;
672

673 674 675 676 677 678 679 680 681 682
	if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
		/*
		 * Pick a once upon a time good value, i.e. things look
		 * strange since we can't read a sysctl value, but lets not
		 * die yet...
		 */
		max = 512;
	} else {
		max -= (page_size / 1024);
	}
683

684 685 686 687 688 689 690
	pages = (max * 1024) / page_size;
	if (!is_power_of_2(pages))
		pages = rounddown_pow_of_two(pages);

	return pages;
}

691
size_t evlist__mmap_size(unsigned long pages)
692 693 694 695
{
	if (pages == UINT_MAX)
		pages = perf_event_mlock_kb_in_pages();
	else if (!is_power_of_2(pages))
696 697 698 699 700
		return 0;

	return (pages + 1) * page_size;
}

701 702
static long parse_pages_arg(const char *str, unsigned long min,
			    unsigned long max)
703
{
704
	unsigned long pages, val;
705 706 707 708 709 710 711
	static struct parse_tag tags[] = {
		{ .tag  = 'B', .mult = 1       },
		{ .tag  = 'K', .mult = 1 << 10 },
		{ .tag  = 'M', .mult = 1 << 20 },
		{ .tag  = 'G', .mult = 1 << 30 },
		{ .tag  = 0 },
	};
712

713
	if (str == NULL)
714
		return -EINVAL;
715

716
	val = parse_tag_value(str, tags);
717
	if (val != (unsigned long) -1) {
718 719 720 721 722 723
		/* we got file size value */
		pages = PERF_ALIGN(val, page_size) / page_size;
	} else {
		/* we got pages count value */
		char *eptr;
		pages = strtoul(str, &eptr, 10);
724 725
		if (*eptr != '\0')
			return -EINVAL;
726 727
	}

728
	if (pages == 0 && min == 0) {
729
		/* leave number of pages at 0 */
730
	} else if (!is_power_of_2(pages)) {
731 732
		char buf[100];

733
		/* round pages up to next power of 2 */
734
		pages = roundup_pow_of_two(pages);
735 736
		if (!pages)
			return -EINVAL;
737 738 739 740

		unit_number__scnprintf(buf, sizeof(buf), pages * page_size);
		pr_info("rounding mmap pages size to %s (%lu pages)\n",
			buf, pages);
741 742
	}

743 744 745 746 747 748
	if (pages > max)
		return -EINVAL;

	return pages;
}

749
int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
750 751 752 753
{
	unsigned long max = UINT_MAX;
	long pages;

A
Adrian Hunter 已提交
754
	if (max > SIZE_MAX / page_size)
755 756 757 758 759
		max = SIZE_MAX / page_size;

	pages = parse_pages_arg(str, 1, max);
	if (pages < 0) {
		pr_err("Invalid argument for --mmap_pages/-m\n");
760 761 762 763 764 765 766
		return -1;
	}

	*mmap_pages = pages;
	return 0;
}

767 768 769 770 771 772
int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
				  int unset __maybe_unused)
{
	return __perf_evlist__parse_mmap_pages(opt->value, str);
}

773
/**
774
 * evlist__mmap_ex - Create mmaps to receive events.
775 776 777
 * @evlist: list of events
 * @pages: map length in pages
 * @overwrite: overwrite older events?
778 779
 * @auxtrace_pages - auxtrace map length in pages
 * @auxtrace_overwrite - overwrite older auxtrace data?
780
 *
781
 * If @overwrite is %false the user needs to signal event consumption using
782
 * perf_mmap__write_tail().  Using evlist__mmap_read() does this
783
 * automatically.
784
 *
785 786 787
 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
 * consumption using auxtrace_mmap__write_tail().
 *
788
 * Return: %0 on success, negative error code otherwise.
789
 */
790
int evlist__mmap_ex(struct evlist *evlist, unsigned int pages,
791
			 unsigned int auxtrace_pages,
792 793
			 bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush,
			 int comp_level)
794
{
W
Wang Nan 已提交
795 796 797 798 799
	/*
	 * Delay setting mp.prot: set it before calling perf_mmap__mmap.
	 * Its value is decided by evsel's write_backward.
	 * So &mp should not be passed through const pointer.
	 */
J
Jiri Olsa 已提交
800 801 802 803 804 805
	struct mmap_params mp = {
		.nr_cblocks	= nr_cblocks,
		.affinity	= affinity,
		.flush		= flush,
		.comp_level	= comp_level
	};
806
	struct perf_evlist_mmap_ops ops = {
807 808 809
		.idx  = perf_evlist__mmap_cb_idx,
		.get  = perf_evlist__mmap_cb_get,
		.mmap = perf_evlist__mmap_cb_mmap,
810
	};
811

812
	if (!evlist->mmap)
813
		evlist->mmap = evlist__alloc_mmap(evlist, false);
814
	if (!evlist->mmap)
815 816
		return -ENOMEM;

817 818
	evlist->core.mmap_len = evlist__mmap_size(pages);
	pr_debug("mmap size %zuB\n", evlist->core.mmap_len);
J
Jiri Olsa 已提交
819
	mp.core.mask = evlist->core.mmap_len - page_size - 1;
820

821
	auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->core.mmap_len,
822 823
				   auxtrace_pages, auxtrace_overwrite);

824
	return perf_evlist__mmap_ops(&evlist->core, &ops, &mp.core);
825
}
826

827
int evlist__mmap(struct evlist *evlist, unsigned int pages)
828
{
829
	return evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0);
830 831
}

832
int perf_evlist__create_maps(struct evlist *evlist, struct target *target)
833
{
834
	bool all_threads = (target->per_thread && target->system_wide);
835
	struct perf_cpu_map *cpus;
836
	struct perf_thread_map *threads;
837

838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855
	/*
	 * If specify '-a' and '--per-thread' to perf record, perf record
	 * will override '--per-thread'. target->per_thread = false and
	 * target->system_wide = true.
	 *
	 * If specify '--per-thread' only to perf record,
	 * target->per_thread = true and target->system_wide = false.
	 *
	 * So target->per_thread && target->system_wide is false.
	 * For perf record, thread_map__new_str doesn't call
	 * thread_map__new_all_cpus. That will keep perf record's
	 * current behavior.
	 *
	 * For perf stat, it allows the case that target->per_thread and
	 * target->system_wide are all true. It means to collect system-wide
	 * per-thread data. thread_map__new_str will call
	 * thread_map__new_all_cpus to enumerate all threads.
	 */
856
	threads = thread_map__new_str(target->pid, target->tid, target->uid,
857
				      all_threads);
858

859
	if (!threads)
860 861
		return -1;

862
	if (target__uses_dummy_map(target))
863
		cpus = perf_cpu_map__dummy_new();
864
	else
865
		cpus = perf_cpu_map__new(target->cpu_list);
866

867
	if (!cpus)
868 869
		goto out_delete_threads;

870
	evlist->core.has_user_cpus = !!target->cpu_list;
871

872
	perf_evlist__set_maps(&evlist->core, cpus, threads);
873 874

	return 0;
875 876

out_delete_threads:
877
	perf_thread_map__put(threads);
878 879 880
	return -1;
}

881
void __perf_evlist__set_sample_bit(struct evlist *evlist,
882 883
				   enum perf_event_sample_format bit)
{
884
	struct evsel *evsel;
885

886
	evlist__for_each_entry(evlist, evsel)
887 888 889
		__perf_evsel__set_sample_bit(evsel, bit);
}

890
void __perf_evlist__reset_sample_bit(struct evlist *evlist,
891 892
				     enum perf_event_sample_format bit)
{
893
	struct evsel *evsel;
894

895
	evlist__for_each_entry(evlist, evsel)
896 897 898
		__perf_evsel__reset_sample_bit(evsel, bit);
}

899
int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel)
900
{
901
	struct evsel *evsel;
902
	int err = 0;
903

904
	evlist__for_each_entry(evlist, evsel) {
905
		if (evsel->filter == NULL)
906
			continue;
907

908 909 910 911
		/*
		 * filters only work for tracepoint event, which doesn't have cpu limit.
		 * So evlist and evsel should always be same.
		 */
912
		err = perf_evsel__apply_filter(&evsel->core, evsel->filter);
913 914
		if (err) {
			*err_evsel = evsel;
915
			break;
916
		}
917 918
	}

919 920 921
	return err;
}

922
int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter)
923
{
924
	struct evsel *evsel;
925 926
	int err = 0;

927 928 929
	if (filter == NULL)
		return -1;

930
	evlist__for_each_entry(evlist, evsel) {
931
		if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
932 933
			continue;

934
		err = perf_evsel__set_filter(evsel, filter);
935 936 937 938 939
		if (err)
			break;
	}

	return err;
940
}
941

942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961
int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter)
{
	struct evsel *evsel;
	int err = 0;

	if (filter == NULL)
		return -1;

	evlist__for_each_entry(evlist, evsel) {
		if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
			continue;

		err = perf_evsel__append_tp_filter(evsel, filter);
		if (err)
			break;
	}

	return err;
}

962
static char *asprintf__tp_filter_pids(size_t npids, pid_t *pids)
963 964
{
	char *filter;
965
	size_t i;
966

967 968 969
	for (i = 0; i < npids; ++i) {
		if (i == 0) {
			if (asprintf(&filter, "common_pid != %d", pids[i]) < 0)
970
				return NULL;
971 972 973 974 975 976 977 978 979 980
		} else {
			char *tmp;

			if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0)
				goto out_free;

			free(filter);
			filter = tmp;
		}
	}
981

982
	return filter;
983
out_free:
984 985 986 987 988 989 990 991 992
	free(filter);
	return NULL;
}

int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
{
	char *filter = asprintf__tp_filter_pids(npids, pids);
	int ret = perf_evlist__set_tp_filter(evlist, filter);

993 994 995 996
	free(filter);
	return ret;
}

997
int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid)
998
{
999
	return perf_evlist__set_tp_filter_pids(evlist, 1, &pid);
1000 1001
}

1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015
int perf_evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
{
	char *filter = asprintf__tp_filter_pids(npids, pids);
	int ret = perf_evlist__append_tp_filter(evlist, filter);

	free(filter);
	return ret;
}

int perf_evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid)
{
	return perf_evlist__append_tp_filter_pids(evlist, 1, &pid);
}

1016
bool perf_evlist__valid_sample_type(struct evlist *evlist)
1017
{
1018
	struct evsel *pos;
1019

1020
	if (evlist->core.nr_entries == 1)
1021 1022 1023 1024 1025
		return true;

	if (evlist->id_pos < 0 || evlist->is_pos < 0)
		return false;

1026
	evlist__for_each_entry(evlist, pos) {
1027 1028
		if (pos->id_pos != evlist->id_pos ||
		    pos->is_pos != evlist->is_pos)
1029
			return false;
1030 1031
	}

1032
	return true;
1033 1034
}

1035
u64 __perf_evlist__combined_sample_type(struct evlist *evlist)
1036
{
1037
	struct evsel *evsel;
1038 1039 1040 1041

	if (evlist->combined_sample_type)
		return evlist->combined_sample_type;

1042
	evlist__for_each_entry(evlist, evsel)
1043
		evlist->combined_sample_type |= evsel->core.attr.sample_type;
1044 1045 1046 1047

	return evlist->combined_sample_type;
}

1048
u64 perf_evlist__combined_sample_type(struct evlist *evlist)
1049 1050 1051
{
	evlist->combined_sample_type = 0;
	return __perf_evlist__combined_sample_type(evlist);
1052 1053
}

1054
u64 perf_evlist__combined_branch_type(struct evlist *evlist)
1055
{
1056
	struct evsel *evsel;
1057 1058
	u64 branch_type = 0;

1059
	evlist__for_each_entry(evlist, evsel)
1060
		branch_type |= evsel->core.attr.branch_sample_type;
1061 1062 1063
	return branch_type;
}

1064
bool perf_evlist__valid_read_format(struct evlist *evlist)
1065
{
1066
	struct evsel *first = evlist__first(evlist), *pos = first;
1067 1068
	u64 read_format = first->core.attr.read_format;
	u64 sample_type = first->core.attr.sample_type;
1069

1070
	evlist__for_each_entry(evlist, pos) {
1071
		if (read_format != pos->core.attr.read_format)
1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083
			return false;
	}

	/* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */
	if ((sample_type & PERF_SAMPLE_READ) &&
	    !(read_format & PERF_FORMAT_ID)) {
		return false;
	}

	return true;
}

1084
u16 perf_evlist__id_hdr_size(struct evlist *evlist)
1085
{
1086
	struct evsel *first = evlist__first(evlist);
1087 1088 1089 1090
	struct perf_sample *data;
	u64 sample_type;
	u16 size = 0;

1091
	if (!first->core.attr.sample_id_all)
1092 1093
		goto out;

1094
	sample_type = first->core.attr.sample_type;
1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109

	if (sample_type & PERF_SAMPLE_TID)
		size += sizeof(data->tid) * 2;

       if (sample_type & PERF_SAMPLE_TIME)
		size += sizeof(data->time);

	if (sample_type & PERF_SAMPLE_ID)
		size += sizeof(data->id);

	if (sample_type & PERF_SAMPLE_STREAM_ID)
		size += sizeof(data->stream_id);

	if (sample_type & PERF_SAMPLE_CPU)
		size += sizeof(data->cpu) * 2;
1110 1111 1112

	if (sample_type & PERF_SAMPLE_IDENTIFIER)
		size += sizeof(data->id);
1113 1114 1115 1116
out:
	return size;
}

1117
bool perf_evlist__valid_sample_id_all(struct evlist *evlist)
1118
{
1119
	struct evsel *first = evlist__first(evlist), *pos = first;
1120

1121
	evlist__for_each_entry_continue(evlist, pos) {
1122
		if (first->core.attr.sample_id_all != pos->core.attr.sample_id_all)
1123
			return false;
1124 1125
	}

1126 1127 1128
	return true;
}

1129
bool perf_evlist__sample_id_all(struct evlist *evlist)
1130
{
1131
	struct evsel *first = evlist__first(evlist);
1132
	return first->core.attr.sample_id_all;
1133
}
1134

1135
void perf_evlist__set_selected(struct evlist *evlist,
1136
			       struct evsel *evsel)
1137 1138 1139
{
	evlist->selected = evsel;
}
1140

1141
void evlist__close(struct evlist *evlist)
1142
{
1143
	struct evsel *evsel;
1144

1145
	evlist__for_each_entry_reverse(evlist, evsel)
1146
		evsel__close(evsel);
1147 1148
}

1149
static int perf_evlist__create_syswide_maps(struct evlist *evlist)
1150
{
1151
	struct perf_cpu_map *cpus;
1152
	struct perf_thread_map *threads;
1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163
	int err = -ENOMEM;

	/*
	 * Try reading /sys/devices/system/cpu/online to get
	 * an all cpus map.
	 *
	 * FIXME: -ENOMEM is the best we can do here, the cpu_map
	 * code needs an overhaul to properly forward the
	 * error, and we may not want to do that fallback to a
	 * default cpu identity map :-\
	 */
1164
	cpus = perf_cpu_map__new(NULL);
1165
	if (!cpus)
1166 1167
		goto out;

1168
	threads = perf_thread_map__new_dummy();
1169 1170
	if (!threads)
		goto out_put;
1171

1172
	perf_evlist__set_maps(&evlist->core, cpus, threads);
1173 1174
out:
	return err;
1175
out_put:
1176
	perf_cpu_map__put(cpus);
1177 1178 1179
	goto out;
}

1180
int evlist__open(struct evlist *evlist)
1181
{
1182
	struct evsel *evsel;
1183
	int err;
1184

1185 1186 1187 1188
	/*
	 * Default: one fd per CPU, all threads, aka systemwide
	 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL
	 */
1189
	if (evlist->core.threads == NULL && evlist->core.cpus == NULL) {
1190 1191 1192 1193 1194
		err = perf_evlist__create_syswide_maps(evlist);
		if (err < 0)
			goto out_err;
	}

1195 1196
	perf_evlist__update_id_pos(evlist);

1197
	evlist__for_each_entry(evlist, evsel) {
1198
		err = evsel__open(evsel, evsel->core.cpus, evsel->core.threads);
1199 1200 1201 1202 1203 1204
		if (err < 0)
			goto out_err;
	}

	return 0;
out_err:
1205
	evlist__close(evlist);
1206
	errno = -err;
1207 1208
	return err;
}
1209

1210
int perf_evlist__prepare_workload(struct evlist *evlist, struct target *target,
1211
				  const char *argv[], bool pipe_output,
1212
				  void (*exec_error)(int signo, siginfo_t *info, void *ucontext))
1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233
{
	int child_ready_pipe[2], go_pipe[2];
	char bf;

	if (pipe(child_ready_pipe) < 0) {
		perror("failed to create 'ready' pipe");
		return -1;
	}

	if (pipe(go_pipe) < 0) {
		perror("failed to create 'go' pipe");
		goto out_close_ready_pipe;
	}

	evlist->workload.pid = fork();
	if (evlist->workload.pid < 0) {
		perror("failed to fork");
		goto out_close_pipes;
	}

	if (!evlist->workload.pid) {
1234 1235
		int ret;

1236
		if (pipe_output)
1237 1238
			dup2(2, 1);

1239 1240
		signal(SIGTERM, SIG_DFL);

1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252
		close(child_ready_pipe[0]);
		close(go_pipe[1]);
		fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);

		/*
		 * Tell the parent we're ready to go
		 */
		close(child_ready_pipe[1]);

		/*
		 * Wait until the parent tells us to go.
		 */
1253 1254 1255 1256 1257 1258
		ret = read(go_pipe[0], &bf, 1);
		/*
		 * The parent will ask for the execvp() to be performed by
		 * writing exactly one byte, in workload.cork_fd, usually via
		 * perf_evlist__start_workload().
		 *
1259
		 * For cancelling the workload without actually running it,
1260 1261 1262 1263 1264 1265 1266 1267 1268
		 * the parent will just close workload.cork_fd, without writing
		 * anything, i.e. read will return zero and we just exit()
		 * here.
		 */
		if (ret != 1) {
			if (ret == -1)
				perror("unable to read pipe");
			exit(ret);
		}
1269 1270 1271

		execvp(argv[0], (char **)argv);

1272
		if (exec_error) {
1273 1274 1275 1276 1277 1278 1279
			union sigval val;

			val.sival_int = errno;
			if (sigqueue(getppid(), SIGUSR1, val))
				perror(argv[0]);
		} else
			perror(argv[0]);
1280 1281 1282
		exit(-1);
	}

1283 1284 1285 1286 1287 1288 1289 1290
	if (exec_error) {
		struct sigaction act = {
			.sa_flags     = SA_SIGINFO,
			.sa_sigaction = exec_error,
		};
		sigaction(SIGUSR1, &act, NULL);
	}

1291
	if (target__none(target)) {
1292
		if (evlist->core.threads == NULL) {
1293 1294 1295 1296
			fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n",
				__func__, __LINE__);
			goto out_close_pipes;
		}
1297
		perf_thread_map__set_pid(evlist->core.threads, 0, evlist->workload.pid);
1298
	}
1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309

	close(child_ready_pipe[1]);
	close(go_pipe[0]);
	/*
	 * wait for child to settle
	 */
	if (read(child_ready_pipe[0], &bf, 1) == -1) {
		perror("unable to read pipe");
		goto out_close_pipes;
	}

1310
	fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC);
1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323
	evlist->workload.cork_fd = go_pipe[1];
	close(child_ready_pipe[0]);
	return 0;

out_close_pipes:
	close(go_pipe[0]);
	close(go_pipe[1]);
out_close_ready_pipe:
	close(child_ready_pipe[0]);
	close(child_ready_pipe[1]);
	return -1;
}

1324
int perf_evlist__start_workload(struct evlist *evlist)
1325 1326
{
	if (evlist->workload.cork_fd > 0) {
1327
		char bf = 0;
1328
		int ret;
1329 1330 1331
		/*
		 * Remove the cork, let it rip!
		 */
1332 1333
		ret = write(evlist->workload.cork_fd, &bf, 1);
		if (ret < 0)
1334
			perror("unable to write to pipe");
1335 1336 1337

		close(evlist->workload.cork_fd);
		return ret;
1338 1339 1340 1341
	}

	return 0;
}
1342

1343
int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event,
1344
			      struct perf_sample *sample)
1345
{
1346
	struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
1347 1348 1349

	if (!evsel)
		return -EFAULT;
1350
	return perf_evsel__parse_sample(evsel, event, sample);
1351
}
1352

1353
int perf_evlist__parse_sample_timestamp(struct evlist *evlist,
1354 1355 1356
					union perf_event *event,
					u64 *timestamp)
{
1357
	struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
1358 1359 1360 1361 1362 1363

	if (!evsel)
		return -EFAULT;
	return perf_evsel__parse_sample_timestamp(evsel, event, timestamp);
}

1364
int perf_evlist__strerror_open(struct evlist *evlist,
1365 1366 1367
			       int err, char *buf, size_t size)
{
	int printed, value;
1368
	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
1369 1370 1371 1372 1373 1374 1375 1376

	switch (err) {
	case EACCES:
	case EPERM:
		printed = scnprintf(buf, size,
				    "Error:\t%s.\n"
				    "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg);

1377
		value = perf_event_paranoid();
1378 1379 1380 1381 1382 1383 1384 1385

		printed += scnprintf(buf + printed, size - printed, "\nHint:\t");

		if (value >= 2) {
			printed += scnprintf(buf + printed, size - printed,
					     "For your workloads it needs to be <= 1\nHint:\t");
		}
		printed += scnprintf(buf + printed, size - printed,
1386
				     "For system wide tracing it needs to be set to -1.\n");
1387 1388

		printed += scnprintf(buf + printed, size - printed,
1389 1390
				    "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
				    "Hint:\tThe current value is %d.", value);
1391
		break;
1392
	case EINVAL: {
1393
		struct evsel *first = evlist__first(evlist);
1394 1395 1396 1397 1398
		int max_freq;

		if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0)
			goto out_default;

1399
		if (first->core.attr.sample_freq < (u64)max_freq)
1400 1401 1402 1403 1404 1405
			goto out_default;

		printed = scnprintf(buf, size,
				    "Error:\t%s.\n"
				    "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n"
				    "Hint:\tThe current value is %d and %" PRIu64 " is being requested.",
1406
				    emsg, max_freq, first->core.attr.sample_freq);
1407 1408
		break;
	}
1409
	default:
1410
out_default:
1411 1412 1413 1414 1415 1416
		scnprintf(buf, size, "%s", emsg);
		break;
	}

	return 0;
}
1417

1418
int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size)
1419
{
1420
	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
1421
	int pages_attempted = evlist->core.mmap_len / 1024, pages_max_per_user, printed = 0;
1422 1423 1424

	switch (err) {
	case EPERM:
1425
		sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user);
1426 1427
		printed += scnprintf(buf + printed, size - printed,
				     "Error:\t%s.\n"
1428
				     "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n"
1429
				     "Hint:\tTried using %zd kB.\n",
1430
				     emsg, pages_max_per_user, pages_attempted);
1431 1432 1433 1434 1435 1436 1437 1438 1439

		if (pages_attempted >= pages_max_per_user) {
			printed += scnprintf(buf + printed, size - printed,
					     "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n",
					     pages_max_per_user + pages_attempted);
		}

		printed += scnprintf(buf + printed, size - printed,
				     "Hint:\tTry using a smaller -m/--mmap-pages value.");
1440 1441 1442 1443 1444 1445 1446 1447 1448
		break;
	default:
		scnprintf(buf, size, "%s", emsg);
		break;
	}

	return 0;
}

1449
void perf_evlist__to_front(struct evlist *evlist,
1450
			   struct evsel *move_evsel)
1451
{
1452
	struct evsel *evsel, *n;
1453 1454
	LIST_HEAD(move);

1455
	if (move_evsel == evlist__first(evlist))
1456 1457
		return;

1458
	evlist__for_each_entry_safe(evlist, n, evsel) {
1459
		if (evsel->leader == move_evsel->leader)
1460
			list_move_tail(&evsel->core.node, &move);
1461 1462
	}

1463
	list_splice(&move, &evlist->core.entries);
1464
}
1465

1466
void perf_evlist__set_tracking_event(struct evlist *evlist,
1467
				     struct evsel *tracking_evsel)
1468
{
1469
	struct evsel *evsel;
1470 1471 1472 1473

	if (tracking_evsel->tracking)
		return;

1474
	evlist__for_each_entry(evlist, evsel) {
1475 1476 1477 1478 1479 1480
		if (evsel != tracking_evsel)
			evsel->tracking = false;
	}

	tracking_evsel->tracking = true;
}
1481

1482
struct evsel *
1483
perf_evlist__find_evsel_by_str(struct evlist *evlist,
1484 1485
			       const char *str)
{
1486
	struct evsel *evsel;
1487

1488
	evlist__for_each_entry(evlist, evsel) {
1489 1490 1491 1492 1493 1494 1495 1496
		if (!evsel->name)
			continue;
		if (strcmp(str, evsel->name) == 0)
			return evsel;
	}

	return NULL;
}
1497

1498
void perf_evlist__toggle_bkw_mmap(struct evlist *evlist,
1499 1500 1501 1502 1503 1504 1505 1506 1507
				  enum bkw_mmap_state state)
{
	enum bkw_mmap_state old_state = evlist->bkw_mmap_state;
	enum action {
		NONE,
		PAUSE,
		RESUME,
	} action = NONE;

1508
	if (!evlist->overwrite_mmap)
1509 1510 1511 1512 1513
		return;

	switch (old_state) {
	case BKW_MMAP_NOTREADY: {
		if (state != BKW_MMAP_RUNNING)
1514
			goto state_err;
1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554
		break;
	}
	case BKW_MMAP_RUNNING: {
		if (state != BKW_MMAP_DATA_PENDING)
			goto state_err;
		action = PAUSE;
		break;
	}
	case BKW_MMAP_DATA_PENDING: {
		if (state != BKW_MMAP_EMPTY)
			goto state_err;
		break;
	}
	case BKW_MMAP_EMPTY: {
		if (state != BKW_MMAP_RUNNING)
			goto state_err;
		action = RESUME;
		break;
	}
	default:
		WARN_ONCE(1, "Shouldn't get there\n");
	}

	evlist->bkw_mmap_state = state;

	switch (action) {
	case PAUSE:
		perf_evlist__pause(evlist);
		break;
	case RESUME:
		perf_evlist__resume(evlist);
		break;
	case NONE:
	default:
		break;
	}

state_err:
	return;
}
1555

1556
bool perf_evlist__exclude_kernel(struct evlist *evlist)
1557
{
1558
	struct evsel *evsel;
1559 1560

	evlist__for_each_entry(evlist, evsel) {
1561
		if (!evsel->core.attr.exclude_kernel)
1562 1563 1564 1565 1566
			return false;
	}

	return true;
}
1567 1568 1569 1570 1571 1572

/*
 * Events in data file are not collect in groups, but we still want
 * the group display. Set the artificial group and set the leader's
 * forced_leader flag to notify the display code.
 */
1573
void perf_evlist__force_leader(struct evlist *evlist)
1574 1575
{
	if (!evlist->nr_groups) {
1576
		struct evsel *leader = evlist__first(evlist);
1577 1578 1579 1580 1581

		perf_evlist__set_leader(evlist);
		leader->forced_leader = true;
	}
}
1582

1583
struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
1584
						 struct evsel *evsel)
1585
{
1586
	struct evsel *c2, *leader;
1587 1588 1589 1590
	bool is_open = true;

	leader = evsel->leader;
	pr_debug("Weak group for %s/%d failed\n",
1591
			leader->name, leader->core.nr_members);
1592 1593 1594 1595 1596 1597 1598 1599 1600 1601

	/*
	 * for_each_group_member doesn't work here because it doesn't
	 * include the first entry.
	 */
	evlist__for_each_entry(evsel_list, c2) {
		if (c2 == evsel)
			is_open = false;
		if (c2->leader == leader) {
			if (is_open)
1602
				perf_evsel__close(&c2->core);
1603
			c2->leader = c2;
1604
			c2->core.nr_members = 0;
1605 1606 1607 1608
		}
	}
	return leader;
}
1609

1610
int perf_evlist__add_sb_event(struct evlist **evlist,
1611 1612 1613 1614
			      struct perf_event_attr *attr,
			      perf_evsel__sb_cb_t cb,
			      void *data)
{
1615
	struct evsel *evsel;
1616 1617 1618
	bool new_evlist = (*evlist) == NULL;

	if (*evlist == NULL)
1619
		*evlist = evlist__new();
1620 1621 1622 1623 1624 1625 1626 1627
	if (*evlist == NULL)
		return -1;

	if (!attr->sample_id_all) {
		pr_warning("enabling sample_id_all for all side band events\n");
		attr->sample_id_all = 1;
	}

1628
	evsel = perf_evsel__new_idx(attr, (*evlist)->core.nr_entries);
1629 1630 1631 1632 1633
	if (!evsel)
		goto out_err;

	evsel->side_band.cb = cb;
	evsel->side_band.data = data;
1634
	evlist__add(*evlist, evsel);
1635 1636 1637 1638
	return 0;

out_err:
	if (new_evlist) {
1639
		evlist__delete(*evlist);
1640 1641 1642 1643 1644 1645 1646
		*evlist = NULL;
	}
	return -1;
}

static void *perf_evlist__poll_thread(void *arg)
{
1647
	struct evlist *evlist = arg;
1648
	bool draining = false;
1649
	int i, done = 0;
1650 1651 1652 1653 1654 1655 1656 1657
	/*
	 * In order to read symbols from other namespaces perf to needs to call
	 * setns(2).  This isn't permitted if the struct_fs has multiple users.
	 * unshare(2) the fs so that we may continue to setns into namespaces
	 * that we're observing when, for instance, reading the build-ids at
	 * the end of a 'perf record' session.
	 */
	unshare(CLONE_FS);
1658 1659 1660

	while (!done) {
		bool got_data = false;
1661

1662
		if (evlist->thread.done)
1663 1664 1665
			draining = true;

		if (!draining)
1666
			evlist__poll(evlist, 1000);
1667

1668
		for (i = 0; i < evlist->core.nr_mmaps; i++) {
1669
			struct mmap *map = &evlist->mmap[i];
1670 1671
			union perf_event *event;

1672
			if (perf_mmap__read_init(&map->core))
1673
				continue;
1674
			while ((event = perf_mmap__read_event(&map->core)) != NULL) {
1675
				struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
1676 1677 1678 1679 1680 1681

				if (evsel && evsel->side_band.cb)
					evsel->side_band.cb(event, evsel->side_band.data);
				else
					pr_warning("cannot locate proper evsel for the side band event\n");

1682
				perf_mmap__consume(&map->core);
1683
				got_data = true;
1684
			}
1685
			perf_mmap__read_done(&map->core);
1686
		}
1687 1688 1689

		if (draining && !got_data)
			break;
1690 1691 1692 1693
	}
	return NULL;
}

1694
int perf_evlist__start_sb_thread(struct evlist *evlist,
1695 1696
				 struct target *target)
{
1697
	struct evsel *counter;
1698 1699 1700 1701 1702 1703 1704 1705

	if (!evlist)
		return 0;

	if (perf_evlist__create_maps(evlist, target))
		goto out_delete_evlist;

	evlist__for_each_entry(evlist, counter) {
1706
		if (evsel__open(counter, evlist->core.cpus,
1707
				     evlist->core.threads) < 0)
1708 1709 1710
			goto out_delete_evlist;
	}

1711
	if (evlist__mmap(evlist, UINT_MAX))
1712 1713 1714
		goto out_delete_evlist;

	evlist__for_each_entry(evlist, counter) {
1715
		if (evsel__enable(counter))
1716 1717 1718 1719 1720 1721 1722 1723 1724 1725
			goto out_delete_evlist;
	}

	evlist->thread.done = 0;
	if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist))
		goto out_delete_evlist;

	return 0;

out_delete_evlist:
1726
	evlist__delete(evlist);
1727 1728 1729 1730
	evlist = NULL;
	return -1;
}

1731
void perf_evlist__stop_sb_thread(struct evlist *evlist)
1732 1733 1734 1735 1736
{
	if (!evlist)
		return;
	evlist->thread.done = 1;
	pthread_join(evlist->thread.th, NULL);
1737
	evlist__delete(evlist);
1738
}