evlist.c 38.7 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
2 3 4 5 6 7
/*
 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
 *
 * Parts came from builtin-{top,stat,record}.c, see those files for further
 * copyright notes.
 */
8
#include <api/fs/fs.h>
9
#include <errno.h>
10
#include <inttypes.h>
11
#include <poll.h>
12
#include "cpumap.h"
13
#include "util/mmap.h"
14
#include "thread_map.h"
15
#include "target.h"
16 17
#include "evlist.h"
#include "evsel.h"
A
Adrian Hunter 已提交
18
#include "debug.h"
19
#include "units.h"
20
#include <internal/lib.h> // page_size
21
#include "../perf.h"
22
#include "asm/bug.h"
23
#include "bpf-event.h"
24
#include <signal.h>
25
#include <unistd.h>
26
#include <sched.h>
27
#include <stdlib.h>
28

29
#include "parse-events.h"
30
#include <subcmd/parse-options.h>
31

32
#include <fcntl.h>
33
#include <sys/ioctl.h>
34 35
#include <sys/mman.h>

36 37
#include <linux/bitops.h>
#include <linux/hash.h>
38
#include <linux/log2.h>
39
#include <linux/err.h>
40
#include <linux/string.h>
41
#include <linux/zalloc.h>
42
#include <perf/evlist.h>
43
#include <perf/evsel.h>
44
#include <perf/cpumap.h>
45
#include <perf/mmap.h>
46

47 48
#include <internal/xyarray.h>

49 50 51 52
#ifdef LACKS_SIGQUEUE_PROTOTYPE
int sigqueue(pid_t pid, int sig, const union sigval value);
#endif

53
#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
54
#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
55

56 57
void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
		  struct perf_thread_map *threads)
58
{
59
	perf_evlist__init(&evlist->core);
60
	perf_evlist__set_maps(&evlist->core, cpus, threads);
61
	evlist->workload.pid = -1;
62
	evlist->bkw_mmap_state = BKW_MMAP_NOTREADY;
63 64
}

65
struct evlist *evlist__new(void)
66
{
67
	struct evlist *evlist = zalloc(sizeof(*evlist));
68

69
	if (evlist != NULL)
70
		evlist__init(evlist, NULL, NULL);
71 72 73 74

	return evlist;
}

75
struct evlist *perf_evlist__new_default(void)
76
{
77
	struct evlist *evlist = evlist__new();
78 79

	if (evlist && perf_evlist__add_default(evlist)) {
80
		evlist__delete(evlist);
81 82 83 84 85 86
		evlist = NULL;
	}

	return evlist;
}

87
struct evlist *perf_evlist__new_dummy(void)
88
{
89
	struct evlist *evlist = evlist__new();
90 91

	if (evlist && perf_evlist__add_dummy(evlist)) {
92
		evlist__delete(evlist);
93 94 95 96 97 98
		evlist = NULL;
	}

	return evlist;
}

99 100 101 102 103 104 105
/**
 * perf_evlist__set_id_pos - set the positions of event ids.
 * @evlist: selected event list
 *
 * Events with compatible sample types all have the same id_pos
 * and is_pos.  For convenience, put a copy on evlist.
 */
106
void perf_evlist__set_id_pos(struct evlist *evlist)
107
{
108
	struct evsel *first = evlist__first(evlist);
109 110 111 112 113

	evlist->id_pos = first->id_pos;
	evlist->is_pos = first->is_pos;
}

114
static void perf_evlist__update_id_pos(struct evlist *evlist)
115
{
116
	struct evsel *evsel;
117

118
	evlist__for_each_entry(evlist, evsel)
119 120 121 122 123
		perf_evsel__calc_id_pos(evsel);

	perf_evlist__set_id_pos(evlist);
}

124
static void evlist__purge(struct evlist *evlist)
125
{
126
	struct evsel *pos, *n;
127

128
	evlist__for_each_entry_safe(evlist, n, pos) {
129
		list_del_init(&pos->core.node);
130
		pos->evlist = NULL;
131
		evsel__delete(pos);
132 133
	}

134
	evlist->core.nr_entries = 0;
135 136
}

137
void evlist__exit(struct evlist *evlist)
138
{
139
	zfree(&evlist->mmap);
140
	zfree(&evlist->overwrite_mmap);
141
	fdarray__exit(&evlist->core.pollfd);
142 143
}

144
void evlist__delete(struct evlist *evlist)
145
{
146 147 148
	if (evlist == NULL)
		return;

149
	evlist__munmap(evlist);
150
	evlist__close(evlist);
151
	perf_cpu_map__put(evlist->core.cpus);
152
	perf_thread_map__put(evlist->core.threads);
153
	evlist->core.cpus = NULL;
154
	evlist->core.threads = NULL;
155
	evlist__purge(evlist);
156
	evlist__exit(evlist);
157 158 159
	free(evlist);
}

160
void evlist__add(struct evlist *evlist, struct evsel *entry)
161
{
162
	entry->evlist = evlist;
163
	entry->idx = evlist->core.nr_entries;
164
	entry->tracking = !entry->idx;
165

166 167 168
	perf_evlist__add(&evlist->core, &entry->core);

	if (evlist->core.nr_entries == 1)
169
		perf_evlist__set_id_pos(evlist);
170 171
}

172
void evlist__remove(struct evlist *evlist, struct evsel *evsel)
173 174
{
	evsel->evlist = NULL;
175
	perf_evlist__remove(&evlist->core, &evsel->core);
176 177
}

178
void perf_evlist__splice_list_tail(struct evlist *evlist,
179
				   struct list_head *list)
180
{
181
	struct evsel *evsel, *temp;
182

183
	__evlist__for_each_entry_safe(list, temp, evsel) {
184
		list_del_init(&evsel->core.node);
185
		evlist__add(evlist, evsel);
186
	}
187 188
}

189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
int __evlist__set_tracepoints_handlers(struct evlist *evlist,
				       const struct evsel_str_handler *assocs, size_t nr_assocs)
{
	struct evsel *evsel;
	size_t i;
	int err;

	for (i = 0; i < nr_assocs; i++) {
		// Adding a handler for an event not in this evlist, just ignore it.
		evsel = perf_evlist__find_tracepoint_by_name(evlist, assocs[i].name);
		if (evsel == NULL)
			continue;

		err = -EEXIST;
		if (evsel->handler != NULL)
			goto out;
		evsel->handler = assocs[i].handler;
	}

	err = 0;
out:
	return err;
}

213 214
void __perf_evlist__set_leader(struct list_head *list)
{
215
	struct evsel *evsel, *leader;
216

217 218
	leader = list_entry(list->next, struct evsel, core.node);
	evsel = list_entry(list->prev, struct evsel, core.node);
219

220
	leader->core.nr_members = evsel->idx - leader->idx + 1;
221

222
	__evlist__for_each_entry(list, evsel) {
223
		evsel->leader = leader;
224 225 226
	}
}

227
void perf_evlist__set_leader(struct evlist *evlist)
228
{
229 230
	if (evlist->core.nr_entries) {
		evlist->nr_groups = evlist->core.nr_entries > 1 ? 1 : 0;
231
		__perf_evlist__set_leader(&evlist->core.entries);
232
	}
233 234
}

235
int __perf_evlist__add_default(struct evlist *evlist, bool precise)
236
{
237
	struct evsel *evsel = perf_evsel__new_cycles(precise);
238

239
	if (evsel == NULL)
240
		return -ENOMEM;
241

242
	evlist__add(evlist, evsel);
243 244
	return 0;
}
245

246
int perf_evlist__add_dummy(struct evlist *evlist)
247 248 249 250 251 252
{
	struct perf_event_attr attr = {
		.type	= PERF_TYPE_SOFTWARE,
		.config = PERF_COUNT_SW_DUMMY,
		.size	= sizeof(attr), /* to capture ABI version */
	};
253
	struct evsel *evsel = perf_evsel__new_idx(&attr, evlist->core.nr_entries);
254 255 256 257

	if (evsel == NULL)
		return -ENOMEM;

258
	evlist__add(evlist, evsel);
259 260 261
	return 0;
}

262
static int evlist__add_attrs(struct evlist *evlist,
263
				  struct perf_event_attr *attrs, size_t nr_attrs)
264
{
265
	struct evsel *evsel, *n;
266 267 268 269
	LIST_HEAD(head);
	size_t i;

	for (i = 0; i < nr_attrs; i++) {
270
		evsel = perf_evsel__new_idx(attrs + i, evlist->core.nr_entries + i);
271 272
		if (evsel == NULL)
			goto out_delete_partial_list;
273
		list_add_tail(&evsel->core.node, &head);
274 275
	}

276
	perf_evlist__splice_list_tail(evlist, &head);
277 278 279 280

	return 0;

out_delete_partial_list:
281
	__evlist__for_each_entry_safe(&head, n, evsel)
282
		evsel__delete(evsel);
283 284 285
	return -1;
}

286
int __perf_evlist__add_default_attrs(struct evlist *evlist,
287 288 289 290 291 292 293
				     struct perf_event_attr *attrs, size_t nr_attrs)
{
	size_t i;

	for (i = 0; i < nr_attrs; i++)
		event_attr_init(attrs + i);

294
	return evlist__add_attrs(evlist, attrs, nr_attrs);
295 296
}

297
struct evsel *
298
perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id)
299
{
300
	struct evsel *evsel;
301

302
	evlist__for_each_entry(evlist, evsel) {
303 304
		if (evsel->core.attr.type   == PERF_TYPE_TRACEPOINT &&
		    (int)evsel->core.attr.config == id)
305 306 307 308 309 310
			return evsel;
	}

	return NULL;
}

311
struct evsel *
312
perf_evlist__find_tracepoint_by_name(struct evlist *evlist,
313 314
				     const char *name)
{
315
	struct evsel *evsel;
316

317
	evlist__for_each_entry(evlist, evsel) {
318
		if ((evsel->core.attr.type == PERF_TYPE_TRACEPOINT) &&
319 320 321 322 323 324 325
		    (strcmp(evsel->name, name) == 0))
			return evsel;
	}

	return NULL;
}

326
int perf_evlist__add_newtp(struct evlist *evlist,
327 328
			   const char *sys, const char *name, void *handler)
{
329
	struct evsel *evsel = perf_evsel__newtp(sys, name);
330

331
	if (IS_ERR(evsel))
332 333
		return -1;

334
	evsel->handler = handler;
335
	evlist__add(evlist, evsel);
336 337 338
	return 0;
}

339
static int perf_evlist__nr_threads(struct evlist *evlist,
340
				   struct evsel *evsel)
341
{
342
	if (evsel->core.system_wide)
343 344
		return 1;
	else
345
		return perf_thread_map__nr(evlist->core.threads);
346 347
}

348
void evlist__disable(struct evlist *evlist)
349
{
350
	struct evsel *pos;
351

352
	evlist__for_each_entry(evlist, pos) {
353
		if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->core.fd)
354
			continue;
355
		evsel__disable(pos);
356
	}
357 358

	evlist->enabled = false;
359 360
}

361
void evlist__enable(struct evlist *evlist)
362
{
363
	struct evsel *pos;
364

365
	evlist__for_each_entry(evlist, pos) {
366
		if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
367
			continue;
368
		evsel__enable(pos);
369
	}
370 371 372 373

	evlist->enabled = true;
}

374
void perf_evlist__toggle_enable(struct evlist *evlist)
375
{
376
	(evlist->enabled ? evlist__disable : evlist__enable)(evlist);
377 378
}

379
static int perf_evlist__enable_event_cpu(struct evlist *evlist,
380
					 struct evsel *evsel, int cpu)
381
{
382
	int thread;
383 384
	int nr_threads = perf_evlist__nr_threads(evlist, evsel);

385
	if (!evsel->core.fd)
386 387 388
		return -EINVAL;

	for (thread = 0; thread < nr_threads; thread++) {
389
		int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
390 391 392 393 394 395
		if (err)
			return err;
	}
	return 0;
}

396
static int perf_evlist__enable_event_thread(struct evlist *evlist,
397
					    struct evsel *evsel,
398 399
					    int thread)
{
400
	int cpu;
401
	int nr_cpus = perf_cpu_map__nr(evlist->core.cpus);
402

403
	if (!evsel->core.fd)
404 405 406
		return -EINVAL;

	for (cpu = 0; cpu < nr_cpus; cpu++) {
407
		int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
408 409 410 411 412 413
		if (err)
			return err;
	}
	return 0;
}

414
int perf_evlist__enable_event_idx(struct evlist *evlist,
415
				  struct evsel *evsel, int idx)
416
{
417
	bool per_cpu_mmaps = !perf_cpu_map__empty(evlist->core.cpus);
418 419 420 421 422 423 424

	if (per_cpu_mmaps)
		return perf_evlist__enable_event_cpu(evlist, evsel, idx);
	else
		return perf_evlist__enable_event_thread(evlist, evsel, idx);
}

425
int evlist__add_pollfd(struct evlist *evlist, int fd)
426
{
427
	return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN);
428 429
}

430 431
static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd,
					 void *arg __maybe_unused)
432
{
433
	struct perf_mmap *map = fda->priv[fd].ptr;
434

435
	if (map)
436
		perf_mmap__put(map);
437
}
438

439
int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask)
440
{
441
	return fdarray__filter(&evlist->core.pollfd, revents_and_mask,
442
			       perf_evlist__munmap_filtered, NULL);
443 444
}

445
int evlist__poll(struct evlist *evlist, int timeout)
446
{
447
	return perf_evlist__poll(&evlist->core, timeout);
448 449
}

450
struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id)
451 452 453 454 455 456
{
	struct hlist_head *head;
	struct perf_sample_id *sid;
	int hash;

	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
457
	head = &evlist->core.heads[hash];
458

459
	hlist_for_each_entry(sid, head, node)
460
		if (sid->id == id)
461 462 463 464 465
			return sid;

	return NULL;
}

466
struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id)
467 468 469
{
	struct perf_sample_id *sid;

470
	if (evlist->core.nr_entries == 1 || !id)
471
		return evlist__first(evlist);
472 473 474

	sid = perf_evlist__id2sid(evlist, id);
	if (sid)
475
		return container_of(sid->evsel, struct evsel, core);
476 477

	if (!perf_evlist__sample_id_all(evlist))
478
		return evlist__first(evlist);
479

480 481
	return NULL;
}
482

483
struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist,
484 485 486 487 488 489 490 491 492
						u64 id)
{
	struct perf_sample_id *sid;

	if (!id)
		return NULL;

	sid = perf_evlist__id2sid(evlist, id);
	if (sid)
493
		return container_of(sid->evsel, struct evsel, core);
494 495 496 497

	return NULL;
}

498
static int perf_evlist__event2id(struct evlist *evlist,
499 500
				 union perf_event *event, u64 *id)
{
501
	const __u64 *array = event->sample.array;
502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518
	ssize_t n;

	n = (event->header.size - sizeof(event->header)) >> 3;

	if (event->header.type == PERF_RECORD_SAMPLE) {
		if (evlist->id_pos >= n)
			return -1;
		*id = array[evlist->id_pos];
	} else {
		if (evlist->is_pos > n)
			return -1;
		n -= evlist->is_pos;
		*id = array[n];
	}
	return 0;
}

519
struct evsel *perf_evlist__event2evsel(struct evlist *evlist,
J
Jiri Olsa 已提交
520
					    union perf_event *event)
521
{
522
	struct evsel *first = evlist__first(evlist);
523 524 525 526 527
	struct hlist_head *head;
	struct perf_sample_id *sid;
	int hash;
	u64 id;

528
	if (evlist->core.nr_entries == 1)
529 530
		return first;

531
	if (!first->core.attr.sample_id_all &&
532 533
	    event->header.type != PERF_RECORD_SAMPLE)
		return first;
534 535 536 537 538 539

	if (perf_evlist__event2id(evlist, event, &id))
		return NULL;

	/* Synthesized events have an id of zero */
	if (!id)
540
		return first;
541 542

	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
543
	head = &evlist->core.heads[hash];
544 545 546

	hlist_for_each_entry(sid, head, node) {
		if (sid->id == id)
547
			return container_of(sid->evsel, struct evsel, core);
548 549 550 551
	}
	return NULL;
}

552
static int perf_evlist__set_paused(struct evlist *evlist, bool value)
W
Wang Nan 已提交
553 554 555
{
	int i;

556
	if (!evlist->overwrite_mmap)
557 558
		return 0;

559
	for (i = 0; i < evlist->core.nr_mmaps; i++) {
560
		int fd = evlist->overwrite_mmap[i].core.fd;
W
Wang Nan 已提交
561 562 563 564 565 566 567 568 569 570 571
		int err;

		if (fd < 0)
			continue;
		err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0);
		if (err)
			return err;
	}
	return 0;
}

572
static int perf_evlist__pause(struct evlist *evlist)
W
Wang Nan 已提交
573 574 575 576
{
	return perf_evlist__set_paused(evlist, true);
}

577
static int perf_evlist__resume(struct evlist *evlist)
W
Wang Nan 已提交
578 579 580 581
{
	return perf_evlist__set_paused(evlist, false);
}

582
static void evlist__munmap_nofree(struct evlist *evlist)
583
{
584
	int i;
585

586
	if (evlist->mmap)
587
		for (i = 0; i < evlist->core.nr_mmaps; i++)
588
			perf_mmap__munmap(&evlist->mmap[i].core);
589

590
	if (evlist->overwrite_mmap)
591
		for (i = 0; i < evlist->core.nr_mmaps; i++)
592
			perf_mmap__munmap(&evlist->overwrite_mmap[i].core);
593
}
594

595
void evlist__munmap(struct evlist *evlist)
596
{
597
	evlist__munmap_nofree(evlist);
598
	zfree(&evlist->mmap);
599
	zfree(&evlist->overwrite_mmap);
600 601
}

602 603 604 605 606 607 608
static void perf_mmap__unmap_cb(struct perf_mmap *map)
{
	struct mmap *m = container_of(map, struct mmap, core);

	mmap__munmap(m);
}

609 610
static struct mmap *evlist__alloc_mmap(struct evlist *evlist,
				       bool overwrite)
611
{
W
Wang Nan 已提交
612
	int i;
613
	struct mmap *map;
W
Wang Nan 已提交
614

615
	evlist->core.nr_mmaps = perf_cpu_map__nr(evlist->core.cpus);
616
	if (perf_cpu_map__empty(evlist->core.cpus))
617 618
		evlist->core.nr_mmaps = perf_thread_map__nr(evlist->core.threads);
	map = zalloc(evlist->core.nr_mmaps * sizeof(struct mmap));
619 620
	if (!map)
		return NULL;
621

622
	for (i = 0; i < evlist->core.nr_mmaps; i++) {
623 624
		/*
		 * When the perf_mmap() call is made we grab one refcount, plus
625
		 * one extra to let perf_mmap__consume() get the last
626 627 628 629 630 631
		 * events after all real references (perf_mmap__get()) are
		 * dropped.
		 *
		 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and
		 * thus does perf_mmap__get() on it.
		 */
632
		perf_mmap__init(&map[i].core, overwrite, perf_mmap__unmap_cb);
633
	}
634

635
	return map;
636 637
}

638 639 640 641 642 643 644 645 646 647 648
static void
perf_evlist__mmap_cb_idx(struct perf_evlist *_evlist,
			 struct perf_mmap_param *_mp,
			 int idx, bool per_cpu)
{
	struct evlist *evlist = container_of(_evlist, struct evlist, core);
	struct mmap_params *mp = container_of(_mp, struct mmap_params, core);

	auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, idx, per_cpu);
}

649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671
static struct perf_mmap*
perf_evlist__mmap_cb_get(struct perf_evlist *_evlist, bool overwrite, int idx)
{
	struct evlist *evlist = container_of(_evlist, struct evlist, core);
	struct mmap *maps = evlist->mmap;

	if (overwrite) {
		maps = evlist->overwrite_mmap;

		if (!maps) {
			maps = evlist__alloc_mmap(evlist, true);
			if (!maps)
				return NULL;

			evlist->overwrite_mmap = maps;
			if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
				perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
		}
	}

	return &maps[idx].core;
}

672 673 674 675 676 677 678 679 680 681
static int
perf_evlist__mmap_cb_mmap(struct perf_mmap *_map, struct perf_mmap_param *_mp,
			  int output, int cpu)
{
	struct mmap *map = container_of(_map, struct mmap, core);
	struct mmap_params *mp = container_of(_mp, struct mmap_params, core);

	return mmap__mmap(map, mp, output, cpu);
}

682
unsigned long perf_event_mlock_kb_in_pages(void)
683
{
684 685
	unsigned long pages;
	int max;
686

687 688 689 690 691 692 693 694 695 696
	if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
		/*
		 * Pick a once upon a time good value, i.e. things look
		 * strange since we can't read a sysctl value, but lets not
		 * die yet...
		 */
		max = 512;
	} else {
		max -= (page_size / 1024);
	}
697

698 699 700 701 702 703 704
	pages = (max * 1024) / page_size;
	if (!is_power_of_2(pages))
		pages = rounddown_pow_of_two(pages);

	return pages;
}

705
size_t evlist__mmap_size(unsigned long pages)
706 707 708 709
{
	if (pages == UINT_MAX)
		pages = perf_event_mlock_kb_in_pages();
	else if (!is_power_of_2(pages))
710 711 712 713 714
		return 0;

	return (pages + 1) * page_size;
}

715 716
static long parse_pages_arg(const char *str, unsigned long min,
			    unsigned long max)
717
{
718
	unsigned long pages, val;
719 720 721 722 723 724 725
	static struct parse_tag tags[] = {
		{ .tag  = 'B', .mult = 1       },
		{ .tag  = 'K', .mult = 1 << 10 },
		{ .tag  = 'M', .mult = 1 << 20 },
		{ .tag  = 'G', .mult = 1 << 30 },
		{ .tag  = 0 },
	};
726

727
	if (str == NULL)
728
		return -EINVAL;
729

730
	val = parse_tag_value(str, tags);
731
	if (val != (unsigned long) -1) {
732 733 734 735 736 737
		/* we got file size value */
		pages = PERF_ALIGN(val, page_size) / page_size;
	} else {
		/* we got pages count value */
		char *eptr;
		pages = strtoul(str, &eptr, 10);
738 739
		if (*eptr != '\0')
			return -EINVAL;
740 741
	}

742
	if (pages == 0 && min == 0) {
743
		/* leave number of pages at 0 */
744
	} else if (!is_power_of_2(pages)) {
745 746
		char buf[100];

747
		/* round pages up to next power of 2 */
748
		pages = roundup_pow_of_two(pages);
749 750
		if (!pages)
			return -EINVAL;
751 752 753 754

		unit_number__scnprintf(buf, sizeof(buf), pages * page_size);
		pr_info("rounding mmap pages size to %s (%lu pages)\n",
			buf, pages);
755 756
	}

757 758 759 760 761 762
	if (pages > max)
		return -EINVAL;

	return pages;
}

763
int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
764 765 766 767
{
	unsigned long max = UINT_MAX;
	long pages;

A
Adrian Hunter 已提交
768
	if (max > SIZE_MAX / page_size)
769 770 771 772 773
		max = SIZE_MAX / page_size;

	pages = parse_pages_arg(str, 1, max);
	if (pages < 0) {
		pr_err("Invalid argument for --mmap_pages/-m\n");
774 775 776 777 778 779 780
		return -1;
	}

	*mmap_pages = pages;
	return 0;
}

781 782 783 784 785 786
int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
				  int unset __maybe_unused)
{
	return __perf_evlist__parse_mmap_pages(opt->value, str);
}

787
/**
788
 * evlist__mmap_ex - Create mmaps to receive events.
789 790 791
 * @evlist: list of events
 * @pages: map length in pages
 * @overwrite: overwrite older events?
792 793
 * @auxtrace_pages - auxtrace map length in pages
 * @auxtrace_overwrite - overwrite older auxtrace data?
794
 *
795
 * If @overwrite is %false the user needs to signal event consumption using
796
 * perf_mmap__write_tail().  Using evlist__mmap_read() does this
797
 * automatically.
798
 *
799 800 801
 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
 * consumption using auxtrace_mmap__write_tail().
 *
802
 * Return: %0 on success, negative error code otherwise.
803
 */
804
int evlist__mmap_ex(struct evlist *evlist, unsigned int pages,
805
			 unsigned int auxtrace_pages,
806 807
			 bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush,
			 int comp_level)
808
{
W
Wang Nan 已提交
809 810 811 812 813
	/*
	 * Delay setting mp.prot: set it before calling perf_mmap__mmap.
	 * Its value is decided by evsel's write_backward.
	 * So &mp should not be passed through const pointer.
	 */
J
Jiri Olsa 已提交
814 815 816 817 818 819
	struct mmap_params mp = {
		.nr_cblocks	= nr_cblocks,
		.affinity	= affinity,
		.flush		= flush,
		.comp_level	= comp_level
	};
820
	struct perf_evlist_mmap_ops ops = {
821 822 823
		.idx  = perf_evlist__mmap_cb_idx,
		.get  = perf_evlist__mmap_cb_get,
		.mmap = perf_evlist__mmap_cb_mmap,
824
	};
825

826
	if (!evlist->mmap)
827
		evlist->mmap = evlist__alloc_mmap(evlist, false);
828
	if (!evlist->mmap)
829 830
		return -ENOMEM;

831 832
	evlist->core.mmap_len = evlist__mmap_size(pages);
	pr_debug("mmap size %zuB\n", evlist->core.mmap_len);
J
Jiri Olsa 已提交
833
	mp.core.mask = evlist->core.mmap_len - page_size - 1;
834

835
	auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->core.mmap_len,
836 837
				   auxtrace_pages, auxtrace_overwrite);

838
	return perf_evlist__mmap_ops(&evlist->core, &ops, &mp.core);
839
}
840

841
int evlist__mmap(struct evlist *evlist, unsigned int pages)
842
{
843
	return evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0);
844 845
}

846
int perf_evlist__create_maps(struct evlist *evlist, struct target *target)
847
{
848
	bool all_threads = (target->per_thread && target->system_wide);
849
	struct perf_cpu_map *cpus;
850
	struct perf_thread_map *threads;
851

852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869
	/*
	 * If specify '-a' and '--per-thread' to perf record, perf record
	 * will override '--per-thread'. target->per_thread = false and
	 * target->system_wide = true.
	 *
	 * If specify '--per-thread' only to perf record,
	 * target->per_thread = true and target->system_wide = false.
	 *
	 * So target->per_thread && target->system_wide is false.
	 * For perf record, thread_map__new_str doesn't call
	 * thread_map__new_all_cpus. That will keep perf record's
	 * current behavior.
	 *
	 * For perf stat, it allows the case that target->per_thread and
	 * target->system_wide are all true. It means to collect system-wide
	 * per-thread data. thread_map__new_str will call
	 * thread_map__new_all_cpus to enumerate all threads.
	 */
870
	threads = thread_map__new_str(target->pid, target->tid, target->uid,
871
				      all_threads);
872

873
	if (!threads)
874 875
		return -1;

876
	if (target__uses_dummy_map(target))
877
		cpus = perf_cpu_map__dummy_new();
878
	else
879
		cpus = perf_cpu_map__new(target->cpu_list);
880

881
	if (!cpus)
882 883
		goto out_delete_threads;

884
	evlist->core.has_user_cpus = !!target->cpu_list;
885

886
	perf_evlist__set_maps(&evlist->core, cpus, threads);
887 888

	return 0;
889 890

out_delete_threads:
891
	perf_thread_map__put(threads);
892 893 894
	return -1;
}

895
void __perf_evlist__set_sample_bit(struct evlist *evlist,
896 897
				   enum perf_event_sample_format bit)
{
898
	struct evsel *evsel;
899

900
	evlist__for_each_entry(evlist, evsel)
901 902 903
		__perf_evsel__set_sample_bit(evsel, bit);
}

904
void __perf_evlist__reset_sample_bit(struct evlist *evlist,
905 906
				     enum perf_event_sample_format bit)
{
907
	struct evsel *evsel;
908

909
	evlist__for_each_entry(evlist, evsel)
910 911 912
		__perf_evsel__reset_sample_bit(evsel, bit);
}

913
int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel)
914
{
915
	struct evsel *evsel;
916
	int err = 0;
917

918
	evlist__for_each_entry(evlist, evsel) {
919
		if (evsel->filter == NULL)
920
			continue;
921

922 923 924 925
		/*
		 * filters only work for tracepoint event, which doesn't have cpu limit.
		 * So evlist and evsel should always be same.
		 */
926
		err = perf_evsel__apply_filter(&evsel->core, evsel->filter);
927 928
		if (err) {
			*err_evsel = evsel;
929
			break;
930
		}
931 932
	}

933 934 935
	return err;
}

936
int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter)
937
{
938
	struct evsel *evsel;
939 940
	int err = 0;

941 942 943
	if (filter == NULL)
		return -1;

944
	evlist__for_each_entry(evlist, evsel) {
945
		if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
946 947
			continue;

948
		err = perf_evsel__set_filter(evsel, filter);
949 950 951 952 953
		if (err)
			break;
	}

	return err;
954
}
955

956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975
int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter)
{
	struct evsel *evsel;
	int err = 0;

	if (filter == NULL)
		return -1;

	evlist__for_each_entry(evlist, evsel) {
		if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
			continue;

		err = perf_evsel__append_tp_filter(evsel, filter);
		if (err)
			break;
	}

	return err;
}

976
static char *asprintf__tp_filter_pids(size_t npids, pid_t *pids)
977 978
{
	char *filter;
979
	size_t i;
980

981 982 983
	for (i = 0; i < npids; ++i) {
		if (i == 0) {
			if (asprintf(&filter, "common_pid != %d", pids[i]) < 0)
984
				return NULL;
985 986 987 988 989 990 991 992 993 994
		} else {
			char *tmp;

			if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0)
				goto out_free;

			free(filter);
			filter = tmp;
		}
	}
995

996
	return filter;
997
out_free:
998 999 1000 1001 1002 1003 1004 1005 1006
	free(filter);
	return NULL;
}

int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
{
	char *filter = asprintf__tp_filter_pids(npids, pids);
	int ret = perf_evlist__set_tp_filter(evlist, filter);

1007 1008 1009 1010
	free(filter);
	return ret;
}

1011
int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid)
1012
{
1013
	return perf_evlist__set_tp_filter_pids(evlist, 1, &pid);
1014 1015
}

1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029
int perf_evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
{
	char *filter = asprintf__tp_filter_pids(npids, pids);
	int ret = perf_evlist__append_tp_filter(evlist, filter);

	free(filter);
	return ret;
}

int perf_evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid)
{
	return perf_evlist__append_tp_filter_pids(evlist, 1, &pid);
}

1030
bool perf_evlist__valid_sample_type(struct evlist *evlist)
1031
{
1032
	struct evsel *pos;
1033

1034
	if (evlist->core.nr_entries == 1)
1035 1036 1037 1038 1039
		return true;

	if (evlist->id_pos < 0 || evlist->is_pos < 0)
		return false;

1040
	evlist__for_each_entry(evlist, pos) {
1041 1042
		if (pos->id_pos != evlist->id_pos ||
		    pos->is_pos != evlist->is_pos)
1043
			return false;
1044 1045
	}

1046
	return true;
1047 1048
}

1049
u64 __perf_evlist__combined_sample_type(struct evlist *evlist)
1050
{
1051
	struct evsel *evsel;
1052 1053 1054 1055

	if (evlist->combined_sample_type)
		return evlist->combined_sample_type;

1056
	evlist__for_each_entry(evlist, evsel)
1057
		evlist->combined_sample_type |= evsel->core.attr.sample_type;
1058 1059 1060 1061

	return evlist->combined_sample_type;
}

1062
u64 perf_evlist__combined_sample_type(struct evlist *evlist)
1063 1064 1065
{
	evlist->combined_sample_type = 0;
	return __perf_evlist__combined_sample_type(evlist);
1066 1067
}

1068
u64 perf_evlist__combined_branch_type(struct evlist *evlist)
1069
{
1070
	struct evsel *evsel;
1071 1072
	u64 branch_type = 0;

1073
	evlist__for_each_entry(evlist, evsel)
1074
		branch_type |= evsel->core.attr.branch_sample_type;
1075 1076 1077
	return branch_type;
}

1078
bool perf_evlist__valid_read_format(struct evlist *evlist)
1079
{
1080
	struct evsel *first = evlist__first(evlist), *pos = first;
1081 1082
	u64 read_format = first->core.attr.read_format;
	u64 sample_type = first->core.attr.sample_type;
1083

1084
	evlist__for_each_entry(evlist, pos) {
1085
		if (read_format != pos->core.attr.read_format)
1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097
			return false;
	}

	/* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */
	if ((sample_type & PERF_SAMPLE_READ) &&
	    !(read_format & PERF_FORMAT_ID)) {
		return false;
	}

	return true;
}

1098
u16 perf_evlist__id_hdr_size(struct evlist *evlist)
1099
{
1100
	struct evsel *first = evlist__first(evlist);
1101 1102 1103 1104
	struct perf_sample *data;
	u64 sample_type;
	u16 size = 0;

1105
	if (!first->core.attr.sample_id_all)
1106 1107
		goto out;

1108
	sample_type = first->core.attr.sample_type;
1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123

	if (sample_type & PERF_SAMPLE_TID)
		size += sizeof(data->tid) * 2;

       if (sample_type & PERF_SAMPLE_TIME)
		size += sizeof(data->time);

	if (sample_type & PERF_SAMPLE_ID)
		size += sizeof(data->id);

	if (sample_type & PERF_SAMPLE_STREAM_ID)
		size += sizeof(data->stream_id);

	if (sample_type & PERF_SAMPLE_CPU)
		size += sizeof(data->cpu) * 2;
1124 1125 1126

	if (sample_type & PERF_SAMPLE_IDENTIFIER)
		size += sizeof(data->id);
1127 1128 1129 1130
out:
	return size;
}

1131
bool perf_evlist__valid_sample_id_all(struct evlist *evlist)
1132
{
1133
	struct evsel *first = evlist__first(evlist), *pos = first;
1134

1135
	evlist__for_each_entry_continue(evlist, pos) {
1136
		if (first->core.attr.sample_id_all != pos->core.attr.sample_id_all)
1137
			return false;
1138 1139
	}

1140 1141 1142
	return true;
}

1143
bool perf_evlist__sample_id_all(struct evlist *evlist)
1144
{
1145
	struct evsel *first = evlist__first(evlist);
1146
	return first->core.attr.sample_id_all;
1147
}
1148

1149
void perf_evlist__set_selected(struct evlist *evlist,
1150
			       struct evsel *evsel)
1151 1152 1153
{
	evlist->selected = evsel;
}
1154

1155
void evlist__close(struct evlist *evlist)
1156
{
1157
	struct evsel *evsel;
1158

1159
	evlist__for_each_entry_reverse(evlist, evsel)
1160
		evsel__close(evsel);
1161 1162
}

1163
static int perf_evlist__create_syswide_maps(struct evlist *evlist)
1164
{
1165
	struct perf_cpu_map *cpus;
1166
	struct perf_thread_map *threads;
1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177
	int err = -ENOMEM;

	/*
	 * Try reading /sys/devices/system/cpu/online to get
	 * an all cpus map.
	 *
	 * FIXME: -ENOMEM is the best we can do here, the cpu_map
	 * code needs an overhaul to properly forward the
	 * error, and we may not want to do that fallback to a
	 * default cpu identity map :-\
	 */
1178
	cpus = perf_cpu_map__new(NULL);
1179
	if (!cpus)
1180 1181
		goto out;

1182
	threads = perf_thread_map__new_dummy();
1183 1184
	if (!threads)
		goto out_put;
1185

1186
	perf_evlist__set_maps(&evlist->core, cpus, threads);
1187 1188
out:
	return err;
1189
out_put:
1190
	perf_cpu_map__put(cpus);
1191 1192 1193
	goto out;
}

1194
int evlist__open(struct evlist *evlist)
1195
{
1196
	struct evsel *evsel;
1197
	int err;
1198

1199 1200 1201 1202
	/*
	 * Default: one fd per CPU, all threads, aka systemwide
	 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL
	 */
1203
	if (evlist->core.threads == NULL && evlist->core.cpus == NULL) {
1204 1205 1206 1207 1208
		err = perf_evlist__create_syswide_maps(evlist);
		if (err < 0)
			goto out_err;
	}

1209 1210
	perf_evlist__update_id_pos(evlist);

1211
	evlist__for_each_entry(evlist, evsel) {
1212
		err = evsel__open(evsel, evsel->core.cpus, evsel->core.threads);
1213 1214 1215 1216 1217 1218
		if (err < 0)
			goto out_err;
	}

	return 0;
out_err:
1219
	evlist__close(evlist);
1220
	errno = -err;
1221 1222
	return err;
}
1223

1224
int perf_evlist__prepare_workload(struct evlist *evlist, struct target *target,
1225
				  const char *argv[], bool pipe_output,
1226
				  void (*exec_error)(int signo, siginfo_t *info, void *ucontext))
1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247
{
	int child_ready_pipe[2], go_pipe[2];
	char bf;

	if (pipe(child_ready_pipe) < 0) {
		perror("failed to create 'ready' pipe");
		return -1;
	}

	if (pipe(go_pipe) < 0) {
		perror("failed to create 'go' pipe");
		goto out_close_ready_pipe;
	}

	evlist->workload.pid = fork();
	if (evlist->workload.pid < 0) {
		perror("failed to fork");
		goto out_close_pipes;
	}

	if (!evlist->workload.pid) {
1248 1249
		int ret;

1250
		if (pipe_output)
1251 1252
			dup2(2, 1);

1253 1254
		signal(SIGTERM, SIG_DFL);

1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266
		close(child_ready_pipe[0]);
		close(go_pipe[1]);
		fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);

		/*
		 * Tell the parent we're ready to go
		 */
		close(child_ready_pipe[1]);

		/*
		 * Wait until the parent tells us to go.
		 */
1267 1268 1269 1270 1271 1272
		ret = read(go_pipe[0], &bf, 1);
		/*
		 * The parent will ask for the execvp() to be performed by
		 * writing exactly one byte, in workload.cork_fd, usually via
		 * perf_evlist__start_workload().
		 *
1273
		 * For cancelling the workload without actually running it,
1274 1275 1276 1277 1278 1279 1280 1281 1282
		 * the parent will just close workload.cork_fd, without writing
		 * anything, i.e. read will return zero and we just exit()
		 * here.
		 */
		if (ret != 1) {
			if (ret == -1)
				perror("unable to read pipe");
			exit(ret);
		}
1283 1284 1285

		execvp(argv[0], (char **)argv);

1286
		if (exec_error) {
1287 1288 1289 1290 1291 1292 1293
			union sigval val;

			val.sival_int = errno;
			if (sigqueue(getppid(), SIGUSR1, val))
				perror(argv[0]);
		} else
			perror(argv[0]);
1294 1295 1296
		exit(-1);
	}

1297 1298 1299 1300 1301 1302 1303 1304
	if (exec_error) {
		struct sigaction act = {
			.sa_flags     = SA_SIGINFO,
			.sa_sigaction = exec_error,
		};
		sigaction(SIGUSR1, &act, NULL);
	}

1305
	if (target__none(target)) {
1306
		if (evlist->core.threads == NULL) {
1307 1308 1309 1310
			fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n",
				__func__, __LINE__);
			goto out_close_pipes;
		}
1311
		perf_thread_map__set_pid(evlist->core.threads, 0, evlist->workload.pid);
1312
	}
1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323

	close(child_ready_pipe[1]);
	close(go_pipe[0]);
	/*
	 * wait for child to settle
	 */
	if (read(child_ready_pipe[0], &bf, 1) == -1) {
		perror("unable to read pipe");
		goto out_close_pipes;
	}

1324
	fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC);
1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337
	evlist->workload.cork_fd = go_pipe[1];
	close(child_ready_pipe[0]);
	return 0;

out_close_pipes:
	close(go_pipe[0]);
	close(go_pipe[1]);
out_close_ready_pipe:
	close(child_ready_pipe[0]);
	close(child_ready_pipe[1]);
	return -1;
}

1338
int perf_evlist__start_workload(struct evlist *evlist)
1339 1340
{
	if (evlist->workload.cork_fd > 0) {
1341
		char bf = 0;
1342
		int ret;
1343 1344 1345
		/*
		 * Remove the cork, let it rip!
		 */
1346 1347
		ret = write(evlist->workload.cork_fd, &bf, 1);
		if (ret < 0)
1348
			perror("unable to write to pipe");
1349 1350 1351

		close(evlist->workload.cork_fd);
		return ret;
1352 1353 1354 1355
	}

	return 0;
}
1356

1357
int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event,
1358
			      struct perf_sample *sample)
1359
{
1360
	struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
1361 1362 1363

	if (!evsel)
		return -EFAULT;
1364
	return perf_evsel__parse_sample(evsel, event, sample);
1365
}
1366

1367
int perf_evlist__parse_sample_timestamp(struct evlist *evlist,
1368 1369 1370
					union perf_event *event,
					u64 *timestamp)
{
1371
	struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
1372 1373 1374 1375 1376 1377

	if (!evsel)
		return -EFAULT;
	return perf_evsel__parse_sample_timestamp(evsel, event, timestamp);
}

1378
int perf_evlist__strerror_open(struct evlist *evlist,
1379 1380 1381
			       int err, char *buf, size_t size)
{
	int printed, value;
1382
	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
1383 1384 1385 1386 1387 1388 1389 1390

	switch (err) {
	case EACCES:
	case EPERM:
		printed = scnprintf(buf, size,
				    "Error:\t%s.\n"
				    "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg);

1391
		value = perf_event_paranoid();
1392 1393 1394 1395 1396 1397 1398 1399

		printed += scnprintf(buf + printed, size - printed, "\nHint:\t");

		if (value >= 2) {
			printed += scnprintf(buf + printed, size - printed,
					     "For your workloads it needs to be <= 1\nHint:\t");
		}
		printed += scnprintf(buf + printed, size - printed,
1400
				     "For system wide tracing it needs to be set to -1.\n");
1401 1402

		printed += scnprintf(buf + printed, size - printed,
1403 1404
				    "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
				    "Hint:\tThe current value is %d.", value);
1405
		break;
1406
	case EINVAL: {
1407
		struct evsel *first = evlist__first(evlist);
1408 1409 1410 1411 1412
		int max_freq;

		if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0)
			goto out_default;

1413
		if (first->core.attr.sample_freq < (u64)max_freq)
1414 1415 1416 1417 1418 1419
			goto out_default;

		printed = scnprintf(buf, size,
				    "Error:\t%s.\n"
				    "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n"
				    "Hint:\tThe current value is %d and %" PRIu64 " is being requested.",
1420
				    emsg, max_freq, first->core.attr.sample_freq);
1421 1422
		break;
	}
1423
	default:
1424
out_default:
1425 1426 1427 1428 1429 1430
		scnprintf(buf, size, "%s", emsg);
		break;
	}

	return 0;
}
1431

1432
int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size)
1433
{
1434
	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
1435
	int pages_attempted = evlist->core.mmap_len / 1024, pages_max_per_user, printed = 0;
1436 1437 1438

	switch (err) {
	case EPERM:
1439
		sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user);
1440 1441
		printed += scnprintf(buf + printed, size - printed,
				     "Error:\t%s.\n"
1442
				     "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n"
1443
				     "Hint:\tTried using %zd kB.\n",
1444
				     emsg, pages_max_per_user, pages_attempted);
1445 1446 1447 1448 1449 1450 1451 1452 1453

		if (pages_attempted >= pages_max_per_user) {
			printed += scnprintf(buf + printed, size - printed,
					     "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n",
					     pages_max_per_user + pages_attempted);
		}

		printed += scnprintf(buf + printed, size - printed,
				     "Hint:\tTry using a smaller -m/--mmap-pages value.");
1454 1455 1456 1457 1458 1459 1460 1461 1462
		break;
	default:
		scnprintf(buf, size, "%s", emsg);
		break;
	}

	return 0;
}

1463
void perf_evlist__to_front(struct evlist *evlist,
1464
			   struct evsel *move_evsel)
1465
{
1466
	struct evsel *evsel, *n;
1467 1468
	LIST_HEAD(move);

1469
	if (move_evsel == evlist__first(evlist))
1470 1471
		return;

1472
	evlist__for_each_entry_safe(evlist, n, evsel) {
1473
		if (evsel->leader == move_evsel->leader)
1474
			list_move_tail(&evsel->core.node, &move);
1475 1476
	}

1477
	list_splice(&move, &evlist->core.entries);
1478
}
1479

1480
void perf_evlist__set_tracking_event(struct evlist *evlist,
1481
				     struct evsel *tracking_evsel)
1482
{
1483
	struct evsel *evsel;
1484 1485 1486 1487

	if (tracking_evsel->tracking)
		return;

1488
	evlist__for_each_entry(evlist, evsel) {
1489 1490 1491 1492 1493 1494
		if (evsel != tracking_evsel)
			evsel->tracking = false;
	}

	tracking_evsel->tracking = true;
}
1495

1496
struct evsel *
1497
perf_evlist__find_evsel_by_str(struct evlist *evlist,
1498 1499
			       const char *str)
{
1500
	struct evsel *evsel;
1501

1502
	evlist__for_each_entry(evlist, evsel) {
1503 1504 1505 1506 1507 1508 1509 1510
		if (!evsel->name)
			continue;
		if (strcmp(str, evsel->name) == 0)
			return evsel;
	}

	return NULL;
}
1511

1512
void perf_evlist__toggle_bkw_mmap(struct evlist *evlist,
1513 1514 1515 1516 1517 1518 1519 1520 1521
				  enum bkw_mmap_state state)
{
	enum bkw_mmap_state old_state = evlist->bkw_mmap_state;
	enum action {
		NONE,
		PAUSE,
		RESUME,
	} action = NONE;

1522
	if (!evlist->overwrite_mmap)
1523 1524 1525 1526 1527
		return;

	switch (old_state) {
	case BKW_MMAP_NOTREADY: {
		if (state != BKW_MMAP_RUNNING)
1528
			goto state_err;
1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568
		break;
	}
	case BKW_MMAP_RUNNING: {
		if (state != BKW_MMAP_DATA_PENDING)
			goto state_err;
		action = PAUSE;
		break;
	}
	case BKW_MMAP_DATA_PENDING: {
		if (state != BKW_MMAP_EMPTY)
			goto state_err;
		break;
	}
	case BKW_MMAP_EMPTY: {
		if (state != BKW_MMAP_RUNNING)
			goto state_err;
		action = RESUME;
		break;
	}
	default:
		WARN_ONCE(1, "Shouldn't get there\n");
	}

	evlist->bkw_mmap_state = state;

	switch (action) {
	case PAUSE:
		perf_evlist__pause(evlist);
		break;
	case RESUME:
		perf_evlist__resume(evlist);
		break;
	case NONE:
	default:
		break;
	}

state_err:
	return;
}
1569

1570
bool perf_evlist__exclude_kernel(struct evlist *evlist)
1571
{
1572
	struct evsel *evsel;
1573 1574

	evlist__for_each_entry(evlist, evsel) {
1575
		if (!evsel->core.attr.exclude_kernel)
1576 1577 1578 1579 1580
			return false;
	}

	return true;
}
1581 1582 1583 1584 1585 1586

/*
 * Events in data file are not collect in groups, but we still want
 * the group display. Set the artificial group and set the leader's
 * forced_leader flag to notify the display code.
 */
1587
void perf_evlist__force_leader(struct evlist *evlist)
1588 1589
{
	if (!evlist->nr_groups) {
1590
		struct evsel *leader = evlist__first(evlist);
1591 1592 1593 1594 1595

		perf_evlist__set_leader(evlist);
		leader->forced_leader = true;
	}
}
1596

1597
struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
1598
						 struct evsel *evsel)
1599
{
1600
	struct evsel *c2, *leader;
1601 1602 1603 1604
	bool is_open = true;

	leader = evsel->leader;
	pr_debug("Weak group for %s/%d failed\n",
1605
			leader->name, leader->core.nr_members);
1606 1607 1608 1609 1610 1611 1612 1613 1614 1615

	/*
	 * for_each_group_member doesn't work here because it doesn't
	 * include the first entry.
	 */
	evlist__for_each_entry(evsel_list, c2) {
		if (c2 == evsel)
			is_open = false;
		if (c2->leader == leader) {
			if (is_open)
1616
				perf_evsel__close(&evsel->core);
1617
			c2->leader = c2;
1618
			c2->core.nr_members = 0;
1619 1620 1621 1622
		}
	}
	return leader;
}
1623

1624
int perf_evlist__add_sb_event(struct evlist **evlist,
1625 1626 1627 1628
			      struct perf_event_attr *attr,
			      perf_evsel__sb_cb_t cb,
			      void *data)
{
1629
	struct evsel *evsel;
1630 1631 1632
	bool new_evlist = (*evlist) == NULL;

	if (*evlist == NULL)
1633
		*evlist = evlist__new();
1634 1635 1636 1637 1638 1639 1640 1641
	if (*evlist == NULL)
		return -1;

	if (!attr->sample_id_all) {
		pr_warning("enabling sample_id_all for all side band events\n");
		attr->sample_id_all = 1;
	}

1642
	evsel = perf_evsel__new_idx(attr, (*evlist)->core.nr_entries);
1643 1644 1645 1646 1647
	if (!evsel)
		goto out_err;

	evsel->side_band.cb = cb;
	evsel->side_band.data = data;
1648
	evlist__add(*evlist, evsel);
1649 1650 1651 1652
	return 0;

out_err:
	if (new_evlist) {
1653
		evlist__delete(*evlist);
1654 1655 1656 1657 1658 1659 1660
		*evlist = NULL;
	}
	return -1;
}

static void *perf_evlist__poll_thread(void *arg)
{
1661
	struct evlist *evlist = arg;
1662
	bool draining = false;
1663
	int i, done = 0;
1664 1665 1666 1667 1668 1669 1670 1671
	/*
	 * In order to read symbols from other namespaces perf to needs to call
	 * setns(2).  This isn't permitted if the struct_fs has multiple users.
	 * unshare(2) the fs so that we may continue to setns into namespaces
	 * that we're observing when, for instance, reading the build-ids at
	 * the end of a 'perf record' session.
	 */
	unshare(CLONE_FS);
1672 1673 1674

	while (!done) {
		bool got_data = false;
1675

1676
		if (evlist->thread.done)
1677 1678 1679
			draining = true;

		if (!draining)
1680
			evlist__poll(evlist, 1000);
1681

1682
		for (i = 0; i < evlist->core.nr_mmaps; i++) {
1683
			struct mmap *map = &evlist->mmap[i];
1684 1685
			union perf_event *event;

1686
			if (perf_mmap__read_init(&map->core))
1687
				continue;
1688
			while ((event = perf_mmap__read_event(&map->core)) != NULL) {
1689
				struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
1690 1691 1692 1693 1694 1695

				if (evsel && evsel->side_band.cb)
					evsel->side_band.cb(event, evsel->side_band.data);
				else
					pr_warning("cannot locate proper evsel for the side band event\n");

1696
				perf_mmap__consume(&map->core);
1697
				got_data = true;
1698
			}
1699
			perf_mmap__read_done(&map->core);
1700
		}
1701 1702 1703

		if (draining && !got_data)
			break;
1704 1705 1706 1707
	}
	return NULL;
}

1708
int perf_evlist__start_sb_thread(struct evlist *evlist,
1709 1710
				 struct target *target)
{
1711
	struct evsel *counter;
1712 1713 1714 1715 1716 1717 1718 1719

	if (!evlist)
		return 0;

	if (perf_evlist__create_maps(evlist, target))
		goto out_delete_evlist;

	evlist__for_each_entry(evlist, counter) {
1720
		if (evsel__open(counter, evlist->core.cpus,
1721
				     evlist->core.threads) < 0)
1722 1723 1724
			goto out_delete_evlist;
	}

1725
	if (evlist__mmap(evlist, UINT_MAX))
1726 1727 1728
		goto out_delete_evlist;

	evlist__for_each_entry(evlist, counter) {
1729
		if (evsel__enable(counter))
1730 1731 1732 1733 1734 1735 1736 1737 1738 1739
			goto out_delete_evlist;
	}

	evlist->thread.done = 0;
	if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist))
		goto out_delete_evlist;

	return 0;

out_delete_evlist:
1740
	evlist__delete(evlist);
1741 1742 1743 1744
	evlist = NULL;
	return -1;
}

1745
void perf_evlist__stop_sb_thread(struct evlist *evlist)
1746 1747 1748 1749 1750
{
	if (!evlist)
		return;
	evlist->thread.done = 1;
	pthread_join(evlist->thread.th, NULL);
1751
	evlist__delete(evlist);
1752
}