evlist.c 41.0 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
2 3 4 5 6 7
/*
 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
 *
 * Parts came from builtin-{top,stat,record}.c, see those files for further
 * copyright notes.
 */
8
#include <api/fs/fs.h>
9
#include <errno.h>
10
#include <inttypes.h>
11
#include <poll.h>
12
#include "cpumap.h"
13
#include "util/mmap.h"
14
#include "thread_map.h"
15
#include "target.h"
16 17
#include "evlist.h"
#include "evsel.h"
A
Adrian Hunter 已提交
18
#include "debug.h"
19
#include "units.h"
20
#include <internal/lib.h> // page_size
21
#include "../perf.h"
22
#include "asm/bug.h"
23
#include "bpf-event.h"
24
#include <signal.h>
25
#include <unistd.h>
26
#include <sched.h>
27
#include <stdlib.h>
28

29
#include "parse-events.h"
30
#include <subcmd/parse-options.h>
31

32
#include <fcntl.h>
33
#include <sys/ioctl.h>
34 35
#include <sys/mman.h>

36 37
#include <linux/bitops.h>
#include <linux/hash.h>
38
#include <linux/log2.h>
39
#include <linux/err.h>
40
#include <linux/string.h>
41
#include <linux/zalloc.h>
42
#include <perf/evlist.h>
43
#include <perf/evsel.h>
44
#include <perf/cpumap.h>
45

46 47
#include <internal/xyarray.h>

48 49 50 51
#ifdef LACKS_SIGQUEUE_PROTOTYPE
int sigqueue(pid_t pid, int sig, const union sigval value);
#endif

52
#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
53
#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
54

55 56
void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
		  struct perf_thread_map *threads)
57
{
58
	perf_evlist__init(&evlist->core);
59
	perf_evlist__set_maps(&evlist->core, cpus, threads);
60
	fdarray__init(&evlist->core.pollfd, 64);
61
	evlist->workload.pid = -1;
62
	evlist->bkw_mmap_state = BKW_MMAP_NOTREADY;
63 64
}

65
struct evlist *evlist__new(void)
66
{
67
	struct evlist *evlist = zalloc(sizeof(*evlist));
68

69
	if (evlist != NULL)
70
		evlist__init(evlist, NULL, NULL);
71 72 73 74

	return evlist;
}

75
struct evlist *perf_evlist__new_default(void)
76
{
77
	struct evlist *evlist = evlist__new();
78 79

	if (evlist && perf_evlist__add_default(evlist)) {
80
		evlist__delete(evlist);
81 82 83 84 85 86
		evlist = NULL;
	}

	return evlist;
}

87
struct evlist *perf_evlist__new_dummy(void)
88
{
89
	struct evlist *evlist = evlist__new();
90 91

	if (evlist && perf_evlist__add_dummy(evlist)) {
92
		evlist__delete(evlist);
93 94 95 96 97 98
		evlist = NULL;
	}

	return evlist;
}

99 100 101 102 103 104 105
/**
 * perf_evlist__set_id_pos - set the positions of event ids.
 * @evlist: selected event list
 *
 * Events with compatible sample types all have the same id_pos
 * and is_pos.  For convenience, put a copy on evlist.
 */
106
void perf_evlist__set_id_pos(struct evlist *evlist)
107
{
108
	struct evsel *first = evlist__first(evlist);
109 110 111 112 113

	evlist->id_pos = first->id_pos;
	evlist->is_pos = first->is_pos;
}

114
static void perf_evlist__update_id_pos(struct evlist *evlist)
115
{
116
	struct evsel *evsel;
117

118
	evlist__for_each_entry(evlist, evsel)
119 120 121 122 123
		perf_evsel__calc_id_pos(evsel);

	perf_evlist__set_id_pos(evlist);
}

124
static void evlist__purge(struct evlist *evlist)
125
{
126
	struct evsel *pos, *n;
127

128
	evlist__for_each_entry_safe(evlist, n, pos) {
129
		list_del_init(&pos->core.node);
130
		pos->evlist = NULL;
131
		evsel__delete(pos);
132 133
	}

134
	evlist->core.nr_entries = 0;
135 136
}

137
void evlist__exit(struct evlist *evlist)
138
{
139
	zfree(&evlist->mmap);
140
	zfree(&evlist->overwrite_mmap);
141
	fdarray__exit(&evlist->core.pollfd);
142 143
}

144
void evlist__delete(struct evlist *evlist)
145
{
146 147 148
	if (evlist == NULL)
		return;

149
	evlist__munmap(evlist);
150
	evlist__close(evlist);
151
	perf_cpu_map__put(evlist->core.cpus);
152
	perf_thread_map__put(evlist->core.threads);
153
	evlist->core.cpus = NULL;
154
	evlist->core.threads = NULL;
155
	evlist__purge(evlist);
156
	evlist__exit(evlist);
157 158 159
	free(evlist);
}

160
void evlist__add(struct evlist *evlist, struct evsel *entry)
161
{
162
	entry->evlist = evlist;
163
	entry->idx = evlist->core.nr_entries;
164
	entry->tracking = !entry->idx;
165

166 167 168
	perf_evlist__add(&evlist->core, &entry->core);

	if (evlist->core.nr_entries == 1)
169
		perf_evlist__set_id_pos(evlist);
170 171
}

172
void evlist__remove(struct evlist *evlist, struct evsel *evsel)
173 174
{
	evsel->evlist = NULL;
175
	perf_evlist__remove(&evlist->core, &evsel->core);
176 177
}

178
void perf_evlist__splice_list_tail(struct evlist *evlist,
179
				   struct list_head *list)
180
{
181
	struct evsel *evsel, *temp;
182

183
	__evlist__for_each_entry_safe(list, temp, evsel) {
184
		list_del_init(&evsel->core.node);
185
		evlist__add(evlist, evsel);
186
	}
187 188
}

189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
int __evlist__set_tracepoints_handlers(struct evlist *evlist,
				       const struct evsel_str_handler *assocs, size_t nr_assocs)
{
	struct evsel *evsel;
	size_t i;
	int err;

	for (i = 0; i < nr_assocs; i++) {
		// Adding a handler for an event not in this evlist, just ignore it.
		evsel = perf_evlist__find_tracepoint_by_name(evlist, assocs[i].name);
		if (evsel == NULL)
			continue;

		err = -EEXIST;
		if (evsel->handler != NULL)
			goto out;
		evsel->handler = assocs[i].handler;
	}

	err = 0;
out:
	return err;
}

213 214
void __perf_evlist__set_leader(struct list_head *list)
{
215
	struct evsel *evsel, *leader;
216

217 218
	leader = list_entry(list->next, struct evsel, core.node);
	evsel = list_entry(list->prev, struct evsel, core.node);
219

220
	leader->core.nr_members = evsel->idx - leader->idx + 1;
221

222
	__evlist__for_each_entry(list, evsel) {
223
		evsel->leader = leader;
224 225 226
	}
}

227
void perf_evlist__set_leader(struct evlist *evlist)
228
{
229 230
	if (evlist->core.nr_entries) {
		evlist->nr_groups = evlist->core.nr_entries > 1 ? 1 : 0;
231
		__perf_evlist__set_leader(&evlist->core.entries);
232
	}
233 234
}

235
int __perf_evlist__add_default(struct evlist *evlist, bool precise)
236
{
237
	struct evsel *evsel = perf_evsel__new_cycles(precise);
238

239
	if (evsel == NULL)
240
		return -ENOMEM;
241

242
	evlist__add(evlist, evsel);
243 244
	return 0;
}
245

246
int perf_evlist__add_dummy(struct evlist *evlist)
247 248 249 250 251 252
{
	struct perf_event_attr attr = {
		.type	= PERF_TYPE_SOFTWARE,
		.config = PERF_COUNT_SW_DUMMY,
		.size	= sizeof(attr), /* to capture ABI version */
	};
253
	struct evsel *evsel = perf_evsel__new_idx(&attr, evlist->core.nr_entries);
254 255 256 257

	if (evsel == NULL)
		return -ENOMEM;

258
	evlist__add(evlist, evsel);
259 260 261
	return 0;
}

262
static int evlist__add_attrs(struct evlist *evlist,
263
				  struct perf_event_attr *attrs, size_t nr_attrs)
264
{
265
	struct evsel *evsel, *n;
266 267 268 269
	LIST_HEAD(head);
	size_t i;

	for (i = 0; i < nr_attrs; i++) {
270
		evsel = perf_evsel__new_idx(attrs + i, evlist->core.nr_entries + i);
271 272
		if (evsel == NULL)
			goto out_delete_partial_list;
273
		list_add_tail(&evsel->core.node, &head);
274 275
	}

276
	perf_evlist__splice_list_tail(evlist, &head);
277 278 279 280

	return 0;

out_delete_partial_list:
281
	__evlist__for_each_entry_safe(&head, n, evsel)
282
		evsel__delete(evsel);
283 284 285
	return -1;
}

286
int __perf_evlist__add_default_attrs(struct evlist *evlist,
287 288 289 290 291 292 293
				     struct perf_event_attr *attrs, size_t nr_attrs)
{
	size_t i;

	for (i = 0; i < nr_attrs; i++)
		event_attr_init(attrs + i);

294
	return evlist__add_attrs(evlist, attrs, nr_attrs);
295 296
}

297
struct evsel *
298
perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id)
299
{
300
	struct evsel *evsel;
301

302
	evlist__for_each_entry(evlist, evsel) {
303 304
		if (evsel->core.attr.type   == PERF_TYPE_TRACEPOINT &&
		    (int)evsel->core.attr.config == id)
305 306 307 308 309 310
			return evsel;
	}

	return NULL;
}

311
struct evsel *
312
perf_evlist__find_tracepoint_by_name(struct evlist *evlist,
313 314
				     const char *name)
{
315
	struct evsel *evsel;
316

317
	evlist__for_each_entry(evlist, evsel) {
318
		if ((evsel->core.attr.type == PERF_TYPE_TRACEPOINT) &&
319 320 321 322 323 324 325
		    (strcmp(evsel->name, name) == 0))
			return evsel;
	}

	return NULL;
}

326
int perf_evlist__add_newtp(struct evlist *evlist,
327 328
			   const char *sys, const char *name, void *handler)
{
329
	struct evsel *evsel = perf_evsel__newtp(sys, name);
330

331
	if (IS_ERR(evsel))
332 333
		return -1;

334
	evsel->handler = handler;
335
	evlist__add(evlist, evsel);
336 337 338
	return 0;
}

339
static int perf_evlist__nr_threads(struct evlist *evlist,
340
				   struct evsel *evsel)
341
{
342
	if (evsel->core.system_wide)
343 344
		return 1;
	else
345
		return perf_thread_map__nr(evlist->core.threads);
346 347
}

348
void evlist__disable(struct evlist *evlist)
349
{
350
	struct evsel *pos;
351

352
	evlist__for_each_entry(evlist, pos) {
353
		if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->core.fd)
354
			continue;
355
		evsel__disable(pos);
356
	}
357 358

	evlist->enabled = false;
359 360
}

361
void evlist__enable(struct evlist *evlist)
362
{
363
	struct evsel *pos;
364

365
	evlist__for_each_entry(evlist, pos) {
366
		if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
367
			continue;
368
		evsel__enable(pos);
369
	}
370 371 372 373

	evlist->enabled = true;
}

374
void perf_evlist__toggle_enable(struct evlist *evlist)
375
{
376
	(evlist->enabled ? evlist__disable : evlist__enable)(evlist);
377 378
}

379
static int perf_evlist__enable_event_cpu(struct evlist *evlist,
380
					 struct evsel *evsel, int cpu)
381
{
382
	int thread;
383 384
	int nr_threads = perf_evlist__nr_threads(evlist, evsel);

385
	if (!evsel->core.fd)
386 387 388
		return -EINVAL;

	for (thread = 0; thread < nr_threads; thread++) {
389
		int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
390 391 392 393 394 395
		if (err)
			return err;
	}
	return 0;
}

396
static int perf_evlist__enable_event_thread(struct evlist *evlist,
397
					    struct evsel *evsel,
398 399
					    int thread)
{
400
	int cpu;
401
	int nr_cpus = perf_cpu_map__nr(evlist->core.cpus);
402

403
	if (!evsel->core.fd)
404 405 406
		return -EINVAL;

	for (cpu = 0; cpu < nr_cpus; cpu++) {
407
		int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
408 409 410 411 412 413
		if (err)
			return err;
	}
	return 0;
}

414
int perf_evlist__enable_event_idx(struct evlist *evlist,
415
				  struct evsel *evsel, int idx)
416
{
417
	bool per_cpu_mmaps = !perf_cpu_map__empty(evlist->core.cpus);
418 419 420 421 422 423 424

	if (per_cpu_mmaps)
		return perf_evlist__enable_event_cpu(evlist, evsel, idx);
	else
		return perf_evlist__enable_event_thread(evlist, evsel, idx);
}

425
int evlist__add_pollfd(struct evlist *evlist, int fd)
426
{
427
	return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN);
428 429
}

430 431
static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd,
					 void *arg __maybe_unused)
432
{
433
	struct mmap *map = fda->priv[fd].ptr;
434

435 436
	if (map)
		perf_mmap__put(map);
437
}
438

439
int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask)
440
{
441
	return fdarray__filter(&evlist->core.pollfd, revents_and_mask,
442
			       perf_evlist__munmap_filtered, NULL);
443 444
}

445
int evlist__poll(struct evlist *evlist, int timeout)
446
{
447
	return perf_evlist__poll(&evlist->core, timeout);
448 449
}

450
static void perf_evlist__set_sid_idx(struct evlist *evlist,
451
				     struct evsel *evsel, int idx, int cpu,
A
Adrian Hunter 已提交
452 453 454 455
				     int thread)
{
	struct perf_sample_id *sid = SID(evsel, cpu, thread);
	sid->idx = idx;
456 457
	if (evlist->core.cpus && cpu >= 0)
		sid->cpu = evlist->core.cpus->map[cpu];
A
Adrian Hunter 已提交
458 459
	else
		sid->cpu = -1;
460
	if (!evsel->core.system_wide && evlist->core.threads && thread >= 0)
461
		sid->tid = perf_thread_map__pid(evlist->core.threads, thread);
A
Adrian Hunter 已提交
462 463 464 465
	else
		sid->tid = -1;
}

466
struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id)
467 468 469 470 471 472
{
	struct hlist_head *head;
	struct perf_sample_id *sid;
	int hash;

	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
473
	head = &evlist->core.heads[hash];
474

475
	hlist_for_each_entry(sid, head, node)
476
		if (sid->id == id)
477 478 479 480 481
			return sid;

	return NULL;
}

482
struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id)
483 484 485
{
	struct perf_sample_id *sid;

486
	if (evlist->core.nr_entries == 1 || !id)
487
		return evlist__first(evlist);
488 489 490

	sid = perf_evlist__id2sid(evlist, id);
	if (sid)
491
		return container_of(sid->evsel, struct evsel, core);
492 493

	if (!perf_evlist__sample_id_all(evlist))
494
		return evlist__first(evlist);
495

496 497
	return NULL;
}
498

499
struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist,
500 501 502 503 504 505 506 507 508
						u64 id)
{
	struct perf_sample_id *sid;

	if (!id)
		return NULL;

	sid = perf_evlist__id2sid(evlist, id);
	if (sid)
509
		return container_of(sid->evsel, struct evsel, core);
510 511 512 513

	return NULL;
}

514
static int perf_evlist__event2id(struct evlist *evlist,
515 516
				 union perf_event *event, u64 *id)
{
517
	const __u64 *array = event->sample.array;
518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534
	ssize_t n;

	n = (event->header.size - sizeof(event->header)) >> 3;

	if (event->header.type == PERF_RECORD_SAMPLE) {
		if (evlist->id_pos >= n)
			return -1;
		*id = array[evlist->id_pos];
	} else {
		if (evlist->is_pos > n)
			return -1;
		n -= evlist->is_pos;
		*id = array[n];
	}
	return 0;
}

535
struct evsel *perf_evlist__event2evsel(struct evlist *evlist,
J
Jiri Olsa 已提交
536
					    union perf_event *event)
537
{
538
	struct evsel *first = evlist__first(evlist);
539 540 541 542 543
	struct hlist_head *head;
	struct perf_sample_id *sid;
	int hash;
	u64 id;

544
	if (evlist->core.nr_entries == 1)
545 546
		return first;

547
	if (!first->core.attr.sample_id_all &&
548 549
	    event->header.type != PERF_RECORD_SAMPLE)
		return first;
550 551 552 553 554 555

	if (perf_evlist__event2id(evlist, event, &id))
		return NULL;

	/* Synthesized events have an id of zero */
	if (!id)
556
		return first;
557 558

	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
559
	head = &evlist->core.heads[hash];
560 561 562

	hlist_for_each_entry(sid, head, node) {
		if (sid->id == id)
563
			return container_of(sid->evsel, struct evsel, core);
564 565 566 567
	}
	return NULL;
}

568
static int perf_evlist__set_paused(struct evlist *evlist, bool value)
W
Wang Nan 已提交
569 570 571
{
	int i;

572
	if (!evlist->overwrite_mmap)
573 574
		return 0;

575
	for (i = 0; i < evlist->core.nr_mmaps; i++) {
576
		int fd = evlist->overwrite_mmap[i].core.fd;
W
Wang Nan 已提交
577 578 579 580 581 582 583 584 585 586 587
		int err;

		if (fd < 0)
			continue;
		err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0);
		if (err)
			return err;
	}
	return 0;
}

588
static int perf_evlist__pause(struct evlist *evlist)
W
Wang Nan 已提交
589 590 591 592
{
	return perf_evlist__set_paused(evlist, true);
}

593
static int perf_evlist__resume(struct evlist *evlist)
W
Wang Nan 已提交
594 595 596 597
{
	return perf_evlist__set_paused(evlist, false);
}

598
static void evlist__munmap_nofree(struct evlist *evlist)
599
{
600
	int i;
601

602
	if (evlist->mmap)
603
		for (i = 0; i < evlist->core.nr_mmaps; i++)
604
			perf_mmap__munmap(&evlist->mmap[i]);
605

606
	if (evlist->overwrite_mmap)
607
		for (i = 0; i < evlist->core.nr_mmaps; i++)
608
			perf_mmap__munmap(&evlist->overwrite_mmap[i]);
609
}
610

611
void evlist__munmap(struct evlist *evlist)
612
{
613
	evlist__munmap_nofree(evlist);
614
	zfree(&evlist->mmap);
615
	zfree(&evlist->overwrite_mmap);
616 617
}

618 619
static struct mmap *evlist__alloc_mmap(struct evlist *evlist,
				       bool overwrite)
620
{
W
Wang Nan 已提交
621
	int i;
622
	struct mmap *map;
W
Wang Nan 已提交
623

624
	evlist->core.nr_mmaps = perf_cpu_map__nr(evlist->core.cpus);
625
	if (perf_cpu_map__empty(evlist->core.cpus))
626 627
		evlist->core.nr_mmaps = perf_thread_map__nr(evlist->core.threads);
	map = zalloc(evlist->core.nr_mmaps * sizeof(struct mmap));
628 629
	if (!map)
		return NULL;
630

631
	for (i = 0; i < evlist->core.nr_mmaps; i++) {
632
		map[i].core.fd = -1;
633
		map[i].core.overwrite = overwrite;
634 635
		/*
		 * When the perf_mmap() call is made we grab one refcount, plus
636
		 * one extra to let perf_mmap__consume() get the last
637 638 639 640 641 642
		 * events after all real references (perf_mmap__get()) are
		 * dropped.
		 *
		 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and
		 * thus does perf_mmap__get() on it.
		 */
643
		refcount_set(&map[i].core.refcnt, 0);
644
	}
645
	return map;
646 647
}

648
static bool
649
perf_evlist__should_poll(struct evlist *evlist __maybe_unused,
650
			 struct evsel *evsel)
651
{
652
	if (evsel->core.attr.write_backward)
653 654 655 656
		return false;
	return true;
}

657
static int evlist__mmap_per_evsel(struct evlist *evlist, int idx,
658
				       struct mmap_params *mp, int cpu_idx,
659
				       int thread, int *_output, int *_output_overwrite)
660
{
661
	struct evsel *evsel;
662
	int revent;
663
	int evlist_cpu = cpu_map__cpu(evlist->core.cpus, cpu_idx);
664

665
	evlist__for_each_entry(evlist, evsel) {
666
		struct mmap *maps = evlist->mmap;
667
		int *output = _output;
668
		int fd;
669
		int cpu;
670

W
Wang Nan 已提交
671
		mp->prot = PROT_READ | PROT_WRITE;
672
		if (evsel->core.attr.write_backward) {
673 674
			output = _output_overwrite;
			maps = evlist->overwrite_mmap;
675 676

			if (!maps) {
677
				maps = evlist__alloc_mmap(evlist, true);
678 679
				if (!maps)
					return -1;
680
				evlist->overwrite_mmap = maps;
681 682
				if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
					perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
683
			}
W
Wang Nan 已提交
684
			mp->prot &= ~PROT_WRITE;
685
		}
686

687
		if (evsel->core.system_wide && thread)
688 689
			continue;

690
		cpu = perf_cpu_map__idx(evsel->core.cpus, evlist_cpu);
691 692 693
		if (cpu == -1)
			continue;

694
		fd = FD(evsel, cpu, thread);
695 696 697

		if (*output == -1) {
			*output = fd;
698

699
			if (perf_mmap__mmap(&maps[idx], mp, *output, evlist_cpu) < 0)
700 701 702 703
				return -1;
		} else {
			if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
				return -1;
704

705
			perf_mmap__get(&maps[idx]);
706 707
		}

708 709
		revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0;

710 711 712 713 714 715 716
		/*
		 * The system_wide flag causes a selected event to be opened
		 * always without a pid.  Consequently it will never get a
		 * POLLHUP, but it is used for tracking in combination with
		 * other events, so it should not need to be polled anyway.
		 * Therefore don't add it for polling.
		 */
717
		if (!evsel->core.system_wide &&
718
		     perf_evlist__add_pollfd(&evlist->core, fd, &maps[idx], revent) < 0) {
719
			perf_mmap__put(&maps[idx]);
720
			return -1;
721
		}
722

723
		if (evsel->core.attr.read_format & PERF_FORMAT_ID) {
724
			if (perf_evlist__id_add_fd(&evlist->core, &evsel->core, cpu, thread,
A
Adrian Hunter 已提交
725 726 727 728 729
						   fd) < 0)
				return -1;
			perf_evlist__set_sid_idx(evlist, evsel, idx, cpu,
						 thread);
		}
730 731 732 733 734
	}

	return 0;
}

735
static int evlist__mmap_per_cpu(struct evlist *evlist,
736
				     struct mmap_params *mp)
737
{
738
	int cpu, thread;
739
	int nr_cpus = perf_cpu_map__nr(evlist->core.cpus);
740
	int nr_threads = perf_thread_map__nr(evlist->core.threads);
741

A
Adrian Hunter 已提交
742
	pr_debug2("perf event ring buffer mmapped per cpu\n");
743
	for (cpu = 0; cpu < nr_cpus; cpu++) {
744
		int output = -1;
745
		int output_overwrite = -1;
746

747 748 749
		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
					      true);

750
		for (thread = 0; thread < nr_threads; thread++) {
751
			if (evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
752
							thread, &output, &output_overwrite))
753
				goto out_unmap;
754 755 756 757 758 759
		}
	}

	return 0;

out_unmap:
760
	evlist__munmap_nofree(evlist);
761 762 763
	return -1;
}

764
static int evlist__mmap_per_thread(struct evlist *evlist,
765
					struct mmap_params *mp)
766 767
{
	int thread;
768
	int nr_threads = perf_thread_map__nr(evlist->core.threads);
769

A
Adrian Hunter 已提交
770
	pr_debug2("perf event ring buffer mmapped per thread\n");
771
	for (thread = 0; thread < nr_threads; thread++) {
772
		int output = -1;
773
		int output_overwrite = -1;
774

775 776 777
		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
					      false);

778
		if (evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
779
						&output, &output_overwrite))
780
			goto out_unmap;
781 782 783 784 785
	}

	return 0;

out_unmap:
786
	evlist__munmap_nofree(evlist);
787 788 789
	return -1;
}

790
unsigned long perf_event_mlock_kb_in_pages(void)
791
{
792 793
	unsigned long pages;
	int max;
794

795 796 797 798 799 800 801 802 803 804
	if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
		/*
		 * Pick a once upon a time good value, i.e. things look
		 * strange since we can't read a sysctl value, but lets not
		 * die yet...
		 */
		max = 512;
	} else {
		max -= (page_size / 1024);
	}
805

806 807 808 809 810 811 812
	pages = (max * 1024) / page_size;
	if (!is_power_of_2(pages))
		pages = rounddown_pow_of_two(pages);

	return pages;
}

813
size_t evlist__mmap_size(unsigned long pages)
814 815 816 817
{
	if (pages == UINT_MAX)
		pages = perf_event_mlock_kb_in_pages();
	else if (!is_power_of_2(pages))
818 819 820 821 822
		return 0;

	return (pages + 1) * page_size;
}

823 824
static long parse_pages_arg(const char *str, unsigned long min,
			    unsigned long max)
825
{
826
	unsigned long pages, val;
827 828 829 830 831 832 833
	static struct parse_tag tags[] = {
		{ .tag  = 'B', .mult = 1       },
		{ .tag  = 'K', .mult = 1 << 10 },
		{ .tag  = 'M', .mult = 1 << 20 },
		{ .tag  = 'G', .mult = 1 << 30 },
		{ .tag  = 0 },
	};
834

835
	if (str == NULL)
836
		return -EINVAL;
837

838
	val = parse_tag_value(str, tags);
839
	if (val != (unsigned long) -1) {
840 841 842 843 844 845
		/* we got file size value */
		pages = PERF_ALIGN(val, page_size) / page_size;
	} else {
		/* we got pages count value */
		char *eptr;
		pages = strtoul(str, &eptr, 10);
846 847
		if (*eptr != '\0')
			return -EINVAL;
848 849
	}

850
	if (pages == 0 && min == 0) {
851
		/* leave number of pages at 0 */
852
	} else if (!is_power_of_2(pages)) {
853 854
		char buf[100];

855
		/* round pages up to next power of 2 */
856
		pages = roundup_pow_of_two(pages);
857 858
		if (!pages)
			return -EINVAL;
859 860 861 862

		unit_number__scnprintf(buf, sizeof(buf), pages * page_size);
		pr_info("rounding mmap pages size to %s (%lu pages)\n",
			buf, pages);
863 864
	}

865 866 867 868 869 870
	if (pages > max)
		return -EINVAL;

	return pages;
}

871
int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
872 873 874 875
{
	unsigned long max = UINT_MAX;
	long pages;

A
Adrian Hunter 已提交
876
	if (max > SIZE_MAX / page_size)
877 878 879 880 881
		max = SIZE_MAX / page_size;

	pages = parse_pages_arg(str, 1, max);
	if (pages < 0) {
		pr_err("Invalid argument for --mmap_pages/-m\n");
882 883 884 885 886 887 888
		return -1;
	}

	*mmap_pages = pages;
	return 0;
}

889 890 891 892 893 894
int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
				  int unset __maybe_unused)
{
	return __perf_evlist__parse_mmap_pages(opt->value, str);
}

895
/**
896
 * evlist__mmap_ex - Create mmaps to receive events.
897 898 899
 * @evlist: list of events
 * @pages: map length in pages
 * @overwrite: overwrite older events?
900 901
 * @auxtrace_pages - auxtrace map length in pages
 * @auxtrace_overwrite - overwrite older auxtrace data?
902
 *
903
 * If @overwrite is %false the user needs to signal event consumption using
904
 * perf_mmap__write_tail().  Using evlist__mmap_read() does this
905
 * automatically.
906
 *
907 908 909
 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
 * consumption using auxtrace_mmap__write_tail().
 *
910
 * Return: %0 on success, negative error code otherwise.
911
 */
912
int evlist__mmap_ex(struct evlist *evlist, unsigned int pages,
913
			 unsigned int auxtrace_pages,
914 915
			 bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush,
			 int comp_level)
916
{
917
	struct evsel *evsel;
918
	const struct perf_cpu_map *cpus = evlist->core.cpus;
919
	const struct perf_thread_map *threads = evlist->core.threads;
W
Wang Nan 已提交
920 921 922 923 924
	/*
	 * Delay setting mp.prot: set it before calling perf_mmap__mmap.
	 * Its value is decided by evsel's write_backward.
	 * So &mp should not be passed through const pointer.
	 */
925 926
	struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush,
				  .comp_level = comp_level };
927

928
	if (!evlist->mmap)
929
		evlist->mmap = evlist__alloc_mmap(evlist, false);
930
	if (!evlist->mmap)
931 932
		return -ENOMEM;

933
	if (evlist->core.pollfd.entries == NULL && perf_evlist__alloc_pollfd(&evlist->core) < 0)
934 935
		return -ENOMEM;

936 937 938
	evlist->core.mmap_len = evlist__mmap_size(pages);
	pr_debug("mmap size %zuB\n", evlist->core.mmap_len);
	mp.mask = evlist->core.mmap_len - page_size - 1;
939

940
	auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->core.mmap_len,
941 942
				   auxtrace_pages, auxtrace_overwrite);

943
	evlist__for_each_entry(evlist, evsel) {
944
		if ((evsel->core.attr.read_format & PERF_FORMAT_ID) &&
945
		    evsel->core.sample_id == NULL &&
946
		    perf_evsel__alloc_id(&evsel->core, perf_cpu_map__nr(cpus), threads->nr) < 0)
947 948 949
			return -ENOMEM;
	}

950
	if (perf_cpu_map__empty(cpus))
951
		return evlist__mmap_per_thread(evlist, &mp);
952

953
	return evlist__mmap_per_cpu(evlist, &mp);
954
}
955

956
int evlist__mmap(struct evlist *evlist, unsigned int pages)
957
{
958
	return evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0);
959 960
}

961
int perf_evlist__create_maps(struct evlist *evlist, struct target *target)
962
{
963
	bool all_threads = (target->per_thread && target->system_wide);
964
	struct perf_cpu_map *cpus;
965
	struct perf_thread_map *threads;
966

967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984
	/*
	 * If specify '-a' and '--per-thread' to perf record, perf record
	 * will override '--per-thread'. target->per_thread = false and
	 * target->system_wide = true.
	 *
	 * If specify '--per-thread' only to perf record,
	 * target->per_thread = true and target->system_wide = false.
	 *
	 * So target->per_thread && target->system_wide is false.
	 * For perf record, thread_map__new_str doesn't call
	 * thread_map__new_all_cpus. That will keep perf record's
	 * current behavior.
	 *
	 * For perf stat, it allows the case that target->per_thread and
	 * target->system_wide are all true. It means to collect system-wide
	 * per-thread data. thread_map__new_str will call
	 * thread_map__new_all_cpus to enumerate all threads.
	 */
985
	threads = thread_map__new_str(target->pid, target->tid, target->uid,
986
				      all_threads);
987

988
	if (!threads)
989 990
		return -1;

991
	if (target__uses_dummy_map(target))
992
		cpus = perf_cpu_map__dummy_new();
993
	else
994
		cpus = perf_cpu_map__new(target->cpu_list);
995

996
	if (!cpus)
997 998
		goto out_delete_threads;

999
	evlist->core.has_user_cpus = !!target->cpu_list;
1000

1001
	perf_evlist__set_maps(&evlist->core, cpus, threads);
1002 1003

	return 0;
1004 1005

out_delete_threads:
1006
	perf_thread_map__put(threads);
1007 1008 1009
	return -1;
}

1010
void __perf_evlist__set_sample_bit(struct evlist *evlist,
1011 1012
				   enum perf_event_sample_format bit)
{
1013
	struct evsel *evsel;
1014

1015
	evlist__for_each_entry(evlist, evsel)
1016 1017 1018
		__perf_evsel__set_sample_bit(evsel, bit);
}

1019
void __perf_evlist__reset_sample_bit(struct evlist *evlist,
1020 1021
				     enum perf_event_sample_format bit)
{
1022
	struct evsel *evsel;
1023

1024
	evlist__for_each_entry(evlist, evsel)
1025 1026 1027
		__perf_evsel__reset_sample_bit(evsel, bit);
}

1028
int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel)
1029
{
1030
	struct evsel *evsel;
1031
	int err = 0;
1032

1033
	evlist__for_each_entry(evlist, evsel) {
1034
		if (evsel->filter == NULL)
1035
			continue;
1036

1037 1038 1039 1040
		/*
		 * filters only work for tracepoint event, which doesn't have cpu limit.
		 * So evlist and evsel should always be same.
		 */
1041
		err = perf_evsel__apply_filter(&evsel->core, evsel->filter);
1042 1043
		if (err) {
			*err_evsel = evsel;
1044
			break;
1045
		}
1046 1047
	}

1048 1049 1050
	return err;
}

1051
int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter)
1052
{
1053
	struct evsel *evsel;
1054 1055
	int err = 0;

1056 1057 1058
	if (filter == NULL)
		return -1;

1059
	evlist__for_each_entry(evlist, evsel) {
1060
		if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
1061 1062
			continue;

1063
		err = perf_evsel__set_filter(evsel, filter);
1064 1065 1066 1067 1068
		if (err)
			break;
	}

	return err;
1069
}
1070

1071
static char *asprintf__tp_filter_pids(size_t npids, pid_t *pids)
1072 1073
{
	char *filter;
1074
	size_t i;
1075

1076 1077 1078
	for (i = 0; i < npids; ++i) {
		if (i == 0) {
			if (asprintf(&filter, "common_pid != %d", pids[i]) < 0)
1079
				return NULL;
1080 1081 1082 1083 1084 1085 1086 1087 1088 1089
		} else {
			char *tmp;

			if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0)
				goto out_free;

			free(filter);
			filter = tmp;
		}
	}
1090

1091
	return filter;
1092
out_free:
1093 1094 1095 1096 1097 1098 1099 1100 1101
	free(filter);
	return NULL;
}

int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
{
	char *filter = asprintf__tp_filter_pids(npids, pids);
	int ret = perf_evlist__set_tp_filter(evlist, filter);

1102 1103 1104 1105
	free(filter);
	return ret;
}

1106
int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid)
1107
{
1108
	return perf_evlist__set_tp_filter_pids(evlist, 1, &pid);
1109 1110
}

1111
bool perf_evlist__valid_sample_type(struct evlist *evlist)
1112
{
1113
	struct evsel *pos;
1114

1115
	if (evlist->core.nr_entries == 1)
1116 1117 1118 1119 1120
		return true;

	if (evlist->id_pos < 0 || evlist->is_pos < 0)
		return false;

1121
	evlist__for_each_entry(evlist, pos) {
1122 1123
		if (pos->id_pos != evlist->id_pos ||
		    pos->is_pos != evlist->is_pos)
1124
			return false;
1125 1126
	}

1127
	return true;
1128 1129
}

1130
u64 __perf_evlist__combined_sample_type(struct evlist *evlist)
1131
{
1132
	struct evsel *evsel;
1133 1134 1135 1136

	if (evlist->combined_sample_type)
		return evlist->combined_sample_type;

1137
	evlist__for_each_entry(evlist, evsel)
1138
		evlist->combined_sample_type |= evsel->core.attr.sample_type;
1139 1140 1141 1142

	return evlist->combined_sample_type;
}

1143
u64 perf_evlist__combined_sample_type(struct evlist *evlist)
1144 1145 1146
{
	evlist->combined_sample_type = 0;
	return __perf_evlist__combined_sample_type(evlist);
1147 1148
}

1149
u64 perf_evlist__combined_branch_type(struct evlist *evlist)
1150
{
1151
	struct evsel *evsel;
1152 1153
	u64 branch_type = 0;

1154
	evlist__for_each_entry(evlist, evsel)
1155
		branch_type |= evsel->core.attr.branch_sample_type;
1156 1157 1158
	return branch_type;
}

1159
bool perf_evlist__valid_read_format(struct evlist *evlist)
1160
{
1161
	struct evsel *first = evlist__first(evlist), *pos = first;
1162 1163
	u64 read_format = first->core.attr.read_format;
	u64 sample_type = first->core.attr.sample_type;
1164

1165
	evlist__for_each_entry(evlist, pos) {
1166
		if (read_format != pos->core.attr.read_format)
1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178
			return false;
	}

	/* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */
	if ((sample_type & PERF_SAMPLE_READ) &&
	    !(read_format & PERF_FORMAT_ID)) {
		return false;
	}

	return true;
}

1179
u16 perf_evlist__id_hdr_size(struct evlist *evlist)
1180
{
1181
	struct evsel *first = evlist__first(evlist);
1182 1183 1184 1185
	struct perf_sample *data;
	u64 sample_type;
	u16 size = 0;

1186
	if (!first->core.attr.sample_id_all)
1187 1188
		goto out;

1189
	sample_type = first->core.attr.sample_type;
1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204

	if (sample_type & PERF_SAMPLE_TID)
		size += sizeof(data->tid) * 2;

       if (sample_type & PERF_SAMPLE_TIME)
		size += sizeof(data->time);

	if (sample_type & PERF_SAMPLE_ID)
		size += sizeof(data->id);

	if (sample_type & PERF_SAMPLE_STREAM_ID)
		size += sizeof(data->stream_id);

	if (sample_type & PERF_SAMPLE_CPU)
		size += sizeof(data->cpu) * 2;
1205 1206 1207

	if (sample_type & PERF_SAMPLE_IDENTIFIER)
		size += sizeof(data->id);
1208 1209 1210 1211
out:
	return size;
}

1212
bool perf_evlist__valid_sample_id_all(struct evlist *evlist)
1213
{
1214
	struct evsel *first = evlist__first(evlist), *pos = first;
1215

1216
	evlist__for_each_entry_continue(evlist, pos) {
1217
		if (first->core.attr.sample_id_all != pos->core.attr.sample_id_all)
1218
			return false;
1219 1220
	}

1221 1222 1223
	return true;
}

1224
bool perf_evlist__sample_id_all(struct evlist *evlist)
1225
{
1226
	struct evsel *first = evlist__first(evlist);
1227
	return first->core.attr.sample_id_all;
1228
}
1229

1230
void perf_evlist__set_selected(struct evlist *evlist,
1231
			       struct evsel *evsel)
1232 1233 1234
{
	evlist->selected = evsel;
}
1235

1236
void evlist__close(struct evlist *evlist)
1237
{
1238
	struct evsel *evsel;
1239

1240
	evlist__for_each_entry_reverse(evlist, evsel)
1241
		evsel__close(evsel);
1242 1243
}

1244
static int perf_evlist__create_syswide_maps(struct evlist *evlist)
1245
{
1246
	struct perf_cpu_map *cpus;
1247
	struct perf_thread_map *threads;
1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258
	int err = -ENOMEM;

	/*
	 * Try reading /sys/devices/system/cpu/online to get
	 * an all cpus map.
	 *
	 * FIXME: -ENOMEM is the best we can do here, the cpu_map
	 * code needs an overhaul to properly forward the
	 * error, and we may not want to do that fallback to a
	 * default cpu identity map :-\
	 */
1259
	cpus = perf_cpu_map__new(NULL);
1260
	if (!cpus)
1261 1262
		goto out;

1263
	threads = perf_thread_map__new_dummy();
1264 1265
	if (!threads)
		goto out_put;
1266

1267
	perf_evlist__set_maps(&evlist->core, cpus, threads);
1268 1269
out:
	return err;
1270
out_put:
1271
	perf_cpu_map__put(cpus);
1272 1273 1274
	goto out;
}

1275
int evlist__open(struct evlist *evlist)
1276
{
1277
	struct evsel *evsel;
1278
	int err;
1279

1280 1281 1282 1283
	/*
	 * Default: one fd per CPU, all threads, aka systemwide
	 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL
	 */
1284
	if (evlist->core.threads == NULL && evlist->core.cpus == NULL) {
1285 1286 1287 1288 1289
		err = perf_evlist__create_syswide_maps(evlist);
		if (err < 0)
			goto out_err;
	}

1290 1291
	perf_evlist__update_id_pos(evlist);

1292
	evlist__for_each_entry(evlist, evsel) {
1293
		err = evsel__open(evsel, evsel->core.cpus, evsel->core.threads);
1294 1295 1296 1297 1298 1299
		if (err < 0)
			goto out_err;
	}

	return 0;
out_err:
1300
	evlist__close(evlist);
1301
	errno = -err;
1302 1303
	return err;
}
1304

1305
int perf_evlist__prepare_workload(struct evlist *evlist, struct target *target,
1306
				  const char *argv[], bool pipe_output,
1307
				  void (*exec_error)(int signo, siginfo_t *info, void *ucontext))
1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328
{
	int child_ready_pipe[2], go_pipe[2];
	char bf;

	if (pipe(child_ready_pipe) < 0) {
		perror("failed to create 'ready' pipe");
		return -1;
	}

	if (pipe(go_pipe) < 0) {
		perror("failed to create 'go' pipe");
		goto out_close_ready_pipe;
	}

	evlist->workload.pid = fork();
	if (evlist->workload.pid < 0) {
		perror("failed to fork");
		goto out_close_pipes;
	}

	if (!evlist->workload.pid) {
1329 1330
		int ret;

1331
		if (pipe_output)
1332 1333
			dup2(2, 1);

1334 1335
		signal(SIGTERM, SIG_DFL);

1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347
		close(child_ready_pipe[0]);
		close(go_pipe[1]);
		fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);

		/*
		 * Tell the parent we're ready to go
		 */
		close(child_ready_pipe[1]);

		/*
		 * Wait until the parent tells us to go.
		 */
1348 1349 1350 1351 1352 1353
		ret = read(go_pipe[0], &bf, 1);
		/*
		 * The parent will ask for the execvp() to be performed by
		 * writing exactly one byte, in workload.cork_fd, usually via
		 * perf_evlist__start_workload().
		 *
1354
		 * For cancelling the workload without actually running it,
1355 1356 1357 1358 1359 1360 1361 1362 1363
		 * the parent will just close workload.cork_fd, without writing
		 * anything, i.e. read will return zero and we just exit()
		 * here.
		 */
		if (ret != 1) {
			if (ret == -1)
				perror("unable to read pipe");
			exit(ret);
		}
1364 1365 1366

		execvp(argv[0], (char **)argv);

1367
		if (exec_error) {
1368 1369 1370 1371 1372 1373 1374
			union sigval val;

			val.sival_int = errno;
			if (sigqueue(getppid(), SIGUSR1, val))
				perror(argv[0]);
		} else
			perror(argv[0]);
1375 1376 1377
		exit(-1);
	}

1378 1379 1380 1381 1382 1383 1384 1385
	if (exec_error) {
		struct sigaction act = {
			.sa_flags     = SA_SIGINFO,
			.sa_sigaction = exec_error,
		};
		sigaction(SIGUSR1, &act, NULL);
	}

1386
	if (target__none(target)) {
1387
		if (evlist->core.threads == NULL) {
1388 1389 1390 1391
			fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n",
				__func__, __LINE__);
			goto out_close_pipes;
		}
1392
		perf_thread_map__set_pid(evlist->core.threads, 0, evlist->workload.pid);
1393
	}
1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404

	close(child_ready_pipe[1]);
	close(go_pipe[0]);
	/*
	 * wait for child to settle
	 */
	if (read(child_ready_pipe[0], &bf, 1) == -1) {
		perror("unable to read pipe");
		goto out_close_pipes;
	}

1405
	fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC);
1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418
	evlist->workload.cork_fd = go_pipe[1];
	close(child_ready_pipe[0]);
	return 0;

out_close_pipes:
	close(go_pipe[0]);
	close(go_pipe[1]);
out_close_ready_pipe:
	close(child_ready_pipe[0]);
	close(child_ready_pipe[1]);
	return -1;
}

1419
int perf_evlist__start_workload(struct evlist *evlist)
1420 1421
{
	if (evlist->workload.cork_fd > 0) {
1422
		char bf = 0;
1423
		int ret;
1424 1425 1426
		/*
		 * Remove the cork, let it rip!
		 */
1427 1428
		ret = write(evlist->workload.cork_fd, &bf, 1);
		if (ret < 0)
1429
			perror("unable to write to pipe");
1430 1431 1432

		close(evlist->workload.cork_fd);
		return ret;
1433 1434 1435 1436
	}

	return 0;
}
1437

1438
int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event,
1439
			      struct perf_sample *sample)
1440
{
1441
	struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
1442 1443 1444

	if (!evsel)
		return -EFAULT;
1445
	return perf_evsel__parse_sample(evsel, event, sample);
1446
}
1447

1448
int perf_evlist__parse_sample_timestamp(struct evlist *evlist,
1449 1450 1451
					union perf_event *event,
					u64 *timestamp)
{
1452
	struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
1453 1454 1455 1456 1457 1458

	if (!evsel)
		return -EFAULT;
	return perf_evsel__parse_sample_timestamp(evsel, event, timestamp);
}

1459
int perf_evlist__strerror_open(struct evlist *evlist,
1460 1461 1462
			       int err, char *buf, size_t size)
{
	int printed, value;
1463
	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
1464 1465 1466 1467 1468 1469 1470 1471

	switch (err) {
	case EACCES:
	case EPERM:
		printed = scnprintf(buf, size,
				    "Error:\t%s.\n"
				    "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg);

1472
		value = perf_event_paranoid();
1473 1474 1475 1476 1477 1478 1479 1480

		printed += scnprintf(buf + printed, size - printed, "\nHint:\t");

		if (value >= 2) {
			printed += scnprintf(buf + printed, size - printed,
					     "For your workloads it needs to be <= 1\nHint:\t");
		}
		printed += scnprintf(buf + printed, size - printed,
1481
				     "For system wide tracing it needs to be set to -1.\n");
1482 1483

		printed += scnprintf(buf + printed, size - printed,
1484 1485
				    "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
				    "Hint:\tThe current value is %d.", value);
1486
		break;
1487
	case EINVAL: {
1488
		struct evsel *first = evlist__first(evlist);
1489 1490 1491 1492 1493
		int max_freq;

		if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0)
			goto out_default;

1494
		if (first->core.attr.sample_freq < (u64)max_freq)
1495 1496 1497 1498 1499 1500
			goto out_default;

		printed = scnprintf(buf, size,
				    "Error:\t%s.\n"
				    "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n"
				    "Hint:\tThe current value is %d and %" PRIu64 " is being requested.",
1501
				    emsg, max_freq, first->core.attr.sample_freq);
1502 1503
		break;
	}
1504
	default:
1505
out_default:
1506 1507 1508 1509 1510 1511
		scnprintf(buf, size, "%s", emsg);
		break;
	}

	return 0;
}
1512

1513
int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size)
1514
{
1515
	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
1516
	int pages_attempted = evlist->core.mmap_len / 1024, pages_max_per_user, printed = 0;
1517 1518 1519

	switch (err) {
	case EPERM:
1520
		sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user);
1521 1522
		printed += scnprintf(buf + printed, size - printed,
				     "Error:\t%s.\n"
1523
				     "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n"
1524
				     "Hint:\tTried using %zd kB.\n",
1525
				     emsg, pages_max_per_user, pages_attempted);
1526 1527 1528 1529 1530 1531 1532 1533 1534

		if (pages_attempted >= pages_max_per_user) {
			printed += scnprintf(buf + printed, size - printed,
					     "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n",
					     pages_max_per_user + pages_attempted);
		}

		printed += scnprintf(buf + printed, size - printed,
				     "Hint:\tTry using a smaller -m/--mmap-pages value.");
1535 1536 1537 1538 1539 1540 1541 1542 1543
		break;
	default:
		scnprintf(buf, size, "%s", emsg);
		break;
	}

	return 0;
}

1544
void perf_evlist__to_front(struct evlist *evlist,
1545
			   struct evsel *move_evsel)
1546
{
1547
	struct evsel *evsel, *n;
1548 1549
	LIST_HEAD(move);

1550
	if (move_evsel == evlist__first(evlist))
1551 1552
		return;

1553
	evlist__for_each_entry_safe(evlist, n, evsel) {
1554
		if (evsel->leader == move_evsel->leader)
1555
			list_move_tail(&evsel->core.node, &move);
1556 1557
	}

1558
	list_splice(&move, &evlist->core.entries);
1559
}
1560

1561
void perf_evlist__set_tracking_event(struct evlist *evlist,
1562
				     struct evsel *tracking_evsel)
1563
{
1564
	struct evsel *evsel;
1565 1566 1567 1568

	if (tracking_evsel->tracking)
		return;

1569
	evlist__for_each_entry(evlist, evsel) {
1570 1571 1572 1573 1574 1575
		if (evsel != tracking_evsel)
			evsel->tracking = false;
	}

	tracking_evsel->tracking = true;
}
1576

1577
struct evsel *
1578
perf_evlist__find_evsel_by_str(struct evlist *evlist,
1579 1580
			       const char *str)
{
1581
	struct evsel *evsel;
1582

1583
	evlist__for_each_entry(evlist, evsel) {
1584 1585 1586 1587 1588 1589 1590 1591
		if (!evsel->name)
			continue;
		if (strcmp(str, evsel->name) == 0)
			return evsel;
	}

	return NULL;
}
1592

1593
void perf_evlist__toggle_bkw_mmap(struct evlist *evlist,
1594 1595 1596 1597 1598 1599 1600 1601 1602
				  enum bkw_mmap_state state)
{
	enum bkw_mmap_state old_state = evlist->bkw_mmap_state;
	enum action {
		NONE,
		PAUSE,
		RESUME,
	} action = NONE;

1603
	if (!evlist->overwrite_mmap)
1604 1605 1606 1607 1608
		return;

	switch (old_state) {
	case BKW_MMAP_NOTREADY: {
		if (state != BKW_MMAP_RUNNING)
1609
			goto state_err;
1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649
		break;
	}
	case BKW_MMAP_RUNNING: {
		if (state != BKW_MMAP_DATA_PENDING)
			goto state_err;
		action = PAUSE;
		break;
	}
	case BKW_MMAP_DATA_PENDING: {
		if (state != BKW_MMAP_EMPTY)
			goto state_err;
		break;
	}
	case BKW_MMAP_EMPTY: {
		if (state != BKW_MMAP_RUNNING)
			goto state_err;
		action = RESUME;
		break;
	}
	default:
		WARN_ONCE(1, "Shouldn't get there\n");
	}

	evlist->bkw_mmap_state = state;

	switch (action) {
	case PAUSE:
		perf_evlist__pause(evlist);
		break;
	case RESUME:
		perf_evlist__resume(evlist);
		break;
	case NONE:
	default:
		break;
	}

state_err:
	return;
}
1650

1651
bool perf_evlist__exclude_kernel(struct evlist *evlist)
1652
{
1653
	struct evsel *evsel;
1654 1655

	evlist__for_each_entry(evlist, evsel) {
1656
		if (!evsel->core.attr.exclude_kernel)
1657 1658 1659 1660 1661
			return false;
	}

	return true;
}
1662 1663 1664 1665 1666 1667

/*
 * Events in data file are not collect in groups, but we still want
 * the group display. Set the artificial group and set the leader's
 * forced_leader flag to notify the display code.
 */
1668
void perf_evlist__force_leader(struct evlist *evlist)
1669 1670
{
	if (!evlist->nr_groups) {
1671
		struct evsel *leader = evlist__first(evlist);
1672 1673 1674 1675 1676

		perf_evlist__set_leader(evlist);
		leader->forced_leader = true;
	}
}
1677

1678
struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
1679
						 struct evsel *evsel)
1680
{
1681
	struct evsel *c2, *leader;
1682 1683 1684 1685
	bool is_open = true;

	leader = evsel->leader;
	pr_debug("Weak group for %s/%d failed\n",
1686
			leader->name, leader->core.nr_members);
1687 1688 1689 1690 1691 1692 1693 1694 1695 1696

	/*
	 * for_each_group_member doesn't work here because it doesn't
	 * include the first entry.
	 */
	evlist__for_each_entry(evsel_list, c2) {
		if (c2 == evsel)
			is_open = false;
		if (c2->leader == leader) {
			if (is_open)
1697
				perf_evsel__close(&evsel->core);
1698
			c2->leader = c2;
1699
			c2->core.nr_members = 0;
1700 1701 1702 1703
		}
	}
	return leader;
}
1704

1705
int perf_evlist__add_sb_event(struct evlist **evlist,
1706 1707 1708 1709
			      struct perf_event_attr *attr,
			      perf_evsel__sb_cb_t cb,
			      void *data)
{
1710
	struct evsel *evsel;
1711 1712 1713
	bool new_evlist = (*evlist) == NULL;

	if (*evlist == NULL)
1714
		*evlist = evlist__new();
1715 1716 1717 1718 1719 1720 1721 1722
	if (*evlist == NULL)
		return -1;

	if (!attr->sample_id_all) {
		pr_warning("enabling sample_id_all for all side band events\n");
		attr->sample_id_all = 1;
	}

1723
	evsel = perf_evsel__new_idx(attr, (*evlist)->core.nr_entries);
1724 1725 1726 1727 1728
	if (!evsel)
		goto out_err;

	evsel->side_band.cb = cb;
	evsel->side_band.data = data;
1729
	evlist__add(*evlist, evsel);
1730 1731 1732 1733
	return 0;

out_err:
	if (new_evlist) {
1734
		evlist__delete(*evlist);
1735 1736 1737 1738 1739 1740 1741
		*evlist = NULL;
	}
	return -1;
}

static void *perf_evlist__poll_thread(void *arg)
{
1742
	struct evlist *evlist = arg;
1743
	bool draining = false;
1744
	int i, done = 0;
1745 1746 1747 1748 1749 1750 1751 1752
	/*
	 * In order to read symbols from other namespaces perf to needs to call
	 * setns(2).  This isn't permitted if the struct_fs has multiple users.
	 * unshare(2) the fs so that we may continue to setns into namespaces
	 * that we're observing when, for instance, reading the build-ids at
	 * the end of a 'perf record' session.
	 */
	unshare(CLONE_FS);
1753 1754 1755

	while (!done) {
		bool got_data = false;
1756

1757
		if (evlist->thread.done)
1758 1759 1760
			draining = true;

		if (!draining)
1761
			evlist__poll(evlist, 1000);
1762

1763
		for (i = 0; i < evlist->core.nr_mmaps; i++) {
1764
			struct mmap *map = &evlist->mmap[i];
1765 1766 1767 1768 1769
			union perf_event *event;

			if (perf_mmap__read_init(map))
				continue;
			while ((event = perf_mmap__read_event(map)) != NULL) {
1770
				struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
1771 1772 1773 1774 1775 1776 1777

				if (evsel && evsel->side_band.cb)
					evsel->side_band.cb(event, evsel->side_band.data);
				else
					pr_warning("cannot locate proper evsel for the side band event\n");

				perf_mmap__consume(map);
1778
				got_data = true;
1779 1780 1781
			}
			perf_mmap__read_done(map);
		}
1782 1783 1784

		if (draining && !got_data)
			break;
1785 1786 1787 1788
	}
	return NULL;
}

1789
int perf_evlist__start_sb_thread(struct evlist *evlist,
1790 1791
				 struct target *target)
{
1792
	struct evsel *counter;
1793 1794 1795 1796 1797 1798 1799 1800

	if (!evlist)
		return 0;

	if (perf_evlist__create_maps(evlist, target))
		goto out_delete_evlist;

	evlist__for_each_entry(evlist, counter) {
1801
		if (evsel__open(counter, evlist->core.cpus,
1802
				     evlist->core.threads) < 0)
1803 1804 1805
			goto out_delete_evlist;
	}

1806
	if (evlist__mmap(evlist, UINT_MAX))
1807 1808 1809
		goto out_delete_evlist;

	evlist__for_each_entry(evlist, counter) {
1810
		if (evsel__enable(counter))
1811 1812 1813 1814 1815 1816 1817 1818 1819 1820
			goto out_delete_evlist;
	}

	evlist->thread.done = 0;
	if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist))
		goto out_delete_evlist;

	return 0;

out_delete_evlist:
1821
	evlist__delete(evlist);
1822 1823 1824 1825
	evlist = NULL;
	return -1;
}

1826
void perf_evlist__stop_sb_thread(struct evlist *evlist)
1827 1828 1829 1830 1831
{
	if (!evlist)
		return;
	evlist->thread.done = 1;
	pthread_join(evlist->thread.th, NULL);
1832
	evlist__delete(evlist);
1833
}