evlist.c 41.3 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
2 3 4 5 6 7
/*
 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
 *
 * Parts came from builtin-{top,stat,record}.c, see those files for further
 * copyright notes.
 */
8
#include <api/fs/fs.h>
9
#include <errno.h>
10
#include <inttypes.h>
11
#include <poll.h>
12
#include "cpumap.h"
13
#include "util/mmap.h"
14
#include "thread_map.h"
15
#include "target.h"
16 17
#include "evlist.h"
#include "evsel.h"
A
Adrian Hunter 已提交
18
#include "debug.h"
19
#include "units.h"
20
#include <internal/lib.h> // page_size
21
#include "../perf.h"
22
#include "asm/bug.h"
23
#include "bpf-event.h"
24
#include <signal.h>
25
#include <unistd.h>
26
#include <sched.h>
27
#include <stdlib.h>
28

29
#include "parse-events.h"
30
#include <subcmd/parse-options.h>
31

32
#include <fcntl.h>
33
#include <sys/ioctl.h>
34 35
#include <sys/mman.h>

36 37
#include <linux/bitops.h>
#include <linux/hash.h>
38
#include <linux/log2.h>
39
#include <linux/err.h>
40
#include <linux/string.h>
41
#include <linux/zalloc.h>
42
#include <perf/evlist.h>
43
#include <perf/evsel.h>
44
#include <perf/cpumap.h>
45

46 47
#include <internal/xyarray.h>

48 49 50 51
#ifdef LACKS_SIGQUEUE_PROTOTYPE
int sigqueue(pid_t pid, int sig, const union sigval value);
#endif

52
#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
53
#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
54

55 56
void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
		  struct perf_thread_map *threads)
57
{
58
	perf_evlist__init(&evlist->core);
59
	perf_evlist__set_maps(&evlist->core, cpus, threads);
60
	fdarray__init(&evlist->core.pollfd, 64);
61
	evlist->workload.pid = -1;
62
	evlist->bkw_mmap_state = BKW_MMAP_NOTREADY;
63 64
}

65
struct evlist *evlist__new(void)
66
{
67
	struct evlist *evlist = zalloc(sizeof(*evlist));
68

69
	if (evlist != NULL)
70
		evlist__init(evlist, NULL, NULL);
71 72 73 74

	return evlist;
}

75
struct evlist *perf_evlist__new_default(void)
76
{
77
	struct evlist *evlist = evlist__new();
78 79

	if (evlist && perf_evlist__add_default(evlist)) {
80
		evlist__delete(evlist);
81 82 83 84 85 86
		evlist = NULL;
	}

	return evlist;
}

87
struct evlist *perf_evlist__new_dummy(void)
88
{
89
	struct evlist *evlist = evlist__new();
90 91

	if (evlist && perf_evlist__add_dummy(evlist)) {
92
		evlist__delete(evlist);
93 94 95 96 97 98
		evlist = NULL;
	}

	return evlist;
}

99 100 101 102 103 104 105
/**
 * perf_evlist__set_id_pos - set the positions of event ids.
 * @evlist: selected event list
 *
 * Events with compatible sample types all have the same id_pos
 * and is_pos.  For convenience, put a copy on evlist.
 */
106
void perf_evlist__set_id_pos(struct evlist *evlist)
107
{
108
	struct evsel *first = evlist__first(evlist);
109 110 111 112 113

	evlist->id_pos = first->id_pos;
	evlist->is_pos = first->is_pos;
}

114
static void perf_evlist__update_id_pos(struct evlist *evlist)
115
{
116
	struct evsel *evsel;
117

118
	evlist__for_each_entry(evlist, evsel)
119 120 121 122 123
		perf_evsel__calc_id_pos(evsel);

	perf_evlist__set_id_pos(evlist);
}

124
static void evlist__purge(struct evlist *evlist)
125
{
126
	struct evsel *pos, *n;
127

128
	evlist__for_each_entry_safe(evlist, n, pos) {
129
		list_del_init(&pos->core.node);
130
		pos->evlist = NULL;
131
		evsel__delete(pos);
132 133
	}

134
	evlist->core.nr_entries = 0;
135 136
}

137
void evlist__exit(struct evlist *evlist)
138
{
139
	zfree(&evlist->mmap);
140
	zfree(&evlist->overwrite_mmap);
141
	fdarray__exit(&evlist->core.pollfd);
142 143
}

144
void evlist__delete(struct evlist *evlist)
145
{
146 147 148
	if (evlist == NULL)
		return;

149
	evlist__munmap(evlist);
150
	evlist__close(evlist);
151
	perf_cpu_map__put(evlist->core.cpus);
152
	perf_thread_map__put(evlist->core.threads);
153
	evlist->core.cpus = NULL;
154
	evlist->core.threads = NULL;
155
	evlist__purge(evlist);
156
	evlist__exit(evlist);
157 158 159
	free(evlist);
}

160
void evlist__add(struct evlist *evlist, struct evsel *entry)
161
{
162
	entry->evlist = evlist;
163
	entry->idx = evlist->core.nr_entries;
164
	entry->tracking = !entry->idx;
165

166 167 168
	perf_evlist__add(&evlist->core, &entry->core);

	if (evlist->core.nr_entries == 1)
169
		perf_evlist__set_id_pos(evlist);
170 171
}

172
void evlist__remove(struct evlist *evlist, struct evsel *evsel)
173 174
{
	evsel->evlist = NULL;
175
	perf_evlist__remove(&evlist->core, &evsel->core);
176 177
}

178
void perf_evlist__splice_list_tail(struct evlist *evlist,
179
				   struct list_head *list)
180
{
181
	struct evsel *evsel, *temp;
182

183
	__evlist__for_each_entry_safe(list, temp, evsel) {
184
		list_del_init(&evsel->core.node);
185
		evlist__add(evlist, evsel);
186
	}
187 188
}

189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
int __evlist__set_tracepoints_handlers(struct evlist *evlist,
				       const struct evsel_str_handler *assocs, size_t nr_assocs)
{
	struct evsel *evsel;
	size_t i;
	int err;

	for (i = 0; i < nr_assocs; i++) {
		// Adding a handler for an event not in this evlist, just ignore it.
		evsel = perf_evlist__find_tracepoint_by_name(evlist, assocs[i].name);
		if (evsel == NULL)
			continue;

		err = -EEXIST;
		if (evsel->handler != NULL)
			goto out;
		evsel->handler = assocs[i].handler;
	}

	err = 0;
out:
	return err;
}

213 214
void __perf_evlist__set_leader(struct list_head *list)
{
215
	struct evsel *evsel, *leader;
216

217 218
	leader = list_entry(list->next, struct evsel, core.node);
	evsel = list_entry(list->prev, struct evsel, core.node);
219

220
	leader->core.nr_members = evsel->idx - leader->idx + 1;
221

222
	__evlist__for_each_entry(list, evsel) {
223
		evsel->leader = leader;
224 225 226
	}
}

227
void perf_evlist__set_leader(struct evlist *evlist)
228
{
229 230
	if (evlist->core.nr_entries) {
		evlist->nr_groups = evlist->core.nr_entries > 1 ? 1 : 0;
231
		__perf_evlist__set_leader(&evlist->core.entries);
232
	}
233 234
}

235
int __perf_evlist__add_default(struct evlist *evlist, bool precise)
236
{
237
	struct evsel *evsel = perf_evsel__new_cycles(precise);
238

239
	if (evsel == NULL)
240
		return -ENOMEM;
241

242
	evlist__add(evlist, evsel);
243 244
	return 0;
}
245

246
int perf_evlist__add_dummy(struct evlist *evlist)
247 248 249 250 251 252
{
	struct perf_event_attr attr = {
		.type	= PERF_TYPE_SOFTWARE,
		.config = PERF_COUNT_SW_DUMMY,
		.size	= sizeof(attr), /* to capture ABI version */
	};
253
	struct evsel *evsel = perf_evsel__new_idx(&attr, evlist->core.nr_entries);
254 255 256 257

	if (evsel == NULL)
		return -ENOMEM;

258
	evlist__add(evlist, evsel);
259 260 261
	return 0;
}

262
static int evlist__add_attrs(struct evlist *evlist,
263
				  struct perf_event_attr *attrs, size_t nr_attrs)
264
{
265
	struct evsel *evsel, *n;
266 267 268 269
	LIST_HEAD(head);
	size_t i;

	for (i = 0; i < nr_attrs; i++) {
270
		evsel = perf_evsel__new_idx(attrs + i, evlist->core.nr_entries + i);
271 272
		if (evsel == NULL)
			goto out_delete_partial_list;
273
		list_add_tail(&evsel->core.node, &head);
274 275
	}

276
	perf_evlist__splice_list_tail(evlist, &head);
277 278 279 280

	return 0;

out_delete_partial_list:
281
	__evlist__for_each_entry_safe(&head, n, evsel)
282
		evsel__delete(evsel);
283 284 285
	return -1;
}

286
int __perf_evlist__add_default_attrs(struct evlist *evlist,
287 288 289 290 291 292 293
				     struct perf_event_attr *attrs, size_t nr_attrs)
{
	size_t i;

	for (i = 0; i < nr_attrs; i++)
		event_attr_init(attrs + i);

294
	return evlist__add_attrs(evlist, attrs, nr_attrs);
295 296
}

297
struct evsel *
298
perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id)
299
{
300
	struct evsel *evsel;
301

302
	evlist__for_each_entry(evlist, evsel) {
303 304
		if (evsel->core.attr.type   == PERF_TYPE_TRACEPOINT &&
		    (int)evsel->core.attr.config == id)
305 306 307 308 309 310
			return evsel;
	}

	return NULL;
}

311
struct evsel *
312
perf_evlist__find_tracepoint_by_name(struct evlist *evlist,
313 314
				     const char *name)
{
315
	struct evsel *evsel;
316

317
	evlist__for_each_entry(evlist, evsel) {
318
		if ((evsel->core.attr.type == PERF_TYPE_TRACEPOINT) &&
319 320 321 322 323 324 325
		    (strcmp(evsel->name, name) == 0))
			return evsel;
	}

	return NULL;
}

326
int perf_evlist__add_newtp(struct evlist *evlist,
327 328
			   const char *sys, const char *name, void *handler)
{
329
	struct evsel *evsel = perf_evsel__newtp(sys, name);
330

331
	if (IS_ERR(evsel))
332 333
		return -1;

334
	evsel->handler = handler;
335
	evlist__add(evlist, evsel);
336 337 338
	return 0;
}

339
static int perf_evlist__nr_threads(struct evlist *evlist,
340
				   struct evsel *evsel)
341
{
342
	if (evsel->core.system_wide)
343 344
		return 1;
	else
345
		return perf_thread_map__nr(evlist->core.threads);
346 347
}

348
void evlist__disable(struct evlist *evlist)
349
{
350
	struct evsel *pos;
351

352
	evlist__for_each_entry(evlist, pos) {
353
		if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->core.fd)
354
			continue;
355
		evsel__disable(pos);
356
	}
357 358

	evlist->enabled = false;
359 360
}

361
void evlist__enable(struct evlist *evlist)
362
{
363
	struct evsel *pos;
364

365
	evlist__for_each_entry(evlist, pos) {
366
		if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
367
			continue;
368
		evsel__enable(pos);
369
	}
370 371 372 373

	evlist->enabled = true;
}

374
void perf_evlist__toggle_enable(struct evlist *evlist)
375
{
376
	(evlist->enabled ? evlist__disable : evlist__enable)(evlist);
377 378
}

379
static int perf_evlist__enable_event_cpu(struct evlist *evlist,
380
					 struct evsel *evsel, int cpu)
381
{
382
	int thread;
383 384
	int nr_threads = perf_evlist__nr_threads(evlist, evsel);

385
	if (!evsel->core.fd)
386 387 388
		return -EINVAL;

	for (thread = 0; thread < nr_threads; thread++) {
389
		int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
390 391 392 393 394 395
		if (err)
			return err;
	}
	return 0;
}

396
static int perf_evlist__enable_event_thread(struct evlist *evlist,
397
					    struct evsel *evsel,
398 399
					    int thread)
{
400
	int cpu;
401
	int nr_cpus = perf_cpu_map__nr(evlist->core.cpus);
402

403
	if (!evsel->core.fd)
404 405 406
		return -EINVAL;

	for (cpu = 0; cpu < nr_cpus; cpu++) {
407
		int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
408 409 410 411 412 413
		if (err)
			return err;
	}
	return 0;
}

414
int perf_evlist__enable_event_idx(struct evlist *evlist,
415
				  struct evsel *evsel, int idx)
416
{
417
	bool per_cpu_mmaps = !perf_cpu_map__empty(evlist->core.cpus);
418 419 420 421 422 423 424

	if (per_cpu_mmaps)
		return perf_evlist__enable_event_cpu(evlist, evsel, idx);
	else
		return perf_evlist__enable_event_thread(evlist, evsel, idx);
}

425
int evlist__add_pollfd(struct evlist *evlist, int fd)
426
{
427
	return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN);
428 429
}

430 431
static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd,
					 void *arg __maybe_unused)
432
{
433
	struct mmap *map = fda->priv[fd].ptr;
434

435 436
	if (map)
		perf_mmap__put(map);
437
}
438

439
int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask)
440
{
441
	return fdarray__filter(&evlist->core.pollfd, revents_and_mask,
442
			       perf_evlist__munmap_filtered, NULL);
443 444
}

445
int evlist__poll(struct evlist *evlist, int timeout)
446
{
447
	return perf_evlist__poll(&evlist->core, timeout);
448 449
}

450
static void perf_evlist__set_sid_idx(struct evlist *evlist,
451
				     struct evsel *evsel, int idx, int cpu,
A
Adrian Hunter 已提交
452 453 454 455
				     int thread)
{
	struct perf_sample_id *sid = SID(evsel, cpu, thread);
	sid->idx = idx;
456 457
	if (evlist->core.cpus && cpu >= 0)
		sid->cpu = evlist->core.cpus->map[cpu];
A
Adrian Hunter 已提交
458 459
	else
		sid->cpu = -1;
460
	if (!evsel->core.system_wide && evlist->core.threads && thread >= 0)
461
		sid->tid = perf_thread_map__pid(evlist->core.threads, thread);
A
Adrian Hunter 已提交
462 463 464 465
	else
		sid->tid = -1;
}

466
struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id)
467 468 469 470 471 472
{
	struct hlist_head *head;
	struct perf_sample_id *sid;
	int hash;

	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
473
	head = &evlist->core.heads[hash];
474

475
	hlist_for_each_entry(sid, head, node)
476
		if (sid->id == id)
477 478 479 480 481
			return sid;

	return NULL;
}

482
struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id)
483 484 485
{
	struct perf_sample_id *sid;

486
	if (evlist->core.nr_entries == 1 || !id)
487
		return evlist__first(evlist);
488 489 490

	sid = perf_evlist__id2sid(evlist, id);
	if (sid)
491
		return container_of(sid->evsel, struct evsel, core);
492 493

	if (!perf_evlist__sample_id_all(evlist))
494
		return evlist__first(evlist);
495

496 497
	return NULL;
}
498

499
struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist,
500 501 502 503 504 505 506 507 508
						u64 id)
{
	struct perf_sample_id *sid;

	if (!id)
		return NULL;

	sid = perf_evlist__id2sid(evlist, id);
	if (sid)
509
		return container_of(sid->evsel, struct evsel, core);
510 511 512 513

	return NULL;
}

514
static int perf_evlist__event2id(struct evlist *evlist,
515 516
				 union perf_event *event, u64 *id)
{
517
	const __u64 *array = event->sample.array;
518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534
	ssize_t n;

	n = (event->header.size - sizeof(event->header)) >> 3;

	if (event->header.type == PERF_RECORD_SAMPLE) {
		if (evlist->id_pos >= n)
			return -1;
		*id = array[evlist->id_pos];
	} else {
		if (evlist->is_pos > n)
			return -1;
		n -= evlist->is_pos;
		*id = array[n];
	}
	return 0;
}

535
struct evsel *perf_evlist__event2evsel(struct evlist *evlist,
J
Jiri Olsa 已提交
536
					    union perf_event *event)
537
{
538
	struct evsel *first = evlist__first(evlist);
539 540 541 542 543
	struct hlist_head *head;
	struct perf_sample_id *sid;
	int hash;
	u64 id;

544
	if (evlist->core.nr_entries == 1)
545 546
		return first;

547
	if (!first->core.attr.sample_id_all &&
548 549
	    event->header.type != PERF_RECORD_SAMPLE)
		return first;
550 551 552 553 554 555

	if (perf_evlist__event2id(evlist, event, &id))
		return NULL;

	/* Synthesized events have an id of zero */
	if (!id)
556
		return first;
557 558

	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
559
	head = &evlist->core.heads[hash];
560 561 562

	hlist_for_each_entry(sid, head, node) {
		if (sid->id == id)
563
			return container_of(sid->evsel, struct evsel, core);
564 565 566 567
	}
	return NULL;
}

568
static int perf_evlist__set_paused(struct evlist *evlist, bool value)
W
Wang Nan 已提交
569 570 571
{
	int i;

572
	if (!evlist->overwrite_mmap)
573 574
		return 0;

575
	for (i = 0; i < evlist->core.nr_mmaps; i++) {
576
		int fd = evlist->overwrite_mmap[i].core.fd;
W
Wang Nan 已提交
577 578 579 580 581 582 583 584 585 586 587
		int err;

		if (fd < 0)
			continue;
		err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0);
		if (err)
			return err;
	}
	return 0;
}

588
static int perf_evlist__pause(struct evlist *evlist)
W
Wang Nan 已提交
589 590 591 592
{
	return perf_evlist__set_paused(evlist, true);
}

593
static int perf_evlist__resume(struct evlist *evlist)
W
Wang Nan 已提交
594 595 596 597
{
	return perf_evlist__set_paused(evlist, false);
}

598
static void evlist__munmap_nofree(struct evlist *evlist)
599
{
600
	int i;
601

602
	if (evlist->mmap)
603
		for (i = 0; i < evlist->core.nr_mmaps; i++)
604
			perf_mmap__munmap(&evlist->mmap[i]);
605

606
	if (evlist->overwrite_mmap)
607
		for (i = 0; i < evlist->core.nr_mmaps; i++)
608
			perf_mmap__munmap(&evlist->overwrite_mmap[i]);
609
}
610

611
void evlist__munmap(struct evlist *evlist)
612
{
613
	evlist__munmap_nofree(evlist);
614
	zfree(&evlist->mmap);
615
	zfree(&evlist->overwrite_mmap);
616 617
}

618 619
static struct mmap *evlist__alloc_mmap(struct evlist *evlist,
				       bool overwrite)
620
{
W
Wang Nan 已提交
621
	int i;
622
	struct mmap *map;
W
Wang Nan 已提交
623

624
	evlist->core.nr_mmaps = perf_cpu_map__nr(evlist->core.cpus);
625
	if (perf_cpu_map__empty(evlist->core.cpus))
626 627
		evlist->core.nr_mmaps = perf_thread_map__nr(evlist->core.threads);
	map = zalloc(evlist->core.nr_mmaps * sizeof(struct mmap));
628 629
	if (!map)
		return NULL;
630

631
	for (i = 0; i < evlist->core.nr_mmaps; i++) {
632
		map[i].core.fd = -1;
633
		map[i].core.overwrite = overwrite;
634 635
		/*
		 * When the perf_mmap() call is made we grab one refcount, plus
636
		 * one extra to let perf_mmap__consume() get the last
637 638 639 640 641 642
		 * events after all real references (perf_mmap__get()) are
		 * dropped.
		 *
		 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and
		 * thus does perf_mmap__get() on it.
		 */
643
		refcount_set(&map[i].core.refcnt, 0);
644
	}
645
	return map;
646 647
}

648
static bool
649
perf_evlist__should_poll(struct evlist *evlist __maybe_unused,
650
			 struct evsel *evsel)
651
{
652
	if (evsel->core.attr.write_backward)
653 654 655 656
		return false;
	return true;
}

657
static int evlist__mmap_per_evsel(struct evlist *evlist, int idx,
658
				       struct mmap_params *mp, int cpu_idx,
659
				       int thread, int *_output, int *_output_overwrite)
660
{
661
	struct evsel *evsel;
662
	int revent;
663
	int evlist_cpu = cpu_map__cpu(evlist->core.cpus, cpu_idx);
664

665
	evlist__for_each_entry(evlist, evsel) {
666
		struct mmap *maps = evlist->mmap;
667
		int *output = _output;
668
		int fd;
669
		int cpu;
670

W
Wang Nan 已提交
671
		mp->prot = PROT_READ | PROT_WRITE;
672
		if (evsel->core.attr.write_backward) {
673 674
			output = _output_overwrite;
			maps = evlist->overwrite_mmap;
675 676

			if (!maps) {
677
				maps = evlist__alloc_mmap(evlist, true);
678 679
				if (!maps)
					return -1;
680
				evlist->overwrite_mmap = maps;
681 682
				if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
					perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
683
			}
W
Wang Nan 已提交
684
			mp->prot &= ~PROT_WRITE;
685
		}
686

687
		if (evsel->core.system_wide && thread)
688 689
			continue;

690
		cpu = perf_cpu_map__idx(evsel->core.cpus, evlist_cpu);
691 692 693
		if (cpu == -1)
			continue;

694
		fd = FD(evsel, cpu, thread);
695 696 697

		if (*output == -1) {
			*output = fd;
698

699
			if (perf_mmap__mmap(&maps[idx], mp, *output, evlist_cpu) < 0)
700 701 702 703
				return -1;
		} else {
			if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
				return -1;
704

705
			perf_mmap__get(&maps[idx]);
706 707
		}

708 709
		revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0;

710 711 712 713 714 715 716
		/*
		 * The system_wide flag causes a selected event to be opened
		 * always without a pid.  Consequently it will never get a
		 * POLLHUP, but it is used for tracking in combination with
		 * other events, so it should not need to be polled anyway.
		 * Therefore don't add it for polling.
		 */
717
		if (!evsel->core.system_wide &&
718
		     perf_evlist__add_pollfd(&evlist->core, fd, &maps[idx], revent) < 0) {
719
			perf_mmap__put(&maps[idx]);
720
			return -1;
721
		}
722

723
		if (evsel->core.attr.read_format & PERF_FORMAT_ID) {
724
			if (perf_evlist__id_add_fd(&evlist->core, &evsel->core, cpu, thread,
A
Adrian Hunter 已提交
725 726 727 728 729
						   fd) < 0)
				return -1;
			perf_evlist__set_sid_idx(evlist, evsel, idx, cpu,
						 thread);
		}
730 731 732 733 734
	}

	return 0;
}

735
static int evlist__mmap_per_cpu(struct evlist *evlist,
736
				     struct mmap_params *mp)
737
{
738
	int cpu, thread;
739
	int nr_cpus = perf_cpu_map__nr(evlist->core.cpus);
740
	int nr_threads = perf_thread_map__nr(evlist->core.threads);
741

A
Adrian Hunter 已提交
742
	pr_debug2("perf event ring buffer mmapped per cpu\n");
743
	for (cpu = 0; cpu < nr_cpus; cpu++) {
744
		int output = -1;
745
		int output_overwrite = -1;
746

747 748 749
		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
					      true);

750
		for (thread = 0; thread < nr_threads; thread++) {
751
			if (evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
752
							thread, &output, &output_overwrite))
753
				goto out_unmap;
754 755 756 757 758 759
		}
	}

	return 0;

out_unmap:
760
	evlist__munmap_nofree(evlist);
761 762 763
	return -1;
}

764
static int evlist__mmap_per_thread(struct evlist *evlist,
765
					struct mmap_params *mp)
766 767
{
	int thread;
768
	int nr_threads = perf_thread_map__nr(evlist->core.threads);
769

A
Adrian Hunter 已提交
770
	pr_debug2("perf event ring buffer mmapped per thread\n");
771
	for (thread = 0; thread < nr_threads; thread++) {
772
		int output = -1;
773
		int output_overwrite = -1;
774

775 776 777
		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
					      false);

778
		if (evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
779
						&output, &output_overwrite))
780
			goto out_unmap;
781 782 783 784 785
	}

	return 0;

out_unmap:
786
	evlist__munmap_nofree(evlist);
787 788 789
	return -1;
}

790
unsigned long perf_event_mlock_kb_in_pages(void)
791
{
792 793
	unsigned long pages;
	int max;
794

795 796 797 798 799 800 801 802 803 804
	if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
		/*
		 * Pick a once upon a time good value, i.e. things look
		 * strange since we can't read a sysctl value, but lets not
		 * die yet...
		 */
		max = 512;
	} else {
		max -= (page_size / 1024);
	}
805

806 807 808 809 810 811 812
	pages = (max * 1024) / page_size;
	if (!is_power_of_2(pages))
		pages = rounddown_pow_of_two(pages);

	return pages;
}

813
size_t evlist__mmap_size(unsigned long pages)
814 815 816 817
{
	if (pages == UINT_MAX)
		pages = perf_event_mlock_kb_in_pages();
	else if (!is_power_of_2(pages))
818 819 820 821 822
		return 0;

	return (pages + 1) * page_size;
}

823 824
static long parse_pages_arg(const char *str, unsigned long min,
			    unsigned long max)
825
{
826
	unsigned long pages, val;
827 828 829 830 831 832 833
	static struct parse_tag tags[] = {
		{ .tag  = 'B', .mult = 1       },
		{ .tag  = 'K', .mult = 1 << 10 },
		{ .tag  = 'M', .mult = 1 << 20 },
		{ .tag  = 'G', .mult = 1 << 30 },
		{ .tag  = 0 },
	};
834

835
	if (str == NULL)
836
		return -EINVAL;
837

838
	val = parse_tag_value(str, tags);
839
	if (val != (unsigned long) -1) {
840 841 842 843 844 845
		/* we got file size value */
		pages = PERF_ALIGN(val, page_size) / page_size;
	} else {
		/* we got pages count value */
		char *eptr;
		pages = strtoul(str, &eptr, 10);
846 847
		if (*eptr != '\0')
			return -EINVAL;
848 849
	}

850
	if (pages == 0 && min == 0) {
851
		/* leave number of pages at 0 */
852
	} else if (!is_power_of_2(pages)) {
853 854
		char buf[100];

855
		/* round pages up to next power of 2 */
856
		pages = roundup_pow_of_two(pages);
857 858
		if (!pages)
			return -EINVAL;
859 860 861 862

		unit_number__scnprintf(buf, sizeof(buf), pages * page_size);
		pr_info("rounding mmap pages size to %s (%lu pages)\n",
			buf, pages);
863 864
	}

865 866 867 868 869 870
	if (pages > max)
		return -EINVAL;

	return pages;
}

871
int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
872 873 874 875
{
	unsigned long max = UINT_MAX;
	long pages;

A
Adrian Hunter 已提交
876
	if (max > SIZE_MAX / page_size)
877 878 879 880 881
		max = SIZE_MAX / page_size;

	pages = parse_pages_arg(str, 1, max);
	if (pages < 0) {
		pr_err("Invalid argument for --mmap_pages/-m\n");
882 883 884 885 886 887 888
		return -1;
	}

	*mmap_pages = pages;
	return 0;
}

889 890 891 892 893 894
int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
				  int unset __maybe_unused)
{
	return __perf_evlist__parse_mmap_pages(opt->value, str);
}

895
/**
896
 * evlist__mmap_ex - Create mmaps to receive events.
897 898 899
 * @evlist: list of events
 * @pages: map length in pages
 * @overwrite: overwrite older events?
900 901
 * @auxtrace_pages - auxtrace map length in pages
 * @auxtrace_overwrite - overwrite older auxtrace data?
902
 *
903
 * If @overwrite is %false the user needs to signal event consumption using
904
 * perf_mmap__write_tail().  Using evlist__mmap_read() does this
905
 * automatically.
906
 *
907 908 909
 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
 * consumption using auxtrace_mmap__write_tail().
 *
910
 * Return: %0 on success, negative error code otherwise.
911
 */
912
int evlist__mmap_ex(struct evlist *evlist, unsigned int pages,
913
			 unsigned int auxtrace_pages,
914 915
			 bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush,
			 int comp_level)
916
{
917
	struct evsel *evsel;
918
	const struct perf_cpu_map *cpus = evlist->core.cpus;
919
	const struct perf_thread_map *threads = evlist->core.threads;
W
Wang Nan 已提交
920 921 922 923 924
	/*
	 * Delay setting mp.prot: set it before calling perf_mmap__mmap.
	 * Its value is decided by evsel's write_backward.
	 * So &mp should not be passed through const pointer.
	 */
925 926
	struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush,
				  .comp_level = comp_level };
927

928
	if (!evlist->mmap)
929
		evlist->mmap = evlist__alloc_mmap(evlist, false);
930
	if (!evlist->mmap)
931 932
		return -ENOMEM;

933
	if (evlist->core.pollfd.entries == NULL && perf_evlist__alloc_pollfd(&evlist->core) < 0)
934 935
		return -ENOMEM;

936 937 938
	evlist->core.mmap_len = evlist__mmap_size(pages);
	pr_debug("mmap size %zuB\n", evlist->core.mmap_len);
	mp.mask = evlist->core.mmap_len - page_size - 1;
939

940
	auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->core.mmap_len,
941 942
				   auxtrace_pages, auxtrace_overwrite);

943
	evlist__for_each_entry(evlist, evsel) {
944
		if ((evsel->core.attr.read_format & PERF_FORMAT_ID) &&
945
		    evsel->core.sample_id == NULL &&
946
		    perf_evsel__alloc_id(&evsel->core, perf_cpu_map__nr(cpus), threads->nr) < 0)
947 948 949
			return -ENOMEM;
	}

950
	if (perf_cpu_map__empty(cpus))
951
		return evlist__mmap_per_thread(evlist, &mp);
952

953
	return evlist__mmap_per_cpu(evlist, &mp);
954
}
955

956
int evlist__mmap(struct evlist *evlist, unsigned int pages)
957
{
958
	return evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0);
959 960
}

961
int perf_evlist__create_maps(struct evlist *evlist, struct target *target)
962
{
963
	bool all_threads = (target->per_thread && target->system_wide);
964
	struct perf_cpu_map *cpus;
965
	struct perf_thread_map *threads;
966

967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984
	/*
	 * If specify '-a' and '--per-thread' to perf record, perf record
	 * will override '--per-thread'. target->per_thread = false and
	 * target->system_wide = true.
	 *
	 * If specify '--per-thread' only to perf record,
	 * target->per_thread = true and target->system_wide = false.
	 *
	 * So target->per_thread && target->system_wide is false.
	 * For perf record, thread_map__new_str doesn't call
	 * thread_map__new_all_cpus. That will keep perf record's
	 * current behavior.
	 *
	 * For perf stat, it allows the case that target->per_thread and
	 * target->system_wide are all true. It means to collect system-wide
	 * per-thread data. thread_map__new_str will call
	 * thread_map__new_all_cpus to enumerate all threads.
	 */
985
	threads = thread_map__new_str(target->pid, target->tid, target->uid,
986
				      all_threads);
987

988
	if (!threads)
989 990
		return -1;

991
	if (target__uses_dummy_map(target))
992
		cpus = perf_cpu_map__dummy_new();
993
	else
994
		cpus = perf_cpu_map__new(target->cpu_list);
995

996
	if (!cpus)
997 998
		goto out_delete_threads;

999
	evlist->core.has_user_cpus = !!target->cpu_list;
1000

1001
	perf_evlist__set_maps(&evlist->core, cpus, threads);
1002 1003

	return 0;
1004 1005

out_delete_threads:
1006
	perf_thread_map__put(threads);
1007 1008 1009
	return -1;
}

1010
void __perf_evlist__set_sample_bit(struct evlist *evlist,
1011 1012
				   enum perf_event_sample_format bit)
{
1013
	struct evsel *evsel;
1014

1015
	evlist__for_each_entry(evlist, evsel)
1016 1017 1018
		__perf_evsel__set_sample_bit(evsel, bit);
}

1019
void __perf_evlist__reset_sample_bit(struct evlist *evlist,
1020 1021
				     enum perf_event_sample_format bit)
{
1022
	struct evsel *evsel;
1023

1024
	evlist__for_each_entry(evlist, evsel)
1025 1026 1027
		__perf_evsel__reset_sample_bit(evsel, bit);
}

1028
int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel)
1029
{
1030
	struct evsel *evsel;
1031
	int err = 0;
1032

1033
	evlist__for_each_entry(evlist, evsel) {
1034
		if (evsel->filter == NULL)
1035
			continue;
1036

1037 1038 1039 1040
		/*
		 * filters only work for tracepoint event, which doesn't have cpu limit.
		 * So evlist and evsel should always be same.
		 */
1041
		err = perf_evsel__apply_filter(&evsel->core, evsel->filter);
1042 1043
		if (err) {
			*err_evsel = evsel;
1044
			break;
1045
		}
1046 1047
	}

1048 1049 1050
	return err;
}

1051
int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter)
1052
{
1053
	struct evsel *evsel;
1054 1055
	int err = 0;

1056 1057 1058
	if (filter == NULL)
		return -1;

1059
	evlist__for_each_entry(evlist, evsel) {
1060
		if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
1061 1062
			continue;

1063
		err = perf_evsel__set_filter(evsel, filter);
1064 1065 1066 1067 1068
		if (err)
			break;
	}

	return err;
1069
}
1070

1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090
int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter)
{
	struct evsel *evsel;
	int err = 0;

	if (filter == NULL)
		return -1;

	evlist__for_each_entry(evlist, evsel) {
		if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
			continue;

		err = perf_evsel__append_tp_filter(evsel, filter);
		if (err)
			break;
	}

	return err;
}

1091
static char *asprintf__tp_filter_pids(size_t npids, pid_t *pids)
1092 1093
{
	char *filter;
1094
	size_t i;
1095

1096 1097 1098
	for (i = 0; i < npids; ++i) {
		if (i == 0) {
			if (asprintf(&filter, "common_pid != %d", pids[i]) < 0)
1099
				return NULL;
1100 1101 1102 1103 1104 1105 1106 1107 1108 1109
		} else {
			char *tmp;

			if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0)
				goto out_free;

			free(filter);
			filter = tmp;
		}
	}
1110

1111
	return filter;
1112
out_free:
1113 1114 1115 1116 1117 1118 1119 1120 1121
	free(filter);
	return NULL;
}

int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
{
	char *filter = asprintf__tp_filter_pids(npids, pids);
	int ret = perf_evlist__set_tp_filter(evlist, filter);

1122 1123 1124 1125
	free(filter);
	return ret;
}

1126
int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid)
1127
{
1128
	return perf_evlist__set_tp_filter_pids(evlist, 1, &pid);
1129 1130
}

1131
bool perf_evlist__valid_sample_type(struct evlist *evlist)
1132
{
1133
	struct evsel *pos;
1134

1135
	if (evlist->core.nr_entries == 1)
1136 1137 1138 1139 1140
		return true;

	if (evlist->id_pos < 0 || evlist->is_pos < 0)
		return false;

1141
	evlist__for_each_entry(evlist, pos) {
1142 1143
		if (pos->id_pos != evlist->id_pos ||
		    pos->is_pos != evlist->is_pos)
1144
			return false;
1145 1146
	}

1147
	return true;
1148 1149
}

1150
u64 __perf_evlist__combined_sample_type(struct evlist *evlist)
1151
{
1152
	struct evsel *evsel;
1153 1154 1155 1156

	if (evlist->combined_sample_type)
		return evlist->combined_sample_type;

1157
	evlist__for_each_entry(evlist, evsel)
1158
		evlist->combined_sample_type |= evsel->core.attr.sample_type;
1159 1160 1161 1162

	return evlist->combined_sample_type;
}

1163
u64 perf_evlist__combined_sample_type(struct evlist *evlist)
1164 1165 1166
{
	evlist->combined_sample_type = 0;
	return __perf_evlist__combined_sample_type(evlist);
1167 1168
}

1169
u64 perf_evlist__combined_branch_type(struct evlist *evlist)
1170
{
1171
	struct evsel *evsel;
1172 1173
	u64 branch_type = 0;

1174
	evlist__for_each_entry(evlist, evsel)
1175
		branch_type |= evsel->core.attr.branch_sample_type;
1176 1177 1178
	return branch_type;
}

1179
bool perf_evlist__valid_read_format(struct evlist *evlist)
1180
{
1181
	struct evsel *first = evlist__first(evlist), *pos = first;
1182 1183
	u64 read_format = first->core.attr.read_format;
	u64 sample_type = first->core.attr.sample_type;
1184

1185
	evlist__for_each_entry(evlist, pos) {
1186
		if (read_format != pos->core.attr.read_format)
1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198
			return false;
	}

	/* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */
	if ((sample_type & PERF_SAMPLE_READ) &&
	    !(read_format & PERF_FORMAT_ID)) {
		return false;
	}

	return true;
}

1199
u16 perf_evlist__id_hdr_size(struct evlist *evlist)
1200
{
1201
	struct evsel *first = evlist__first(evlist);
1202 1203 1204 1205
	struct perf_sample *data;
	u64 sample_type;
	u16 size = 0;

1206
	if (!first->core.attr.sample_id_all)
1207 1208
		goto out;

1209
	sample_type = first->core.attr.sample_type;
1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224

	if (sample_type & PERF_SAMPLE_TID)
		size += sizeof(data->tid) * 2;

       if (sample_type & PERF_SAMPLE_TIME)
		size += sizeof(data->time);

	if (sample_type & PERF_SAMPLE_ID)
		size += sizeof(data->id);

	if (sample_type & PERF_SAMPLE_STREAM_ID)
		size += sizeof(data->stream_id);

	if (sample_type & PERF_SAMPLE_CPU)
		size += sizeof(data->cpu) * 2;
1225 1226 1227

	if (sample_type & PERF_SAMPLE_IDENTIFIER)
		size += sizeof(data->id);
1228 1229 1230 1231
out:
	return size;
}

1232
bool perf_evlist__valid_sample_id_all(struct evlist *evlist)
1233
{
1234
	struct evsel *first = evlist__first(evlist), *pos = first;
1235

1236
	evlist__for_each_entry_continue(evlist, pos) {
1237
		if (first->core.attr.sample_id_all != pos->core.attr.sample_id_all)
1238
			return false;
1239 1240
	}

1241 1242 1243
	return true;
}

1244
bool perf_evlist__sample_id_all(struct evlist *evlist)
1245
{
1246
	struct evsel *first = evlist__first(evlist);
1247
	return first->core.attr.sample_id_all;
1248
}
1249

1250
void perf_evlist__set_selected(struct evlist *evlist,
1251
			       struct evsel *evsel)
1252 1253 1254
{
	evlist->selected = evsel;
}
1255

1256
void evlist__close(struct evlist *evlist)
1257
{
1258
	struct evsel *evsel;
1259

1260
	evlist__for_each_entry_reverse(evlist, evsel)
1261
		evsel__close(evsel);
1262 1263
}

1264
static int perf_evlist__create_syswide_maps(struct evlist *evlist)
1265
{
1266
	struct perf_cpu_map *cpus;
1267
	struct perf_thread_map *threads;
1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278
	int err = -ENOMEM;

	/*
	 * Try reading /sys/devices/system/cpu/online to get
	 * an all cpus map.
	 *
	 * FIXME: -ENOMEM is the best we can do here, the cpu_map
	 * code needs an overhaul to properly forward the
	 * error, and we may not want to do that fallback to a
	 * default cpu identity map :-\
	 */
1279
	cpus = perf_cpu_map__new(NULL);
1280
	if (!cpus)
1281 1282
		goto out;

1283
	threads = perf_thread_map__new_dummy();
1284 1285
	if (!threads)
		goto out_put;
1286

1287
	perf_evlist__set_maps(&evlist->core, cpus, threads);
1288 1289
out:
	return err;
1290
out_put:
1291
	perf_cpu_map__put(cpus);
1292 1293 1294
	goto out;
}

1295
int evlist__open(struct evlist *evlist)
1296
{
1297
	struct evsel *evsel;
1298
	int err;
1299

1300 1301 1302 1303
	/*
	 * Default: one fd per CPU, all threads, aka systemwide
	 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL
	 */
1304
	if (evlist->core.threads == NULL && evlist->core.cpus == NULL) {
1305 1306 1307 1308 1309
		err = perf_evlist__create_syswide_maps(evlist);
		if (err < 0)
			goto out_err;
	}

1310 1311
	perf_evlist__update_id_pos(evlist);

1312
	evlist__for_each_entry(evlist, evsel) {
1313
		err = evsel__open(evsel, evsel->core.cpus, evsel->core.threads);
1314 1315 1316 1317 1318 1319
		if (err < 0)
			goto out_err;
	}

	return 0;
out_err:
1320
	evlist__close(evlist);
1321
	errno = -err;
1322 1323
	return err;
}
1324

1325
int perf_evlist__prepare_workload(struct evlist *evlist, struct target *target,
1326
				  const char *argv[], bool pipe_output,
1327
				  void (*exec_error)(int signo, siginfo_t *info, void *ucontext))
1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348
{
	int child_ready_pipe[2], go_pipe[2];
	char bf;

	if (pipe(child_ready_pipe) < 0) {
		perror("failed to create 'ready' pipe");
		return -1;
	}

	if (pipe(go_pipe) < 0) {
		perror("failed to create 'go' pipe");
		goto out_close_ready_pipe;
	}

	evlist->workload.pid = fork();
	if (evlist->workload.pid < 0) {
		perror("failed to fork");
		goto out_close_pipes;
	}

	if (!evlist->workload.pid) {
1349 1350
		int ret;

1351
		if (pipe_output)
1352 1353
			dup2(2, 1);

1354 1355
		signal(SIGTERM, SIG_DFL);

1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367
		close(child_ready_pipe[0]);
		close(go_pipe[1]);
		fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);

		/*
		 * Tell the parent we're ready to go
		 */
		close(child_ready_pipe[1]);

		/*
		 * Wait until the parent tells us to go.
		 */
1368 1369 1370 1371 1372 1373
		ret = read(go_pipe[0], &bf, 1);
		/*
		 * The parent will ask for the execvp() to be performed by
		 * writing exactly one byte, in workload.cork_fd, usually via
		 * perf_evlist__start_workload().
		 *
1374
		 * For cancelling the workload without actually running it,
1375 1376 1377 1378 1379 1380 1381 1382 1383
		 * the parent will just close workload.cork_fd, without writing
		 * anything, i.e. read will return zero and we just exit()
		 * here.
		 */
		if (ret != 1) {
			if (ret == -1)
				perror("unable to read pipe");
			exit(ret);
		}
1384 1385 1386

		execvp(argv[0], (char **)argv);

1387
		if (exec_error) {
1388 1389 1390 1391 1392 1393 1394
			union sigval val;

			val.sival_int = errno;
			if (sigqueue(getppid(), SIGUSR1, val))
				perror(argv[0]);
		} else
			perror(argv[0]);
1395 1396 1397
		exit(-1);
	}

1398 1399 1400 1401 1402 1403 1404 1405
	if (exec_error) {
		struct sigaction act = {
			.sa_flags     = SA_SIGINFO,
			.sa_sigaction = exec_error,
		};
		sigaction(SIGUSR1, &act, NULL);
	}

1406
	if (target__none(target)) {
1407
		if (evlist->core.threads == NULL) {
1408 1409 1410 1411
			fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n",
				__func__, __LINE__);
			goto out_close_pipes;
		}
1412
		perf_thread_map__set_pid(evlist->core.threads, 0, evlist->workload.pid);
1413
	}
1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424

	close(child_ready_pipe[1]);
	close(go_pipe[0]);
	/*
	 * wait for child to settle
	 */
	if (read(child_ready_pipe[0], &bf, 1) == -1) {
		perror("unable to read pipe");
		goto out_close_pipes;
	}

1425
	fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC);
1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438
	evlist->workload.cork_fd = go_pipe[1];
	close(child_ready_pipe[0]);
	return 0;

out_close_pipes:
	close(go_pipe[0]);
	close(go_pipe[1]);
out_close_ready_pipe:
	close(child_ready_pipe[0]);
	close(child_ready_pipe[1]);
	return -1;
}

1439
int perf_evlist__start_workload(struct evlist *evlist)
1440 1441
{
	if (evlist->workload.cork_fd > 0) {
1442
		char bf = 0;
1443
		int ret;
1444 1445 1446
		/*
		 * Remove the cork, let it rip!
		 */
1447 1448
		ret = write(evlist->workload.cork_fd, &bf, 1);
		if (ret < 0)
1449
			perror("unable to write to pipe");
1450 1451 1452

		close(evlist->workload.cork_fd);
		return ret;
1453 1454 1455 1456
	}

	return 0;
}
1457

1458
int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event,
1459
			      struct perf_sample *sample)
1460
{
1461
	struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
1462 1463 1464

	if (!evsel)
		return -EFAULT;
1465
	return perf_evsel__parse_sample(evsel, event, sample);
1466
}
1467

1468
int perf_evlist__parse_sample_timestamp(struct evlist *evlist,
1469 1470 1471
					union perf_event *event,
					u64 *timestamp)
{
1472
	struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
1473 1474 1475 1476 1477 1478

	if (!evsel)
		return -EFAULT;
	return perf_evsel__parse_sample_timestamp(evsel, event, timestamp);
}

1479
int perf_evlist__strerror_open(struct evlist *evlist,
1480 1481 1482
			       int err, char *buf, size_t size)
{
	int printed, value;
1483
	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
1484 1485 1486 1487 1488 1489 1490 1491

	switch (err) {
	case EACCES:
	case EPERM:
		printed = scnprintf(buf, size,
				    "Error:\t%s.\n"
				    "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg);

1492
		value = perf_event_paranoid();
1493 1494 1495 1496 1497 1498 1499 1500

		printed += scnprintf(buf + printed, size - printed, "\nHint:\t");

		if (value >= 2) {
			printed += scnprintf(buf + printed, size - printed,
					     "For your workloads it needs to be <= 1\nHint:\t");
		}
		printed += scnprintf(buf + printed, size - printed,
1501
				     "For system wide tracing it needs to be set to -1.\n");
1502 1503

		printed += scnprintf(buf + printed, size - printed,
1504 1505
				    "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
				    "Hint:\tThe current value is %d.", value);
1506
		break;
1507
	case EINVAL: {
1508
		struct evsel *first = evlist__first(evlist);
1509 1510 1511 1512 1513
		int max_freq;

		if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0)
			goto out_default;

1514
		if (first->core.attr.sample_freq < (u64)max_freq)
1515 1516 1517 1518 1519 1520
			goto out_default;

		printed = scnprintf(buf, size,
				    "Error:\t%s.\n"
				    "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n"
				    "Hint:\tThe current value is %d and %" PRIu64 " is being requested.",
1521
				    emsg, max_freq, first->core.attr.sample_freq);
1522 1523
		break;
	}
1524
	default:
1525
out_default:
1526 1527 1528 1529 1530 1531
		scnprintf(buf, size, "%s", emsg);
		break;
	}

	return 0;
}
1532

1533
int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size)
1534
{
1535
	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
1536
	int pages_attempted = evlist->core.mmap_len / 1024, pages_max_per_user, printed = 0;
1537 1538 1539

	switch (err) {
	case EPERM:
1540
		sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user);
1541 1542
		printed += scnprintf(buf + printed, size - printed,
				     "Error:\t%s.\n"
1543
				     "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n"
1544
				     "Hint:\tTried using %zd kB.\n",
1545
				     emsg, pages_max_per_user, pages_attempted);
1546 1547 1548 1549 1550 1551 1552 1553 1554

		if (pages_attempted >= pages_max_per_user) {
			printed += scnprintf(buf + printed, size - printed,
					     "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n",
					     pages_max_per_user + pages_attempted);
		}

		printed += scnprintf(buf + printed, size - printed,
				     "Hint:\tTry using a smaller -m/--mmap-pages value.");
1555 1556 1557 1558 1559 1560 1561 1562 1563
		break;
	default:
		scnprintf(buf, size, "%s", emsg);
		break;
	}

	return 0;
}

1564
void perf_evlist__to_front(struct evlist *evlist,
1565
			   struct evsel *move_evsel)
1566
{
1567
	struct evsel *evsel, *n;
1568 1569
	LIST_HEAD(move);

1570
	if (move_evsel == evlist__first(evlist))
1571 1572
		return;

1573
	evlist__for_each_entry_safe(evlist, n, evsel) {
1574
		if (evsel->leader == move_evsel->leader)
1575
			list_move_tail(&evsel->core.node, &move);
1576 1577
	}

1578
	list_splice(&move, &evlist->core.entries);
1579
}
1580

1581
void perf_evlist__set_tracking_event(struct evlist *evlist,
1582
				     struct evsel *tracking_evsel)
1583
{
1584
	struct evsel *evsel;
1585 1586 1587 1588

	if (tracking_evsel->tracking)
		return;

1589
	evlist__for_each_entry(evlist, evsel) {
1590 1591 1592 1593 1594 1595
		if (evsel != tracking_evsel)
			evsel->tracking = false;
	}

	tracking_evsel->tracking = true;
}
1596

1597
struct evsel *
1598
perf_evlist__find_evsel_by_str(struct evlist *evlist,
1599 1600
			       const char *str)
{
1601
	struct evsel *evsel;
1602

1603
	evlist__for_each_entry(evlist, evsel) {
1604 1605 1606 1607 1608 1609 1610 1611
		if (!evsel->name)
			continue;
		if (strcmp(str, evsel->name) == 0)
			return evsel;
	}

	return NULL;
}
1612

1613
void perf_evlist__toggle_bkw_mmap(struct evlist *evlist,
1614 1615 1616 1617 1618 1619 1620 1621 1622
				  enum bkw_mmap_state state)
{
	enum bkw_mmap_state old_state = evlist->bkw_mmap_state;
	enum action {
		NONE,
		PAUSE,
		RESUME,
	} action = NONE;

1623
	if (!evlist->overwrite_mmap)
1624 1625 1626 1627 1628
		return;

	switch (old_state) {
	case BKW_MMAP_NOTREADY: {
		if (state != BKW_MMAP_RUNNING)
1629
			goto state_err;
1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669
		break;
	}
	case BKW_MMAP_RUNNING: {
		if (state != BKW_MMAP_DATA_PENDING)
			goto state_err;
		action = PAUSE;
		break;
	}
	case BKW_MMAP_DATA_PENDING: {
		if (state != BKW_MMAP_EMPTY)
			goto state_err;
		break;
	}
	case BKW_MMAP_EMPTY: {
		if (state != BKW_MMAP_RUNNING)
			goto state_err;
		action = RESUME;
		break;
	}
	default:
		WARN_ONCE(1, "Shouldn't get there\n");
	}

	evlist->bkw_mmap_state = state;

	switch (action) {
	case PAUSE:
		perf_evlist__pause(evlist);
		break;
	case RESUME:
		perf_evlist__resume(evlist);
		break;
	case NONE:
	default:
		break;
	}

state_err:
	return;
}
1670

1671
bool perf_evlist__exclude_kernel(struct evlist *evlist)
1672
{
1673
	struct evsel *evsel;
1674 1675

	evlist__for_each_entry(evlist, evsel) {
1676
		if (!evsel->core.attr.exclude_kernel)
1677 1678 1679 1680 1681
			return false;
	}

	return true;
}
1682 1683 1684 1685 1686 1687

/*
 * Events in data file are not collect in groups, but we still want
 * the group display. Set the artificial group and set the leader's
 * forced_leader flag to notify the display code.
 */
1688
void perf_evlist__force_leader(struct evlist *evlist)
1689 1690
{
	if (!evlist->nr_groups) {
1691
		struct evsel *leader = evlist__first(evlist);
1692 1693 1694 1695 1696

		perf_evlist__set_leader(evlist);
		leader->forced_leader = true;
	}
}
1697

1698
struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
1699
						 struct evsel *evsel)
1700
{
1701
	struct evsel *c2, *leader;
1702 1703 1704 1705
	bool is_open = true;

	leader = evsel->leader;
	pr_debug("Weak group for %s/%d failed\n",
1706
			leader->name, leader->core.nr_members);
1707 1708 1709 1710 1711 1712 1713 1714 1715 1716

	/*
	 * for_each_group_member doesn't work here because it doesn't
	 * include the first entry.
	 */
	evlist__for_each_entry(evsel_list, c2) {
		if (c2 == evsel)
			is_open = false;
		if (c2->leader == leader) {
			if (is_open)
1717
				perf_evsel__close(&evsel->core);
1718
			c2->leader = c2;
1719
			c2->core.nr_members = 0;
1720 1721 1722 1723
		}
	}
	return leader;
}
1724

1725
int perf_evlist__add_sb_event(struct evlist **evlist,
1726 1727 1728 1729
			      struct perf_event_attr *attr,
			      perf_evsel__sb_cb_t cb,
			      void *data)
{
1730
	struct evsel *evsel;
1731 1732 1733
	bool new_evlist = (*evlist) == NULL;

	if (*evlist == NULL)
1734
		*evlist = evlist__new();
1735 1736 1737 1738 1739 1740 1741 1742
	if (*evlist == NULL)
		return -1;

	if (!attr->sample_id_all) {
		pr_warning("enabling sample_id_all for all side band events\n");
		attr->sample_id_all = 1;
	}

1743
	evsel = perf_evsel__new_idx(attr, (*evlist)->core.nr_entries);
1744 1745 1746 1747 1748
	if (!evsel)
		goto out_err;

	evsel->side_band.cb = cb;
	evsel->side_band.data = data;
1749
	evlist__add(*evlist, evsel);
1750 1751 1752 1753
	return 0;

out_err:
	if (new_evlist) {
1754
		evlist__delete(*evlist);
1755 1756 1757 1758 1759 1760 1761
		*evlist = NULL;
	}
	return -1;
}

static void *perf_evlist__poll_thread(void *arg)
{
1762
	struct evlist *evlist = arg;
1763
	bool draining = false;
1764
	int i, done = 0;
1765 1766 1767 1768 1769 1770 1771 1772
	/*
	 * In order to read symbols from other namespaces perf to needs to call
	 * setns(2).  This isn't permitted if the struct_fs has multiple users.
	 * unshare(2) the fs so that we may continue to setns into namespaces
	 * that we're observing when, for instance, reading the build-ids at
	 * the end of a 'perf record' session.
	 */
	unshare(CLONE_FS);
1773 1774 1775

	while (!done) {
		bool got_data = false;
1776

1777
		if (evlist->thread.done)
1778 1779 1780
			draining = true;

		if (!draining)
1781
			evlist__poll(evlist, 1000);
1782

1783
		for (i = 0; i < evlist->core.nr_mmaps; i++) {
1784
			struct mmap *map = &evlist->mmap[i];
1785 1786 1787 1788 1789
			union perf_event *event;

			if (perf_mmap__read_init(map))
				continue;
			while ((event = perf_mmap__read_event(map)) != NULL) {
1790
				struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
1791 1792 1793 1794 1795 1796 1797

				if (evsel && evsel->side_band.cb)
					evsel->side_band.cb(event, evsel->side_band.data);
				else
					pr_warning("cannot locate proper evsel for the side band event\n");

				perf_mmap__consume(map);
1798
				got_data = true;
1799 1800 1801
			}
			perf_mmap__read_done(map);
		}
1802 1803 1804

		if (draining && !got_data)
			break;
1805 1806 1807 1808
	}
	return NULL;
}

1809
int perf_evlist__start_sb_thread(struct evlist *evlist,
1810 1811
				 struct target *target)
{
1812
	struct evsel *counter;
1813 1814 1815 1816 1817 1818 1819 1820

	if (!evlist)
		return 0;

	if (perf_evlist__create_maps(evlist, target))
		goto out_delete_evlist;

	evlist__for_each_entry(evlist, counter) {
1821
		if (evsel__open(counter, evlist->core.cpus,
1822
				     evlist->core.threads) < 0)
1823 1824 1825
			goto out_delete_evlist;
	}

1826
	if (evlist__mmap(evlist, UINT_MAX))
1827 1828 1829
		goto out_delete_evlist;

	evlist__for_each_entry(evlist, counter) {
1830
		if (evsel__enable(counter))
1831 1832 1833 1834 1835 1836 1837 1838 1839 1840
			goto out_delete_evlist;
	}

	evlist->thread.done = 0;
	if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist))
		goto out_delete_evlist;

	return 0;

out_delete_evlist:
1841
	evlist__delete(evlist);
1842 1843 1844 1845
	evlist = NULL;
	return -1;
}

1846
void perf_evlist__stop_sb_thread(struct evlist *evlist)
1847 1848 1849 1850 1851
{
	if (!evlist)
		return;
	evlist->thread.done = 1;
	pthread_join(evlist->thread.th, NULL);
1852
	evlist__delete(evlist);
1853
}