evlist.c 41.6 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
2 3 4 5 6 7
/*
 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
 *
 * Parts came from builtin-{top,stat,record}.c, see those files for further
 * copyright notes.
 */
8
#include <api/fs/fs.h>
9
#include <errno.h>
10
#include <inttypes.h>
11
#include <poll.h>
12
#include "cpumap.h"
13
#include "util/mmap.h"
14
#include "thread_map.h"
15
#include "target.h"
16 17
#include "evlist.h"
#include "evsel.h"
A
Adrian Hunter 已提交
18
#include "debug.h"
19
#include "units.h"
20
#include <internal/lib.h> // page_size
21
#include "../perf.h"
22
#include "asm/bug.h"
23
#include "bpf-event.h"
24
#include <signal.h>
25
#include <unistd.h>
26
#include <sched.h>
27
#include <stdlib.h>
28

29
#include "parse-events.h"
30
#include <subcmd/parse-options.h>
31

32
#include <fcntl.h>
33
#include <sys/ioctl.h>
34 35
#include <sys/mman.h>

36 37
#include <linux/bitops.h>
#include <linux/hash.h>
38
#include <linux/log2.h>
39
#include <linux/err.h>
40
#include <linux/string.h>
41
#include <linux/zalloc.h>
42
#include <perf/evlist.h>
43
#include <perf/evsel.h>
44
#include <perf/cpumap.h>
45

46 47
#include <internal/xyarray.h>

48 49 50 51
#ifdef LACKS_SIGQUEUE_PROTOTYPE
int sigqueue(pid_t pid, int sig, const union sigval value);
#endif

52
#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
53
#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
54

55 56
void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
		  struct perf_thread_map *threads)
57
{
58
	perf_evlist__init(&evlist->core);
59
	perf_evlist__set_maps(&evlist->core, cpus, threads);
60
	fdarray__init(&evlist->core.pollfd, 64);
61
	evlist->workload.pid = -1;
62
	evlist->bkw_mmap_state = BKW_MMAP_NOTREADY;
63 64
}

65
struct evlist *evlist__new(void)
66
{
67
	struct evlist *evlist = zalloc(sizeof(*evlist));
68

69
	if (evlist != NULL)
70
		evlist__init(evlist, NULL, NULL);
71 72 73 74

	return evlist;
}

75
struct evlist *perf_evlist__new_default(void)
76
{
77
	struct evlist *evlist = evlist__new();
78 79

	if (evlist && perf_evlist__add_default(evlist)) {
80
		evlist__delete(evlist);
81 82 83 84 85 86
		evlist = NULL;
	}

	return evlist;
}

87
struct evlist *perf_evlist__new_dummy(void)
88
{
89
	struct evlist *evlist = evlist__new();
90 91

	if (evlist && perf_evlist__add_dummy(evlist)) {
92
		evlist__delete(evlist);
93 94 95 96 97 98
		evlist = NULL;
	}

	return evlist;
}

99 100 101 102 103 104 105
/**
 * perf_evlist__set_id_pos - set the positions of event ids.
 * @evlist: selected event list
 *
 * Events with compatible sample types all have the same id_pos
 * and is_pos.  For convenience, put a copy on evlist.
 */
106
void perf_evlist__set_id_pos(struct evlist *evlist)
107
{
108
	struct evsel *first = evlist__first(evlist);
109 110 111 112 113

	evlist->id_pos = first->id_pos;
	evlist->is_pos = first->is_pos;
}

114
static void perf_evlist__update_id_pos(struct evlist *evlist)
115
{
116
	struct evsel *evsel;
117

118
	evlist__for_each_entry(evlist, evsel)
119 120 121 122 123
		perf_evsel__calc_id_pos(evsel);

	perf_evlist__set_id_pos(evlist);
}

124
static void evlist__purge(struct evlist *evlist)
125
{
126
	struct evsel *pos, *n;
127

128
	evlist__for_each_entry_safe(evlist, n, pos) {
129
		list_del_init(&pos->core.node);
130
		pos->evlist = NULL;
131
		evsel__delete(pos);
132 133
	}

134
	evlist->core.nr_entries = 0;
135 136
}

137
void evlist__exit(struct evlist *evlist)
138
{
139
	zfree(&evlist->mmap);
140
	zfree(&evlist->overwrite_mmap);
141
	fdarray__exit(&evlist->core.pollfd);
142 143
}

144
void evlist__delete(struct evlist *evlist)
145
{
146 147 148
	if (evlist == NULL)
		return;

149
	evlist__munmap(evlist);
150
	evlist__close(evlist);
151
	perf_cpu_map__put(evlist->core.cpus);
152
	perf_thread_map__put(evlist->core.threads);
153
	evlist->core.cpus = NULL;
154
	evlist->core.threads = NULL;
155
	evlist__purge(evlist);
156
	evlist__exit(evlist);
157 158 159
	free(evlist);
}

160
void evlist__add(struct evlist *evlist, struct evsel *entry)
161
{
162
	entry->evlist = evlist;
163
	entry->idx = evlist->core.nr_entries;
164
	entry->tracking = !entry->idx;
165

166 167 168
	perf_evlist__add(&evlist->core, &entry->core);

	if (evlist->core.nr_entries == 1)
169
		perf_evlist__set_id_pos(evlist);
170 171
}

172
void evlist__remove(struct evlist *evlist, struct evsel *evsel)
173 174
{
	evsel->evlist = NULL;
175
	perf_evlist__remove(&evlist->core, &evsel->core);
176 177
}

178
void perf_evlist__splice_list_tail(struct evlist *evlist,
179
				   struct list_head *list)
180
{
181
	struct evsel *evsel, *temp;
182

183
	__evlist__for_each_entry_safe(list, temp, evsel) {
184
		list_del_init(&evsel->core.node);
185
		evlist__add(evlist, evsel);
186
	}
187 188
}

189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
int __evlist__set_tracepoints_handlers(struct evlist *evlist,
				       const struct evsel_str_handler *assocs, size_t nr_assocs)
{
	struct evsel *evsel;
	size_t i;
	int err;

	for (i = 0; i < nr_assocs; i++) {
		// Adding a handler for an event not in this evlist, just ignore it.
		evsel = perf_evlist__find_tracepoint_by_name(evlist, assocs[i].name);
		if (evsel == NULL)
			continue;

		err = -EEXIST;
		if (evsel->handler != NULL)
			goto out;
		evsel->handler = assocs[i].handler;
	}

	err = 0;
out:
	return err;
}

213 214
void __perf_evlist__set_leader(struct list_head *list)
{
215
	struct evsel *evsel, *leader;
216

217 218
	leader = list_entry(list->next, struct evsel, core.node);
	evsel = list_entry(list->prev, struct evsel, core.node);
219

220
	leader->core.nr_members = evsel->idx - leader->idx + 1;
221

222
	__evlist__for_each_entry(list, evsel) {
223
		evsel->leader = leader;
224 225 226
	}
}

227
void perf_evlist__set_leader(struct evlist *evlist)
228
{
229 230
	if (evlist->core.nr_entries) {
		evlist->nr_groups = evlist->core.nr_entries > 1 ? 1 : 0;
231
		__perf_evlist__set_leader(&evlist->core.entries);
232
	}
233 234
}

235
int __perf_evlist__add_default(struct evlist *evlist, bool precise)
236
{
237
	struct evsel *evsel = perf_evsel__new_cycles(precise);
238

239
	if (evsel == NULL)
240
		return -ENOMEM;
241

242
	evlist__add(evlist, evsel);
243 244
	return 0;
}
245

246
int perf_evlist__add_dummy(struct evlist *evlist)
247 248 249 250 251 252
{
	struct perf_event_attr attr = {
		.type	= PERF_TYPE_SOFTWARE,
		.config = PERF_COUNT_SW_DUMMY,
		.size	= sizeof(attr), /* to capture ABI version */
	};
253
	struct evsel *evsel = perf_evsel__new_idx(&attr, evlist->core.nr_entries);
254 255 256 257

	if (evsel == NULL)
		return -ENOMEM;

258
	evlist__add(evlist, evsel);
259 260 261
	return 0;
}

262
static int evlist__add_attrs(struct evlist *evlist,
263
				  struct perf_event_attr *attrs, size_t nr_attrs)
264
{
265
	struct evsel *evsel, *n;
266 267 268 269
	LIST_HEAD(head);
	size_t i;

	for (i = 0; i < nr_attrs; i++) {
270
		evsel = perf_evsel__new_idx(attrs + i, evlist->core.nr_entries + i);
271 272
		if (evsel == NULL)
			goto out_delete_partial_list;
273
		list_add_tail(&evsel->core.node, &head);
274 275
	}

276
	perf_evlist__splice_list_tail(evlist, &head);
277 278 279 280

	return 0;

out_delete_partial_list:
281
	__evlist__for_each_entry_safe(&head, n, evsel)
282
		evsel__delete(evsel);
283 284 285
	return -1;
}

286
int __perf_evlist__add_default_attrs(struct evlist *evlist,
287 288 289 290 291 292 293
				     struct perf_event_attr *attrs, size_t nr_attrs)
{
	size_t i;

	for (i = 0; i < nr_attrs; i++)
		event_attr_init(attrs + i);

294
	return evlist__add_attrs(evlist, attrs, nr_attrs);
295 296
}

297
struct evsel *
298
perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id)
299
{
300
	struct evsel *evsel;
301

302
	evlist__for_each_entry(evlist, evsel) {
303 304
		if (evsel->core.attr.type   == PERF_TYPE_TRACEPOINT &&
		    (int)evsel->core.attr.config == id)
305 306 307 308 309 310
			return evsel;
	}

	return NULL;
}

311
struct evsel *
312
perf_evlist__find_tracepoint_by_name(struct evlist *evlist,
313 314
				     const char *name)
{
315
	struct evsel *evsel;
316

317
	evlist__for_each_entry(evlist, evsel) {
318
		if ((evsel->core.attr.type == PERF_TYPE_TRACEPOINT) &&
319 320 321 322 323 324 325
		    (strcmp(evsel->name, name) == 0))
			return evsel;
	}

	return NULL;
}

326
int perf_evlist__add_newtp(struct evlist *evlist,
327 328
			   const char *sys, const char *name, void *handler)
{
329
	struct evsel *evsel = perf_evsel__newtp(sys, name);
330

331
	if (IS_ERR(evsel))
332 333
		return -1;

334
	evsel->handler = handler;
335
	evlist__add(evlist, evsel);
336 337 338
	return 0;
}

339
static int perf_evlist__nr_threads(struct evlist *evlist,
340
				   struct evsel *evsel)
341
{
342
	if (evsel->core.system_wide)
343 344
		return 1;
	else
345
		return perf_thread_map__nr(evlist->core.threads);
346 347
}

348
void evlist__disable(struct evlist *evlist)
349
{
350
	struct evsel *pos;
351

352
	evlist__for_each_entry(evlist, pos) {
353
		if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->core.fd)
354
			continue;
355
		evsel__disable(pos);
356
	}
357 358

	evlist->enabled = false;
359 360
}

361
void evlist__enable(struct evlist *evlist)
362
{
363
	struct evsel *pos;
364

365
	evlist__for_each_entry(evlist, pos) {
366
		if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
367
			continue;
368
		evsel__enable(pos);
369
	}
370 371 372 373

	evlist->enabled = true;
}

374
void perf_evlist__toggle_enable(struct evlist *evlist)
375
{
376
	(evlist->enabled ? evlist__disable : evlist__enable)(evlist);
377 378
}

379
static int perf_evlist__enable_event_cpu(struct evlist *evlist,
380
					 struct evsel *evsel, int cpu)
381
{
382
	int thread;
383 384
	int nr_threads = perf_evlist__nr_threads(evlist, evsel);

385
	if (!evsel->core.fd)
386 387 388
		return -EINVAL;

	for (thread = 0; thread < nr_threads; thread++) {
389
		int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
390 391 392 393 394 395
		if (err)
			return err;
	}
	return 0;
}

396
static int perf_evlist__enable_event_thread(struct evlist *evlist,
397
					    struct evsel *evsel,
398 399
					    int thread)
{
400
	int cpu;
401
	int nr_cpus = perf_cpu_map__nr(evlist->core.cpus);
402

403
	if (!evsel->core.fd)
404 405 406
		return -EINVAL;

	for (cpu = 0; cpu < nr_cpus; cpu++) {
407
		int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
408 409 410 411 412 413
		if (err)
			return err;
	}
	return 0;
}

414
int perf_evlist__enable_event_idx(struct evlist *evlist,
415
				  struct evsel *evsel, int idx)
416
{
417
	bool per_cpu_mmaps = !perf_cpu_map__empty(evlist->core.cpus);
418 419 420 421 422 423 424

	if (per_cpu_mmaps)
		return perf_evlist__enable_event_cpu(evlist, evsel, idx);
	else
		return perf_evlist__enable_event_thread(evlist, evsel, idx);
}

425
int evlist__add_pollfd(struct evlist *evlist, int fd)
426
{
427
	return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN);
428 429
}

430 431
static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd,
					 void *arg __maybe_unused)
432
{
433
	struct mmap *map = fda->priv[fd].ptr;
434

435 436
	if (map)
		perf_mmap__put(map);
437
}
438

439
int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask)
440
{
441
	return fdarray__filter(&evlist->core.pollfd, revents_and_mask,
442
			       perf_evlist__munmap_filtered, NULL);
443 444
}

445
int evlist__poll(struct evlist *evlist, int timeout)
446
{
447
	return perf_evlist__poll(&evlist->core, timeout);
448 449
}

450
static void perf_evlist__set_sid_idx(struct evlist *evlist,
451
				     struct evsel *evsel, int idx, int cpu,
A
Adrian Hunter 已提交
452 453 454 455
				     int thread)
{
	struct perf_sample_id *sid = SID(evsel, cpu, thread);
	sid->idx = idx;
456 457
	if (evlist->core.cpus && cpu >= 0)
		sid->cpu = evlist->core.cpus->map[cpu];
A
Adrian Hunter 已提交
458 459
	else
		sid->cpu = -1;
460
	if (!evsel->core.system_wide && evlist->core.threads && thread >= 0)
461
		sid->tid = perf_thread_map__pid(evlist->core.threads, thread);
A
Adrian Hunter 已提交
462 463 464 465
	else
		sid->tid = -1;
}

466
struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id)
467 468 469 470 471 472
{
	struct hlist_head *head;
	struct perf_sample_id *sid;
	int hash;

	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
473
	head = &evlist->core.heads[hash];
474

475
	hlist_for_each_entry(sid, head, node)
476
		if (sid->id == id)
477 478 479 480 481
			return sid;

	return NULL;
}

482
struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id)
483 484 485
{
	struct perf_sample_id *sid;

486
	if (evlist->core.nr_entries == 1 || !id)
487
		return evlist__first(evlist);
488 489 490

	sid = perf_evlist__id2sid(evlist, id);
	if (sid)
491
		return container_of(sid->evsel, struct evsel, core);
492 493

	if (!perf_evlist__sample_id_all(evlist))
494
		return evlist__first(evlist);
495

496 497
	return NULL;
}
498

499
struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist,
500 501 502 503 504 505 506 507 508
						u64 id)
{
	struct perf_sample_id *sid;

	if (!id)
		return NULL;

	sid = perf_evlist__id2sid(evlist, id);
	if (sid)
509
		return container_of(sid->evsel, struct evsel, core);
510 511 512 513

	return NULL;
}

514
static int perf_evlist__event2id(struct evlist *evlist,
515 516
				 union perf_event *event, u64 *id)
{
517
	const __u64 *array = event->sample.array;
518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534
	ssize_t n;

	n = (event->header.size - sizeof(event->header)) >> 3;

	if (event->header.type == PERF_RECORD_SAMPLE) {
		if (evlist->id_pos >= n)
			return -1;
		*id = array[evlist->id_pos];
	} else {
		if (evlist->is_pos > n)
			return -1;
		n -= evlist->is_pos;
		*id = array[n];
	}
	return 0;
}

535
struct evsel *perf_evlist__event2evsel(struct evlist *evlist,
J
Jiri Olsa 已提交
536
					    union perf_event *event)
537
{
538
	struct evsel *first = evlist__first(evlist);
539 540 541 542 543
	struct hlist_head *head;
	struct perf_sample_id *sid;
	int hash;
	u64 id;

544
	if (evlist->core.nr_entries == 1)
545 546
		return first;

547
	if (!first->core.attr.sample_id_all &&
548 549
	    event->header.type != PERF_RECORD_SAMPLE)
		return first;
550 551 552 553 554 555

	if (perf_evlist__event2id(evlist, event, &id))
		return NULL;

	/* Synthesized events have an id of zero */
	if (!id)
556
		return first;
557 558

	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
559
	head = &evlist->core.heads[hash];
560 561 562

	hlist_for_each_entry(sid, head, node) {
		if (sid->id == id)
563
			return container_of(sid->evsel, struct evsel, core);
564 565 566 567
	}
	return NULL;
}

568
static int perf_evlist__set_paused(struct evlist *evlist, bool value)
W
Wang Nan 已提交
569 570 571
{
	int i;

572
	if (!evlist->overwrite_mmap)
573 574
		return 0;

575
	for (i = 0; i < evlist->core.nr_mmaps; i++) {
576
		int fd = evlist->overwrite_mmap[i].core.fd;
W
Wang Nan 已提交
577 578 579 580 581 582 583 584 585 586 587
		int err;

		if (fd < 0)
			continue;
		err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0);
		if (err)
			return err;
	}
	return 0;
}

588
static int perf_evlist__pause(struct evlist *evlist)
W
Wang Nan 已提交
589 590 591 592
{
	return perf_evlist__set_paused(evlist, true);
}

593
static int perf_evlist__resume(struct evlist *evlist)
W
Wang Nan 已提交
594 595 596 597
{
	return perf_evlist__set_paused(evlist, false);
}

598
static void evlist__munmap_nofree(struct evlist *evlist)
599
{
600
	int i;
601

602
	if (evlist->mmap)
603
		for (i = 0; i < evlist->core.nr_mmaps; i++)
604
			perf_mmap__munmap(&evlist->mmap[i]);
605

606
	if (evlist->overwrite_mmap)
607
		for (i = 0; i < evlist->core.nr_mmaps; i++)
608
			perf_mmap__munmap(&evlist->overwrite_mmap[i]);
609
}
610

611
void evlist__munmap(struct evlist *evlist)
612
{
613
	evlist__munmap_nofree(evlist);
614
	zfree(&evlist->mmap);
615
	zfree(&evlist->overwrite_mmap);
616 617
}

618 619
static struct mmap *evlist__alloc_mmap(struct evlist *evlist,
				       bool overwrite)
620
{
W
Wang Nan 已提交
621
	int i;
622
	struct mmap *map;
W
Wang Nan 已提交
623

624
	evlist->core.nr_mmaps = perf_cpu_map__nr(evlist->core.cpus);
625
	if (perf_cpu_map__empty(evlist->core.cpus))
626 627
		evlist->core.nr_mmaps = perf_thread_map__nr(evlist->core.threads);
	map = zalloc(evlist->core.nr_mmaps * sizeof(struct mmap));
628 629
	if (!map)
		return NULL;
630

631
	for (i = 0; i < evlist->core.nr_mmaps; i++) {
632 633
		/*
		 * When the perf_mmap() call is made we grab one refcount, plus
634
		 * one extra to let perf_mmap__consume() get the last
635 636 637 638 639 640
		 * events after all real references (perf_mmap__get()) are
		 * dropped.
		 *
		 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and
		 * thus does perf_mmap__get() on it.
		 */
641
		perf_mmap__init(&map[i].core, overwrite);
642
	}
643

644
	return map;
645 646
}

647
static bool
648
perf_evlist__should_poll(struct evlist *evlist __maybe_unused,
649
			 struct evsel *evsel)
650
{
651
	if (evsel->core.attr.write_backward)
652 653 654 655
		return false;
	return true;
}

656
static int evlist__mmap_per_evsel(struct evlist *evlist, int idx,
657
				       struct mmap_params *mp, int cpu_idx,
658
				       int thread, int *_output, int *_output_overwrite)
659
{
660
	struct evsel *evsel;
661
	int revent;
662
	int evlist_cpu = cpu_map__cpu(evlist->core.cpus, cpu_idx);
663

664
	evlist__for_each_entry(evlist, evsel) {
665
		struct mmap *maps = evlist->mmap;
666
		int *output = _output;
667
		int fd;
668
		int cpu;
669

W
Wang Nan 已提交
670
		mp->prot = PROT_READ | PROT_WRITE;
671
		if (evsel->core.attr.write_backward) {
672 673
			output = _output_overwrite;
			maps = evlist->overwrite_mmap;
674 675

			if (!maps) {
676
				maps = evlist__alloc_mmap(evlist, true);
677 678
				if (!maps)
					return -1;
679
				evlist->overwrite_mmap = maps;
680 681
				if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
					perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
682
			}
W
Wang Nan 已提交
683
			mp->prot &= ~PROT_WRITE;
684
		}
685

686
		if (evsel->core.system_wide && thread)
687 688
			continue;

689
		cpu = perf_cpu_map__idx(evsel->core.cpus, evlist_cpu);
690 691 692
		if (cpu == -1)
			continue;

693
		fd = FD(evsel, cpu, thread);
694 695 696

		if (*output == -1) {
			*output = fd;
697

698
			if (perf_mmap__mmap(&maps[idx], mp, *output, evlist_cpu) < 0)
699 700 701 702
				return -1;
		} else {
			if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
				return -1;
703

704
			perf_mmap__get(&maps[idx]);
705 706
		}

707 708
		revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0;

709 710 711 712 713 714 715
		/*
		 * The system_wide flag causes a selected event to be opened
		 * always without a pid.  Consequently it will never get a
		 * POLLHUP, but it is used for tracking in combination with
		 * other events, so it should not need to be polled anyway.
		 * Therefore don't add it for polling.
		 */
716
		if (!evsel->core.system_wide &&
717
		     perf_evlist__add_pollfd(&evlist->core, fd, &maps[idx], revent) < 0) {
718
			perf_mmap__put(&maps[idx]);
719
			return -1;
720
		}
721

722
		if (evsel->core.attr.read_format & PERF_FORMAT_ID) {
723
			if (perf_evlist__id_add_fd(&evlist->core, &evsel->core, cpu, thread,
A
Adrian Hunter 已提交
724 725 726 727 728
						   fd) < 0)
				return -1;
			perf_evlist__set_sid_idx(evlist, evsel, idx, cpu,
						 thread);
		}
729 730 731 732 733
	}

	return 0;
}

734
static int evlist__mmap_per_cpu(struct evlist *evlist,
735
				     struct mmap_params *mp)
736
{
737
	int cpu, thread;
738
	int nr_cpus = perf_cpu_map__nr(evlist->core.cpus);
739
	int nr_threads = perf_thread_map__nr(evlist->core.threads);
740

A
Adrian Hunter 已提交
741
	pr_debug2("perf event ring buffer mmapped per cpu\n");
742
	for (cpu = 0; cpu < nr_cpus; cpu++) {
743
		int output = -1;
744
		int output_overwrite = -1;
745

746 747 748
		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
					      true);

749
		for (thread = 0; thread < nr_threads; thread++) {
750
			if (evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
751
							thread, &output, &output_overwrite))
752
				goto out_unmap;
753 754 755 756 757 758
		}
	}

	return 0;

out_unmap:
759
	evlist__munmap_nofree(evlist);
760 761 762
	return -1;
}

763
static int evlist__mmap_per_thread(struct evlist *evlist,
764
					struct mmap_params *mp)
765 766
{
	int thread;
767
	int nr_threads = perf_thread_map__nr(evlist->core.threads);
768

A
Adrian Hunter 已提交
769
	pr_debug2("perf event ring buffer mmapped per thread\n");
770
	for (thread = 0; thread < nr_threads; thread++) {
771
		int output = -1;
772
		int output_overwrite = -1;
773

774 775 776
		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
					      false);

777
		if (evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
778
						&output, &output_overwrite))
779
			goto out_unmap;
780 781 782 783 784
	}

	return 0;

out_unmap:
785
	evlist__munmap_nofree(evlist);
786 787 788
	return -1;
}

789
unsigned long perf_event_mlock_kb_in_pages(void)
790
{
791 792
	unsigned long pages;
	int max;
793

794 795 796 797 798 799 800 801 802 803
	if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
		/*
		 * Pick a once upon a time good value, i.e. things look
		 * strange since we can't read a sysctl value, but lets not
		 * die yet...
		 */
		max = 512;
	} else {
		max -= (page_size / 1024);
	}
804

805 806 807 808 809 810 811
	pages = (max * 1024) / page_size;
	if (!is_power_of_2(pages))
		pages = rounddown_pow_of_two(pages);

	return pages;
}

812
size_t evlist__mmap_size(unsigned long pages)
813 814 815 816
{
	if (pages == UINT_MAX)
		pages = perf_event_mlock_kb_in_pages();
	else if (!is_power_of_2(pages))
817 818 819 820 821
		return 0;

	return (pages + 1) * page_size;
}

822 823
static long parse_pages_arg(const char *str, unsigned long min,
			    unsigned long max)
824
{
825
	unsigned long pages, val;
826 827 828 829 830 831 832
	static struct parse_tag tags[] = {
		{ .tag  = 'B', .mult = 1       },
		{ .tag  = 'K', .mult = 1 << 10 },
		{ .tag  = 'M', .mult = 1 << 20 },
		{ .tag  = 'G', .mult = 1 << 30 },
		{ .tag  = 0 },
	};
833

834
	if (str == NULL)
835
		return -EINVAL;
836

837
	val = parse_tag_value(str, tags);
838
	if (val != (unsigned long) -1) {
839 840 841 842 843 844
		/* we got file size value */
		pages = PERF_ALIGN(val, page_size) / page_size;
	} else {
		/* we got pages count value */
		char *eptr;
		pages = strtoul(str, &eptr, 10);
845 846
		if (*eptr != '\0')
			return -EINVAL;
847 848
	}

849
	if (pages == 0 && min == 0) {
850
		/* leave number of pages at 0 */
851
	} else if (!is_power_of_2(pages)) {
852 853
		char buf[100];

854
		/* round pages up to next power of 2 */
855
		pages = roundup_pow_of_two(pages);
856 857
		if (!pages)
			return -EINVAL;
858 859 860 861

		unit_number__scnprintf(buf, sizeof(buf), pages * page_size);
		pr_info("rounding mmap pages size to %s (%lu pages)\n",
			buf, pages);
862 863
	}

864 865 866 867 868 869
	if (pages > max)
		return -EINVAL;

	return pages;
}

870
int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
871 872 873 874
{
	unsigned long max = UINT_MAX;
	long pages;

A
Adrian Hunter 已提交
875
	if (max > SIZE_MAX / page_size)
876 877 878 879 880
		max = SIZE_MAX / page_size;

	pages = parse_pages_arg(str, 1, max);
	if (pages < 0) {
		pr_err("Invalid argument for --mmap_pages/-m\n");
881 882 883 884 885 886 887
		return -1;
	}

	*mmap_pages = pages;
	return 0;
}

888 889 890 891 892 893
int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
				  int unset __maybe_unused)
{
	return __perf_evlist__parse_mmap_pages(opt->value, str);
}

894
/**
895
 * evlist__mmap_ex - Create mmaps to receive events.
896 897 898
 * @evlist: list of events
 * @pages: map length in pages
 * @overwrite: overwrite older events?
899 900
 * @auxtrace_pages - auxtrace map length in pages
 * @auxtrace_overwrite - overwrite older auxtrace data?
901
 *
902
 * If @overwrite is %false the user needs to signal event consumption using
903
 * perf_mmap__write_tail().  Using evlist__mmap_read() does this
904
 * automatically.
905
 *
906 907 908
 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
 * consumption using auxtrace_mmap__write_tail().
 *
909
 * Return: %0 on success, negative error code otherwise.
910
 */
911
int evlist__mmap_ex(struct evlist *evlist, unsigned int pages,
912
			 unsigned int auxtrace_pages,
913 914
			 bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush,
			 int comp_level)
915
{
916
	struct evsel *evsel;
917
	const struct perf_cpu_map *cpus = evlist->core.cpus;
918
	const struct perf_thread_map *threads = evlist->core.threads;
W
Wang Nan 已提交
919 920 921 922 923
	/*
	 * Delay setting mp.prot: set it before calling perf_mmap__mmap.
	 * Its value is decided by evsel's write_backward.
	 * So &mp should not be passed through const pointer.
	 */
924 925
	struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush,
				  .comp_level = comp_level };
926

927
	if (!evlist->mmap)
928
		evlist->mmap = evlist__alloc_mmap(evlist, false);
929
	if (!evlist->mmap)
930 931
		return -ENOMEM;

932
	if (evlist->core.pollfd.entries == NULL && perf_evlist__alloc_pollfd(&evlist->core) < 0)
933 934
		return -ENOMEM;

935 936 937
	evlist->core.mmap_len = evlist__mmap_size(pages);
	pr_debug("mmap size %zuB\n", evlist->core.mmap_len);
	mp.mask = evlist->core.mmap_len - page_size - 1;
938

939
	auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->core.mmap_len,
940 941
				   auxtrace_pages, auxtrace_overwrite);

942
	evlist__for_each_entry(evlist, evsel) {
943
		if ((evsel->core.attr.read_format & PERF_FORMAT_ID) &&
944
		    evsel->core.sample_id == NULL &&
945
		    perf_evsel__alloc_id(&evsel->core, perf_cpu_map__nr(cpus), threads->nr) < 0)
946 947 948
			return -ENOMEM;
	}

949
	if (perf_cpu_map__empty(cpus))
950
		return evlist__mmap_per_thread(evlist, &mp);
951

952
	return evlist__mmap_per_cpu(evlist, &mp);
953
}
954

955
int evlist__mmap(struct evlist *evlist, unsigned int pages)
956
{
957
	return evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0);
958 959
}

960
int perf_evlist__create_maps(struct evlist *evlist, struct target *target)
961
{
962
	bool all_threads = (target->per_thread && target->system_wide);
963
	struct perf_cpu_map *cpus;
964
	struct perf_thread_map *threads;
965

966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983
	/*
	 * If specify '-a' and '--per-thread' to perf record, perf record
	 * will override '--per-thread'. target->per_thread = false and
	 * target->system_wide = true.
	 *
	 * If specify '--per-thread' only to perf record,
	 * target->per_thread = true and target->system_wide = false.
	 *
	 * So target->per_thread && target->system_wide is false.
	 * For perf record, thread_map__new_str doesn't call
	 * thread_map__new_all_cpus. That will keep perf record's
	 * current behavior.
	 *
	 * For perf stat, it allows the case that target->per_thread and
	 * target->system_wide are all true. It means to collect system-wide
	 * per-thread data. thread_map__new_str will call
	 * thread_map__new_all_cpus to enumerate all threads.
	 */
984
	threads = thread_map__new_str(target->pid, target->tid, target->uid,
985
				      all_threads);
986

987
	if (!threads)
988 989
		return -1;

990
	if (target__uses_dummy_map(target))
991
		cpus = perf_cpu_map__dummy_new();
992
	else
993
		cpus = perf_cpu_map__new(target->cpu_list);
994

995
	if (!cpus)
996 997
		goto out_delete_threads;

998
	evlist->core.has_user_cpus = !!target->cpu_list;
999

1000
	perf_evlist__set_maps(&evlist->core, cpus, threads);
1001 1002

	return 0;
1003 1004

out_delete_threads:
1005
	perf_thread_map__put(threads);
1006 1007 1008
	return -1;
}

1009
void __perf_evlist__set_sample_bit(struct evlist *evlist,
1010 1011
				   enum perf_event_sample_format bit)
{
1012
	struct evsel *evsel;
1013

1014
	evlist__for_each_entry(evlist, evsel)
1015 1016 1017
		__perf_evsel__set_sample_bit(evsel, bit);
}

1018
void __perf_evlist__reset_sample_bit(struct evlist *evlist,
1019 1020
				     enum perf_event_sample_format bit)
{
1021
	struct evsel *evsel;
1022

1023
	evlist__for_each_entry(evlist, evsel)
1024 1025 1026
		__perf_evsel__reset_sample_bit(evsel, bit);
}

1027
int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel)
1028
{
1029
	struct evsel *evsel;
1030
	int err = 0;
1031

1032
	evlist__for_each_entry(evlist, evsel) {
1033
		if (evsel->filter == NULL)
1034
			continue;
1035

1036 1037 1038 1039
		/*
		 * filters only work for tracepoint event, which doesn't have cpu limit.
		 * So evlist and evsel should always be same.
		 */
1040
		err = perf_evsel__apply_filter(&evsel->core, evsel->filter);
1041 1042
		if (err) {
			*err_evsel = evsel;
1043
			break;
1044
		}
1045 1046
	}

1047 1048 1049
	return err;
}

1050
int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter)
1051
{
1052
	struct evsel *evsel;
1053 1054
	int err = 0;

1055 1056 1057
	if (filter == NULL)
		return -1;

1058
	evlist__for_each_entry(evlist, evsel) {
1059
		if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
1060 1061
			continue;

1062
		err = perf_evsel__set_filter(evsel, filter);
1063 1064 1065 1066 1067
		if (err)
			break;
	}

	return err;
1068
}
1069

1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089
int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter)
{
	struct evsel *evsel;
	int err = 0;

	if (filter == NULL)
		return -1;

	evlist__for_each_entry(evlist, evsel) {
		if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
			continue;

		err = perf_evsel__append_tp_filter(evsel, filter);
		if (err)
			break;
	}

	return err;
}

1090
static char *asprintf__tp_filter_pids(size_t npids, pid_t *pids)
1091 1092
{
	char *filter;
1093
	size_t i;
1094

1095 1096 1097
	for (i = 0; i < npids; ++i) {
		if (i == 0) {
			if (asprintf(&filter, "common_pid != %d", pids[i]) < 0)
1098
				return NULL;
1099 1100 1101 1102 1103 1104 1105 1106 1107 1108
		} else {
			char *tmp;

			if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0)
				goto out_free;

			free(filter);
			filter = tmp;
		}
	}
1109

1110
	return filter;
1111
out_free:
1112 1113 1114 1115 1116 1117 1118 1119 1120
	free(filter);
	return NULL;
}

int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
{
	char *filter = asprintf__tp_filter_pids(npids, pids);
	int ret = perf_evlist__set_tp_filter(evlist, filter);

1121 1122 1123 1124
	free(filter);
	return ret;
}

1125
int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid)
1126
{
1127
	return perf_evlist__set_tp_filter_pids(evlist, 1, &pid);
1128 1129
}

1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143
int perf_evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
{
	char *filter = asprintf__tp_filter_pids(npids, pids);
	int ret = perf_evlist__append_tp_filter(evlist, filter);

	free(filter);
	return ret;
}

int perf_evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid)
{
	return perf_evlist__append_tp_filter_pids(evlist, 1, &pid);
}

1144
bool perf_evlist__valid_sample_type(struct evlist *evlist)
1145
{
1146
	struct evsel *pos;
1147

1148
	if (evlist->core.nr_entries == 1)
1149 1150 1151 1152 1153
		return true;

	if (evlist->id_pos < 0 || evlist->is_pos < 0)
		return false;

1154
	evlist__for_each_entry(evlist, pos) {
1155 1156
		if (pos->id_pos != evlist->id_pos ||
		    pos->is_pos != evlist->is_pos)
1157
			return false;
1158 1159
	}

1160
	return true;
1161 1162
}

1163
u64 __perf_evlist__combined_sample_type(struct evlist *evlist)
1164
{
1165
	struct evsel *evsel;
1166 1167 1168 1169

	if (evlist->combined_sample_type)
		return evlist->combined_sample_type;

1170
	evlist__for_each_entry(evlist, evsel)
1171
		evlist->combined_sample_type |= evsel->core.attr.sample_type;
1172 1173 1174 1175

	return evlist->combined_sample_type;
}

1176
u64 perf_evlist__combined_sample_type(struct evlist *evlist)
1177 1178 1179
{
	evlist->combined_sample_type = 0;
	return __perf_evlist__combined_sample_type(evlist);
1180 1181
}

1182
u64 perf_evlist__combined_branch_type(struct evlist *evlist)
1183
{
1184
	struct evsel *evsel;
1185 1186
	u64 branch_type = 0;

1187
	evlist__for_each_entry(evlist, evsel)
1188
		branch_type |= evsel->core.attr.branch_sample_type;
1189 1190 1191
	return branch_type;
}

1192
bool perf_evlist__valid_read_format(struct evlist *evlist)
1193
{
1194
	struct evsel *first = evlist__first(evlist), *pos = first;
1195 1196
	u64 read_format = first->core.attr.read_format;
	u64 sample_type = first->core.attr.sample_type;
1197

1198
	evlist__for_each_entry(evlist, pos) {
1199
		if (read_format != pos->core.attr.read_format)
1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211
			return false;
	}

	/* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */
	if ((sample_type & PERF_SAMPLE_READ) &&
	    !(read_format & PERF_FORMAT_ID)) {
		return false;
	}

	return true;
}

1212
u16 perf_evlist__id_hdr_size(struct evlist *evlist)
1213
{
1214
	struct evsel *first = evlist__first(evlist);
1215 1216 1217 1218
	struct perf_sample *data;
	u64 sample_type;
	u16 size = 0;

1219
	if (!first->core.attr.sample_id_all)
1220 1221
		goto out;

1222
	sample_type = first->core.attr.sample_type;
1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237

	if (sample_type & PERF_SAMPLE_TID)
		size += sizeof(data->tid) * 2;

       if (sample_type & PERF_SAMPLE_TIME)
		size += sizeof(data->time);

	if (sample_type & PERF_SAMPLE_ID)
		size += sizeof(data->id);

	if (sample_type & PERF_SAMPLE_STREAM_ID)
		size += sizeof(data->stream_id);

	if (sample_type & PERF_SAMPLE_CPU)
		size += sizeof(data->cpu) * 2;
1238 1239 1240

	if (sample_type & PERF_SAMPLE_IDENTIFIER)
		size += sizeof(data->id);
1241 1242 1243 1244
out:
	return size;
}

1245
bool perf_evlist__valid_sample_id_all(struct evlist *evlist)
1246
{
1247
	struct evsel *first = evlist__first(evlist), *pos = first;
1248

1249
	evlist__for_each_entry_continue(evlist, pos) {
1250
		if (first->core.attr.sample_id_all != pos->core.attr.sample_id_all)
1251
			return false;
1252 1253
	}

1254 1255 1256
	return true;
}

1257
bool perf_evlist__sample_id_all(struct evlist *evlist)
1258
{
1259
	struct evsel *first = evlist__first(evlist);
1260
	return first->core.attr.sample_id_all;
1261
}
1262

1263
void perf_evlist__set_selected(struct evlist *evlist,
1264
			       struct evsel *evsel)
1265 1266 1267
{
	evlist->selected = evsel;
}
1268

1269
void evlist__close(struct evlist *evlist)
1270
{
1271
	struct evsel *evsel;
1272

1273
	evlist__for_each_entry_reverse(evlist, evsel)
1274
		evsel__close(evsel);
1275 1276
}

1277
static int perf_evlist__create_syswide_maps(struct evlist *evlist)
1278
{
1279
	struct perf_cpu_map *cpus;
1280
	struct perf_thread_map *threads;
1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291
	int err = -ENOMEM;

	/*
	 * Try reading /sys/devices/system/cpu/online to get
	 * an all cpus map.
	 *
	 * FIXME: -ENOMEM is the best we can do here, the cpu_map
	 * code needs an overhaul to properly forward the
	 * error, and we may not want to do that fallback to a
	 * default cpu identity map :-\
	 */
1292
	cpus = perf_cpu_map__new(NULL);
1293
	if (!cpus)
1294 1295
		goto out;

1296
	threads = perf_thread_map__new_dummy();
1297 1298
	if (!threads)
		goto out_put;
1299

1300
	perf_evlist__set_maps(&evlist->core, cpus, threads);
1301 1302
out:
	return err;
1303
out_put:
1304
	perf_cpu_map__put(cpus);
1305 1306 1307
	goto out;
}

1308
int evlist__open(struct evlist *evlist)
1309
{
1310
	struct evsel *evsel;
1311
	int err;
1312

1313 1314 1315 1316
	/*
	 * Default: one fd per CPU, all threads, aka systemwide
	 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL
	 */
1317
	if (evlist->core.threads == NULL && evlist->core.cpus == NULL) {
1318 1319 1320 1321 1322
		err = perf_evlist__create_syswide_maps(evlist);
		if (err < 0)
			goto out_err;
	}

1323 1324
	perf_evlist__update_id_pos(evlist);

1325
	evlist__for_each_entry(evlist, evsel) {
1326
		err = evsel__open(evsel, evsel->core.cpus, evsel->core.threads);
1327 1328 1329 1330 1331 1332
		if (err < 0)
			goto out_err;
	}

	return 0;
out_err:
1333
	evlist__close(evlist);
1334
	errno = -err;
1335 1336
	return err;
}
1337

1338
int perf_evlist__prepare_workload(struct evlist *evlist, struct target *target,
1339
				  const char *argv[], bool pipe_output,
1340
				  void (*exec_error)(int signo, siginfo_t *info, void *ucontext))
1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361
{
	int child_ready_pipe[2], go_pipe[2];
	char bf;

	if (pipe(child_ready_pipe) < 0) {
		perror("failed to create 'ready' pipe");
		return -1;
	}

	if (pipe(go_pipe) < 0) {
		perror("failed to create 'go' pipe");
		goto out_close_ready_pipe;
	}

	evlist->workload.pid = fork();
	if (evlist->workload.pid < 0) {
		perror("failed to fork");
		goto out_close_pipes;
	}

	if (!evlist->workload.pid) {
1362 1363
		int ret;

1364
		if (pipe_output)
1365 1366
			dup2(2, 1);

1367 1368
		signal(SIGTERM, SIG_DFL);

1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380
		close(child_ready_pipe[0]);
		close(go_pipe[1]);
		fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);

		/*
		 * Tell the parent we're ready to go
		 */
		close(child_ready_pipe[1]);

		/*
		 * Wait until the parent tells us to go.
		 */
1381 1382 1383 1384 1385 1386
		ret = read(go_pipe[0], &bf, 1);
		/*
		 * The parent will ask for the execvp() to be performed by
		 * writing exactly one byte, in workload.cork_fd, usually via
		 * perf_evlist__start_workload().
		 *
1387
		 * For cancelling the workload without actually running it,
1388 1389 1390 1391 1392 1393 1394 1395 1396
		 * the parent will just close workload.cork_fd, without writing
		 * anything, i.e. read will return zero and we just exit()
		 * here.
		 */
		if (ret != 1) {
			if (ret == -1)
				perror("unable to read pipe");
			exit(ret);
		}
1397 1398 1399

		execvp(argv[0], (char **)argv);

1400
		if (exec_error) {
1401 1402 1403 1404 1405 1406 1407
			union sigval val;

			val.sival_int = errno;
			if (sigqueue(getppid(), SIGUSR1, val))
				perror(argv[0]);
		} else
			perror(argv[0]);
1408 1409 1410
		exit(-1);
	}

1411 1412 1413 1414 1415 1416 1417 1418
	if (exec_error) {
		struct sigaction act = {
			.sa_flags     = SA_SIGINFO,
			.sa_sigaction = exec_error,
		};
		sigaction(SIGUSR1, &act, NULL);
	}

1419
	if (target__none(target)) {
1420
		if (evlist->core.threads == NULL) {
1421 1422 1423 1424
			fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n",
				__func__, __LINE__);
			goto out_close_pipes;
		}
1425
		perf_thread_map__set_pid(evlist->core.threads, 0, evlist->workload.pid);
1426
	}
1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437

	close(child_ready_pipe[1]);
	close(go_pipe[0]);
	/*
	 * wait for child to settle
	 */
	if (read(child_ready_pipe[0], &bf, 1) == -1) {
		perror("unable to read pipe");
		goto out_close_pipes;
	}

1438
	fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC);
1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451
	evlist->workload.cork_fd = go_pipe[1];
	close(child_ready_pipe[0]);
	return 0;

out_close_pipes:
	close(go_pipe[0]);
	close(go_pipe[1]);
out_close_ready_pipe:
	close(child_ready_pipe[0]);
	close(child_ready_pipe[1]);
	return -1;
}

1452
int perf_evlist__start_workload(struct evlist *evlist)
1453 1454
{
	if (evlist->workload.cork_fd > 0) {
1455
		char bf = 0;
1456
		int ret;
1457 1458 1459
		/*
		 * Remove the cork, let it rip!
		 */
1460 1461
		ret = write(evlist->workload.cork_fd, &bf, 1);
		if (ret < 0)
1462
			perror("unable to write to pipe");
1463 1464 1465

		close(evlist->workload.cork_fd);
		return ret;
1466 1467 1468 1469
	}

	return 0;
}
1470

1471
int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event,
1472
			      struct perf_sample *sample)
1473
{
1474
	struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
1475 1476 1477

	if (!evsel)
		return -EFAULT;
1478
	return perf_evsel__parse_sample(evsel, event, sample);
1479
}
1480

1481
int perf_evlist__parse_sample_timestamp(struct evlist *evlist,
1482 1483 1484
					union perf_event *event,
					u64 *timestamp)
{
1485
	struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
1486 1487 1488 1489 1490 1491

	if (!evsel)
		return -EFAULT;
	return perf_evsel__parse_sample_timestamp(evsel, event, timestamp);
}

1492
int perf_evlist__strerror_open(struct evlist *evlist,
1493 1494 1495
			       int err, char *buf, size_t size)
{
	int printed, value;
1496
	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
1497 1498 1499 1500 1501 1502 1503 1504

	switch (err) {
	case EACCES:
	case EPERM:
		printed = scnprintf(buf, size,
				    "Error:\t%s.\n"
				    "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg);

1505
		value = perf_event_paranoid();
1506 1507 1508 1509 1510 1511 1512 1513

		printed += scnprintf(buf + printed, size - printed, "\nHint:\t");

		if (value >= 2) {
			printed += scnprintf(buf + printed, size - printed,
					     "For your workloads it needs to be <= 1\nHint:\t");
		}
		printed += scnprintf(buf + printed, size - printed,
1514
				     "For system wide tracing it needs to be set to -1.\n");
1515 1516

		printed += scnprintf(buf + printed, size - printed,
1517 1518
				    "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
				    "Hint:\tThe current value is %d.", value);
1519
		break;
1520
	case EINVAL: {
1521
		struct evsel *first = evlist__first(evlist);
1522 1523 1524 1525 1526
		int max_freq;

		if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0)
			goto out_default;

1527
		if (first->core.attr.sample_freq < (u64)max_freq)
1528 1529 1530 1531 1532 1533
			goto out_default;

		printed = scnprintf(buf, size,
				    "Error:\t%s.\n"
				    "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n"
				    "Hint:\tThe current value is %d and %" PRIu64 " is being requested.",
1534
				    emsg, max_freq, first->core.attr.sample_freq);
1535 1536
		break;
	}
1537
	default:
1538
out_default:
1539 1540 1541 1542 1543 1544
		scnprintf(buf, size, "%s", emsg);
		break;
	}

	return 0;
}
1545

1546
int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size)
1547
{
1548
	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
1549
	int pages_attempted = evlist->core.mmap_len / 1024, pages_max_per_user, printed = 0;
1550 1551 1552

	switch (err) {
	case EPERM:
1553
		sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user);
1554 1555
		printed += scnprintf(buf + printed, size - printed,
				     "Error:\t%s.\n"
1556
				     "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n"
1557
				     "Hint:\tTried using %zd kB.\n",
1558
				     emsg, pages_max_per_user, pages_attempted);
1559 1560 1561 1562 1563 1564 1565 1566 1567

		if (pages_attempted >= pages_max_per_user) {
			printed += scnprintf(buf + printed, size - printed,
					     "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n",
					     pages_max_per_user + pages_attempted);
		}

		printed += scnprintf(buf + printed, size - printed,
				     "Hint:\tTry using a smaller -m/--mmap-pages value.");
1568 1569 1570 1571 1572 1573 1574 1575 1576
		break;
	default:
		scnprintf(buf, size, "%s", emsg);
		break;
	}

	return 0;
}

1577
void perf_evlist__to_front(struct evlist *evlist,
1578
			   struct evsel *move_evsel)
1579
{
1580
	struct evsel *evsel, *n;
1581 1582
	LIST_HEAD(move);

1583
	if (move_evsel == evlist__first(evlist))
1584 1585
		return;

1586
	evlist__for_each_entry_safe(evlist, n, evsel) {
1587
		if (evsel->leader == move_evsel->leader)
1588
			list_move_tail(&evsel->core.node, &move);
1589 1590
	}

1591
	list_splice(&move, &evlist->core.entries);
1592
}
1593

1594
void perf_evlist__set_tracking_event(struct evlist *evlist,
1595
				     struct evsel *tracking_evsel)
1596
{
1597
	struct evsel *evsel;
1598 1599 1600 1601

	if (tracking_evsel->tracking)
		return;

1602
	evlist__for_each_entry(evlist, evsel) {
1603 1604 1605 1606 1607 1608
		if (evsel != tracking_evsel)
			evsel->tracking = false;
	}

	tracking_evsel->tracking = true;
}
1609

1610
struct evsel *
1611
perf_evlist__find_evsel_by_str(struct evlist *evlist,
1612 1613
			       const char *str)
{
1614
	struct evsel *evsel;
1615

1616
	evlist__for_each_entry(evlist, evsel) {
1617 1618 1619 1620 1621 1622 1623 1624
		if (!evsel->name)
			continue;
		if (strcmp(str, evsel->name) == 0)
			return evsel;
	}

	return NULL;
}
1625

1626
void perf_evlist__toggle_bkw_mmap(struct evlist *evlist,
1627 1628 1629 1630 1631 1632 1633 1634 1635
				  enum bkw_mmap_state state)
{
	enum bkw_mmap_state old_state = evlist->bkw_mmap_state;
	enum action {
		NONE,
		PAUSE,
		RESUME,
	} action = NONE;

1636
	if (!evlist->overwrite_mmap)
1637 1638 1639 1640 1641
		return;

	switch (old_state) {
	case BKW_MMAP_NOTREADY: {
		if (state != BKW_MMAP_RUNNING)
1642
			goto state_err;
1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682
		break;
	}
	case BKW_MMAP_RUNNING: {
		if (state != BKW_MMAP_DATA_PENDING)
			goto state_err;
		action = PAUSE;
		break;
	}
	case BKW_MMAP_DATA_PENDING: {
		if (state != BKW_MMAP_EMPTY)
			goto state_err;
		break;
	}
	case BKW_MMAP_EMPTY: {
		if (state != BKW_MMAP_RUNNING)
			goto state_err;
		action = RESUME;
		break;
	}
	default:
		WARN_ONCE(1, "Shouldn't get there\n");
	}

	evlist->bkw_mmap_state = state;

	switch (action) {
	case PAUSE:
		perf_evlist__pause(evlist);
		break;
	case RESUME:
		perf_evlist__resume(evlist);
		break;
	case NONE:
	default:
		break;
	}

state_err:
	return;
}
1683

1684
bool perf_evlist__exclude_kernel(struct evlist *evlist)
1685
{
1686
	struct evsel *evsel;
1687 1688

	evlist__for_each_entry(evlist, evsel) {
1689
		if (!evsel->core.attr.exclude_kernel)
1690 1691 1692 1693 1694
			return false;
	}

	return true;
}
1695 1696 1697 1698 1699 1700

/*
 * Events in data file are not collect in groups, but we still want
 * the group display. Set the artificial group and set the leader's
 * forced_leader flag to notify the display code.
 */
1701
void perf_evlist__force_leader(struct evlist *evlist)
1702 1703
{
	if (!evlist->nr_groups) {
1704
		struct evsel *leader = evlist__first(evlist);
1705 1706 1707 1708 1709

		perf_evlist__set_leader(evlist);
		leader->forced_leader = true;
	}
}
1710

1711
struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
1712
						 struct evsel *evsel)
1713
{
1714
	struct evsel *c2, *leader;
1715 1716 1717 1718
	bool is_open = true;

	leader = evsel->leader;
	pr_debug("Weak group for %s/%d failed\n",
1719
			leader->name, leader->core.nr_members);
1720 1721 1722 1723 1724 1725 1726 1727 1728 1729

	/*
	 * for_each_group_member doesn't work here because it doesn't
	 * include the first entry.
	 */
	evlist__for_each_entry(evsel_list, c2) {
		if (c2 == evsel)
			is_open = false;
		if (c2->leader == leader) {
			if (is_open)
1730
				perf_evsel__close(&evsel->core);
1731
			c2->leader = c2;
1732
			c2->core.nr_members = 0;
1733 1734 1735 1736
		}
	}
	return leader;
}
1737

1738
int perf_evlist__add_sb_event(struct evlist **evlist,
1739 1740 1741 1742
			      struct perf_event_attr *attr,
			      perf_evsel__sb_cb_t cb,
			      void *data)
{
1743
	struct evsel *evsel;
1744 1745 1746
	bool new_evlist = (*evlist) == NULL;

	if (*evlist == NULL)
1747
		*evlist = evlist__new();
1748 1749 1750 1751 1752 1753 1754 1755
	if (*evlist == NULL)
		return -1;

	if (!attr->sample_id_all) {
		pr_warning("enabling sample_id_all for all side band events\n");
		attr->sample_id_all = 1;
	}

1756
	evsel = perf_evsel__new_idx(attr, (*evlist)->core.nr_entries);
1757 1758 1759 1760 1761
	if (!evsel)
		goto out_err;

	evsel->side_band.cb = cb;
	evsel->side_band.data = data;
1762
	evlist__add(*evlist, evsel);
1763 1764 1765 1766
	return 0;

out_err:
	if (new_evlist) {
1767
		evlist__delete(*evlist);
1768 1769 1770 1771 1772 1773 1774
		*evlist = NULL;
	}
	return -1;
}

static void *perf_evlist__poll_thread(void *arg)
{
1775
	struct evlist *evlist = arg;
1776
	bool draining = false;
1777
	int i, done = 0;
1778 1779 1780 1781 1782 1783 1784 1785
	/*
	 * In order to read symbols from other namespaces perf to needs to call
	 * setns(2).  This isn't permitted if the struct_fs has multiple users.
	 * unshare(2) the fs so that we may continue to setns into namespaces
	 * that we're observing when, for instance, reading the build-ids at
	 * the end of a 'perf record' session.
	 */
	unshare(CLONE_FS);
1786 1787 1788

	while (!done) {
		bool got_data = false;
1789

1790
		if (evlist->thread.done)
1791 1792 1793
			draining = true;

		if (!draining)
1794
			evlist__poll(evlist, 1000);
1795

1796
		for (i = 0; i < evlist->core.nr_mmaps; i++) {
1797
			struct mmap *map = &evlist->mmap[i];
1798 1799 1800 1801 1802
			union perf_event *event;

			if (perf_mmap__read_init(map))
				continue;
			while ((event = perf_mmap__read_event(map)) != NULL) {
1803
				struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
1804 1805 1806 1807 1808 1809 1810

				if (evsel && evsel->side_band.cb)
					evsel->side_band.cb(event, evsel->side_band.data);
				else
					pr_warning("cannot locate proper evsel for the side band event\n");

				perf_mmap__consume(map);
1811
				got_data = true;
1812 1813 1814
			}
			perf_mmap__read_done(map);
		}
1815 1816 1817

		if (draining && !got_data)
			break;
1818 1819 1820 1821
	}
	return NULL;
}

1822
int perf_evlist__start_sb_thread(struct evlist *evlist,
1823 1824
				 struct target *target)
{
1825
	struct evsel *counter;
1826 1827 1828 1829 1830 1831 1832 1833

	if (!evlist)
		return 0;

	if (perf_evlist__create_maps(evlist, target))
		goto out_delete_evlist;

	evlist__for_each_entry(evlist, counter) {
1834
		if (evsel__open(counter, evlist->core.cpus,
1835
				     evlist->core.threads) < 0)
1836 1837 1838
			goto out_delete_evlist;
	}

1839
	if (evlist__mmap(evlist, UINT_MAX))
1840 1841 1842
		goto out_delete_evlist;

	evlist__for_each_entry(evlist, counter) {
1843
		if (evsel__enable(counter))
1844 1845 1846 1847 1848 1849 1850 1851 1852 1853
			goto out_delete_evlist;
	}

	evlist->thread.done = 0;
	if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist))
		goto out_delete_evlist;

	return 0;

out_delete_evlist:
1854
	evlist__delete(evlist);
1855 1856 1857 1858
	evlist = NULL;
	return -1;
}

1859
void perf_evlist__stop_sb_thread(struct evlist *evlist)
1860 1861 1862 1863 1864
{
	if (!evlist)
		return;
	evlist->thread.done = 1;
	pthread_join(evlist->thread.th, NULL);
1865
	evlist__delete(evlist);
1866
}