evlist.c 43.2 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
2 3 4 5 6 7
/*
 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
 *
 * Parts came from builtin-{top,stat,record}.c, see those files for further
 * copyright notes.
 */
8
#include <api/fs/fs.h>
9
#include <errno.h>
10
#include <inttypes.h>
11
#include <poll.h>
12
#include "cpumap.h"
13
#include "util/mmap.h"
14
#include "thread_map.h"
15
#include "target.h"
16 17
#include "evlist.h"
#include "evsel.h"
A
Adrian Hunter 已提交
18
#include "debug.h"
19
#include "units.h"
20
#include <internal/lib.h> // page_size
21
#include "../perf.h"
22
#include "asm/bug.h"
23
#include "bpf-event.h"
24
#include <signal.h>
25
#include <unistd.h>
26
#include <sched.h>
27
#include <stdlib.h>
28

29
#include "parse-events.h"
30
#include <subcmd/parse-options.h>
31

32
#include <fcntl.h>
33
#include <sys/ioctl.h>
34 35
#include <sys/mman.h>

36 37
#include <linux/bitops.h>
#include <linux/hash.h>
38
#include <linux/log2.h>
39
#include <linux/err.h>
40
#include <linux/string.h>
41
#include <linux/zalloc.h>
42
#include <perf/evlist.h>
43
#include <perf/evsel.h>
44
#include <perf/cpumap.h>
45
#include <perf/mmap.h>
46

47 48
#include <internal/xyarray.h>

49 50 51 52
#ifdef LACKS_SIGQUEUE_PROTOTYPE
int sigqueue(pid_t pid, int sig, const union sigval value);
#endif

53
#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
54
#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
55

56 57
void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
		  struct perf_thread_map *threads)
58
{
59
	perf_evlist__init(&evlist->core);
60
	perf_evlist__set_maps(&evlist->core, cpus, threads);
61
	fdarray__init(&evlist->core.pollfd, 64);
62
	evlist->workload.pid = -1;
63
	evlist->bkw_mmap_state = BKW_MMAP_NOTREADY;
64 65
}

66
struct evlist *evlist__new(void)
67
{
68
	struct evlist *evlist = zalloc(sizeof(*evlist));
69

70
	if (evlist != NULL)
71
		evlist__init(evlist, NULL, NULL);
72 73 74 75

	return evlist;
}

76
struct evlist *perf_evlist__new_default(void)
77
{
78
	struct evlist *evlist = evlist__new();
79 80

	if (evlist && perf_evlist__add_default(evlist)) {
81
		evlist__delete(evlist);
82 83 84 85 86 87
		evlist = NULL;
	}

	return evlist;
}

88
struct evlist *perf_evlist__new_dummy(void)
89
{
90
	struct evlist *evlist = evlist__new();
91 92

	if (evlist && perf_evlist__add_dummy(evlist)) {
93
		evlist__delete(evlist);
94 95 96 97 98 99
		evlist = NULL;
	}

	return evlist;
}

100 101 102 103 104 105 106
/**
 * perf_evlist__set_id_pos - set the positions of event ids.
 * @evlist: selected event list
 *
 * Events with compatible sample types all have the same id_pos
 * and is_pos.  For convenience, put a copy on evlist.
 */
107
void perf_evlist__set_id_pos(struct evlist *evlist)
108
{
109
	struct evsel *first = evlist__first(evlist);
110 111 112 113 114

	evlist->id_pos = first->id_pos;
	evlist->is_pos = first->is_pos;
}

115
static void perf_evlist__update_id_pos(struct evlist *evlist)
116
{
117
	struct evsel *evsel;
118

119
	evlist__for_each_entry(evlist, evsel)
120 121 122 123 124
		perf_evsel__calc_id_pos(evsel);

	perf_evlist__set_id_pos(evlist);
}

125
static void evlist__purge(struct evlist *evlist)
126
{
127
	struct evsel *pos, *n;
128

129
	evlist__for_each_entry_safe(evlist, n, pos) {
130
		list_del_init(&pos->core.node);
131
		pos->evlist = NULL;
132
		evsel__delete(pos);
133 134
	}

135
	evlist->core.nr_entries = 0;
136 137
}

138
void evlist__exit(struct evlist *evlist)
139
{
140
	zfree(&evlist->mmap);
141
	zfree(&evlist->overwrite_mmap);
142
	fdarray__exit(&evlist->core.pollfd);
143 144
}

145
void evlist__delete(struct evlist *evlist)
146
{
147 148 149
	if (evlist == NULL)
		return;

150
	evlist__munmap(evlist);
151
	evlist__close(evlist);
152
	perf_cpu_map__put(evlist->core.cpus);
153
	perf_thread_map__put(evlist->core.threads);
154
	evlist->core.cpus = NULL;
155
	evlist->core.threads = NULL;
156
	evlist__purge(evlist);
157
	evlist__exit(evlist);
158 159 160
	free(evlist);
}

161
void evlist__add(struct evlist *evlist, struct evsel *entry)
162
{
163
	entry->evlist = evlist;
164
	entry->idx = evlist->core.nr_entries;
165
	entry->tracking = !entry->idx;
166

167 168 169
	perf_evlist__add(&evlist->core, &entry->core);

	if (evlist->core.nr_entries == 1)
170
		perf_evlist__set_id_pos(evlist);
171 172
}

173
void evlist__remove(struct evlist *evlist, struct evsel *evsel)
174 175
{
	evsel->evlist = NULL;
176
	perf_evlist__remove(&evlist->core, &evsel->core);
177 178
}

179
void perf_evlist__splice_list_tail(struct evlist *evlist,
180
				   struct list_head *list)
181
{
182
	struct evsel *evsel, *temp;
183

184
	__evlist__for_each_entry_safe(list, temp, evsel) {
185
		list_del_init(&evsel->core.node);
186
		evlist__add(evlist, evsel);
187
	}
188 189
}

190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
int __evlist__set_tracepoints_handlers(struct evlist *evlist,
				       const struct evsel_str_handler *assocs, size_t nr_assocs)
{
	struct evsel *evsel;
	size_t i;
	int err;

	for (i = 0; i < nr_assocs; i++) {
		// Adding a handler for an event not in this evlist, just ignore it.
		evsel = perf_evlist__find_tracepoint_by_name(evlist, assocs[i].name);
		if (evsel == NULL)
			continue;

		err = -EEXIST;
		if (evsel->handler != NULL)
			goto out;
		evsel->handler = assocs[i].handler;
	}

	err = 0;
out:
	return err;
}

214 215
void __perf_evlist__set_leader(struct list_head *list)
{
216
	struct evsel *evsel, *leader;
217

218 219
	leader = list_entry(list->next, struct evsel, core.node);
	evsel = list_entry(list->prev, struct evsel, core.node);
220

221
	leader->core.nr_members = evsel->idx - leader->idx + 1;
222

223
	__evlist__for_each_entry(list, evsel) {
224
		evsel->leader = leader;
225 226 227
	}
}

228
void perf_evlist__set_leader(struct evlist *evlist)
229
{
230 231
	if (evlist->core.nr_entries) {
		evlist->nr_groups = evlist->core.nr_entries > 1 ? 1 : 0;
232
		__perf_evlist__set_leader(&evlist->core.entries);
233
	}
234 235
}

236
int __perf_evlist__add_default(struct evlist *evlist, bool precise)
237
{
238
	struct evsel *evsel = perf_evsel__new_cycles(precise);
239

240
	if (evsel == NULL)
241
		return -ENOMEM;
242

243
	evlist__add(evlist, evsel);
244 245
	return 0;
}
246

247
int perf_evlist__add_dummy(struct evlist *evlist)
248 249 250 251 252 253
{
	struct perf_event_attr attr = {
		.type	= PERF_TYPE_SOFTWARE,
		.config = PERF_COUNT_SW_DUMMY,
		.size	= sizeof(attr), /* to capture ABI version */
	};
254
	struct evsel *evsel = perf_evsel__new_idx(&attr, evlist->core.nr_entries);
255 256 257 258

	if (evsel == NULL)
		return -ENOMEM;

259
	evlist__add(evlist, evsel);
260 261 262
	return 0;
}

263
static int evlist__add_attrs(struct evlist *evlist,
264
				  struct perf_event_attr *attrs, size_t nr_attrs)
265
{
266
	struct evsel *evsel, *n;
267 268 269 270
	LIST_HEAD(head);
	size_t i;

	for (i = 0; i < nr_attrs; i++) {
271
		evsel = perf_evsel__new_idx(attrs + i, evlist->core.nr_entries + i);
272 273
		if (evsel == NULL)
			goto out_delete_partial_list;
274
		list_add_tail(&evsel->core.node, &head);
275 276
	}

277
	perf_evlist__splice_list_tail(evlist, &head);
278 279 280 281

	return 0;

out_delete_partial_list:
282
	__evlist__for_each_entry_safe(&head, n, evsel)
283
		evsel__delete(evsel);
284 285 286
	return -1;
}

287
int __perf_evlist__add_default_attrs(struct evlist *evlist,
288 289 290 291 292 293 294
				     struct perf_event_attr *attrs, size_t nr_attrs)
{
	size_t i;

	for (i = 0; i < nr_attrs; i++)
		event_attr_init(attrs + i);

295
	return evlist__add_attrs(evlist, attrs, nr_attrs);
296 297
}

298
struct evsel *
299
perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id)
300
{
301
	struct evsel *evsel;
302

303
	evlist__for_each_entry(evlist, evsel) {
304 305
		if (evsel->core.attr.type   == PERF_TYPE_TRACEPOINT &&
		    (int)evsel->core.attr.config == id)
306 307 308 309 310 311
			return evsel;
	}

	return NULL;
}

312
struct evsel *
313
perf_evlist__find_tracepoint_by_name(struct evlist *evlist,
314 315
				     const char *name)
{
316
	struct evsel *evsel;
317

318
	evlist__for_each_entry(evlist, evsel) {
319
		if ((evsel->core.attr.type == PERF_TYPE_TRACEPOINT) &&
320 321 322 323 324 325 326
		    (strcmp(evsel->name, name) == 0))
			return evsel;
	}

	return NULL;
}

327
int perf_evlist__add_newtp(struct evlist *evlist,
328 329
			   const char *sys, const char *name, void *handler)
{
330
	struct evsel *evsel = perf_evsel__newtp(sys, name);
331

332
	if (IS_ERR(evsel))
333 334
		return -1;

335
	evsel->handler = handler;
336
	evlist__add(evlist, evsel);
337 338 339
	return 0;
}

340
static int perf_evlist__nr_threads(struct evlist *evlist,
341
				   struct evsel *evsel)
342
{
343
	if (evsel->core.system_wide)
344 345
		return 1;
	else
346
		return perf_thread_map__nr(evlist->core.threads);
347 348
}

349
void evlist__disable(struct evlist *evlist)
350
{
351
	struct evsel *pos;
352

353
	evlist__for_each_entry(evlist, pos) {
354
		if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->core.fd)
355
			continue;
356
		evsel__disable(pos);
357
	}
358 359

	evlist->enabled = false;
360 361
}

362
void evlist__enable(struct evlist *evlist)
363
{
364
	struct evsel *pos;
365

366
	evlist__for_each_entry(evlist, pos) {
367
		if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
368
			continue;
369
		evsel__enable(pos);
370
	}
371 372 373 374

	evlist->enabled = true;
}

375
void perf_evlist__toggle_enable(struct evlist *evlist)
376
{
377
	(evlist->enabled ? evlist__disable : evlist__enable)(evlist);
378 379
}

380
static int perf_evlist__enable_event_cpu(struct evlist *evlist,
381
					 struct evsel *evsel, int cpu)
382
{
383
	int thread;
384 385
	int nr_threads = perf_evlist__nr_threads(evlist, evsel);

386
	if (!evsel->core.fd)
387 388 389
		return -EINVAL;

	for (thread = 0; thread < nr_threads; thread++) {
390
		int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
391 392 393 394 395 396
		if (err)
			return err;
	}
	return 0;
}

397
static int perf_evlist__enable_event_thread(struct evlist *evlist,
398
					    struct evsel *evsel,
399 400
					    int thread)
{
401
	int cpu;
402
	int nr_cpus = perf_cpu_map__nr(evlist->core.cpus);
403

404
	if (!evsel->core.fd)
405 406 407
		return -EINVAL;

	for (cpu = 0; cpu < nr_cpus; cpu++) {
408
		int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
409 410 411 412 413 414
		if (err)
			return err;
	}
	return 0;
}

415
int perf_evlist__enable_event_idx(struct evlist *evlist,
416
				  struct evsel *evsel, int idx)
417
{
418
	bool per_cpu_mmaps = !perf_cpu_map__empty(evlist->core.cpus);
419 420 421 422 423 424 425

	if (per_cpu_mmaps)
		return perf_evlist__enable_event_cpu(evlist, evsel, idx);
	else
		return perf_evlist__enable_event_thread(evlist, evsel, idx);
}

426
int evlist__add_pollfd(struct evlist *evlist, int fd)
427
{
428
	return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN);
429 430
}

431 432
static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd,
					 void *arg __maybe_unused)
433
{
434
	struct mmap *map = fda->priv[fd].ptr;
435

436
	if (map)
437
		perf_mmap__put(&map->core);
438
}
439

440
int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask)
441
{
442
	return fdarray__filter(&evlist->core.pollfd, revents_and_mask,
443
			       perf_evlist__munmap_filtered, NULL);
444 445
}

446
int evlist__poll(struct evlist *evlist, int timeout)
447
{
448
	return perf_evlist__poll(&evlist->core, timeout);
449 450
}

451
static void perf_evlist__set_sid_idx(struct evlist *evlist,
452
				     struct evsel *evsel, int idx, int cpu,
A
Adrian Hunter 已提交
453 454 455 456
				     int thread)
{
	struct perf_sample_id *sid = SID(evsel, cpu, thread);
	sid->idx = idx;
457 458
	if (evlist->core.cpus && cpu >= 0)
		sid->cpu = evlist->core.cpus->map[cpu];
A
Adrian Hunter 已提交
459 460
	else
		sid->cpu = -1;
461
	if (!evsel->core.system_wide && evlist->core.threads && thread >= 0)
462
		sid->tid = perf_thread_map__pid(evlist->core.threads, thread);
A
Adrian Hunter 已提交
463 464 465 466
	else
		sid->tid = -1;
}

467
struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id)
468 469 470 471 472 473
{
	struct hlist_head *head;
	struct perf_sample_id *sid;
	int hash;

	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
474
	head = &evlist->core.heads[hash];
475

476
	hlist_for_each_entry(sid, head, node)
477
		if (sid->id == id)
478 479 480 481 482
			return sid;

	return NULL;
}

483
struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id)
484 485 486
{
	struct perf_sample_id *sid;

487
	if (evlist->core.nr_entries == 1 || !id)
488
		return evlist__first(evlist);
489 490 491

	sid = perf_evlist__id2sid(evlist, id);
	if (sid)
492
		return container_of(sid->evsel, struct evsel, core);
493 494

	if (!perf_evlist__sample_id_all(evlist))
495
		return evlist__first(evlist);
496

497 498
	return NULL;
}
499

500
struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist,
501 502 503 504 505 506 507 508 509
						u64 id)
{
	struct perf_sample_id *sid;

	if (!id)
		return NULL;

	sid = perf_evlist__id2sid(evlist, id);
	if (sid)
510
		return container_of(sid->evsel, struct evsel, core);
511 512 513 514

	return NULL;
}

515
static int perf_evlist__event2id(struct evlist *evlist,
516 517
				 union perf_event *event, u64 *id)
{
518
	const __u64 *array = event->sample.array;
519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535
	ssize_t n;

	n = (event->header.size - sizeof(event->header)) >> 3;

	if (event->header.type == PERF_RECORD_SAMPLE) {
		if (evlist->id_pos >= n)
			return -1;
		*id = array[evlist->id_pos];
	} else {
		if (evlist->is_pos > n)
			return -1;
		n -= evlist->is_pos;
		*id = array[n];
	}
	return 0;
}

536
struct evsel *perf_evlist__event2evsel(struct evlist *evlist,
J
Jiri Olsa 已提交
537
					    union perf_event *event)
538
{
539
	struct evsel *first = evlist__first(evlist);
540 541 542 543 544
	struct hlist_head *head;
	struct perf_sample_id *sid;
	int hash;
	u64 id;

545
	if (evlist->core.nr_entries == 1)
546 547
		return first;

548
	if (!first->core.attr.sample_id_all &&
549 550
	    event->header.type != PERF_RECORD_SAMPLE)
		return first;
551 552 553 554 555 556

	if (perf_evlist__event2id(evlist, event, &id))
		return NULL;

	/* Synthesized events have an id of zero */
	if (!id)
557
		return first;
558 559

	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
560
	head = &evlist->core.heads[hash];
561 562 563

	hlist_for_each_entry(sid, head, node) {
		if (sid->id == id)
564
			return container_of(sid->evsel, struct evsel, core);
565 566 567 568
	}
	return NULL;
}

569
static int perf_evlist__set_paused(struct evlist *evlist, bool value)
W
Wang Nan 已提交
570 571 572
{
	int i;

573
	if (!evlist->overwrite_mmap)
574 575
		return 0;

576
	for (i = 0; i < evlist->core.nr_mmaps; i++) {
577
		int fd = evlist->overwrite_mmap[i].core.fd;
W
Wang Nan 已提交
578 579 580 581 582 583 584 585 586 587 588
		int err;

		if (fd < 0)
			continue;
		err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0);
		if (err)
			return err;
	}
	return 0;
}

589
static int perf_evlist__pause(struct evlist *evlist)
W
Wang Nan 已提交
590 591 592 593
{
	return perf_evlist__set_paused(evlist, true);
}

594
static int perf_evlist__resume(struct evlist *evlist)
W
Wang Nan 已提交
595 596 597 598
{
	return perf_evlist__set_paused(evlist, false);
}

599
static void evlist__munmap_nofree(struct evlist *evlist)
600
{
601
	int i;
602

603
	if (evlist->mmap)
604
		for (i = 0; i < evlist->core.nr_mmaps; i++)
605
			perf_mmap__munmap(&evlist->mmap[i].core);
606

607
	if (evlist->overwrite_mmap)
608
		for (i = 0; i < evlist->core.nr_mmaps; i++)
609
			perf_mmap__munmap(&evlist->overwrite_mmap[i].core);
610
}
611

612
void evlist__munmap(struct evlist *evlist)
613
{
614
	evlist__munmap_nofree(evlist);
615
	zfree(&evlist->mmap);
616
	zfree(&evlist->overwrite_mmap);
617 618
}

619 620 621 622 623 624 625
static void perf_mmap__unmap_cb(struct perf_mmap *map)
{
	struct mmap *m = container_of(map, struct mmap, core);

	mmap__munmap(m);
}

626 627
static struct mmap *evlist__alloc_mmap(struct evlist *evlist,
				       bool overwrite)
628
{
W
Wang Nan 已提交
629
	int i;
630
	struct mmap *map;
W
Wang Nan 已提交
631

632
	evlist->core.nr_mmaps = perf_cpu_map__nr(evlist->core.cpus);
633
	if (perf_cpu_map__empty(evlist->core.cpus))
634 635
		evlist->core.nr_mmaps = perf_thread_map__nr(evlist->core.threads);
	map = zalloc(evlist->core.nr_mmaps * sizeof(struct mmap));
636 637
	if (!map)
		return NULL;
638

639
	for (i = 0; i < evlist->core.nr_mmaps; i++) {
640 641
		/*
		 * When the perf_mmap() call is made we grab one refcount, plus
642
		 * one extra to let perf_mmap__consume() get the last
643 644 645 646 647 648
		 * events after all real references (perf_mmap__get()) are
		 * dropped.
		 *
		 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and
		 * thus does perf_mmap__get() on it.
		 */
649
		perf_mmap__init(&map[i].core, overwrite, perf_mmap__unmap_cb);
650
	}
651

652
	return map;
653 654
}

655
static bool
656
perf_evlist__should_poll(struct evlist *evlist __maybe_unused,
657
			 struct evsel *evsel)
658
{
659
	if (evsel->core.attr.write_backward)
660 661 662 663
		return false;
	return true;
}

664
static int evlist__mmap_per_evsel(struct evlist *evlist, int idx,
665
				       struct mmap_params *mp, int cpu_idx,
666
				       int thread, int *_output, int *_output_overwrite)
667
{
668
	struct evsel *evsel;
669
	int revent;
670
	int evlist_cpu = cpu_map__cpu(evlist->core.cpus, cpu_idx);
671

672
	evlist__for_each_entry(evlist, evsel) {
673
		struct mmap *maps = evlist->mmap;
674
		int *output = _output;
675
		int fd;
676
		int cpu;
677

J
Jiri Olsa 已提交
678
		mp->core.prot = PROT_READ | PROT_WRITE;
679
		if (evsel->core.attr.write_backward) {
680 681
			output = _output_overwrite;
			maps = evlist->overwrite_mmap;
682 683

			if (!maps) {
684
				maps = evlist__alloc_mmap(evlist, true);
685 686
				if (!maps)
					return -1;
687
				evlist->overwrite_mmap = maps;
688 689
				if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
					perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
690
			}
J
Jiri Olsa 已提交
691
			mp->core.prot &= ~PROT_WRITE;
692
		}
693

694
		if (evsel->core.system_wide && thread)
695 696
			continue;

697
		cpu = perf_cpu_map__idx(evsel->core.cpus, evlist_cpu);
698 699 700
		if (cpu == -1)
			continue;

701
		fd = FD(evsel, cpu, thread);
702 703 704

		if (*output == -1) {
			*output = fd;
705

706
			if (mmap__mmap(&maps[idx], mp, *output, evlist_cpu) < 0)
707 708 709 710
				return -1;
		} else {
			if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
				return -1;
711

712
			perf_mmap__get(&maps[idx].core);
713 714
		}

715 716
		revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0;

717 718 719 720 721 722 723
		/*
		 * The system_wide flag causes a selected event to be opened
		 * always without a pid.  Consequently it will never get a
		 * POLLHUP, but it is used for tracking in combination with
		 * other events, so it should not need to be polled anyway.
		 * Therefore don't add it for polling.
		 */
724
		if (!evsel->core.system_wide &&
725
		     perf_evlist__add_pollfd(&evlist->core, fd, &maps[idx], revent) < 0) {
726
			perf_mmap__put(&maps[idx].core);
727
			return -1;
728
		}
729

730
		if (evsel->core.attr.read_format & PERF_FORMAT_ID) {
731
			if (perf_evlist__id_add_fd(&evlist->core, &evsel->core, cpu, thread,
A
Adrian Hunter 已提交
732 733 734 735 736
						   fd) < 0)
				return -1;
			perf_evlist__set_sid_idx(evlist, evsel, idx, cpu,
						 thread);
		}
737 738 739 740 741
	}

	return 0;
}

742 743 744 745 746 747 748 749 750 751 752
static void
perf_evlist__mmap_cb_idx(struct perf_evlist *_evlist,
			 struct perf_mmap_param *_mp,
			 int idx, bool per_cpu)
{
	struct evlist *evlist = container_of(_evlist, struct evlist, core);
	struct mmap_params *mp = container_of(_mp, struct mmap_params, core);

	auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, idx, per_cpu);
}

753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775
static struct perf_mmap*
perf_evlist__mmap_cb_get(struct perf_evlist *_evlist, bool overwrite, int idx)
{
	struct evlist *evlist = container_of(_evlist, struct evlist, core);
	struct mmap *maps = evlist->mmap;

	if (overwrite) {
		maps = evlist->overwrite_mmap;

		if (!maps) {
			maps = evlist__alloc_mmap(evlist, true);
			if (!maps)
				return NULL;

			evlist->overwrite_mmap = maps;
			if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
				perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
		}
	}

	return &maps[idx].core;
}

776 777 778 779 780 781 782 783 784 785
static int
perf_evlist__mmap_cb_mmap(struct perf_mmap *_map, struct perf_mmap_param *_mp,
			  int output, int cpu)
{
	struct mmap *map = container_of(_map, struct mmap, core);
	struct mmap_params *mp = container_of(_mp, struct mmap_params, core);

	return mmap__mmap(map, mp, output, cpu);
}

786
static int evlist__mmap_per_cpu(struct evlist *evlist,
787
				     struct mmap_params *mp)
788
{
789
	int cpu, thread;
790
	int nr_cpus = perf_cpu_map__nr(evlist->core.cpus);
791
	int nr_threads = perf_thread_map__nr(evlist->core.threads);
792

A
Adrian Hunter 已提交
793
	pr_debug2("perf event ring buffer mmapped per cpu\n");
794
	for (cpu = 0; cpu < nr_cpus; cpu++) {
795
		int output = -1;
796
		int output_overwrite = -1;
797

798 799 800
		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
					      true);

801
		for (thread = 0; thread < nr_threads; thread++) {
802
			if (evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
803
							thread, &output, &output_overwrite))
804
				goto out_unmap;
805 806 807 808 809 810
		}
	}

	return 0;

out_unmap:
811
	evlist__munmap_nofree(evlist);
812 813 814
	return -1;
}

815
static int evlist__mmap_per_thread(struct evlist *evlist,
816
					struct mmap_params *mp)
817 818
{
	int thread;
819
	int nr_threads = perf_thread_map__nr(evlist->core.threads);
820

A
Adrian Hunter 已提交
821
	pr_debug2("perf event ring buffer mmapped per thread\n");
822
	for (thread = 0; thread < nr_threads; thread++) {
823
		int output = -1;
824
		int output_overwrite = -1;
825

826 827 828
		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
					      false);

829
		if (evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
830
						&output, &output_overwrite))
831
			goto out_unmap;
832 833 834 835 836
	}

	return 0;

out_unmap:
837
	evlist__munmap_nofree(evlist);
838 839 840
	return -1;
}

841
unsigned long perf_event_mlock_kb_in_pages(void)
842
{
843 844
	unsigned long pages;
	int max;
845

846 847 848 849 850 851 852 853 854 855
	if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
		/*
		 * Pick a once upon a time good value, i.e. things look
		 * strange since we can't read a sysctl value, but lets not
		 * die yet...
		 */
		max = 512;
	} else {
		max -= (page_size / 1024);
	}
856

857 858 859 860 861 862 863
	pages = (max * 1024) / page_size;
	if (!is_power_of_2(pages))
		pages = rounddown_pow_of_two(pages);

	return pages;
}

864
size_t evlist__mmap_size(unsigned long pages)
865 866 867 868
{
	if (pages == UINT_MAX)
		pages = perf_event_mlock_kb_in_pages();
	else if (!is_power_of_2(pages))
869 870 871 872 873
		return 0;

	return (pages + 1) * page_size;
}

874 875
static long parse_pages_arg(const char *str, unsigned long min,
			    unsigned long max)
876
{
877
	unsigned long pages, val;
878 879 880 881 882 883 884
	static struct parse_tag tags[] = {
		{ .tag  = 'B', .mult = 1       },
		{ .tag  = 'K', .mult = 1 << 10 },
		{ .tag  = 'M', .mult = 1 << 20 },
		{ .tag  = 'G', .mult = 1 << 30 },
		{ .tag  = 0 },
	};
885

886
	if (str == NULL)
887
		return -EINVAL;
888

889
	val = parse_tag_value(str, tags);
890
	if (val != (unsigned long) -1) {
891 892 893 894 895 896
		/* we got file size value */
		pages = PERF_ALIGN(val, page_size) / page_size;
	} else {
		/* we got pages count value */
		char *eptr;
		pages = strtoul(str, &eptr, 10);
897 898
		if (*eptr != '\0')
			return -EINVAL;
899 900
	}

901
	if (pages == 0 && min == 0) {
902
		/* leave number of pages at 0 */
903
	} else if (!is_power_of_2(pages)) {
904 905
		char buf[100];

906
		/* round pages up to next power of 2 */
907
		pages = roundup_pow_of_two(pages);
908 909
		if (!pages)
			return -EINVAL;
910 911 912 913

		unit_number__scnprintf(buf, sizeof(buf), pages * page_size);
		pr_info("rounding mmap pages size to %s (%lu pages)\n",
			buf, pages);
914 915
	}

916 917 918 919 920 921
	if (pages > max)
		return -EINVAL;

	return pages;
}

922
int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
923 924 925 926
{
	unsigned long max = UINT_MAX;
	long pages;

A
Adrian Hunter 已提交
927
	if (max > SIZE_MAX / page_size)
928 929 930 931 932
		max = SIZE_MAX / page_size;

	pages = parse_pages_arg(str, 1, max);
	if (pages < 0) {
		pr_err("Invalid argument for --mmap_pages/-m\n");
933 934 935 936 937 938 939
		return -1;
	}

	*mmap_pages = pages;
	return 0;
}

940 941 942 943 944 945
int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
				  int unset __maybe_unused)
{
	return __perf_evlist__parse_mmap_pages(opt->value, str);
}

946
/**
947
 * evlist__mmap_ex - Create mmaps to receive events.
948 949 950
 * @evlist: list of events
 * @pages: map length in pages
 * @overwrite: overwrite older events?
951 952
 * @auxtrace_pages - auxtrace map length in pages
 * @auxtrace_overwrite - overwrite older auxtrace data?
953
 *
954
 * If @overwrite is %false the user needs to signal event consumption using
955
 * perf_mmap__write_tail().  Using evlist__mmap_read() does this
956
 * automatically.
957
 *
958 959 960
 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
 * consumption using auxtrace_mmap__write_tail().
 *
961
 * Return: %0 on success, negative error code otherwise.
962
 */
963
int evlist__mmap_ex(struct evlist *evlist, unsigned int pages,
964
			 unsigned int auxtrace_pages,
965 966
			 bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush,
			 int comp_level)
967
{
968
	struct evsel *evsel;
969
	const struct perf_cpu_map *cpus = evlist->core.cpus;
970
	const struct perf_thread_map *threads = evlist->core.threads;
W
Wang Nan 已提交
971 972 973 974 975
	/*
	 * Delay setting mp.prot: set it before calling perf_mmap__mmap.
	 * Its value is decided by evsel's write_backward.
	 * So &mp should not be passed through const pointer.
	 */
J
Jiri Olsa 已提交
976 977 978 979 980 981
	struct mmap_params mp = {
		.nr_cblocks	= nr_cblocks,
		.affinity	= affinity,
		.flush		= flush,
		.comp_level	= comp_level
	};
982
	struct perf_evlist_mmap_ops ops __maybe_unused = {
983 984 985
		.idx  = perf_evlist__mmap_cb_idx,
		.get  = perf_evlist__mmap_cb_get,
		.mmap = perf_evlist__mmap_cb_mmap,
986
	};
987

988
	if (!evlist->mmap)
989
		evlist->mmap = evlist__alloc_mmap(evlist, false);
990
	if (!evlist->mmap)
991 992
		return -ENOMEM;

993
	if (evlist->core.pollfd.entries == NULL && perf_evlist__alloc_pollfd(&evlist->core) < 0)
994 995
		return -ENOMEM;

996 997
	evlist->core.mmap_len = evlist__mmap_size(pages);
	pr_debug("mmap size %zuB\n", evlist->core.mmap_len);
J
Jiri Olsa 已提交
998
	mp.core.mask = evlist->core.mmap_len - page_size - 1;
999

1000
	auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->core.mmap_len,
1001 1002
				   auxtrace_pages, auxtrace_overwrite);

1003
	evlist__for_each_entry(evlist, evsel) {
1004
		if ((evsel->core.attr.read_format & PERF_FORMAT_ID) &&
1005
		    evsel->core.sample_id == NULL &&
1006
		    perf_evsel__alloc_id(&evsel->core, perf_cpu_map__nr(cpus), threads->nr) < 0)
1007 1008 1009
			return -ENOMEM;
	}

1010
	if (perf_cpu_map__empty(cpus))
1011
		return evlist__mmap_per_thread(evlist, &mp);
1012

1013
	return evlist__mmap_per_cpu(evlist, &mp);
1014
}
1015

1016
int evlist__mmap(struct evlist *evlist, unsigned int pages)
1017
{
1018
	return evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0);
1019 1020
}

1021
int perf_evlist__create_maps(struct evlist *evlist, struct target *target)
1022
{
1023
	bool all_threads = (target->per_thread && target->system_wide);
1024
	struct perf_cpu_map *cpus;
1025
	struct perf_thread_map *threads;
1026

1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044
	/*
	 * If specify '-a' and '--per-thread' to perf record, perf record
	 * will override '--per-thread'. target->per_thread = false and
	 * target->system_wide = true.
	 *
	 * If specify '--per-thread' only to perf record,
	 * target->per_thread = true and target->system_wide = false.
	 *
	 * So target->per_thread && target->system_wide is false.
	 * For perf record, thread_map__new_str doesn't call
	 * thread_map__new_all_cpus. That will keep perf record's
	 * current behavior.
	 *
	 * For perf stat, it allows the case that target->per_thread and
	 * target->system_wide are all true. It means to collect system-wide
	 * per-thread data. thread_map__new_str will call
	 * thread_map__new_all_cpus to enumerate all threads.
	 */
1045
	threads = thread_map__new_str(target->pid, target->tid, target->uid,
1046
				      all_threads);
1047

1048
	if (!threads)
1049 1050
		return -1;

1051
	if (target__uses_dummy_map(target))
1052
		cpus = perf_cpu_map__dummy_new();
1053
	else
1054
		cpus = perf_cpu_map__new(target->cpu_list);
1055

1056
	if (!cpus)
1057 1058
		goto out_delete_threads;

1059
	evlist->core.has_user_cpus = !!target->cpu_list;
1060

1061
	perf_evlist__set_maps(&evlist->core, cpus, threads);
1062 1063

	return 0;
1064 1065

out_delete_threads:
1066
	perf_thread_map__put(threads);
1067 1068 1069
	return -1;
}

1070
void __perf_evlist__set_sample_bit(struct evlist *evlist,
1071 1072
				   enum perf_event_sample_format bit)
{
1073
	struct evsel *evsel;
1074

1075
	evlist__for_each_entry(evlist, evsel)
1076 1077 1078
		__perf_evsel__set_sample_bit(evsel, bit);
}

1079
void __perf_evlist__reset_sample_bit(struct evlist *evlist,
1080 1081
				     enum perf_event_sample_format bit)
{
1082
	struct evsel *evsel;
1083

1084
	evlist__for_each_entry(evlist, evsel)
1085 1086 1087
		__perf_evsel__reset_sample_bit(evsel, bit);
}

1088
int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel)
1089
{
1090
	struct evsel *evsel;
1091
	int err = 0;
1092

1093
	evlist__for_each_entry(evlist, evsel) {
1094
		if (evsel->filter == NULL)
1095
			continue;
1096

1097 1098 1099 1100
		/*
		 * filters only work for tracepoint event, which doesn't have cpu limit.
		 * So evlist and evsel should always be same.
		 */
1101
		err = perf_evsel__apply_filter(&evsel->core, evsel->filter);
1102 1103
		if (err) {
			*err_evsel = evsel;
1104
			break;
1105
		}
1106 1107
	}

1108 1109 1110
	return err;
}

1111
int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter)
1112
{
1113
	struct evsel *evsel;
1114 1115
	int err = 0;

1116 1117 1118
	if (filter == NULL)
		return -1;

1119
	evlist__for_each_entry(evlist, evsel) {
1120
		if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
1121 1122
			continue;

1123
		err = perf_evsel__set_filter(evsel, filter);
1124 1125 1126 1127 1128
		if (err)
			break;
	}

	return err;
1129
}
1130

1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150
int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter)
{
	struct evsel *evsel;
	int err = 0;

	if (filter == NULL)
		return -1;

	evlist__for_each_entry(evlist, evsel) {
		if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
			continue;

		err = perf_evsel__append_tp_filter(evsel, filter);
		if (err)
			break;
	}

	return err;
}

1151
static char *asprintf__tp_filter_pids(size_t npids, pid_t *pids)
1152 1153
{
	char *filter;
1154
	size_t i;
1155

1156 1157 1158
	for (i = 0; i < npids; ++i) {
		if (i == 0) {
			if (asprintf(&filter, "common_pid != %d", pids[i]) < 0)
1159
				return NULL;
1160 1161 1162 1163 1164 1165 1166 1167 1168 1169
		} else {
			char *tmp;

			if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0)
				goto out_free;

			free(filter);
			filter = tmp;
		}
	}
1170

1171
	return filter;
1172
out_free:
1173 1174 1175 1176 1177 1178 1179 1180 1181
	free(filter);
	return NULL;
}

int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
{
	char *filter = asprintf__tp_filter_pids(npids, pids);
	int ret = perf_evlist__set_tp_filter(evlist, filter);

1182 1183 1184 1185
	free(filter);
	return ret;
}

1186
int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid)
1187
{
1188
	return perf_evlist__set_tp_filter_pids(evlist, 1, &pid);
1189 1190
}

1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204
int perf_evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
{
	char *filter = asprintf__tp_filter_pids(npids, pids);
	int ret = perf_evlist__append_tp_filter(evlist, filter);

	free(filter);
	return ret;
}

int perf_evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid)
{
	return perf_evlist__append_tp_filter_pids(evlist, 1, &pid);
}

1205
bool perf_evlist__valid_sample_type(struct evlist *evlist)
1206
{
1207
	struct evsel *pos;
1208

1209
	if (evlist->core.nr_entries == 1)
1210 1211 1212 1213 1214
		return true;

	if (evlist->id_pos < 0 || evlist->is_pos < 0)
		return false;

1215
	evlist__for_each_entry(evlist, pos) {
1216 1217
		if (pos->id_pos != evlist->id_pos ||
		    pos->is_pos != evlist->is_pos)
1218
			return false;
1219 1220
	}

1221
	return true;
1222 1223
}

1224
u64 __perf_evlist__combined_sample_type(struct evlist *evlist)
1225
{
1226
	struct evsel *evsel;
1227 1228 1229 1230

	if (evlist->combined_sample_type)
		return evlist->combined_sample_type;

1231
	evlist__for_each_entry(evlist, evsel)
1232
		evlist->combined_sample_type |= evsel->core.attr.sample_type;
1233 1234 1235 1236

	return evlist->combined_sample_type;
}

1237
u64 perf_evlist__combined_sample_type(struct evlist *evlist)
1238 1239 1240
{
	evlist->combined_sample_type = 0;
	return __perf_evlist__combined_sample_type(evlist);
1241 1242
}

1243
u64 perf_evlist__combined_branch_type(struct evlist *evlist)
1244
{
1245
	struct evsel *evsel;
1246 1247
	u64 branch_type = 0;

1248
	evlist__for_each_entry(evlist, evsel)
1249
		branch_type |= evsel->core.attr.branch_sample_type;
1250 1251 1252
	return branch_type;
}

1253
bool perf_evlist__valid_read_format(struct evlist *evlist)
1254
{
1255
	struct evsel *first = evlist__first(evlist), *pos = first;
1256 1257
	u64 read_format = first->core.attr.read_format;
	u64 sample_type = first->core.attr.sample_type;
1258

1259
	evlist__for_each_entry(evlist, pos) {
1260
		if (read_format != pos->core.attr.read_format)
1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272
			return false;
	}

	/* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */
	if ((sample_type & PERF_SAMPLE_READ) &&
	    !(read_format & PERF_FORMAT_ID)) {
		return false;
	}

	return true;
}

1273
u16 perf_evlist__id_hdr_size(struct evlist *evlist)
1274
{
1275
	struct evsel *first = evlist__first(evlist);
1276 1277 1278 1279
	struct perf_sample *data;
	u64 sample_type;
	u16 size = 0;

1280
	if (!first->core.attr.sample_id_all)
1281 1282
		goto out;

1283
	sample_type = first->core.attr.sample_type;
1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298

	if (sample_type & PERF_SAMPLE_TID)
		size += sizeof(data->tid) * 2;

       if (sample_type & PERF_SAMPLE_TIME)
		size += sizeof(data->time);

	if (sample_type & PERF_SAMPLE_ID)
		size += sizeof(data->id);

	if (sample_type & PERF_SAMPLE_STREAM_ID)
		size += sizeof(data->stream_id);

	if (sample_type & PERF_SAMPLE_CPU)
		size += sizeof(data->cpu) * 2;
1299 1300 1301

	if (sample_type & PERF_SAMPLE_IDENTIFIER)
		size += sizeof(data->id);
1302 1303 1304 1305
out:
	return size;
}

1306
bool perf_evlist__valid_sample_id_all(struct evlist *evlist)
1307
{
1308
	struct evsel *first = evlist__first(evlist), *pos = first;
1309

1310
	evlist__for_each_entry_continue(evlist, pos) {
1311
		if (first->core.attr.sample_id_all != pos->core.attr.sample_id_all)
1312
			return false;
1313 1314
	}

1315 1316 1317
	return true;
}

1318
bool perf_evlist__sample_id_all(struct evlist *evlist)
1319
{
1320
	struct evsel *first = evlist__first(evlist);
1321
	return first->core.attr.sample_id_all;
1322
}
1323

1324
void perf_evlist__set_selected(struct evlist *evlist,
1325
			       struct evsel *evsel)
1326 1327 1328
{
	evlist->selected = evsel;
}
1329

1330
void evlist__close(struct evlist *evlist)
1331
{
1332
	struct evsel *evsel;
1333

1334
	evlist__for_each_entry_reverse(evlist, evsel)
1335
		evsel__close(evsel);
1336 1337
}

1338
static int perf_evlist__create_syswide_maps(struct evlist *evlist)
1339
{
1340
	struct perf_cpu_map *cpus;
1341
	struct perf_thread_map *threads;
1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352
	int err = -ENOMEM;

	/*
	 * Try reading /sys/devices/system/cpu/online to get
	 * an all cpus map.
	 *
	 * FIXME: -ENOMEM is the best we can do here, the cpu_map
	 * code needs an overhaul to properly forward the
	 * error, and we may not want to do that fallback to a
	 * default cpu identity map :-\
	 */
1353
	cpus = perf_cpu_map__new(NULL);
1354
	if (!cpus)
1355 1356
		goto out;

1357
	threads = perf_thread_map__new_dummy();
1358 1359
	if (!threads)
		goto out_put;
1360

1361
	perf_evlist__set_maps(&evlist->core, cpus, threads);
1362 1363
out:
	return err;
1364
out_put:
1365
	perf_cpu_map__put(cpus);
1366 1367 1368
	goto out;
}

1369
int evlist__open(struct evlist *evlist)
1370
{
1371
	struct evsel *evsel;
1372
	int err;
1373

1374 1375 1376 1377
	/*
	 * Default: one fd per CPU, all threads, aka systemwide
	 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL
	 */
1378
	if (evlist->core.threads == NULL && evlist->core.cpus == NULL) {
1379 1380 1381 1382 1383
		err = perf_evlist__create_syswide_maps(evlist);
		if (err < 0)
			goto out_err;
	}

1384 1385
	perf_evlist__update_id_pos(evlist);

1386
	evlist__for_each_entry(evlist, evsel) {
1387
		err = evsel__open(evsel, evsel->core.cpus, evsel->core.threads);
1388 1389 1390 1391 1392 1393
		if (err < 0)
			goto out_err;
	}

	return 0;
out_err:
1394
	evlist__close(evlist);
1395
	errno = -err;
1396 1397
	return err;
}
1398

1399
int perf_evlist__prepare_workload(struct evlist *evlist, struct target *target,
1400
				  const char *argv[], bool pipe_output,
1401
				  void (*exec_error)(int signo, siginfo_t *info, void *ucontext))
1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422
{
	int child_ready_pipe[2], go_pipe[2];
	char bf;

	if (pipe(child_ready_pipe) < 0) {
		perror("failed to create 'ready' pipe");
		return -1;
	}

	if (pipe(go_pipe) < 0) {
		perror("failed to create 'go' pipe");
		goto out_close_ready_pipe;
	}

	evlist->workload.pid = fork();
	if (evlist->workload.pid < 0) {
		perror("failed to fork");
		goto out_close_pipes;
	}

	if (!evlist->workload.pid) {
1423 1424
		int ret;

1425
		if (pipe_output)
1426 1427
			dup2(2, 1);

1428 1429
		signal(SIGTERM, SIG_DFL);

1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441
		close(child_ready_pipe[0]);
		close(go_pipe[1]);
		fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);

		/*
		 * Tell the parent we're ready to go
		 */
		close(child_ready_pipe[1]);

		/*
		 * Wait until the parent tells us to go.
		 */
1442 1443 1444 1445 1446 1447
		ret = read(go_pipe[0], &bf, 1);
		/*
		 * The parent will ask for the execvp() to be performed by
		 * writing exactly one byte, in workload.cork_fd, usually via
		 * perf_evlist__start_workload().
		 *
1448
		 * For cancelling the workload without actually running it,
1449 1450 1451 1452 1453 1454 1455 1456 1457
		 * the parent will just close workload.cork_fd, without writing
		 * anything, i.e. read will return zero and we just exit()
		 * here.
		 */
		if (ret != 1) {
			if (ret == -1)
				perror("unable to read pipe");
			exit(ret);
		}
1458 1459 1460

		execvp(argv[0], (char **)argv);

1461
		if (exec_error) {
1462 1463 1464 1465 1466 1467 1468
			union sigval val;

			val.sival_int = errno;
			if (sigqueue(getppid(), SIGUSR1, val))
				perror(argv[0]);
		} else
			perror(argv[0]);
1469 1470 1471
		exit(-1);
	}

1472 1473 1474 1475 1476 1477 1478 1479
	if (exec_error) {
		struct sigaction act = {
			.sa_flags     = SA_SIGINFO,
			.sa_sigaction = exec_error,
		};
		sigaction(SIGUSR1, &act, NULL);
	}

1480
	if (target__none(target)) {
1481
		if (evlist->core.threads == NULL) {
1482 1483 1484 1485
			fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n",
				__func__, __LINE__);
			goto out_close_pipes;
		}
1486
		perf_thread_map__set_pid(evlist->core.threads, 0, evlist->workload.pid);
1487
	}
1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498

	close(child_ready_pipe[1]);
	close(go_pipe[0]);
	/*
	 * wait for child to settle
	 */
	if (read(child_ready_pipe[0], &bf, 1) == -1) {
		perror("unable to read pipe");
		goto out_close_pipes;
	}

1499
	fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC);
1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512
	evlist->workload.cork_fd = go_pipe[1];
	close(child_ready_pipe[0]);
	return 0;

out_close_pipes:
	close(go_pipe[0]);
	close(go_pipe[1]);
out_close_ready_pipe:
	close(child_ready_pipe[0]);
	close(child_ready_pipe[1]);
	return -1;
}

1513
int perf_evlist__start_workload(struct evlist *evlist)
1514 1515
{
	if (evlist->workload.cork_fd > 0) {
1516
		char bf = 0;
1517
		int ret;
1518 1519 1520
		/*
		 * Remove the cork, let it rip!
		 */
1521 1522
		ret = write(evlist->workload.cork_fd, &bf, 1);
		if (ret < 0)
1523
			perror("unable to write to pipe");
1524 1525 1526

		close(evlist->workload.cork_fd);
		return ret;
1527 1528 1529 1530
	}

	return 0;
}
1531

1532
int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event,
1533
			      struct perf_sample *sample)
1534
{
1535
	struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
1536 1537 1538

	if (!evsel)
		return -EFAULT;
1539
	return perf_evsel__parse_sample(evsel, event, sample);
1540
}
1541

1542
int perf_evlist__parse_sample_timestamp(struct evlist *evlist,
1543 1544 1545
					union perf_event *event,
					u64 *timestamp)
{
1546
	struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
1547 1548 1549 1550 1551 1552

	if (!evsel)
		return -EFAULT;
	return perf_evsel__parse_sample_timestamp(evsel, event, timestamp);
}

1553
int perf_evlist__strerror_open(struct evlist *evlist,
1554 1555 1556
			       int err, char *buf, size_t size)
{
	int printed, value;
1557
	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
1558 1559 1560 1561 1562 1563 1564 1565

	switch (err) {
	case EACCES:
	case EPERM:
		printed = scnprintf(buf, size,
				    "Error:\t%s.\n"
				    "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg);

1566
		value = perf_event_paranoid();
1567 1568 1569 1570 1571 1572 1573 1574

		printed += scnprintf(buf + printed, size - printed, "\nHint:\t");

		if (value >= 2) {
			printed += scnprintf(buf + printed, size - printed,
					     "For your workloads it needs to be <= 1\nHint:\t");
		}
		printed += scnprintf(buf + printed, size - printed,
1575
				     "For system wide tracing it needs to be set to -1.\n");
1576 1577

		printed += scnprintf(buf + printed, size - printed,
1578 1579
				    "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
				    "Hint:\tThe current value is %d.", value);
1580
		break;
1581
	case EINVAL: {
1582
		struct evsel *first = evlist__first(evlist);
1583 1584 1585 1586 1587
		int max_freq;

		if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0)
			goto out_default;

1588
		if (first->core.attr.sample_freq < (u64)max_freq)
1589 1590 1591 1592 1593 1594
			goto out_default;

		printed = scnprintf(buf, size,
				    "Error:\t%s.\n"
				    "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n"
				    "Hint:\tThe current value is %d and %" PRIu64 " is being requested.",
1595
				    emsg, max_freq, first->core.attr.sample_freq);
1596 1597
		break;
	}
1598
	default:
1599
out_default:
1600 1601 1602 1603 1604 1605
		scnprintf(buf, size, "%s", emsg);
		break;
	}

	return 0;
}
1606

1607
int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size)
1608
{
1609
	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
1610
	int pages_attempted = evlist->core.mmap_len / 1024, pages_max_per_user, printed = 0;
1611 1612 1613

	switch (err) {
	case EPERM:
1614
		sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user);
1615 1616
		printed += scnprintf(buf + printed, size - printed,
				     "Error:\t%s.\n"
1617
				     "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n"
1618
				     "Hint:\tTried using %zd kB.\n",
1619
				     emsg, pages_max_per_user, pages_attempted);
1620 1621 1622 1623 1624 1625 1626 1627 1628

		if (pages_attempted >= pages_max_per_user) {
			printed += scnprintf(buf + printed, size - printed,
					     "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n",
					     pages_max_per_user + pages_attempted);
		}

		printed += scnprintf(buf + printed, size - printed,
				     "Hint:\tTry using a smaller -m/--mmap-pages value.");
1629 1630 1631 1632 1633 1634 1635 1636 1637
		break;
	default:
		scnprintf(buf, size, "%s", emsg);
		break;
	}

	return 0;
}

1638
void perf_evlist__to_front(struct evlist *evlist,
1639
			   struct evsel *move_evsel)
1640
{
1641
	struct evsel *evsel, *n;
1642 1643
	LIST_HEAD(move);

1644
	if (move_evsel == evlist__first(evlist))
1645 1646
		return;

1647
	evlist__for_each_entry_safe(evlist, n, evsel) {
1648
		if (evsel->leader == move_evsel->leader)
1649
			list_move_tail(&evsel->core.node, &move);
1650 1651
	}

1652
	list_splice(&move, &evlist->core.entries);
1653
}
1654

1655
void perf_evlist__set_tracking_event(struct evlist *evlist,
1656
				     struct evsel *tracking_evsel)
1657
{
1658
	struct evsel *evsel;
1659 1660 1661 1662

	if (tracking_evsel->tracking)
		return;

1663
	evlist__for_each_entry(evlist, evsel) {
1664 1665 1666 1667 1668 1669
		if (evsel != tracking_evsel)
			evsel->tracking = false;
	}

	tracking_evsel->tracking = true;
}
1670

1671
struct evsel *
1672
perf_evlist__find_evsel_by_str(struct evlist *evlist,
1673 1674
			       const char *str)
{
1675
	struct evsel *evsel;
1676

1677
	evlist__for_each_entry(evlist, evsel) {
1678 1679 1680 1681 1682 1683 1684 1685
		if (!evsel->name)
			continue;
		if (strcmp(str, evsel->name) == 0)
			return evsel;
	}

	return NULL;
}
1686

1687
void perf_evlist__toggle_bkw_mmap(struct evlist *evlist,
1688 1689 1690 1691 1692 1693 1694 1695 1696
				  enum bkw_mmap_state state)
{
	enum bkw_mmap_state old_state = evlist->bkw_mmap_state;
	enum action {
		NONE,
		PAUSE,
		RESUME,
	} action = NONE;

1697
	if (!evlist->overwrite_mmap)
1698 1699 1700 1701 1702
		return;

	switch (old_state) {
	case BKW_MMAP_NOTREADY: {
		if (state != BKW_MMAP_RUNNING)
1703
			goto state_err;
1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743
		break;
	}
	case BKW_MMAP_RUNNING: {
		if (state != BKW_MMAP_DATA_PENDING)
			goto state_err;
		action = PAUSE;
		break;
	}
	case BKW_MMAP_DATA_PENDING: {
		if (state != BKW_MMAP_EMPTY)
			goto state_err;
		break;
	}
	case BKW_MMAP_EMPTY: {
		if (state != BKW_MMAP_RUNNING)
			goto state_err;
		action = RESUME;
		break;
	}
	default:
		WARN_ONCE(1, "Shouldn't get there\n");
	}

	evlist->bkw_mmap_state = state;

	switch (action) {
	case PAUSE:
		perf_evlist__pause(evlist);
		break;
	case RESUME:
		perf_evlist__resume(evlist);
		break;
	case NONE:
	default:
		break;
	}

state_err:
	return;
}
1744

1745
bool perf_evlist__exclude_kernel(struct evlist *evlist)
1746
{
1747
	struct evsel *evsel;
1748 1749

	evlist__for_each_entry(evlist, evsel) {
1750
		if (!evsel->core.attr.exclude_kernel)
1751 1752 1753 1754 1755
			return false;
	}

	return true;
}
1756 1757 1758 1759 1760 1761

/*
 * Events in data file are not collect in groups, but we still want
 * the group display. Set the artificial group and set the leader's
 * forced_leader flag to notify the display code.
 */
1762
void perf_evlist__force_leader(struct evlist *evlist)
1763 1764
{
	if (!evlist->nr_groups) {
1765
		struct evsel *leader = evlist__first(evlist);
1766 1767 1768 1769 1770

		perf_evlist__set_leader(evlist);
		leader->forced_leader = true;
	}
}
1771

1772
struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
1773
						 struct evsel *evsel)
1774
{
1775
	struct evsel *c2, *leader;
1776 1777 1778 1779
	bool is_open = true;

	leader = evsel->leader;
	pr_debug("Weak group for %s/%d failed\n",
1780
			leader->name, leader->core.nr_members);
1781 1782 1783 1784 1785 1786 1787 1788 1789 1790

	/*
	 * for_each_group_member doesn't work here because it doesn't
	 * include the first entry.
	 */
	evlist__for_each_entry(evsel_list, c2) {
		if (c2 == evsel)
			is_open = false;
		if (c2->leader == leader) {
			if (is_open)
1791
				perf_evsel__close(&evsel->core);
1792
			c2->leader = c2;
1793
			c2->core.nr_members = 0;
1794 1795 1796 1797
		}
	}
	return leader;
}
1798

1799
int perf_evlist__add_sb_event(struct evlist **evlist,
1800 1801 1802 1803
			      struct perf_event_attr *attr,
			      perf_evsel__sb_cb_t cb,
			      void *data)
{
1804
	struct evsel *evsel;
1805 1806 1807
	bool new_evlist = (*evlist) == NULL;

	if (*evlist == NULL)
1808
		*evlist = evlist__new();
1809 1810 1811 1812 1813 1814 1815 1816
	if (*evlist == NULL)
		return -1;

	if (!attr->sample_id_all) {
		pr_warning("enabling sample_id_all for all side band events\n");
		attr->sample_id_all = 1;
	}

1817
	evsel = perf_evsel__new_idx(attr, (*evlist)->core.nr_entries);
1818 1819 1820 1821 1822
	if (!evsel)
		goto out_err;

	evsel->side_band.cb = cb;
	evsel->side_band.data = data;
1823
	evlist__add(*evlist, evsel);
1824 1825 1826 1827
	return 0;

out_err:
	if (new_evlist) {
1828
		evlist__delete(*evlist);
1829 1830 1831 1832 1833 1834 1835
		*evlist = NULL;
	}
	return -1;
}

static void *perf_evlist__poll_thread(void *arg)
{
1836
	struct evlist *evlist = arg;
1837
	bool draining = false;
1838
	int i, done = 0;
1839 1840 1841 1842 1843 1844 1845 1846
	/*
	 * In order to read symbols from other namespaces perf to needs to call
	 * setns(2).  This isn't permitted if the struct_fs has multiple users.
	 * unshare(2) the fs so that we may continue to setns into namespaces
	 * that we're observing when, for instance, reading the build-ids at
	 * the end of a 'perf record' session.
	 */
	unshare(CLONE_FS);
1847 1848 1849

	while (!done) {
		bool got_data = false;
1850

1851
		if (evlist->thread.done)
1852 1853 1854
			draining = true;

		if (!draining)
1855
			evlist__poll(evlist, 1000);
1856

1857
		for (i = 0; i < evlist->core.nr_mmaps; i++) {
1858
			struct mmap *map = &evlist->mmap[i];
1859 1860
			union perf_event *event;

1861
			if (perf_mmap__read_init(&map->core))
1862
				continue;
1863
			while ((event = perf_mmap__read_event(&map->core)) != NULL) {
1864
				struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
1865 1866 1867 1868 1869 1870

				if (evsel && evsel->side_band.cb)
					evsel->side_band.cb(event, evsel->side_band.data);
				else
					pr_warning("cannot locate proper evsel for the side band event\n");

1871
				perf_mmap__consume(&map->core);
1872
				got_data = true;
1873
			}
1874
			perf_mmap__read_done(&map->core);
1875
		}
1876 1877 1878

		if (draining && !got_data)
			break;
1879 1880 1881 1882
	}
	return NULL;
}

1883
int perf_evlist__start_sb_thread(struct evlist *evlist,
1884 1885
				 struct target *target)
{
1886
	struct evsel *counter;
1887 1888 1889 1890 1891 1892 1893 1894

	if (!evlist)
		return 0;

	if (perf_evlist__create_maps(evlist, target))
		goto out_delete_evlist;

	evlist__for_each_entry(evlist, counter) {
1895
		if (evsel__open(counter, evlist->core.cpus,
1896
				     evlist->core.threads) < 0)
1897 1898 1899
			goto out_delete_evlist;
	}

1900
	if (evlist__mmap(evlist, UINT_MAX))
1901 1902 1903
		goto out_delete_evlist;

	evlist__for_each_entry(evlist, counter) {
1904
		if (evsel__enable(counter))
1905 1906 1907 1908 1909 1910 1911 1912 1913 1914
			goto out_delete_evlist;
	}

	evlist->thread.done = 0;
	if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist))
		goto out_delete_evlist;

	return 0;

out_delete_evlist:
1915
	evlist__delete(evlist);
1916 1917 1918 1919
	evlist = NULL;
	return -1;
}

1920
void perf_evlist__stop_sb_thread(struct evlist *evlist)
1921 1922 1923 1924 1925
{
	if (!evlist)
		return;
	evlist->thread.done = 1;
	pthread_join(evlist->thread.th, NULL);
1926
	evlist__delete(evlist);
1927
}