evlist.c 42.4 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
2 3 4 5 6 7
/*
 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
 *
 * Parts came from builtin-{top,stat,record}.c, see those files for further
 * copyright notes.
 */
8
#include <api/fs/fs.h>
9
#include <errno.h>
10
#include <inttypes.h>
11
#include <poll.h>
12 13
#include "cpumap.h"
#include "thread_map.h"
14
#include "target.h"
15 16
#include "evlist.h"
#include "evsel.h"
A
Adrian Hunter 已提交
17
#include "debug.h"
18
#include "units.h"
19
#include "asm/bug.h"
20
#include "bpf-event.h"
21
#include <signal.h>
22
#include <unistd.h>
23

24
#include "parse-events.h"
25
#include <subcmd/parse-options.h>
26

27
#include <fcntl.h>
28
#include <sys/ioctl.h>
29 30
#include <sys/mman.h>

31 32
#include <linux/bitops.h>
#include <linux/hash.h>
33
#include <linux/log2.h>
34
#include <linux/err.h>
35
#include <linux/zalloc.h>
36
#include <perf/evlist.h>
37
#include <perf/cpumap.h>
38

39 40 41 42
#ifdef LACKS_SIGQUEUE_PROTOTYPE
int sigqueue(pid_t pid, int sig, const union sigval value);
#endif

43
#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
44
#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
45

46 47
void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
		  struct perf_thread_map *threads)
48 49 50 51 52
{
	int i;

	for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
		INIT_HLIST_HEAD(&evlist->heads[i]);
53
	perf_evlist__init(&evlist->core);
54
	perf_evlist__set_maps(&evlist->core, cpus, threads);
55
	fdarray__init(&evlist->pollfd, 64);
56
	evlist->workload.pid = -1;
57
	evlist->bkw_mmap_state = BKW_MMAP_NOTREADY;
58 59
}

60
struct evlist *evlist__new(void)
61
{
62
	struct evlist *evlist = zalloc(sizeof(*evlist));
63

64
	if (evlist != NULL)
65
		evlist__init(evlist, NULL, NULL);
66 67 68 69

	return evlist;
}

70
struct evlist *perf_evlist__new_default(void)
71
{
72
	struct evlist *evlist = evlist__new();
73 74

	if (evlist && perf_evlist__add_default(evlist)) {
75
		evlist__delete(evlist);
76 77 78 79 80 81
		evlist = NULL;
	}

	return evlist;
}

82
struct evlist *perf_evlist__new_dummy(void)
83
{
84
	struct evlist *evlist = evlist__new();
85 86

	if (evlist && perf_evlist__add_dummy(evlist)) {
87
		evlist__delete(evlist);
88 89 90 91 92 93
		evlist = NULL;
	}

	return evlist;
}

94 95 96 97 98 99 100
/**
 * perf_evlist__set_id_pos - set the positions of event ids.
 * @evlist: selected event list
 *
 * Events with compatible sample types all have the same id_pos
 * and is_pos.  For convenience, put a copy on evlist.
 */
101
void perf_evlist__set_id_pos(struct evlist *evlist)
102
{
103
	struct evsel *first = perf_evlist__first(evlist);
104 105 106 107 108

	evlist->id_pos = first->id_pos;
	evlist->is_pos = first->is_pos;
}

109
static void perf_evlist__update_id_pos(struct evlist *evlist)
110
{
111
	struct evsel *evsel;
112

113
	evlist__for_each_entry(evlist, evsel)
114 115 116 117 118
		perf_evsel__calc_id_pos(evsel);

	perf_evlist__set_id_pos(evlist);
}

119
static void perf_evlist__purge(struct evlist *evlist)
120
{
121
	struct evsel *pos, *n;
122

123
	evlist__for_each_entry_safe(evlist, n, pos) {
124
		list_del_init(&pos->core.node);
125
		pos->evlist = NULL;
126
		evsel__delete(pos);
127 128
	}

129
	evlist->core.nr_entries = 0;
130 131
}

132
void perf_evlist__exit(struct evlist *evlist)
133
{
134
	zfree(&evlist->mmap);
135
	zfree(&evlist->overwrite_mmap);
136
	fdarray__exit(&evlist->pollfd);
137 138
}

139
void evlist__delete(struct evlist *evlist)
140
{
141 142 143
	if (evlist == NULL)
		return;

144
	perf_evlist__munmap(evlist);
145
	evlist__close(evlist);
146
	perf_cpu_map__put(evlist->core.cpus);
147
	perf_thread_map__put(evlist->core.threads);
148
	evlist->core.cpus = NULL;
149
	evlist->core.threads = NULL;
150 151
	perf_evlist__purge(evlist);
	perf_evlist__exit(evlist);
152 153 154
	free(evlist);
}

155
void evlist__add(struct evlist *evlist, struct evsel *entry)
156
{
157
	entry->evlist = evlist;
158
	entry->idx = evlist->core.nr_entries;
159
	entry->tracking = !entry->idx;
160

161 162 163
	perf_evlist__add(&evlist->core, &entry->core);

	if (evlist->core.nr_entries == 1)
164
		perf_evlist__set_id_pos(evlist);
165 166
}

167
void evlist__remove(struct evlist *evlist, struct evsel *evsel)
168 169
{
	evsel->evlist = NULL;
170
	perf_evlist__remove(&evlist->core, &evsel->core);
171 172
}

173
void perf_evlist__splice_list_tail(struct evlist *evlist,
174
				   struct list_head *list)
175
{
176
	struct evsel *evsel, *temp;
177

178
	__evlist__for_each_entry_safe(list, temp, evsel) {
179
		list_del_init(&evsel->core.node);
180
		evlist__add(evlist, evsel);
181
	}
182 183
}

184 185
void __perf_evlist__set_leader(struct list_head *list)
{
186
	struct evsel *evsel, *leader;
187

188 189
	leader = list_entry(list->next, struct evsel, core.node);
	evsel = list_entry(list->prev, struct evsel, core.node);
190

191
	leader->core.nr_members = evsel->idx - leader->idx + 1;
192

193
	__evlist__for_each_entry(list, evsel) {
194
		evsel->leader = leader;
195 196 197
	}
}

198
void perf_evlist__set_leader(struct evlist *evlist)
199
{
200 201
	if (evlist->core.nr_entries) {
		evlist->nr_groups = evlist->core.nr_entries > 1 ? 1 : 0;
202
		__perf_evlist__set_leader(&evlist->core.entries);
203
	}
204 205
}

206
int __perf_evlist__add_default(struct evlist *evlist, bool precise)
207
{
208
	struct evsel *evsel = perf_evsel__new_cycles(precise);
209

210
	if (evsel == NULL)
211
		return -ENOMEM;
212

213
	evlist__add(evlist, evsel);
214 215
	return 0;
}
216

217
int perf_evlist__add_dummy(struct evlist *evlist)
218 219 220 221 222 223
{
	struct perf_event_attr attr = {
		.type	= PERF_TYPE_SOFTWARE,
		.config = PERF_COUNT_SW_DUMMY,
		.size	= sizeof(attr), /* to capture ABI version */
	};
224
	struct evsel *evsel = perf_evsel__new_idx(&attr, evlist->core.nr_entries);
225 226 227 228

	if (evsel == NULL)
		return -ENOMEM;

229
	evlist__add(evlist, evsel);
230 231 232
	return 0;
}

233
static int evlist__add_attrs(struct evlist *evlist,
234
				  struct perf_event_attr *attrs, size_t nr_attrs)
235
{
236
	struct evsel *evsel, *n;
237 238 239 240
	LIST_HEAD(head);
	size_t i;

	for (i = 0; i < nr_attrs; i++) {
241
		evsel = perf_evsel__new_idx(attrs + i, evlist->core.nr_entries + i);
242 243
		if (evsel == NULL)
			goto out_delete_partial_list;
244
		list_add_tail(&evsel->core.node, &head);
245 246
	}

247
	perf_evlist__splice_list_tail(evlist, &head);
248 249 250 251

	return 0;

out_delete_partial_list:
252
	__evlist__for_each_entry_safe(&head, n, evsel)
253
		evsel__delete(evsel);
254 255 256
	return -1;
}

257
int __perf_evlist__add_default_attrs(struct evlist *evlist,
258 259 260 261 262 263 264
				     struct perf_event_attr *attrs, size_t nr_attrs)
{
	size_t i;

	for (i = 0; i < nr_attrs; i++)
		event_attr_init(attrs + i);

265
	return evlist__add_attrs(evlist, attrs, nr_attrs);
266 267
}

268
struct evsel *
269
perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id)
270
{
271
	struct evsel *evsel;
272

273
	evlist__for_each_entry(evlist, evsel) {
274 275
		if (evsel->core.attr.type   == PERF_TYPE_TRACEPOINT &&
		    (int)evsel->core.attr.config == id)
276 277 278 279 280 281
			return evsel;
	}

	return NULL;
}

282
struct evsel *
283
perf_evlist__find_tracepoint_by_name(struct evlist *evlist,
284 285
				     const char *name)
{
286
	struct evsel *evsel;
287

288
	evlist__for_each_entry(evlist, evsel) {
289
		if ((evsel->core.attr.type == PERF_TYPE_TRACEPOINT) &&
290 291 292 293 294 295 296
		    (strcmp(evsel->name, name) == 0))
			return evsel;
	}

	return NULL;
}

297
int perf_evlist__add_newtp(struct evlist *evlist,
298 299
			   const char *sys, const char *name, void *handler)
{
300
	struct evsel *evsel = perf_evsel__newtp(sys, name);
301

302
	if (IS_ERR(evsel))
303 304
		return -1;

305
	evsel->handler = handler;
306
	evlist__add(evlist, evsel);
307 308 309
	return 0;
}

310
static int perf_evlist__nr_threads(struct evlist *evlist,
311
				   struct evsel *evsel)
312 313 314 315
{
	if (evsel->system_wide)
		return 1;
	else
316
		return thread_map__nr(evlist->core.threads);
317 318
}

319
void evlist__disable(struct evlist *evlist)
320
{
321
	struct evsel *pos;
322

323
	evlist__for_each_entry(evlist, pos) {
324
		if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->core.fd)
325
			continue;
326
		evsel__disable(pos);
327
	}
328 329

	evlist->enabled = false;
330 331
}

332
void evlist__enable(struct evlist *evlist)
333
{
334
	struct evsel *pos;
335

336
	evlist__for_each_entry(evlist, pos) {
337
		if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
338
			continue;
339
		evsel__enable(pos);
340
	}
341 342 343 344

	evlist->enabled = true;
}

345
void perf_evlist__toggle_enable(struct evlist *evlist)
346
{
347
	(evlist->enabled ? evlist__disable : evlist__enable)(evlist);
348 349
}

350
static int perf_evlist__enable_event_cpu(struct evlist *evlist,
351
					 struct evsel *evsel, int cpu)
352
{
353
	int thread;
354 355
	int nr_threads = perf_evlist__nr_threads(evlist, evsel);

356
	if (!evsel->core.fd)
357 358 359
		return -EINVAL;

	for (thread = 0; thread < nr_threads; thread++) {
360
		int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
361 362 363 364 365 366
		if (err)
			return err;
	}
	return 0;
}

367
static int perf_evlist__enable_event_thread(struct evlist *evlist,
368
					    struct evsel *evsel,
369 370
					    int thread)
{
371
	int cpu;
372
	int nr_cpus = cpu_map__nr(evlist->core.cpus);
373

374
	if (!evsel->core.fd)
375 376 377
		return -EINVAL;

	for (cpu = 0; cpu < nr_cpus; cpu++) {
378
		int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
379 380 381 382 383 384
		if (err)
			return err;
	}
	return 0;
}

385
int perf_evlist__enable_event_idx(struct evlist *evlist,
386
				  struct evsel *evsel, int idx)
387
{
388
	bool per_cpu_mmaps = !cpu_map__empty(evlist->core.cpus);
389 390 391 392 393 394 395

	if (per_cpu_mmaps)
		return perf_evlist__enable_event_cpu(evlist, evsel, idx);
	else
		return perf_evlist__enable_event_thread(evlist, evsel, idx);
}

396
int perf_evlist__alloc_pollfd(struct evlist *evlist)
397
{
398
	int nr_cpus = cpu_map__nr(evlist->core.cpus);
399
	int nr_threads = thread_map__nr(evlist->core.threads);
400
	int nfds = 0;
401
	struct evsel *evsel;
402

403
	evlist__for_each_entry(evlist, evsel) {
404 405 406 407 408 409
		if (evsel->system_wide)
			nfds += nr_cpus;
		else
			nfds += nr_cpus * nr_threads;
	}

410 411
	if (fdarray__available_entries(&evlist->pollfd) < nfds &&
	    fdarray__grow(&evlist->pollfd, nfds) < 0)
412 413 414
		return -ENOMEM;

	return 0;
415
}
416

417
static int __perf_evlist__add_pollfd(struct evlist *evlist, int fd,
418
				     struct perf_mmap *map, short revent)
419
{
420
	int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP);
421 422 423 424 425
	/*
	 * Save the idx so that when we filter out fds POLLHUP'ed we can
	 * close the associated evlist->mmap[] entry.
	 */
	if (pos >= 0) {
426
		evlist->pollfd.priv[pos].ptr = map;
427 428 429 430 431 432 433

		fcntl(fd, F_SETFL, O_NONBLOCK);
	}

	return pos;
}

434
int perf_evlist__add_pollfd(struct evlist *evlist, int fd)
435
{
436
	return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN);
437 438
}

439 440
static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd,
					 void *arg __maybe_unused)
441
{
442
	struct perf_mmap *map = fda->priv[fd].ptr;
443

444 445
	if (map)
		perf_mmap__put(map);
446
}
447

448
int perf_evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask)
449
{
450
	return fdarray__filter(&evlist->pollfd, revents_and_mask,
451
			       perf_evlist__munmap_filtered, NULL);
452 453
}

454
int perf_evlist__poll(struct evlist *evlist, int timeout)
455
{
456
	return fdarray__poll(&evlist->pollfd, timeout);
457 458
}

459
static void perf_evlist__id_hash(struct evlist *evlist,
460
				 struct evsel *evsel,
461
				 int cpu, int thread, u64 id)
462 463 464 465 466 467 468 469 470 471
{
	int hash;
	struct perf_sample_id *sid = SID(evsel, cpu, thread);

	sid->id = id;
	sid->evsel = evsel;
	hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS);
	hlist_add_head(&sid->node, &evlist->heads[hash]);
}

472
void perf_evlist__id_add(struct evlist *evlist, struct evsel *evsel,
473 474 475 476 477 478
			 int cpu, int thread, u64 id)
{
	perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
	evsel->id[evsel->ids++] = id;
}

479
int perf_evlist__id_add_fd(struct evlist *evlist,
480
			   struct evsel *evsel,
J
Jiri Olsa 已提交
481
			   int cpu, int thread, int fd)
482 483
{
	u64 read_data[4] = { 0, };
484
	int id_idx = 1; /* The first entry is the counter value */
485 486 487 488 489 490 491 492 493 494 495
	u64 id;
	int ret;

	ret = ioctl(fd, PERF_EVENT_IOC_ID, &id);
	if (!ret)
		goto add;

	if (errno != ENOTTY)
		return -1;

	/* Legacy way to get event id.. All hail to old kernels! */
496

497 498 499 500 501 502 503
	/*
	 * This way does not work with group format read, so bail
	 * out in that case.
	 */
	if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP)
		return -1;

504
	if (!(evsel->core.attr.read_format & PERF_FORMAT_ID) ||
505 506 507
	    read(fd, &read_data, sizeof(read_data)) == -1)
		return -1;

508
	if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
509
		++id_idx;
510
	if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
511 512
		++id_idx;

513 514 515 516
	id = read_data[id_idx];

 add:
	perf_evlist__id_add(evlist, evsel, cpu, thread, id);
517 518 519
	return 0;
}

520
static void perf_evlist__set_sid_idx(struct evlist *evlist,
521
				     struct evsel *evsel, int idx, int cpu,
A
Adrian Hunter 已提交
522 523 524 525
				     int thread)
{
	struct perf_sample_id *sid = SID(evsel, cpu, thread);
	sid->idx = idx;
526 527
	if (evlist->core.cpus && cpu >= 0)
		sid->cpu = evlist->core.cpus->map[cpu];
A
Adrian Hunter 已提交
528 529
	else
		sid->cpu = -1;
530 531
	if (!evsel->system_wide && evlist->core.threads && thread >= 0)
		sid->tid = thread_map__pid(evlist->core.threads, thread);
A
Adrian Hunter 已提交
532 533 534 535
	else
		sid->tid = -1;
}

536
struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id)
537 538 539 540 541 542 543 544
{
	struct hlist_head *head;
	struct perf_sample_id *sid;
	int hash;

	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
	head = &evlist->heads[hash];

545
	hlist_for_each_entry(sid, head, node)
546
		if (sid->id == id)
547 548 549 550 551
			return sid;

	return NULL;
}

552
struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id)
553 554 555
{
	struct perf_sample_id *sid;

556
	if (evlist->core.nr_entries == 1 || !id)
557 558 559 560 561
		return perf_evlist__first(evlist);

	sid = perf_evlist__id2sid(evlist, id);
	if (sid)
		return sid->evsel;
562 563

	if (!perf_evlist__sample_id_all(evlist))
564
		return perf_evlist__first(evlist);
565

566 567
	return NULL;
}
568

569
struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist,
570 571 572 573 574 575 576 577 578 579 580 581 582 583
						u64 id)
{
	struct perf_sample_id *sid;

	if (!id)
		return NULL;

	sid = perf_evlist__id2sid(evlist, id);
	if (sid)
		return sid->evsel;

	return NULL;
}

584
static int perf_evlist__event2id(struct evlist *evlist,
585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604
				 union perf_event *event, u64 *id)
{
	const u64 *array = event->sample.array;
	ssize_t n;

	n = (event->header.size - sizeof(event->header)) >> 3;

	if (event->header.type == PERF_RECORD_SAMPLE) {
		if (evlist->id_pos >= n)
			return -1;
		*id = array[evlist->id_pos];
	} else {
		if (evlist->is_pos > n)
			return -1;
		n -= evlist->is_pos;
		*id = array[n];
	}
	return 0;
}

605
struct evsel *perf_evlist__event2evsel(struct evlist *evlist,
J
Jiri Olsa 已提交
606
					    union perf_event *event)
607
{
608
	struct evsel *first = perf_evlist__first(evlist);
609 610 611 612 613
	struct hlist_head *head;
	struct perf_sample_id *sid;
	int hash;
	u64 id;

614
	if (evlist->core.nr_entries == 1)
615 616
		return first;

617
	if (!first->core.attr.sample_id_all &&
618 619
	    event->header.type != PERF_RECORD_SAMPLE)
		return first;
620 621 622 623 624 625

	if (perf_evlist__event2id(evlist, event, &id))
		return NULL;

	/* Synthesized events have an id of zero */
	if (!id)
626
		return first;
627 628 629 630 631 632 633 634 635 636 637

	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
	head = &evlist->heads[hash];

	hlist_for_each_entry(sid, head, node) {
		if (sid->id == id)
			return sid->evsel;
	}
	return NULL;
}

638
static int perf_evlist__set_paused(struct evlist *evlist, bool value)
W
Wang Nan 已提交
639 640 641
{
	int i;

642
	if (!evlist->overwrite_mmap)
643 644
		return 0;

W
Wang Nan 已提交
645
	for (i = 0; i < evlist->nr_mmaps; i++) {
646
		int fd = evlist->overwrite_mmap[i].fd;
W
Wang Nan 已提交
647 648 649 650 651 652 653 654 655 656 657
		int err;

		if (fd < 0)
			continue;
		err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0);
		if (err)
			return err;
	}
	return 0;
}

658
static int perf_evlist__pause(struct evlist *evlist)
W
Wang Nan 已提交
659 660 661 662
{
	return perf_evlist__set_paused(evlist, true);
}

663
static int perf_evlist__resume(struct evlist *evlist)
W
Wang Nan 已提交
664 665 666 667
{
	return perf_evlist__set_paused(evlist, false);
}

668
static void perf_evlist__munmap_nofree(struct evlist *evlist)
669
{
670
	int i;
671

672 673 674
	if (evlist->mmap)
		for (i = 0; i < evlist->nr_mmaps; i++)
			perf_mmap__munmap(&evlist->mmap[i]);
675

676
	if (evlist->overwrite_mmap)
677
		for (i = 0; i < evlist->nr_mmaps; i++)
678
			perf_mmap__munmap(&evlist->overwrite_mmap[i]);
679
}
680

681
void perf_evlist__munmap(struct evlist *evlist)
682 683
{
	perf_evlist__munmap_nofree(evlist);
684
	zfree(&evlist->mmap);
685
	zfree(&evlist->overwrite_mmap);
686 687
}

688
static struct perf_mmap *perf_evlist__alloc_mmap(struct evlist *evlist,
689
						 bool overwrite)
690
{
W
Wang Nan 已提交
691
	int i;
692
	struct perf_mmap *map;
W
Wang Nan 已提交
693

694 695
	evlist->nr_mmaps = cpu_map__nr(evlist->core.cpus);
	if (cpu_map__empty(evlist->core.cpus))
696
		evlist->nr_mmaps = thread_map__nr(evlist->core.threads);
697 698 699
	map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
	if (!map)
		return NULL;
700

701
	for (i = 0; i < evlist->nr_mmaps; i++) {
702
		map[i].fd = -1;
703
		map[i].overwrite = overwrite;
704 705
		/*
		 * When the perf_mmap() call is made we grab one refcount, plus
706
		 * one extra to let perf_mmap__consume() get the last
707 708 709 710 711 712 713 714
		 * events after all real references (perf_mmap__get()) are
		 * dropped.
		 *
		 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and
		 * thus does perf_mmap__get() on it.
		 */
		refcount_set(&map[i].refcnt, 0);
	}
715
	return map;
716 717
}

718
static bool
719
perf_evlist__should_poll(struct evlist *evlist __maybe_unused,
720
			 struct evsel *evsel)
721
{
722
	if (evsel->core.attr.write_backward)
723 724 725 726
		return false;
	return true;
}

727
static int perf_evlist__mmap_per_evsel(struct evlist *evlist, int idx,
728
				       struct mmap_params *mp, int cpu_idx,
729
				       int thread, int *_output, int *_output_overwrite)
730
{
731
	struct evsel *evsel;
732
	int revent;
733
	int evlist_cpu = cpu_map__cpu(evlist->core.cpus, cpu_idx);
734

735
	evlist__for_each_entry(evlist, evsel) {
736 737
		struct perf_mmap *maps = evlist->mmap;
		int *output = _output;
738
		int fd;
739
		int cpu;
740

W
Wang Nan 已提交
741
		mp->prot = PROT_READ | PROT_WRITE;
742
		if (evsel->core.attr.write_backward) {
743 744
			output = _output_overwrite;
			maps = evlist->overwrite_mmap;
745 746

			if (!maps) {
747
				maps = perf_evlist__alloc_mmap(evlist, true);
748 749
				if (!maps)
					return -1;
750
				evlist->overwrite_mmap = maps;
751 752
				if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
					perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
753
			}
W
Wang Nan 已提交
754
			mp->prot &= ~PROT_WRITE;
755
		}
756

757 758 759
		if (evsel->system_wide && thread)
			continue;

760
		cpu = cpu_map__idx(evsel->core.cpus, evlist_cpu);
761 762 763
		if (cpu == -1)
			continue;

764
		fd = FD(evsel, cpu, thread);
765 766 767

		if (*output == -1) {
			*output = fd;
768

769
			if (perf_mmap__mmap(&maps[idx], mp, *output, evlist_cpu) < 0)
770 771 772 773
				return -1;
		} else {
			if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
				return -1;
774

775
			perf_mmap__get(&maps[idx]);
776 777
		}

778 779
		revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0;

780 781 782 783 784 785 786 787
		/*
		 * The system_wide flag causes a selected event to be opened
		 * always without a pid.  Consequently it will never get a
		 * POLLHUP, but it is used for tracking in combination with
		 * other events, so it should not need to be polled anyway.
		 * Therefore don't add it for polling.
		 */
		if (!evsel->system_wide &&
788 789
		    __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) {
			perf_mmap__put(&maps[idx]);
790
			return -1;
791
		}
792

793
		if (evsel->core.attr.read_format & PERF_FORMAT_ID) {
A
Adrian Hunter 已提交
794 795 796 797 798 799
			if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread,
						   fd) < 0)
				return -1;
			perf_evlist__set_sid_idx(evlist, evsel, idx, cpu,
						 thread);
		}
800 801 802 803 804
	}

	return 0;
}

805
static int perf_evlist__mmap_per_cpu(struct evlist *evlist,
806
				     struct mmap_params *mp)
807
{
808
	int cpu, thread;
809
	int nr_cpus = cpu_map__nr(evlist->core.cpus);
810
	int nr_threads = thread_map__nr(evlist->core.threads);
811

A
Adrian Hunter 已提交
812
	pr_debug2("perf event ring buffer mmapped per cpu\n");
813
	for (cpu = 0; cpu < nr_cpus; cpu++) {
814
		int output = -1;
815
		int output_overwrite = -1;
816

817 818 819
		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
					      true);

820
		for (thread = 0; thread < nr_threads; thread++) {
821
			if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
822
							thread, &output, &output_overwrite))
823
				goto out_unmap;
824 825 826 827 828 829
		}
	}

	return 0;

out_unmap:
830
	perf_evlist__munmap_nofree(evlist);
831 832 833
	return -1;
}

834
static int perf_evlist__mmap_per_thread(struct evlist *evlist,
835
					struct mmap_params *mp)
836 837
{
	int thread;
838
	int nr_threads = thread_map__nr(evlist->core.threads);
839

A
Adrian Hunter 已提交
840
	pr_debug2("perf event ring buffer mmapped per thread\n");
841
	for (thread = 0; thread < nr_threads; thread++) {
842
		int output = -1;
843
		int output_overwrite = -1;
844

845 846 847
		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
					      false);

848
		if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
849
						&output, &output_overwrite))
850
			goto out_unmap;
851 852 853 854 855
	}

	return 0;

out_unmap:
856
	perf_evlist__munmap_nofree(evlist);
857 858 859
	return -1;
}

860
unsigned long perf_event_mlock_kb_in_pages(void)
861
{
862 863
	unsigned long pages;
	int max;
864

865 866 867 868 869 870 871 872 873 874
	if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
		/*
		 * Pick a once upon a time good value, i.e. things look
		 * strange since we can't read a sysctl value, but lets not
		 * die yet...
		 */
		max = 512;
	} else {
		max -= (page_size / 1024);
	}
875

876 877 878 879 880 881 882
	pages = (max * 1024) / page_size;
	if (!is_power_of_2(pages))
		pages = rounddown_pow_of_two(pages);

	return pages;
}

883
size_t perf_evlist__mmap_size(unsigned long pages)
884 885 886 887
{
	if (pages == UINT_MAX)
		pages = perf_event_mlock_kb_in_pages();
	else if (!is_power_of_2(pages))
888 889 890 891 892
		return 0;

	return (pages + 1) * page_size;
}

893 894
static long parse_pages_arg(const char *str, unsigned long min,
			    unsigned long max)
895
{
896
	unsigned long pages, val;
897 898 899 900 901 902 903
	static struct parse_tag tags[] = {
		{ .tag  = 'B', .mult = 1       },
		{ .tag  = 'K', .mult = 1 << 10 },
		{ .tag  = 'M', .mult = 1 << 20 },
		{ .tag  = 'G', .mult = 1 << 30 },
		{ .tag  = 0 },
	};
904

905
	if (str == NULL)
906
		return -EINVAL;
907

908
	val = parse_tag_value(str, tags);
909
	if (val != (unsigned long) -1) {
910 911 912 913 914 915
		/* we got file size value */
		pages = PERF_ALIGN(val, page_size) / page_size;
	} else {
		/* we got pages count value */
		char *eptr;
		pages = strtoul(str, &eptr, 10);
916 917
		if (*eptr != '\0')
			return -EINVAL;
918 919
	}

920
	if (pages == 0 && min == 0) {
921
		/* leave number of pages at 0 */
922
	} else if (!is_power_of_2(pages)) {
923 924
		char buf[100];

925
		/* round pages up to next power of 2 */
926
		pages = roundup_pow_of_two(pages);
927 928
		if (!pages)
			return -EINVAL;
929 930 931 932

		unit_number__scnprintf(buf, sizeof(buf), pages * page_size);
		pr_info("rounding mmap pages size to %s (%lu pages)\n",
			buf, pages);
933 934
	}

935 936 937 938 939 940
	if (pages > max)
		return -EINVAL;

	return pages;
}

941
int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
942 943 944 945
{
	unsigned long max = UINT_MAX;
	long pages;

A
Adrian Hunter 已提交
946
	if (max > SIZE_MAX / page_size)
947 948 949 950 951
		max = SIZE_MAX / page_size;

	pages = parse_pages_arg(str, 1, max);
	if (pages < 0) {
		pr_err("Invalid argument for --mmap_pages/-m\n");
952 953 954 955 956 957 958
		return -1;
	}

	*mmap_pages = pages;
	return 0;
}

959 960 961 962 963 964
int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
				  int unset __maybe_unused)
{
	return __perf_evlist__parse_mmap_pages(opt->value, str);
}

965
/**
966
 * perf_evlist__mmap_ex - Create mmaps to receive events.
967 968 969
 * @evlist: list of events
 * @pages: map length in pages
 * @overwrite: overwrite older events?
970 971
 * @auxtrace_pages - auxtrace map length in pages
 * @auxtrace_overwrite - overwrite older auxtrace data?
972
 *
973 974 975
 * If @overwrite is %false the user needs to signal event consumption using
 * perf_mmap__write_tail().  Using perf_evlist__mmap_read() does this
 * automatically.
976
 *
977 978 979
 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
 * consumption using auxtrace_mmap__write_tail().
 *
980
 * Return: %0 on success, negative error code otherwise.
981
 */
982
int perf_evlist__mmap_ex(struct evlist *evlist, unsigned int pages,
983
			 unsigned int auxtrace_pages,
984 985
			 bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush,
			 int comp_level)
986
{
987
	struct evsel *evsel;
988
	const struct perf_cpu_map *cpus = evlist->core.cpus;
989
	const struct perf_thread_map *threads = evlist->core.threads;
W
Wang Nan 已提交
990 991 992 993 994
	/*
	 * Delay setting mp.prot: set it before calling perf_mmap__mmap.
	 * Its value is decided by evsel's write_backward.
	 * So &mp should not be passed through const pointer.
	 */
995 996
	struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush,
				  .comp_level = comp_level };
997

998
	if (!evlist->mmap)
999
		evlist->mmap = perf_evlist__alloc_mmap(evlist, false);
1000
	if (!evlist->mmap)
1001 1002
		return -ENOMEM;

1003
	if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
1004 1005
		return -ENOMEM;

1006
	evlist->mmap_len = perf_evlist__mmap_size(pages);
1007
	pr_debug("mmap size %zuB\n", evlist->mmap_len);
1008
	mp.mask = evlist->mmap_len - page_size - 1;
1009

1010 1011 1012
	auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len,
				   auxtrace_pages, auxtrace_overwrite);

1013
	evlist__for_each_entry(evlist, evsel) {
1014
		if ((evsel->core.attr.read_format & PERF_FORMAT_ID) &&
1015
		    evsel->sample_id == NULL &&
1016
		    perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0)
1017 1018 1019
			return -ENOMEM;
	}

1020
	if (cpu_map__empty(cpus))
1021
		return perf_evlist__mmap_per_thread(evlist, &mp);
1022

1023
	return perf_evlist__mmap_per_cpu(evlist, &mp);
1024
}
1025

1026
int perf_evlist__mmap(struct evlist *evlist, unsigned int pages)
1027
{
1028
	return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0);
1029 1030
}

1031
int perf_evlist__create_maps(struct evlist *evlist, struct target *target)
1032
{
1033
	bool all_threads = (target->per_thread && target->system_wide);
1034
	struct perf_cpu_map *cpus;
1035
	struct perf_thread_map *threads;
1036

1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054
	/*
	 * If specify '-a' and '--per-thread' to perf record, perf record
	 * will override '--per-thread'. target->per_thread = false and
	 * target->system_wide = true.
	 *
	 * If specify '--per-thread' only to perf record,
	 * target->per_thread = true and target->system_wide = false.
	 *
	 * So target->per_thread && target->system_wide is false.
	 * For perf record, thread_map__new_str doesn't call
	 * thread_map__new_all_cpus. That will keep perf record's
	 * current behavior.
	 *
	 * For perf stat, it allows the case that target->per_thread and
	 * target->system_wide are all true. It means to collect system-wide
	 * per-thread data. thread_map__new_str will call
	 * thread_map__new_all_cpus to enumerate all threads.
	 */
1055
	threads = thread_map__new_str(target->pid, target->tid, target->uid,
1056
				      all_threads);
1057

1058
	if (!threads)
1059 1060
		return -1;

1061
	if (target__uses_dummy_map(target))
1062
		cpus = perf_cpu_map__dummy_new();
1063
	else
1064
		cpus = perf_cpu_map__new(target->cpu_list);
1065

1066
	if (!cpus)
1067 1068
		goto out_delete_threads;

1069
	evlist->core.has_user_cpus = !!target->cpu_list;
1070

1071
	perf_evlist__set_maps(&evlist->core, cpus, threads);
1072 1073

	return 0;
1074 1075

out_delete_threads:
1076
	perf_thread_map__put(threads);
1077 1078 1079
	return -1;
}

1080
void __perf_evlist__set_sample_bit(struct evlist *evlist,
1081 1082
				   enum perf_event_sample_format bit)
{
1083
	struct evsel *evsel;
1084

1085
	evlist__for_each_entry(evlist, evsel)
1086 1087 1088
		__perf_evsel__set_sample_bit(evsel, bit);
}

1089
void __perf_evlist__reset_sample_bit(struct evlist *evlist,
1090 1091
				     enum perf_event_sample_format bit)
{
1092
	struct evsel *evsel;
1093

1094
	evlist__for_each_entry(evlist, evsel)
1095 1096 1097
		__perf_evsel__reset_sample_bit(evsel, bit);
}

1098
int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel)
1099
{
1100
	struct evsel *evsel;
1101
	int err = 0;
1102

1103
	evlist__for_each_entry(evlist, evsel) {
1104
		if (evsel->filter == NULL)
1105
			continue;
1106

1107 1108 1109 1110
		/*
		 * filters only work for tracepoint event, which doesn't have cpu limit.
		 * So evlist and evsel should always be same.
		 */
1111
		err = evsel__apply_filter(evsel, evsel->filter);
1112 1113
		if (err) {
			*err_evsel = evsel;
1114
			break;
1115
		}
1116 1117
	}

1118 1119 1120
	return err;
}

1121
int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter)
1122
{
1123
	struct evsel *evsel;
1124 1125
	int err = 0;

1126
	evlist__for_each_entry(evlist, evsel) {
1127
		if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
1128 1129
			continue;

1130
		err = perf_evsel__set_filter(evsel, filter);
1131 1132 1133 1134 1135
		if (err)
			break;
	}

	return err;
1136
}
1137

1138
int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
1139 1140
{
	char *filter;
1141 1142
	int ret = -1;
	size_t i;
1143

1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157
	for (i = 0; i < npids; ++i) {
		if (i == 0) {
			if (asprintf(&filter, "common_pid != %d", pids[i]) < 0)
				return -1;
		} else {
			char *tmp;

			if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0)
				goto out_free;

			free(filter);
			filter = tmp;
		}
	}
1158

1159
	ret = perf_evlist__set_tp_filter(evlist, filter);
1160
out_free:
1161 1162 1163 1164
	free(filter);
	return ret;
}

1165
int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid)
1166
{
1167
	return perf_evlist__set_tp_filter_pids(evlist, 1, &pid);
1168 1169
}

1170
bool perf_evlist__valid_sample_type(struct evlist *evlist)
1171
{
1172
	struct evsel *pos;
1173

1174
	if (evlist->core.nr_entries == 1)
1175 1176 1177 1178 1179
		return true;

	if (evlist->id_pos < 0 || evlist->is_pos < 0)
		return false;

1180
	evlist__for_each_entry(evlist, pos) {
1181 1182
		if (pos->id_pos != evlist->id_pos ||
		    pos->is_pos != evlist->is_pos)
1183
			return false;
1184 1185
	}

1186
	return true;
1187 1188
}

1189
u64 __perf_evlist__combined_sample_type(struct evlist *evlist)
1190
{
1191
	struct evsel *evsel;
1192 1193 1194 1195

	if (evlist->combined_sample_type)
		return evlist->combined_sample_type;

1196
	evlist__for_each_entry(evlist, evsel)
1197
		evlist->combined_sample_type |= evsel->core.attr.sample_type;
1198 1199 1200 1201

	return evlist->combined_sample_type;
}

1202
u64 perf_evlist__combined_sample_type(struct evlist *evlist)
1203 1204 1205
{
	evlist->combined_sample_type = 0;
	return __perf_evlist__combined_sample_type(evlist);
1206 1207
}

1208
u64 perf_evlist__combined_branch_type(struct evlist *evlist)
1209
{
1210
	struct evsel *evsel;
1211 1212
	u64 branch_type = 0;

1213
	evlist__for_each_entry(evlist, evsel)
1214
		branch_type |= evsel->core.attr.branch_sample_type;
1215 1216 1217
	return branch_type;
}

1218
bool perf_evlist__valid_read_format(struct evlist *evlist)
1219
{
1220
	struct evsel *first = perf_evlist__first(evlist), *pos = first;
1221 1222
	u64 read_format = first->core.attr.read_format;
	u64 sample_type = first->core.attr.sample_type;
1223

1224
	evlist__for_each_entry(evlist, pos) {
1225
		if (read_format != pos->core.attr.read_format)
1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237
			return false;
	}

	/* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */
	if ((sample_type & PERF_SAMPLE_READ) &&
	    !(read_format & PERF_FORMAT_ID)) {
		return false;
	}

	return true;
}

1238
u64 perf_evlist__read_format(struct evlist *evlist)
1239
{
1240
	struct evsel *first = perf_evlist__first(evlist);
1241
	return first->core.attr.read_format;
1242 1243
}

1244
u16 perf_evlist__id_hdr_size(struct evlist *evlist)
1245
{
1246
	struct evsel *first = perf_evlist__first(evlist);
1247 1248 1249 1250
	struct perf_sample *data;
	u64 sample_type;
	u16 size = 0;

1251
	if (!first->core.attr.sample_id_all)
1252 1253
		goto out;

1254
	sample_type = first->core.attr.sample_type;
1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269

	if (sample_type & PERF_SAMPLE_TID)
		size += sizeof(data->tid) * 2;

       if (sample_type & PERF_SAMPLE_TIME)
		size += sizeof(data->time);

	if (sample_type & PERF_SAMPLE_ID)
		size += sizeof(data->id);

	if (sample_type & PERF_SAMPLE_STREAM_ID)
		size += sizeof(data->stream_id);

	if (sample_type & PERF_SAMPLE_CPU)
		size += sizeof(data->cpu) * 2;
1270 1271 1272

	if (sample_type & PERF_SAMPLE_IDENTIFIER)
		size += sizeof(data->id);
1273 1274 1275 1276
out:
	return size;
}

1277
bool perf_evlist__valid_sample_id_all(struct evlist *evlist)
1278
{
1279
	struct evsel *first = perf_evlist__first(evlist), *pos = first;
1280

1281
	evlist__for_each_entry_continue(evlist, pos) {
1282
		if (first->core.attr.sample_id_all != pos->core.attr.sample_id_all)
1283
			return false;
1284 1285
	}

1286 1287 1288
	return true;
}

1289
bool perf_evlist__sample_id_all(struct evlist *evlist)
1290
{
1291
	struct evsel *first = perf_evlist__first(evlist);
1292
	return first->core.attr.sample_id_all;
1293
}
1294

1295
void perf_evlist__set_selected(struct evlist *evlist,
1296
			       struct evsel *evsel)
1297 1298 1299
{
	evlist->selected = evsel;
}
1300

1301
void evlist__close(struct evlist *evlist)
1302
{
1303
	struct evsel *evsel;
1304

1305 1306
	evlist__for_each_entry_reverse(evlist, evsel)
		perf_evsel__close(evsel);
1307 1308
}

1309
static int perf_evlist__create_syswide_maps(struct evlist *evlist)
1310
{
1311
	struct perf_cpu_map *cpus;
1312
	struct perf_thread_map *threads;
1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323
	int err = -ENOMEM;

	/*
	 * Try reading /sys/devices/system/cpu/online to get
	 * an all cpus map.
	 *
	 * FIXME: -ENOMEM is the best we can do here, the cpu_map
	 * code needs an overhaul to properly forward the
	 * error, and we may not want to do that fallback to a
	 * default cpu identity map :-\
	 */
1324
	cpus = perf_cpu_map__new(NULL);
1325
	if (!cpus)
1326 1327
		goto out;

1328
	threads = perf_thread_map__new_dummy();
1329 1330
	if (!threads)
		goto out_put;
1331

1332
	perf_evlist__set_maps(&evlist->core, cpus, threads);
1333 1334
out:
	return err;
1335
out_put:
1336
	perf_cpu_map__put(cpus);
1337 1338 1339
	goto out;
}

1340
int evlist__open(struct evlist *evlist)
1341
{
1342
	struct evsel *evsel;
1343
	int err;
1344

1345 1346 1347 1348
	/*
	 * Default: one fd per CPU, all threads, aka systemwide
	 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL
	 */
1349
	if (evlist->core.threads == NULL && evlist->core.cpus == NULL) {
1350 1351 1352 1353 1354
		err = perf_evlist__create_syswide_maps(evlist);
		if (err < 0)
			goto out_err;
	}

1355 1356
	perf_evlist__update_id_pos(evlist);

1357
	evlist__for_each_entry(evlist, evsel) {
1358
		err = evsel__open(evsel, evsel->core.cpus, evsel->core.threads);
1359 1360 1361 1362 1363 1364
		if (err < 0)
			goto out_err;
	}

	return 0;
out_err:
1365
	evlist__close(evlist);
1366
	errno = -err;
1367 1368
	return err;
}
1369

1370
int perf_evlist__prepare_workload(struct evlist *evlist, struct target *target,
1371
				  const char *argv[], bool pipe_output,
1372
				  void (*exec_error)(int signo, siginfo_t *info, void *ucontext))
1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393
{
	int child_ready_pipe[2], go_pipe[2];
	char bf;

	if (pipe(child_ready_pipe) < 0) {
		perror("failed to create 'ready' pipe");
		return -1;
	}

	if (pipe(go_pipe) < 0) {
		perror("failed to create 'go' pipe");
		goto out_close_ready_pipe;
	}

	evlist->workload.pid = fork();
	if (evlist->workload.pid < 0) {
		perror("failed to fork");
		goto out_close_pipes;
	}

	if (!evlist->workload.pid) {
1394 1395
		int ret;

1396
		if (pipe_output)
1397 1398
			dup2(2, 1);

1399 1400
		signal(SIGTERM, SIG_DFL);

1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412
		close(child_ready_pipe[0]);
		close(go_pipe[1]);
		fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);

		/*
		 * Tell the parent we're ready to go
		 */
		close(child_ready_pipe[1]);

		/*
		 * Wait until the parent tells us to go.
		 */
1413 1414 1415 1416 1417 1418
		ret = read(go_pipe[0], &bf, 1);
		/*
		 * The parent will ask for the execvp() to be performed by
		 * writing exactly one byte, in workload.cork_fd, usually via
		 * perf_evlist__start_workload().
		 *
1419
		 * For cancelling the workload without actually running it,
1420 1421 1422 1423 1424 1425 1426 1427 1428
		 * the parent will just close workload.cork_fd, without writing
		 * anything, i.e. read will return zero and we just exit()
		 * here.
		 */
		if (ret != 1) {
			if (ret == -1)
				perror("unable to read pipe");
			exit(ret);
		}
1429 1430 1431

		execvp(argv[0], (char **)argv);

1432
		if (exec_error) {
1433 1434 1435 1436 1437 1438 1439
			union sigval val;

			val.sival_int = errno;
			if (sigqueue(getppid(), SIGUSR1, val))
				perror(argv[0]);
		} else
			perror(argv[0]);
1440 1441 1442
		exit(-1);
	}

1443 1444 1445 1446 1447 1448 1449 1450
	if (exec_error) {
		struct sigaction act = {
			.sa_flags     = SA_SIGINFO,
			.sa_sigaction = exec_error,
		};
		sigaction(SIGUSR1, &act, NULL);
	}

1451
	if (target__none(target)) {
1452
		if (evlist->core.threads == NULL) {
1453 1454 1455 1456
			fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n",
				__func__, __LINE__);
			goto out_close_pipes;
		}
1457
		perf_thread_map__set_pid(evlist->core.threads, 0, evlist->workload.pid);
1458
	}
1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469

	close(child_ready_pipe[1]);
	close(go_pipe[0]);
	/*
	 * wait for child to settle
	 */
	if (read(child_ready_pipe[0], &bf, 1) == -1) {
		perror("unable to read pipe");
		goto out_close_pipes;
	}

1470
	fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC);
1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483
	evlist->workload.cork_fd = go_pipe[1];
	close(child_ready_pipe[0]);
	return 0;

out_close_pipes:
	close(go_pipe[0]);
	close(go_pipe[1]);
out_close_ready_pipe:
	close(child_ready_pipe[0]);
	close(child_ready_pipe[1]);
	return -1;
}

1484
int perf_evlist__start_workload(struct evlist *evlist)
1485 1486
{
	if (evlist->workload.cork_fd > 0) {
1487
		char bf = 0;
1488
		int ret;
1489 1490 1491
		/*
		 * Remove the cork, let it rip!
		 */
1492 1493
		ret = write(evlist->workload.cork_fd, &bf, 1);
		if (ret < 0)
1494
			perror("unable to write to pipe");
1495 1496 1497

		close(evlist->workload.cork_fd);
		return ret;
1498 1499 1500 1501
	}

	return 0;
}
1502

1503
int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event,
1504
			      struct perf_sample *sample)
1505
{
1506
	struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
1507 1508 1509

	if (!evsel)
		return -EFAULT;
1510
	return perf_evsel__parse_sample(evsel, event, sample);
1511
}
1512

1513
int perf_evlist__parse_sample_timestamp(struct evlist *evlist,
1514 1515 1516
					union perf_event *event,
					u64 *timestamp)
{
1517
	struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
1518 1519 1520 1521 1522 1523

	if (!evsel)
		return -EFAULT;
	return perf_evsel__parse_sample_timestamp(evsel, event, timestamp);
}

1524
size_t perf_evlist__fprintf(struct evlist *evlist, FILE *fp)
1525
{
1526
	struct evsel *evsel;
1527 1528
	size_t printed = 0;

1529
	evlist__for_each_entry(evlist, evsel) {
1530 1531 1532 1533
		printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "",
				   perf_evsel__name(evsel));
	}

1534
	return printed + fprintf(fp, "\n");
1535
}
1536

1537
int perf_evlist__strerror_open(struct evlist *evlist,
1538 1539 1540
			       int err, char *buf, size_t size)
{
	int printed, value;
1541
	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
1542 1543 1544 1545 1546 1547 1548 1549

	switch (err) {
	case EACCES:
	case EPERM:
		printed = scnprintf(buf, size,
				    "Error:\t%s.\n"
				    "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg);

1550
		value = perf_event_paranoid();
1551 1552 1553 1554 1555 1556 1557 1558

		printed += scnprintf(buf + printed, size - printed, "\nHint:\t");

		if (value >= 2) {
			printed += scnprintf(buf + printed, size - printed,
					     "For your workloads it needs to be <= 1\nHint:\t");
		}
		printed += scnprintf(buf + printed, size - printed,
1559
				     "For system wide tracing it needs to be set to -1.\n");
1560 1561

		printed += scnprintf(buf + printed, size - printed,
1562 1563
				    "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
				    "Hint:\tThe current value is %d.", value);
1564
		break;
1565
	case EINVAL: {
1566
		struct evsel *first = perf_evlist__first(evlist);
1567 1568 1569 1570 1571
		int max_freq;

		if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0)
			goto out_default;

1572
		if (first->core.attr.sample_freq < (u64)max_freq)
1573 1574 1575 1576 1577 1578
			goto out_default;

		printed = scnprintf(buf, size,
				    "Error:\t%s.\n"
				    "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n"
				    "Hint:\tThe current value is %d and %" PRIu64 " is being requested.",
1579
				    emsg, max_freq, first->core.attr.sample_freq);
1580 1581
		break;
	}
1582
	default:
1583
out_default:
1584 1585 1586 1587 1588 1589
		scnprintf(buf, size, "%s", emsg);
		break;
	}

	return 0;
}
1590

1591
int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size)
1592
{
1593
	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
1594
	int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0;
1595 1596 1597

	switch (err) {
	case EPERM:
1598
		sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user);
1599 1600
		printed += scnprintf(buf + printed, size - printed,
				     "Error:\t%s.\n"
1601
				     "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n"
1602
				     "Hint:\tTried using %zd kB.\n",
1603
				     emsg, pages_max_per_user, pages_attempted);
1604 1605 1606 1607 1608 1609 1610 1611 1612

		if (pages_attempted >= pages_max_per_user) {
			printed += scnprintf(buf + printed, size - printed,
					     "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n",
					     pages_max_per_user + pages_attempted);
		}

		printed += scnprintf(buf + printed, size - printed,
				     "Hint:\tTry using a smaller -m/--mmap-pages value.");
1613 1614 1615 1616 1617 1618 1619 1620 1621
		break;
	default:
		scnprintf(buf, size, "%s", emsg);
		break;
	}

	return 0;
}

1622
void perf_evlist__to_front(struct evlist *evlist,
1623
			   struct evsel *move_evsel)
1624
{
1625
	struct evsel *evsel, *n;
1626 1627 1628 1629 1630
	LIST_HEAD(move);

	if (move_evsel == perf_evlist__first(evlist))
		return;

1631
	evlist__for_each_entry_safe(evlist, n, evsel) {
1632
		if (evsel->leader == move_evsel->leader)
1633
			list_move_tail(&evsel->core.node, &move);
1634 1635
	}

1636
	list_splice(&move, &evlist->core.entries);
1637
}
1638

1639
void perf_evlist__set_tracking_event(struct evlist *evlist,
1640
				     struct evsel *tracking_evsel)
1641
{
1642
	struct evsel *evsel;
1643 1644 1645 1646

	if (tracking_evsel->tracking)
		return;

1647
	evlist__for_each_entry(evlist, evsel) {
1648 1649 1650 1651 1652 1653
		if (evsel != tracking_evsel)
			evsel->tracking = false;
	}

	tracking_evsel->tracking = true;
}
1654

1655
struct evsel *
1656
perf_evlist__find_evsel_by_str(struct evlist *evlist,
1657 1658
			       const char *str)
{
1659
	struct evsel *evsel;
1660

1661
	evlist__for_each_entry(evlist, evsel) {
1662 1663 1664 1665 1666 1667 1668 1669
		if (!evsel->name)
			continue;
		if (strcmp(str, evsel->name) == 0)
			return evsel;
	}

	return NULL;
}
1670

1671
void perf_evlist__toggle_bkw_mmap(struct evlist *evlist,
1672 1673 1674 1675 1676 1677 1678 1679 1680
				  enum bkw_mmap_state state)
{
	enum bkw_mmap_state old_state = evlist->bkw_mmap_state;
	enum action {
		NONE,
		PAUSE,
		RESUME,
	} action = NONE;

1681
	if (!evlist->overwrite_mmap)
1682 1683 1684 1685 1686
		return;

	switch (old_state) {
	case BKW_MMAP_NOTREADY: {
		if (state != BKW_MMAP_RUNNING)
1687
			goto state_err;
1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727
		break;
	}
	case BKW_MMAP_RUNNING: {
		if (state != BKW_MMAP_DATA_PENDING)
			goto state_err;
		action = PAUSE;
		break;
	}
	case BKW_MMAP_DATA_PENDING: {
		if (state != BKW_MMAP_EMPTY)
			goto state_err;
		break;
	}
	case BKW_MMAP_EMPTY: {
		if (state != BKW_MMAP_RUNNING)
			goto state_err;
		action = RESUME;
		break;
	}
	default:
		WARN_ONCE(1, "Shouldn't get there\n");
	}

	evlist->bkw_mmap_state = state;

	switch (action) {
	case PAUSE:
		perf_evlist__pause(evlist);
		break;
	case RESUME:
		perf_evlist__resume(evlist);
		break;
	case NONE:
	default:
		break;
	}

state_err:
	return;
}
1728

1729
bool perf_evlist__exclude_kernel(struct evlist *evlist)
1730
{
1731
	struct evsel *evsel;
1732 1733

	evlist__for_each_entry(evlist, evsel) {
1734
		if (!evsel->core.attr.exclude_kernel)
1735 1736 1737 1738 1739
			return false;
	}

	return true;
}
1740 1741 1742 1743 1744 1745

/*
 * Events in data file are not collect in groups, but we still want
 * the group display. Set the artificial group and set the leader's
 * forced_leader flag to notify the display code.
 */
1746
void perf_evlist__force_leader(struct evlist *evlist)
1747 1748
{
	if (!evlist->nr_groups) {
1749
		struct evsel *leader = perf_evlist__first(evlist);
1750 1751 1752 1753 1754

		perf_evlist__set_leader(evlist);
		leader->forced_leader = true;
	}
}
1755

1756
struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
1757
						 struct evsel *evsel)
1758
{
1759
	struct evsel *c2, *leader;
1760 1761 1762 1763
	bool is_open = true;

	leader = evsel->leader;
	pr_debug("Weak group for %s/%d failed\n",
1764
			leader->name, leader->core.nr_members);
1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776

	/*
	 * for_each_group_member doesn't work here because it doesn't
	 * include the first entry.
	 */
	evlist__for_each_entry(evsel_list, c2) {
		if (c2 == evsel)
			is_open = false;
		if (c2->leader == leader) {
			if (is_open)
				perf_evsel__close(c2);
			c2->leader = c2;
1777
			c2->core.nr_members = 0;
1778 1779 1780 1781
		}
	}
	return leader;
}
1782

1783
int perf_evlist__add_sb_event(struct evlist **evlist,
1784 1785 1786 1787
			      struct perf_event_attr *attr,
			      perf_evsel__sb_cb_t cb,
			      void *data)
{
1788
	struct evsel *evsel;
1789 1790 1791
	bool new_evlist = (*evlist) == NULL;

	if (*evlist == NULL)
1792
		*evlist = evlist__new();
1793 1794 1795 1796 1797 1798 1799 1800
	if (*evlist == NULL)
		return -1;

	if (!attr->sample_id_all) {
		pr_warning("enabling sample_id_all for all side band events\n");
		attr->sample_id_all = 1;
	}

1801
	evsel = perf_evsel__new_idx(attr, (*evlist)->core.nr_entries);
1802 1803 1804 1805 1806
	if (!evsel)
		goto out_err;

	evsel->side_band.cb = cb;
	evsel->side_band.data = data;
1807
	evlist__add(*evlist, evsel);
1808 1809 1810 1811
	return 0;

out_err:
	if (new_evlist) {
1812
		evlist__delete(*evlist);
1813 1814 1815 1816 1817 1818 1819
		*evlist = NULL;
	}
	return -1;
}

static void *perf_evlist__poll_thread(void *arg)
{
1820
	struct evlist *evlist = arg;
1821
	bool draining = false;
1822 1823 1824 1825
	int i, done = 0;

	while (!done) {
		bool got_data = false;
1826

1827
		if (evlist->thread.done)
1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839
			draining = true;

		if (!draining)
			perf_evlist__poll(evlist, 1000);

		for (i = 0; i < evlist->nr_mmaps; i++) {
			struct perf_mmap *map = &evlist->mmap[i];
			union perf_event *event;

			if (perf_mmap__read_init(map))
				continue;
			while ((event = perf_mmap__read_event(map)) != NULL) {
1840
				struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
1841 1842 1843 1844 1845 1846 1847

				if (evsel && evsel->side_band.cb)
					evsel->side_band.cb(event, evsel->side_band.data);
				else
					pr_warning("cannot locate proper evsel for the side band event\n");

				perf_mmap__consume(map);
1848
				got_data = true;
1849 1850 1851
			}
			perf_mmap__read_done(map);
		}
1852 1853 1854

		if (draining && !got_data)
			break;
1855 1856 1857 1858
	}
	return NULL;
}

1859
int perf_evlist__start_sb_thread(struct evlist *evlist,
1860 1861
				 struct target *target)
{
1862
	struct evsel *counter;
1863 1864 1865 1866 1867 1868 1869 1870

	if (!evlist)
		return 0;

	if (perf_evlist__create_maps(evlist, target))
		goto out_delete_evlist;

	evlist__for_each_entry(evlist, counter) {
1871
		if (evsel__open(counter, evlist->core.cpus,
1872
				     evlist->core.threads) < 0)
1873 1874 1875 1876 1877 1878 1879
			goto out_delete_evlist;
	}

	if (perf_evlist__mmap(evlist, UINT_MAX))
		goto out_delete_evlist;

	evlist__for_each_entry(evlist, counter) {
1880
		if (evsel__enable(counter))
1881 1882 1883 1884 1885 1886 1887 1888 1889 1890
			goto out_delete_evlist;
	}

	evlist->thread.done = 0;
	if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist))
		goto out_delete_evlist;

	return 0;

out_delete_evlist:
1891
	evlist__delete(evlist);
1892 1893 1894 1895
	evlist = NULL;
	return -1;
}

1896
void perf_evlist__stop_sb_thread(struct evlist *evlist)
1897 1898 1899 1900 1901
{
	if (!evlist)
		return;
	evlist->thread.done = 1;
	pthread_join(evlist->thread.th, NULL);
1902
	evlist__delete(evlist);
1903
}