evlist.c 43.5 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
2 3 4 5 6 7
/*
 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
 *
 * Parts came from builtin-{top,stat,record}.c, see those files for further
 * copyright notes.
 */
8
#include <api/fs/fs.h>
9
#include <errno.h>
10
#include <inttypes.h>
11
#include <poll.h>
12 13
#include "cpumap.h"
#include "thread_map.h"
14
#include "target.h"
15 16
#include "evlist.h"
#include "evsel.h"
A
Adrian Hunter 已提交
17
#include "debug.h"
18
#include "units.h"
19
#include "asm/bug.h"
20
#include "bpf-event.h"
21
#include <signal.h>
22
#include <unistd.h>
23

24
#include "parse-events.h"
25
#include <subcmd/parse-options.h>
26

27
#include <fcntl.h>
28
#include <sys/ioctl.h>
29 30
#include <sys/mman.h>

31 32
#include <linux/bitops.h>
#include <linux/hash.h>
33
#include <linux/log2.h>
34
#include <linux/err.h>
35
#include <linux/zalloc.h>
36
#include <perf/evlist.h>
37

38 39 40 41
#ifdef LACKS_SIGQUEUE_PROTOTYPE
int sigqueue(pid_t pid, int sig, const union sigval value);
#endif

42
#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
43
#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
44

45 46
void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
		  struct perf_thread_map *threads)
47 48 49 50 51
{
	int i;

	for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
		INIT_HLIST_HEAD(&evlist->heads[i]);
52
	perf_evlist__init(&evlist->core);
53
	perf_evlist__set_maps(evlist, cpus, threads);
54
	fdarray__init(&evlist->pollfd, 64);
55
	evlist->workload.pid = -1;
56
	evlist->bkw_mmap_state = BKW_MMAP_NOTREADY;
57 58
}

59
struct evlist *evlist__new(void)
60
{
61
	struct evlist *evlist = zalloc(sizeof(*evlist));
62

63
	if (evlist != NULL)
64
		evlist__init(evlist, NULL, NULL);
65 66 67 68

	return evlist;
}

69
struct evlist *perf_evlist__new_default(void)
70
{
71
	struct evlist *evlist = evlist__new();
72 73

	if (evlist && perf_evlist__add_default(evlist)) {
74
		evlist__delete(evlist);
75 76 77 78 79 80
		evlist = NULL;
	}

	return evlist;
}

81
struct evlist *perf_evlist__new_dummy(void)
82
{
83
	struct evlist *evlist = evlist__new();
84 85

	if (evlist && perf_evlist__add_dummy(evlist)) {
86
		evlist__delete(evlist);
87 88 89 90 91 92
		evlist = NULL;
	}

	return evlist;
}

93 94 95 96 97 98 99
/**
 * perf_evlist__set_id_pos - set the positions of event ids.
 * @evlist: selected event list
 *
 * Events with compatible sample types all have the same id_pos
 * and is_pos.  For convenience, put a copy on evlist.
 */
100
void perf_evlist__set_id_pos(struct evlist *evlist)
101
{
102
	struct evsel *first = perf_evlist__first(evlist);
103 104 105 106 107

	evlist->id_pos = first->id_pos;
	evlist->is_pos = first->is_pos;
}

108
static void perf_evlist__update_id_pos(struct evlist *evlist)
109
{
110
	struct evsel *evsel;
111

112
	evlist__for_each_entry(evlist, evsel)
113 114 115 116 117
		perf_evsel__calc_id_pos(evsel);

	perf_evlist__set_id_pos(evlist);
}

118
static void perf_evlist__purge(struct evlist *evlist)
119
{
120
	struct evsel *pos, *n;
121

122
	evlist__for_each_entry_safe(evlist, n, pos) {
123
		list_del_init(&pos->core.node);
124
		pos->evlist = NULL;
125
		evsel__delete(pos);
126 127 128 129 130
	}

	evlist->nr_entries = 0;
}

131
void perf_evlist__exit(struct evlist *evlist)
132
{
133
	zfree(&evlist->mmap);
134
	zfree(&evlist->overwrite_mmap);
135
	fdarray__exit(&evlist->pollfd);
136 137
}

138
void evlist__delete(struct evlist *evlist)
139
{
140 141 142
	if (evlist == NULL)
		return;

143
	perf_evlist__munmap(evlist);
144
	evlist__close(evlist);
145
	perf_cpu_map__put(evlist->cpus);
146
	perf_thread_map__put(evlist->threads);
147 148
	evlist->cpus = NULL;
	evlist->threads = NULL;
149 150
	perf_evlist__purge(evlist);
	perf_evlist__exit(evlist);
151 152 153
	free(evlist);
}

154
static void __perf_evlist__propagate_maps(struct evlist *evlist,
155
					  struct evsel *evsel)
156 157 158 159 160 161
{
	/*
	 * We already have cpus for evsel (via PMU sysfs) so
	 * keep it, if there's no target cpu list defined.
	 */
	if (!evsel->own_cpus || evlist->has_user_cpus) {
162 163
		perf_cpu_map__put(evsel->cpus);
		evsel->cpus = perf_cpu_map__get(evlist->cpus);
164
	} else if (evsel->cpus != evsel->own_cpus) {
165 166
		perf_cpu_map__put(evsel->cpus);
		evsel->cpus = perf_cpu_map__get(evsel->own_cpus);
167 168
	}

169 170
	perf_thread_map__put(evsel->threads);
	evsel->threads = perf_thread_map__get(evlist->threads);
171 172
}

173
static void perf_evlist__propagate_maps(struct evlist *evlist)
174
{
175
	struct evsel *evsel;
176

177
	evlist__for_each_entry(evlist, evsel)
178 179 180
		__perf_evlist__propagate_maps(evlist, evsel);
}

181
void evlist__add(struct evlist *evlist, struct evsel *entry)
182
{
183
	perf_evlist__add(&evlist->core, &entry->core);
184
	entry->evlist = evlist;
185
	entry->idx = evlist->nr_entries;
186
	entry->tracking = !entry->idx;
187

188 189
	if (!evlist->nr_entries++)
		perf_evlist__set_id_pos(evlist);
190 191

	__perf_evlist__propagate_maps(evlist, entry);
192 193
}

194
void evlist__remove(struct evlist *evlist, struct evsel *evsel)
195 196
{
	evsel->evlist = NULL;
197
	list_del_init(&evsel->core.node);
198 199 200
	evlist->nr_entries -= 1;
}

201
void perf_evlist__splice_list_tail(struct evlist *evlist,
202
				   struct list_head *list)
203
{
204
	struct evsel *evsel, *temp;
205

206
	__evlist__for_each_entry_safe(list, temp, evsel) {
207
		list_del_init(&evsel->core.node);
208
		evlist__add(evlist, evsel);
209
	}
210 211
}

212 213
void __perf_evlist__set_leader(struct list_head *list)
{
214
	struct evsel *evsel, *leader;
215

216 217
	leader = list_entry(list->next, struct evsel, core.node);
	evsel = list_entry(list->prev, struct evsel, core.node);
218 219

	leader->nr_members = evsel->idx - leader->idx + 1;
220

221
	__evlist__for_each_entry(list, evsel) {
222
		evsel->leader = leader;
223 224 225
	}
}

226
void perf_evlist__set_leader(struct evlist *evlist)
227
{
228 229
	if (evlist->nr_entries) {
		evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0;
230
		__perf_evlist__set_leader(&evlist->core.entries);
231
	}
232 233
}

234
int __perf_evlist__add_default(struct evlist *evlist, bool precise)
235
{
236
	struct evsel *evsel = perf_evsel__new_cycles(precise);
237

238
	if (evsel == NULL)
239
		return -ENOMEM;
240

241
	evlist__add(evlist, evsel);
242 243
	return 0;
}
244

245
int perf_evlist__add_dummy(struct evlist *evlist)
246 247 248 249 250 251
{
	struct perf_event_attr attr = {
		.type	= PERF_TYPE_SOFTWARE,
		.config = PERF_COUNT_SW_DUMMY,
		.size	= sizeof(attr), /* to capture ABI version */
	};
252
	struct evsel *evsel = perf_evsel__new_idx(&attr, evlist->nr_entries);
253 254 255 256

	if (evsel == NULL)
		return -ENOMEM;

257
	evlist__add(evlist, evsel);
258 259 260
	return 0;
}

261
static int evlist__add_attrs(struct evlist *evlist,
262
				  struct perf_event_attr *attrs, size_t nr_attrs)
263
{
264
	struct evsel *evsel, *n;
265 266 267 268
	LIST_HEAD(head);
	size_t i;

	for (i = 0; i < nr_attrs; i++) {
269
		evsel = perf_evsel__new_idx(attrs + i, evlist->nr_entries + i);
270 271
		if (evsel == NULL)
			goto out_delete_partial_list;
272
		list_add_tail(&evsel->core.node, &head);
273 274
	}

275
	perf_evlist__splice_list_tail(evlist, &head);
276 277 278 279

	return 0;

out_delete_partial_list:
280
	__evlist__for_each_entry_safe(&head, n, evsel)
281
		evsel__delete(evsel);
282 283 284
	return -1;
}

285
int __perf_evlist__add_default_attrs(struct evlist *evlist,
286 287 288 289 290 291 292
				     struct perf_event_attr *attrs, size_t nr_attrs)
{
	size_t i;

	for (i = 0; i < nr_attrs; i++)
		event_attr_init(attrs + i);

293
	return evlist__add_attrs(evlist, attrs, nr_attrs);
294 295
}

296
struct evsel *
297
perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id)
298
{
299
	struct evsel *evsel;
300

301
	evlist__for_each_entry(evlist, evsel) {
302 303 304 305 306 307 308 309
		if (evsel->attr.type   == PERF_TYPE_TRACEPOINT &&
		    (int)evsel->attr.config == id)
			return evsel;
	}

	return NULL;
}

310
struct evsel *
311
perf_evlist__find_tracepoint_by_name(struct evlist *evlist,
312 313
				     const char *name)
{
314
	struct evsel *evsel;
315

316
	evlist__for_each_entry(evlist, evsel) {
317 318 319 320 321 322 323 324
		if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) &&
		    (strcmp(evsel->name, name) == 0))
			return evsel;
	}

	return NULL;
}

325
int perf_evlist__add_newtp(struct evlist *evlist,
326 327
			   const char *sys, const char *name, void *handler)
{
328
	struct evsel *evsel = perf_evsel__newtp(sys, name);
329

330
	if (IS_ERR(evsel))
331 332
		return -1;

333
	evsel->handler = handler;
334
	evlist__add(evlist, evsel);
335 336 337
	return 0;
}

338
static int perf_evlist__nr_threads(struct evlist *evlist,
339
				   struct evsel *evsel)
340 341 342 343 344 345 346
{
	if (evsel->system_wide)
		return 1;
	else
		return thread_map__nr(evlist->threads);
}

347
void evlist__disable(struct evlist *evlist)
348
{
349
	struct evsel *pos;
350

351
	evlist__for_each_entry(evlist, pos) {
352
		if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->fd)
353
			continue;
354
		evsel__disable(pos);
355
	}
356 357

	evlist->enabled = false;
358 359
}

360
void evlist__enable(struct evlist *evlist)
361
{
362
	struct evsel *pos;
363

364
	evlist__for_each_entry(evlist, pos) {
365 366
		if (!perf_evsel__is_group_leader(pos) || !pos->fd)
			continue;
367
		evsel__enable(pos);
368
	}
369 370 371 372

	evlist->enabled = true;
}

373
void perf_evlist__toggle_enable(struct evlist *evlist)
374
{
375
	(evlist->enabled ? evlist__disable : evlist__enable)(evlist);
376 377
}

378
static int perf_evlist__enable_event_cpu(struct evlist *evlist,
379
					 struct evsel *evsel, int cpu)
380
{
381
	int thread;
382 383 384 385 386 387
	int nr_threads = perf_evlist__nr_threads(evlist, evsel);

	if (!evsel->fd)
		return -EINVAL;

	for (thread = 0; thread < nr_threads; thread++) {
388
		int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
389 390 391 392 393 394
		if (err)
			return err;
	}
	return 0;
}

395
static int perf_evlist__enable_event_thread(struct evlist *evlist,
396
					    struct evsel *evsel,
397 398
					    int thread)
{
399
	int cpu;
400 401 402 403 404 405
	int nr_cpus = cpu_map__nr(evlist->cpus);

	if (!evsel->fd)
		return -EINVAL;

	for (cpu = 0; cpu < nr_cpus; cpu++) {
406
		int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
407 408 409 410 411 412
		if (err)
			return err;
	}
	return 0;
}

413
int perf_evlist__enable_event_idx(struct evlist *evlist,
414
				  struct evsel *evsel, int idx)
415 416 417 418 419 420 421 422 423
{
	bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus);

	if (per_cpu_mmaps)
		return perf_evlist__enable_event_cpu(evlist, evsel, idx);
	else
		return perf_evlist__enable_event_thread(evlist, evsel, idx);
}

424
int perf_evlist__alloc_pollfd(struct evlist *evlist)
425
{
426 427
	int nr_cpus = cpu_map__nr(evlist->cpus);
	int nr_threads = thread_map__nr(evlist->threads);
428
	int nfds = 0;
429
	struct evsel *evsel;
430

431
	evlist__for_each_entry(evlist, evsel) {
432 433 434 435 436 437
		if (evsel->system_wide)
			nfds += nr_cpus;
		else
			nfds += nr_cpus * nr_threads;
	}

438 439
	if (fdarray__available_entries(&evlist->pollfd) < nfds &&
	    fdarray__grow(&evlist->pollfd, nfds) < 0)
440 441 442
		return -ENOMEM;

	return 0;
443
}
444

445
static int __perf_evlist__add_pollfd(struct evlist *evlist, int fd,
446
				     struct perf_mmap *map, short revent)
447
{
448
	int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP);
449 450 451 452 453
	/*
	 * Save the idx so that when we filter out fds POLLHUP'ed we can
	 * close the associated evlist->mmap[] entry.
	 */
	if (pos >= 0) {
454
		evlist->pollfd.priv[pos].ptr = map;
455 456 457 458 459 460 461

		fcntl(fd, F_SETFL, O_NONBLOCK);
	}

	return pos;
}

462
int perf_evlist__add_pollfd(struct evlist *evlist, int fd)
463
{
464
	return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN);
465 466
}

467 468
static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd,
					 void *arg __maybe_unused)
469
{
470
	struct perf_mmap *map = fda->priv[fd].ptr;
471

472 473
	if (map)
		perf_mmap__put(map);
474
}
475

476
int perf_evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask)
477
{
478
	return fdarray__filter(&evlist->pollfd, revents_and_mask,
479
			       perf_evlist__munmap_filtered, NULL);
480 481
}

482
int perf_evlist__poll(struct evlist *evlist, int timeout)
483
{
484
	return fdarray__poll(&evlist->pollfd, timeout);
485 486
}

487
static void perf_evlist__id_hash(struct evlist *evlist,
488
				 struct evsel *evsel,
489
				 int cpu, int thread, u64 id)
490 491 492 493 494 495 496 497 498 499
{
	int hash;
	struct perf_sample_id *sid = SID(evsel, cpu, thread);

	sid->id = id;
	sid->evsel = evsel;
	hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS);
	hlist_add_head(&sid->node, &evlist->heads[hash]);
}

500
void perf_evlist__id_add(struct evlist *evlist, struct evsel *evsel,
501 502 503 504 505 506
			 int cpu, int thread, u64 id)
{
	perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
	evsel->id[evsel->ids++] = id;
}

507
int perf_evlist__id_add_fd(struct evlist *evlist,
508
			   struct evsel *evsel,
J
Jiri Olsa 已提交
509
			   int cpu, int thread, int fd)
510 511
{
	u64 read_data[4] = { 0, };
512
	int id_idx = 1; /* The first entry is the counter value */
513 514 515 516 517 518 519 520 521 522 523
	u64 id;
	int ret;

	ret = ioctl(fd, PERF_EVENT_IOC_ID, &id);
	if (!ret)
		goto add;

	if (errno != ENOTTY)
		return -1;

	/* Legacy way to get event id.. All hail to old kernels! */
524

525 526 527 528 529 530 531
	/*
	 * This way does not work with group format read, so bail
	 * out in that case.
	 */
	if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP)
		return -1;

532 533 534 535 536 537 538 539 540
	if (!(evsel->attr.read_format & PERF_FORMAT_ID) ||
	    read(fd, &read_data, sizeof(read_data)) == -1)
		return -1;

	if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
		++id_idx;
	if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
		++id_idx;

541 542 543 544
	id = read_data[id_idx];

 add:
	perf_evlist__id_add(evlist, evsel, cpu, thread, id);
545 546 547
	return 0;
}

548
static void perf_evlist__set_sid_idx(struct evlist *evlist,
549
				     struct evsel *evsel, int idx, int cpu,
A
Adrian Hunter 已提交
550 551 552 553 554 555 556 557 558
				     int thread)
{
	struct perf_sample_id *sid = SID(evsel, cpu, thread);
	sid->idx = idx;
	if (evlist->cpus && cpu >= 0)
		sid->cpu = evlist->cpus->map[cpu];
	else
		sid->cpu = -1;
	if (!evsel->system_wide && evlist->threads && thread >= 0)
559
		sid->tid = thread_map__pid(evlist->threads, thread);
A
Adrian Hunter 已提交
560 561 562 563
	else
		sid->tid = -1;
}

564
struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id)
565 566 567 568 569 570 571 572
{
	struct hlist_head *head;
	struct perf_sample_id *sid;
	int hash;

	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
	head = &evlist->heads[hash];

573
	hlist_for_each_entry(sid, head, node)
574
		if (sid->id == id)
575 576 577 578 579
			return sid;

	return NULL;
}

580
struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id)
581 582 583
{
	struct perf_sample_id *sid;

584
	if (evlist->nr_entries == 1 || !id)
585 586 587 588 589
		return perf_evlist__first(evlist);

	sid = perf_evlist__id2sid(evlist, id);
	if (sid)
		return sid->evsel;
590 591

	if (!perf_evlist__sample_id_all(evlist))
592
		return perf_evlist__first(evlist);
593

594 595
	return NULL;
}
596

597
struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist,
598 599 600 601 602 603 604 605 606 607 608 609 610 611
						u64 id)
{
	struct perf_sample_id *sid;

	if (!id)
		return NULL;

	sid = perf_evlist__id2sid(evlist, id);
	if (sid)
		return sid->evsel;

	return NULL;
}

612
static int perf_evlist__event2id(struct evlist *evlist,
613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632
				 union perf_event *event, u64 *id)
{
	const u64 *array = event->sample.array;
	ssize_t n;

	n = (event->header.size - sizeof(event->header)) >> 3;

	if (event->header.type == PERF_RECORD_SAMPLE) {
		if (evlist->id_pos >= n)
			return -1;
		*id = array[evlist->id_pos];
	} else {
		if (evlist->is_pos > n)
			return -1;
		n -= evlist->is_pos;
		*id = array[n];
	}
	return 0;
}

633
struct evsel *perf_evlist__event2evsel(struct evlist *evlist,
J
Jiri Olsa 已提交
634
					    union perf_event *event)
635
{
636
	struct evsel *first = perf_evlist__first(evlist);
637 638 639 640 641 642
	struct hlist_head *head;
	struct perf_sample_id *sid;
	int hash;
	u64 id;

	if (evlist->nr_entries == 1)
643 644 645 646 647
		return first;

	if (!first->attr.sample_id_all &&
	    event->header.type != PERF_RECORD_SAMPLE)
		return first;
648 649 650 651 652 653

	if (perf_evlist__event2id(evlist, event, &id))
		return NULL;

	/* Synthesized events have an id of zero */
	if (!id)
654
		return first;
655 656 657 658 659 660 661 662 663 664 665

	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
	head = &evlist->heads[hash];

	hlist_for_each_entry(sid, head, node) {
		if (sid->id == id)
			return sid->evsel;
	}
	return NULL;
}

666
static int perf_evlist__set_paused(struct evlist *evlist, bool value)
W
Wang Nan 已提交
667 668 669
{
	int i;

670
	if (!evlist->overwrite_mmap)
671 672
		return 0;

W
Wang Nan 已提交
673
	for (i = 0; i < evlist->nr_mmaps; i++) {
674
		int fd = evlist->overwrite_mmap[i].fd;
W
Wang Nan 已提交
675 676 677 678 679 680 681 682 683 684 685
		int err;

		if (fd < 0)
			continue;
		err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0);
		if (err)
			return err;
	}
	return 0;
}

686
static int perf_evlist__pause(struct evlist *evlist)
W
Wang Nan 已提交
687 688 689 690
{
	return perf_evlist__set_paused(evlist, true);
}

691
static int perf_evlist__resume(struct evlist *evlist)
W
Wang Nan 已提交
692 693 694 695
{
	return perf_evlist__set_paused(evlist, false);
}

696
static void perf_evlist__munmap_nofree(struct evlist *evlist)
697
{
698
	int i;
699

700 701 702
	if (evlist->mmap)
		for (i = 0; i < evlist->nr_mmaps; i++)
			perf_mmap__munmap(&evlist->mmap[i]);
703

704
	if (evlist->overwrite_mmap)
705
		for (i = 0; i < evlist->nr_mmaps; i++)
706
			perf_mmap__munmap(&evlist->overwrite_mmap[i]);
707
}
708

709
void perf_evlist__munmap(struct evlist *evlist)
710 711
{
	perf_evlist__munmap_nofree(evlist);
712
	zfree(&evlist->mmap);
713
	zfree(&evlist->overwrite_mmap);
714 715
}

716
static struct perf_mmap *perf_evlist__alloc_mmap(struct evlist *evlist,
717
						 bool overwrite)
718
{
W
Wang Nan 已提交
719
	int i;
720
	struct perf_mmap *map;
W
Wang Nan 已提交
721

722
	evlist->nr_mmaps = cpu_map__nr(evlist->cpus);
723
	if (cpu_map__empty(evlist->cpus))
724
		evlist->nr_mmaps = thread_map__nr(evlist->threads);
725 726 727
	map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
	if (!map)
		return NULL;
728

729
	for (i = 0; i < evlist->nr_mmaps; i++) {
730
		map[i].fd = -1;
731
		map[i].overwrite = overwrite;
732 733
		/*
		 * When the perf_mmap() call is made we grab one refcount, plus
734
		 * one extra to let perf_mmap__consume() get the last
735 736 737 738 739 740 741 742
		 * events after all real references (perf_mmap__get()) are
		 * dropped.
		 *
		 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and
		 * thus does perf_mmap__get() on it.
		 */
		refcount_set(&map[i].refcnt, 0);
	}
743
	return map;
744 745
}

746
static bool
747
perf_evlist__should_poll(struct evlist *evlist __maybe_unused,
748
			 struct evsel *evsel)
749
{
750
	if (evsel->attr.write_backward)
751 752 753 754
		return false;
	return true;
}

755
static int perf_evlist__mmap_per_evsel(struct evlist *evlist, int idx,
756
				       struct mmap_params *mp, int cpu_idx,
757
				       int thread, int *_output, int *_output_overwrite)
758
{
759
	struct evsel *evsel;
760
	int revent;
761
	int evlist_cpu = cpu_map__cpu(evlist->cpus, cpu_idx);
762

763
	evlist__for_each_entry(evlist, evsel) {
764 765
		struct perf_mmap *maps = evlist->mmap;
		int *output = _output;
766
		int fd;
767
		int cpu;
768

W
Wang Nan 已提交
769
		mp->prot = PROT_READ | PROT_WRITE;
770
		if (evsel->attr.write_backward) {
771 772
			output = _output_overwrite;
			maps = evlist->overwrite_mmap;
773 774

			if (!maps) {
775
				maps = perf_evlist__alloc_mmap(evlist, true);
776 777
				if (!maps)
					return -1;
778
				evlist->overwrite_mmap = maps;
779 780
				if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
					perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
781
			}
W
Wang Nan 已提交
782
			mp->prot &= ~PROT_WRITE;
783
		}
784

785 786 787
		if (evsel->system_wide && thread)
			continue;

788 789 790 791
		cpu = cpu_map__idx(evsel->cpus, evlist_cpu);
		if (cpu == -1)
			continue;

792
		fd = FD(evsel, cpu, thread);
793 794 795

		if (*output == -1) {
			*output = fd;
796

797
			if (perf_mmap__mmap(&maps[idx], mp, *output, evlist_cpu) < 0)
798 799 800 801
				return -1;
		} else {
			if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
				return -1;
802

803
			perf_mmap__get(&maps[idx]);
804 805
		}

806 807
		revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0;

808 809 810 811 812 813 814 815
		/*
		 * The system_wide flag causes a selected event to be opened
		 * always without a pid.  Consequently it will never get a
		 * POLLHUP, but it is used for tracking in combination with
		 * other events, so it should not need to be polled anyway.
		 * Therefore don't add it for polling.
		 */
		if (!evsel->system_wide &&
816 817
		    __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) {
			perf_mmap__put(&maps[idx]);
818
			return -1;
819
		}
820

A
Adrian Hunter 已提交
821 822 823 824 825 826 827
		if (evsel->attr.read_format & PERF_FORMAT_ID) {
			if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread,
						   fd) < 0)
				return -1;
			perf_evlist__set_sid_idx(evlist, evsel, idx, cpu,
						 thread);
		}
828 829 830 831 832
	}

	return 0;
}

833
static int perf_evlist__mmap_per_cpu(struct evlist *evlist,
834
				     struct mmap_params *mp)
835
{
836
	int cpu, thread;
837 838
	int nr_cpus = cpu_map__nr(evlist->cpus);
	int nr_threads = thread_map__nr(evlist->threads);
839

A
Adrian Hunter 已提交
840
	pr_debug2("perf event ring buffer mmapped per cpu\n");
841
	for (cpu = 0; cpu < nr_cpus; cpu++) {
842
		int output = -1;
843
		int output_overwrite = -1;
844

845 846 847
		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
					      true);

848
		for (thread = 0; thread < nr_threads; thread++) {
849
			if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
850
							thread, &output, &output_overwrite))
851
				goto out_unmap;
852 853 854 855 856 857
		}
	}

	return 0;

out_unmap:
858
	perf_evlist__munmap_nofree(evlist);
859 860 861
	return -1;
}

862
static int perf_evlist__mmap_per_thread(struct evlist *evlist,
863
					struct mmap_params *mp)
864 865
{
	int thread;
866
	int nr_threads = thread_map__nr(evlist->threads);
867

A
Adrian Hunter 已提交
868
	pr_debug2("perf event ring buffer mmapped per thread\n");
869
	for (thread = 0; thread < nr_threads; thread++) {
870
		int output = -1;
871
		int output_overwrite = -1;
872

873 874 875
		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
					      false);

876
		if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
877
						&output, &output_overwrite))
878
			goto out_unmap;
879 880 881 882 883
	}

	return 0;

out_unmap:
884
	perf_evlist__munmap_nofree(evlist);
885 886 887
	return -1;
}

888
unsigned long perf_event_mlock_kb_in_pages(void)
889
{
890 891
	unsigned long pages;
	int max;
892

893 894 895 896 897 898 899 900 901 902
	if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
		/*
		 * Pick a once upon a time good value, i.e. things look
		 * strange since we can't read a sysctl value, but lets not
		 * die yet...
		 */
		max = 512;
	} else {
		max -= (page_size / 1024);
	}
903

904 905 906 907 908 909 910
	pages = (max * 1024) / page_size;
	if (!is_power_of_2(pages))
		pages = rounddown_pow_of_two(pages);

	return pages;
}

911
size_t perf_evlist__mmap_size(unsigned long pages)
912 913 914 915
{
	if (pages == UINT_MAX)
		pages = perf_event_mlock_kb_in_pages();
	else if (!is_power_of_2(pages))
916 917 918 919 920
		return 0;

	return (pages + 1) * page_size;
}

921 922
static long parse_pages_arg(const char *str, unsigned long min,
			    unsigned long max)
923
{
924
	unsigned long pages, val;
925 926 927 928 929 930 931
	static struct parse_tag tags[] = {
		{ .tag  = 'B', .mult = 1       },
		{ .tag  = 'K', .mult = 1 << 10 },
		{ .tag  = 'M', .mult = 1 << 20 },
		{ .tag  = 'G', .mult = 1 << 30 },
		{ .tag  = 0 },
	};
932

933
	if (str == NULL)
934
		return -EINVAL;
935

936
	val = parse_tag_value(str, tags);
937
	if (val != (unsigned long) -1) {
938 939 940 941 942 943
		/* we got file size value */
		pages = PERF_ALIGN(val, page_size) / page_size;
	} else {
		/* we got pages count value */
		char *eptr;
		pages = strtoul(str, &eptr, 10);
944 945
		if (*eptr != '\0')
			return -EINVAL;
946 947
	}

948
	if (pages == 0 && min == 0) {
949
		/* leave number of pages at 0 */
950
	} else if (!is_power_of_2(pages)) {
951 952
		char buf[100];

953
		/* round pages up to next power of 2 */
954
		pages = roundup_pow_of_two(pages);
955 956
		if (!pages)
			return -EINVAL;
957 958 959 960

		unit_number__scnprintf(buf, sizeof(buf), pages * page_size);
		pr_info("rounding mmap pages size to %s (%lu pages)\n",
			buf, pages);
961 962
	}

963 964 965 966 967 968
	if (pages > max)
		return -EINVAL;

	return pages;
}

969
int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
970 971 972 973
{
	unsigned long max = UINT_MAX;
	long pages;

A
Adrian Hunter 已提交
974
	if (max > SIZE_MAX / page_size)
975 976 977 978 979
		max = SIZE_MAX / page_size;

	pages = parse_pages_arg(str, 1, max);
	if (pages < 0) {
		pr_err("Invalid argument for --mmap_pages/-m\n");
980 981 982 983 984 985 986
		return -1;
	}

	*mmap_pages = pages;
	return 0;
}

987 988 989 990 991 992
int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
				  int unset __maybe_unused)
{
	return __perf_evlist__parse_mmap_pages(opt->value, str);
}

993
/**
994
 * perf_evlist__mmap_ex - Create mmaps to receive events.
995 996 997
 * @evlist: list of events
 * @pages: map length in pages
 * @overwrite: overwrite older events?
998 999
 * @auxtrace_pages - auxtrace map length in pages
 * @auxtrace_overwrite - overwrite older auxtrace data?
1000
 *
1001 1002 1003
 * If @overwrite is %false the user needs to signal event consumption using
 * perf_mmap__write_tail().  Using perf_evlist__mmap_read() does this
 * automatically.
1004
 *
1005 1006 1007
 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
 * consumption using auxtrace_mmap__write_tail().
 *
1008
 * Return: %0 on success, negative error code otherwise.
1009
 */
1010
int perf_evlist__mmap_ex(struct evlist *evlist, unsigned int pages,
1011
			 unsigned int auxtrace_pages,
1012 1013
			 bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush,
			 int comp_level)
1014
{
1015
	struct evsel *evsel;
1016
	const struct perf_cpu_map *cpus = evlist->cpus;
1017
	const struct perf_thread_map *threads = evlist->threads;
W
Wang Nan 已提交
1018 1019 1020 1021 1022
	/*
	 * Delay setting mp.prot: set it before calling perf_mmap__mmap.
	 * Its value is decided by evsel's write_backward.
	 * So &mp should not be passed through const pointer.
	 */
1023 1024
	struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush,
				  .comp_level = comp_level };
1025

1026
	if (!evlist->mmap)
1027
		evlist->mmap = perf_evlist__alloc_mmap(evlist, false);
1028
	if (!evlist->mmap)
1029 1030
		return -ENOMEM;

1031
	if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
1032 1033
		return -ENOMEM;

1034
	evlist->mmap_len = perf_evlist__mmap_size(pages);
1035
	pr_debug("mmap size %zuB\n", evlist->mmap_len);
1036
	mp.mask = evlist->mmap_len - page_size - 1;
1037

1038 1039 1040
	auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len,
				   auxtrace_pages, auxtrace_overwrite);

1041
	evlist__for_each_entry(evlist, evsel) {
1042
		if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
1043
		    evsel->sample_id == NULL &&
1044
		    perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0)
1045 1046 1047
			return -ENOMEM;
	}

1048
	if (cpu_map__empty(cpus))
1049
		return perf_evlist__mmap_per_thread(evlist, &mp);
1050

1051
	return perf_evlist__mmap_per_cpu(evlist, &mp);
1052
}
1053

1054
int perf_evlist__mmap(struct evlist *evlist, unsigned int pages)
1055
{
1056
	return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0);
1057 1058
}

1059
int perf_evlist__create_maps(struct evlist *evlist, struct target *target)
1060
{
1061
	bool all_threads = (target->per_thread && target->system_wide);
1062
	struct perf_cpu_map *cpus;
1063
	struct perf_thread_map *threads;
1064

1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082
	/*
	 * If specify '-a' and '--per-thread' to perf record, perf record
	 * will override '--per-thread'. target->per_thread = false and
	 * target->system_wide = true.
	 *
	 * If specify '--per-thread' only to perf record,
	 * target->per_thread = true and target->system_wide = false.
	 *
	 * So target->per_thread && target->system_wide is false.
	 * For perf record, thread_map__new_str doesn't call
	 * thread_map__new_all_cpus. That will keep perf record's
	 * current behavior.
	 *
	 * For perf stat, it allows the case that target->per_thread and
	 * target->system_wide are all true. It means to collect system-wide
	 * per-thread data. thread_map__new_str will call
	 * thread_map__new_all_cpus to enumerate all threads.
	 */
1083
	threads = thread_map__new_str(target->pid, target->tid, target->uid,
1084
				      all_threads);
1085

1086
	if (!threads)
1087 1088
		return -1;

1089
	if (target__uses_dummy_map(target))
1090
		cpus = perf_cpu_map__dummy_new();
1091
	else
1092
		cpus = cpu_map__new(target->cpu_list);
1093

1094
	if (!cpus)
1095 1096
		goto out_delete_threads;

1097 1098
	evlist->has_user_cpus = !!target->cpu_list;

1099
	perf_evlist__set_maps(evlist, cpus, threads);
1100 1101

	return 0;
1102 1103

out_delete_threads:
1104
	perf_thread_map__put(threads);
1105 1106 1107
	return -1;
}

1108
void perf_evlist__set_maps(struct evlist *evlist, struct perf_cpu_map *cpus,
1109
			   struct perf_thread_map *threads)
1110
{
1111 1112 1113 1114 1115 1116 1117 1118
	/*
	 * Allow for the possibility that one or another of the maps isn't being
	 * changed i.e. don't put it.  Note we are assuming the maps that are
	 * being applied are brand new and evlist is taking ownership of the
	 * original reference count of 1.  If that is not the case it is up to
	 * the caller to increase the reference count.
	 */
	if (cpus != evlist->cpus) {
1119 1120
		perf_cpu_map__put(evlist->cpus);
		evlist->cpus = perf_cpu_map__get(cpus);
1121
	}
1122

1123
	if (threads != evlist->threads) {
1124 1125
		perf_thread_map__put(evlist->threads);
		evlist->threads = perf_thread_map__get(threads);
1126
	}
1127

1128
	perf_evlist__propagate_maps(evlist);
1129 1130
}

1131
void __perf_evlist__set_sample_bit(struct evlist *evlist,
1132 1133
				   enum perf_event_sample_format bit)
{
1134
	struct evsel *evsel;
1135

1136
	evlist__for_each_entry(evlist, evsel)
1137 1138 1139
		__perf_evsel__set_sample_bit(evsel, bit);
}

1140
void __perf_evlist__reset_sample_bit(struct evlist *evlist,
1141 1142
				     enum perf_event_sample_format bit)
{
1143
	struct evsel *evsel;
1144

1145
	evlist__for_each_entry(evlist, evsel)
1146 1147 1148
		__perf_evsel__reset_sample_bit(evsel, bit);
}

1149
int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel)
1150
{
1151
	struct evsel *evsel;
1152
	int err = 0;
1153

1154
	evlist__for_each_entry(evlist, evsel) {
1155
		if (evsel->filter == NULL)
1156
			continue;
1157

1158 1159 1160 1161
		/*
		 * filters only work for tracepoint event, which doesn't have cpu limit.
		 * So evlist and evsel should always be same.
		 */
1162
		err = evsel__apply_filter(evsel, evsel->filter);
1163 1164
		if (err) {
			*err_evsel = evsel;
1165
			break;
1166
		}
1167 1168
	}

1169 1170 1171
	return err;
}

1172
int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter)
1173
{
1174
	struct evsel *evsel;
1175 1176
	int err = 0;

1177
	evlist__for_each_entry(evlist, evsel) {
1178 1179 1180
		if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
			continue;

1181
		err = perf_evsel__set_filter(evsel, filter);
1182 1183 1184 1185 1186
		if (err)
			break;
	}

	return err;
1187
}
1188

1189
int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
1190 1191
{
	char *filter;
1192 1193
	int ret = -1;
	size_t i;
1194

1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208
	for (i = 0; i < npids; ++i) {
		if (i == 0) {
			if (asprintf(&filter, "common_pid != %d", pids[i]) < 0)
				return -1;
		} else {
			char *tmp;

			if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0)
				goto out_free;

			free(filter);
			filter = tmp;
		}
	}
1209

1210
	ret = perf_evlist__set_tp_filter(evlist, filter);
1211
out_free:
1212 1213 1214 1215
	free(filter);
	return ret;
}

1216
int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid)
1217
{
1218
	return perf_evlist__set_tp_filter_pids(evlist, 1, &pid);
1219 1220
}

1221
bool perf_evlist__valid_sample_type(struct evlist *evlist)
1222
{
1223
	struct evsel *pos;
1224

1225 1226 1227 1228 1229 1230
	if (evlist->nr_entries == 1)
		return true;

	if (evlist->id_pos < 0 || evlist->is_pos < 0)
		return false;

1231
	evlist__for_each_entry(evlist, pos) {
1232 1233
		if (pos->id_pos != evlist->id_pos ||
		    pos->is_pos != evlist->is_pos)
1234
			return false;
1235 1236
	}

1237
	return true;
1238 1239
}

1240
u64 __perf_evlist__combined_sample_type(struct evlist *evlist)
1241
{
1242
	struct evsel *evsel;
1243 1244 1245 1246

	if (evlist->combined_sample_type)
		return evlist->combined_sample_type;

1247
	evlist__for_each_entry(evlist, evsel)
1248 1249 1250 1251 1252
		evlist->combined_sample_type |= evsel->attr.sample_type;

	return evlist->combined_sample_type;
}

1253
u64 perf_evlist__combined_sample_type(struct evlist *evlist)
1254 1255 1256
{
	evlist->combined_sample_type = 0;
	return __perf_evlist__combined_sample_type(evlist);
1257 1258
}

1259
u64 perf_evlist__combined_branch_type(struct evlist *evlist)
1260
{
1261
	struct evsel *evsel;
1262 1263
	u64 branch_type = 0;

1264
	evlist__for_each_entry(evlist, evsel)
1265 1266 1267 1268
		branch_type |= evsel->attr.branch_sample_type;
	return branch_type;
}

1269
bool perf_evlist__valid_read_format(struct evlist *evlist)
1270
{
1271
	struct evsel *first = perf_evlist__first(evlist), *pos = first;
1272 1273 1274
	u64 read_format = first->attr.read_format;
	u64 sample_type = first->attr.sample_type;

1275
	evlist__for_each_entry(evlist, pos) {
1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288
		if (read_format != pos->attr.read_format)
			return false;
	}

	/* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */
	if ((sample_type & PERF_SAMPLE_READ) &&
	    !(read_format & PERF_FORMAT_ID)) {
		return false;
	}

	return true;
}

1289
u64 perf_evlist__read_format(struct evlist *evlist)
1290
{
1291
	struct evsel *first = perf_evlist__first(evlist);
1292 1293 1294
	return first->attr.read_format;
}

1295
u16 perf_evlist__id_hdr_size(struct evlist *evlist)
1296
{
1297
	struct evsel *first = perf_evlist__first(evlist);
1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320
	struct perf_sample *data;
	u64 sample_type;
	u16 size = 0;

	if (!first->attr.sample_id_all)
		goto out;

	sample_type = first->attr.sample_type;

	if (sample_type & PERF_SAMPLE_TID)
		size += sizeof(data->tid) * 2;

       if (sample_type & PERF_SAMPLE_TIME)
		size += sizeof(data->time);

	if (sample_type & PERF_SAMPLE_ID)
		size += sizeof(data->id);

	if (sample_type & PERF_SAMPLE_STREAM_ID)
		size += sizeof(data->stream_id);

	if (sample_type & PERF_SAMPLE_CPU)
		size += sizeof(data->cpu) * 2;
1321 1322 1323

	if (sample_type & PERF_SAMPLE_IDENTIFIER)
		size += sizeof(data->id);
1324 1325 1326 1327
out:
	return size;
}

1328
bool perf_evlist__valid_sample_id_all(struct evlist *evlist)
1329
{
1330
	struct evsel *first = perf_evlist__first(evlist), *pos = first;
1331

1332
	evlist__for_each_entry_continue(evlist, pos) {
1333 1334
		if (first->attr.sample_id_all != pos->attr.sample_id_all)
			return false;
1335 1336
	}

1337 1338 1339
	return true;
}

1340
bool perf_evlist__sample_id_all(struct evlist *evlist)
1341
{
1342
	struct evsel *first = perf_evlist__first(evlist);
1343
	return first->attr.sample_id_all;
1344
}
1345

1346
void perf_evlist__set_selected(struct evlist *evlist,
1347
			       struct evsel *evsel)
1348 1349 1350
{
	evlist->selected = evsel;
}
1351

1352
void evlist__close(struct evlist *evlist)
1353
{
1354
	struct evsel *evsel;
1355

1356 1357
	evlist__for_each_entry_reverse(evlist, evsel)
		perf_evsel__close(evsel);
1358 1359
}

1360
static int perf_evlist__create_syswide_maps(struct evlist *evlist)
1361
{
1362
	struct perf_cpu_map *cpus;
1363
	struct perf_thread_map *threads;
1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374
	int err = -ENOMEM;

	/*
	 * Try reading /sys/devices/system/cpu/online to get
	 * an all cpus map.
	 *
	 * FIXME: -ENOMEM is the best we can do here, the cpu_map
	 * code needs an overhaul to properly forward the
	 * error, and we may not want to do that fallback to a
	 * default cpu identity map :-\
	 */
1375 1376
	cpus = cpu_map__new(NULL);
	if (!cpus)
1377 1378
		goto out;

1379
	threads = perf_thread_map__new_dummy();
1380 1381
	if (!threads)
		goto out_put;
1382

1383
	perf_evlist__set_maps(evlist, cpus, threads);
1384 1385
out:
	return err;
1386
out_put:
1387
	perf_cpu_map__put(cpus);
1388 1389 1390
	goto out;
}

1391
int evlist__open(struct evlist *evlist)
1392
{
1393
	struct evsel *evsel;
1394
	int err;
1395

1396 1397 1398 1399 1400 1401 1402 1403 1404 1405
	/*
	 * Default: one fd per CPU, all threads, aka systemwide
	 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL
	 */
	if (evlist->threads == NULL && evlist->cpus == NULL) {
		err = perf_evlist__create_syswide_maps(evlist);
		if (err < 0)
			goto out_err;
	}

1406 1407
	perf_evlist__update_id_pos(evlist);

1408
	evlist__for_each_entry(evlist, evsel) {
1409
		err = evsel__open(evsel, evsel->cpus, evsel->threads);
1410 1411 1412 1413 1414 1415
		if (err < 0)
			goto out_err;
	}

	return 0;
out_err:
1416
	evlist__close(evlist);
1417
	errno = -err;
1418 1419
	return err;
}
1420

1421
int perf_evlist__prepare_workload(struct evlist *evlist, struct target *target,
1422
				  const char *argv[], bool pipe_output,
1423
				  void (*exec_error)(int signo, siginfo_t *info, void *ucontext))
1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444
{
	int child_ready_pipe[2], go_pipe[2];
	char bf;

	if (pipe(child_ready_pipe) < 0) {
		perror("failed to create 'ready' pipe");
		return -1;
	}

	if (pipe(go_pipe) < 0) {
		perror("failed to create 'go' pipe");
		goto out_close_ready_pipe;
	}

	evlist->workload.pid = fork();
	if (evlist->workload.pid < 0) {
		perror("failed to fork");
		goto out_close_pipes;
	}

	if (!evlist->workload.pid) {
1445 1446
		int ret;

1447
		if (pipe_output)
1448 1449
			dup2(2, 1);

1450 1451
		signal(SIGTERM, SIG_DFL);

1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463
		close(child_ready_pipe[0]);
		close(go_pipe[1]);
		fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);

		/*
		 * Tell the parent we're ready to go
		 */
		close(child_ready_pipe[1]);

		/*
		 * Wait until the parent tells us to go.
		 */
1464 1465 1466 1467 1468 1469
		ret = read(go_pipe[0], &bf, 1);
		/*
		 * The parent will ask for the execvp() to be performed by
		 * writing exactly one byte, in workload.cork_fd, usually via
		 * perf_evlist__start_workload().
		 *
1470
		 * For cancelling the workload without actually running it,
1471 1472 1473 1474 1475 1476 1477 1478 1479
		 * the parent will just close workload.cork_fd, without writing
		 * anything, i.e. read will return zero and we just exit()
		 * here.
		 */
		if (ret != 1) {
			if (ret == -1)
				perror("unable to read pipe");
			exit(ret);
		}
1480 1481 1482

		execvp(argv[0], (char **)argv);

1483
		if (exec_error) {
1484 1485 1486 1487 1488 1489 1490
			union sigval val;

			val.sival_int = errno;
			if (sigqueue(getppid(), SIGUSR1, val))
				perror(argv[0]);
		} else
			perror(argv[0]);
1491 1492 1493
		exit(-1);
	}

1494 1495 1496 1497 1498 1499 1500 1501
	if (exec_error) {
		struct sigaction act = {
			.sa_flags     = SA_SIGINFO,
			.sa_sigaction = exec_error,
		};
		sigaction(SIGUSR1, &act, NULL);
	}

1502 1503 1504 1505 1506 1507
	if (target__none(target)) {
		if (evlist->threads == NULL) {
			fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n",
				__func__, __LINE__);
			goto out_close_pipes;
		}
1508
		perf_thread_map__set_pid(evlist->threads, 0, evlist->workload.pid);
1509
	}
1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520

	close(child_ready_pipe[1]);
	close(go_pipe[0]);
	/*
	 * wait for child to settle
	 */
	if (read(child_ready_pipe[0], &bf, 1) == -1) {
		perror("unable to read pipe");
		goto out_close_pipes;
	}

1521
	fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC);
1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534
	evlist->workload.cork_fd = go_pipe[1];
	close(child_ready_pipe[0]);
	return 0;

out_close_pipes:
	close(go_pipe[0]);
	close(go_pipe[1]);
out_close_ready_pipe:
	close(child_ready_pipe[0]);
	close(child_ready_pipe[1]);
	return -1;
}

1535
int perf_evlist__start_workload(struct evlist *evlist)
1536 1537
{
	if (evlist->workload.cork_fd > 0) {
1538
		char bf = 0;
1539
		int ret;
1540 1541 1542
		/*
		 * Remove the cork, let it rip!
		 */
1543 1544
		ret = write(evlist->workload.cork_fd, &bf, 1);
		if (ret < 0)
1545
			perror("unable to write to pipe");
1546 1547 1548

		close(evlist->workload.cork_fd);
		return ret;
1549 1550 1551 1552
	}

	return 0;
}
1553

1554
int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event,
1555
			      struct perf_sample *sample)
1556
{
1557
	struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
1558 1559 1560

	if (!evsel)
		return -EFAULT;
1561
	return perf_evsel__parse_sample(evsel, event, sample);
1562
}
1563

1564
int perf_evlist__parse_sample_timestamp(struct evlist *evlist,
1565 1566 1567
					union perf_event *event,
					u64 *timestamp)
{
1568
	struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
1569 1570 1571 1572 1573 1574

	if (!evsel)
		return -EFAULT;
	return perf_evsel__parse_sample_timestamp(evsel, event, timestamp);
}

1575
size_t perf_evlist__fprintf(struct evlist *evlist, FILE *fp)
1576
{
1577
	struct evsel *evsel;
1578 1579
	size_t printed = 0;

1580
	evlist__for_each_entry(evlist, evsel) {
1581 1582 1583 1584
		printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "",
				   perf_evsel__name(evsel));
	}

1585
	return printed + fprintf(fp, "\n");
1586
}
1587

1588
int perf_evlist__strerror_open(struct evlist *evlist,
1589 1590 1591
			       int err, char *buf, size_t size)
{
	int printed, value;
1592
	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
1593 1594 1595 1596 1597 1598 1599 1600

	switch (err) {
	case EACCES:
	case EPERM:
		printed = scnprintf(buf, size,
				    "Error:\t%s.\n"
				    "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg);

1601
		value = perf_event_paranoid();
1602 1603 1604 1605 1606 1607 1608 1609

		printed += scnprintf(buf + printed, size - printed, "\nHint:\t");

		if (value >= 2) {
			printed += scnprintf(buf + printed, size - printed,
					     "For your workloads it needs to be <= 1\nHint:\t");
		}
		printed += scnprintf(buf + printed, size - printed,
1610
				     "For system wide tracing it needs to be set to -1.\n");
1611 1612

		printed += scnprintf(buf + printed, size - printed,
1613 1614
				    "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
				    "Hint:\tThe current value is %d.", value);
1615
		break;
1616
	case EINVAL: {
1617
		struct evsel *first = perf_evlist__first(evlist);
1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632
		int max_freq;

		if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0)
			goto out_default;

		if (first->attr.sample_freq < (u64)max_freq)
			goto out_default;

		printed = scnprintf(buf, size,
				    "Error:\t%s.\n"
				    "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n"
				    "Hint:\tThe current value is %d and %" PRIu64 " is being requested.",
				    emsg, max_freq, first->attr.sample_freq);
		break;
	}
1633
	default:
1634
out_default:
1635 1636 1637 1638 1639 1640
		scnprintf(buf, size, "%s", emsg);
		break;
	}

	return 0;
}
1641

1642
int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size)
1643
{
1644
	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
1645
	int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0;
1646 1647 1648

	switch (err) {
	case EPERM:
1649
		sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user);
1650 1651
		printed += scnprintf(buf + printed, size - printed,
				     "Error:\t%s.\n"
1652
				     "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n"
1653
				     "Hint:\tTried using %zd kB.\n",
1654
				     emsg, pages_max_per_user, pages_attempted);
1655 1656 1657 1658 1659 1660 1661 1662 1663

		if (pages_attempted >= pages_max_per_user) {
			printed += scnprintf(buf + printed, size - printed,
					     "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n",
					     pages_max_per_user + pages_attempted);
		}

		printed += scnprintf(buf + printed, size - printed,
				     "Hint:\tTry using a smaller -m/--mmap-pages value.");
1664 1665 1666 1667 1668 1669 1670 1671 1672
		break;
	default:
		scnprintf(buf, size, "%s", emsg);
		break;
	}

	return 0;
}

1673
void perf_evlist__to_front(struct evlist *evlist,
1674
			   struct evsel *move_evsel)
1675
{
1676
	struct evsel *evsel, *n;
1677 1678 1679 1680 1681
	LIST_HEAD(move);

	if (move_evsel == perf_evlist__first(evlist))
		return;

1682
	evlist__for_each_entry_safe(evlist, n, evsel) {
1683
		if (evsel->leader == move_evsel->leader)
1684
			list_move_tail(&evsel->core.node, &move);
1685 1686
	}

1687
	list_splice(&move, &evlist->core.entries);
1688
}
1689

1690
void perf_evlist__set_tracking_event(struct evlist *evlist,
1691
				     struct evsel *tracking_evsel)
1692
{
1693
	struct evsel *evsel;
1694 1695 1696 1697

	if (tracking_evsel->tracking)
		return;

1698
	evlist__for_each_entry(evlist, evsel) {
1699 1700 1701 1702 1703 1704
		if (evsel != tracking_evsel)
			evsel->tracking = false;
	}

	tracking_evsel->tracking = true;
}
1705

1706
struct evsel *
1707
perf_evlist__find_evsel_by_str(struct evlist *evlist,
1708 1709
			       const char *str)
{
1710
	struct evsel *evsel;
1711

1712
	evlist__for_each_entry(evlist, evsel) {
1713 1714 1715 1716 1717 1718 1719 1720
		if (!evsel->name)
			continue;
		if (strcmp(str, evsel->name) == 0)
			return evsel;
	}

	return NULL;
}
1721

1722
void perf_evlist__toggle_bkw_mmap(struct evlist *evlist,
1723 1724 1725 1726 1727 1728 1729 1730 1731
				  enum bkw_mmap_state state)
{
	enum bkw_mmap_state old_state = evlist->bkw_mmap_state;
	enum action {
		NONE,
		PAUSE,
		RESUME,
	} action = NONE;

1732
	if (!evlist->overwrite_mmap)
1733 1734 1735 1736 1737
		return;

	switch (old_state) {
	case BKW_MMAP_NOTREADY: {
		if (state != BKW_MMAP_RUNNING)
1738
			goto state_err;
1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778
		break;
	}
	case BKW_MMAP_RUNNING: {
		if (state != BKW_MMAP_DATA_PENDING)
			goto state_err;
		action = PAUSE;
		break;
	}
	case BKW_MMAP_DATA_PENDING: {
		if (state != BKW_MMAP_EMPTY)
			goto state_err;
		break;
	}
	case BKW_MMAP_EMPTY: {
		if (state != BKW_MMAP_RUNNING)
			goto state_err;
		action = RESUME;
		break;
	}
	default:
		WARN_ONCE(1, "Shouldn't get there\n");
	}

	evlist->bkw_mmap_state = state;

	switch (action) {
	case PAUSE:
		perf_evlist__pause(evlist);
		break;
	case RESUME:
		perf_evlist__resume(evlist);
		break;
	case NONE:
	default:
		break;
	}

state_err:
	return;
}
1779

1780
bool perf_evlist__exclude_kernel(struct evlist *evlist)
1781
{
1782
	struct evsel *evsel;
1783 1784 1785 1786 1787 1788 1789 1790

	evlist__for_each_entry(evlist, evsel) {
		if (!evsel->attr.exclude_kernel)
			return false;
	}

	return true;
}
1791 1792 1793 1794 1795 1796

/*
 * Events in data file are not collect in groups, but we still want
 * the group display. Set the artificial group and set the leader's
 * forced_leader flag to notify the display code.
 */
1797
void perf_evlist__force_leader(struct evlist *evlist)
1798 1799
{
	if (!evlist->nr_groups) {
1800
		struct evsel *leader = perf_evlist__first(evlist);
1801 1802 1803 1804 1805

		perf_evlist__set_leader(evlist);
		leader->forced_leader = true;
	}
}
1806

1807
struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
1808
						 struct evsel *evsel)
1809
{
1810
	struct evsel *c2, *leader;
1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832
	bool is_open = true;

	leader = evsel->leader;
	pr_debug("Weak group for %s/%d failed\n",
			leader->name, leader->nr_members);

	/*
	 * for_each_group_member doesn't work here because it doesn't
	 * include the first entry.
	 */
	evlist__for_each_entry(evsel_list, c2) {
		if (c2 == evsel)
			is_open = false;
		if (c2->leader == leader) {
			if (is_open)
				perf_evsel__close(c2);
			c2->leader = c2;
			c2->nr_members = 0;
		}
	}
	return leader;
}
1833

1834
int perf_evlist__add_sb_event(struct evlist **evlist,
1835 1836 1837 1838
			      struct perf_event_attr *attr,
			      perf_evsel__sb_cb_t cb,
			      void *data)
{
1839
	struct evsel *evsel;
1840 1841 1842
	bool new_evlist = (*evlist) == NULL;

	if (*evlist == NULL)
1843
		*evlist = evlist__new();
1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857
	if (*evlist == NULL)
		return -1;

	if (!attr->sample_id_all) {
		pr_warning("enabling sample_id_all for all side band events\n");
		attr->sample_id_all = 1;
	}

	evsel = perf_evsel__new_idx(attr, (*evlist)->nr_entries);
	if (!evsel)
		goto out_err;

	evsel->side_band.cb = cb;
	evsel->side_band.data = data;
1858
	evlist__add(*evlist, evsel);
1859 1860 1861 1862
	return 0;

out_err:
	if (new_evlist) {
1863
		evlist__delete(*evlist);
1864 1865 1866 1867 1868 1869 1870
		*evlist = NULL;
	}
	return -1;
}

static void *perf_evlist__poll_thread(void *arg)
{
1871
	struct evlist *evlist = arg;
1872
	bool draining = false;
1873 1874 1875 1876
	int i, done = 0;

	while (!done) {
		bool got_data = false;
1877

1878
		if (evlist->thread.done)
1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890
			draining = true;

		if (!draining)
			perf_evlist__poll(evlist, 1000);

		for (i = 0; i < evlist->nr_mmaps; i++) {
			struct perf_mmap *map = &evlist->mmap[i];
			union perf_event *event;

			if (perf_mmap__read_init(map))
				continue;
			while ((event = perf_mmap__read_event(map)) != NULL) {
1891
				struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
1892 1893 1894 1895 1896 1897 1898

				if (evsel && evsel->side_band.cb)
					evsel->side_band.cb(event, evsel->side_band.data);
				else
					pr_warning("cannot locate proper evsel for the side band event\n");

				perf_mmap__consume(map);
1899
				got_data = true;
1900 1901 1902
			}
			perf_mmap__read_done(map);
		}
1903 1904 1905

		if (draining && !got_data)
			break;
1906 1907 1908 1909
	}
	return NULL;
}

1910
int perf_evlist__start_sb_thread(struct evlist *evlist,
1911 1912
				 struct target *target)
{
1913
	struct evsel *counter;
1914 1915 1916 1917 1918 1919 1920 1921

	if (!evlist)
		return 0;

	if (perf_evlist__create_maps(evlist, target))
		goto out_delete_evlist;

	evlist__for_each_entry(evlist, counter) {
1922
		if (evsel__open(counter, evlist->cpus,
1923 1924 1925 1926 1927 1928 1929 1930
				     evlist->threads) < 0)
			goto out_delete_evlist;
	}

	if (perf_evlist__mmap(evlist, UINT_MAX))
		goto out_delete_evlist;

	evlist__for_each_entry(evlist, counter) {
1931
		if (evsel__enable(counter))
1932 1933 1934 1935 1936 1937 1938 1939 1940 1941
			goto out_delete_evlist;
	}

	evlist->thread.done = 0;
	if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist))
		goto out_delete_evlist;

	return 0;

out_delete_evlist:
1942
	evlist__delete(evlist);
1943 1944 1945 1946
	evlist = NULL;
	return -1;
}

1947
void perf_evlist__stop_sb_thread(struct evlist *evlist)
1948 1949 1950 1951 1952
{
	if (!evlist)
		return;
	evlist->thread.done = 1;
	pthread_join(evlist->thread.th, NULL);
1953
	evlist__delete(evlist);
1954
}