evlist.c 41.0 KB
Newer Older
1 2 3 4 5 6 7 8
/*
 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
 *
 * Parts came from builtin-{top,stat,record}.c, see those files for further
 * copyright notes.
 *
 * Released under the GPL v2. (and only v2, not any later version)
 */
9
#include "util.h"
10
#include <api/fs/fs.h>
11
#include <errno.h>
12
#include <inttypes.h>
13
#include <poll.h>
14 15
#include "cpumap.h"
#include "thread_map.h"
16
#include "target.h"
17 18
#include "evlist.h"
#include "evsel.h"
A
Adrian Hunter 已提交
19
#include "debug.h"
20
#include "units.h"
21
#include "asm/bug.h"
22
#include <signal.h>
23
#include <unistd.h>
24

25
#include "parse-events.h"
26
#include <subcmd/parse-options.h>
27

28
#include <sys/ioctl.h>
29 30
#include <sys/mman.h>

31 32
#include <linux/bitops.h>
#include <linux/hash.h>
33
#include <linux/log2.h>
34
#include <linux/err.h>
35

36
#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
37
#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
38

39 40
void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
		       struct thread_map *threads)
41 42 43 44 45 46
{
	int i;

	for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
		INIT_HLIST_HEAD(&evlist->heads[i]);
	INIT_LIST_HEAD(&evlist->entries);
47
	perf_evlist__set_maps(evlist, cpus, threads);
48
	fdarray__init(&evlist->pollfd, 64);
49
	evlist->workload.pid = -1;
50
	evlist->bkw_mmap_state = BKW_MMAP_NOTREADY;
51 52
}

53
struct perf_evlist *perf_evlist__new(void)
54 55 56
{
	struct perf_evlist *evlist = zalloc(sizeof(*evlist));

57
	if (evlist != NULL)
58
		perf_evlist__init(evlist, NULL, NULL);
59 60 61 62

	return evlist;
}

63 64 65 66 67 68 69 70 71 72 73 74
struct perf_evlist *perf_evlist__new_default(void)
{
	struct perf_evlist *evlist = perf_evlist__new();

	if (evlist && perf_evlist__add_default(evlist)) {
		perf_evlist__delete(evlist);
		evlist = NULL;
	}

	return evlist;
}

75 76 77 78 79 80 81 82 83 84 85 86
struct perf_evlist *perf_evlist__new_dummy(void)
{
	struct perf_evlist *evlist = perf_evlist__new();

	if (evlist && perf_evlist__add_dummy(evlist)) {
		perf_evlist__delete(evlist);
		evlist = NULL;
	}

	return evlist;
}

87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
/**
 * perf_evlist__set_id_pos - set the positions of event ids.
 * @evlist: selected event list
 *
 * Events with compatible sample types all have the same id_pos
 * and is_pos.  For convenience, put a copy on evlist.
 */
void perf_evlist__set_id_pos(struct perf_evlist *evlist)
{
	struct perf_evsel *first = perf_evlist__first(evlist);

	evlist->id_pos = first->id_pos;
	evlist->is_pos = first->is_pos;
}

102 103 104 105
static void perf_evlist__update_id_pos(struct perf_evlist *evlist)
{
	struct perf_evsel *evsel;

106
	evlist__for_each_entry(evlist, evsel)
107 108 109 110 111
		perf_evsel__calc_id_pos(evsel);

	perf_evlist__set_id_pos(evlist);
}

112 113 114 115
static void perf_evlist__purge(struct perf_evlist *evlist)
{
	struct perf_evsel *pos, *n;

116
	evlist__for_each_entry_safe(evlist, n, pos) {
117
		list_del_init(&pos->node);
118
		pos->evlist = NULL;
119 120 121 122 123 124
		perf_evsel__delete(pos);
	}

	evlist->nr_entries = 0;
}

125
void perf_evlist__exit(struct perf_evlist *evlist)
126
{
127
	zfree(&evlist->mmap);
128
	zfree(&evlist->overwrite_mmap);
129
	fdarray__exit(&evlist->pollfd);
130 131 132 133
}

void perf_evlist__delete(struct perf_evlist *evlist)
{
134 135 136
	if (evlist == NULL)
		return;

137
	perf_evlist__munmap(evlist);
138
	perf_evlist__close(evlist);
139
	cpu_map__put(evlist->cpus);
140
	thread_map__put(evlist->threads);
141 142
	evlist->cpus = NULL;
	evlist->threads = NULL;
143 144
	perf_evlist__purge(evlist);
	perf_evlist__exit(evlist);
145 146 147
	free(evlist);
}

148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
					  struct perf_evsel *evsel)
{
	/*
	 * We already have cpus for evsel (via PMU sysfs) so
	 * keep it, if there's no target cpu list defined.
	 */
	if (!evsel->own_cpus || evlist->has_user_cpus) {
		cpu_map__put(evsel->cpus);
		evsel->cpus = cpu_map__get(evlist->cpus);
	} else if (evsel->cpus != evsel->own_cpus) {
		cpu_map__put(evsel->cpus);
		evsel->cpus = cpu_map__get(evsel->own_cpus);
	}

	thread_map__put(evsel->threads);
	evsel->threads = thread_map__get(evlist->threads);
}

static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
{
	struct perf_evsel *evsel;

171
	evlist__for_each_entry(evlist, evsel)
172 173 174
		__perf_evlist__propagate_maps(evlist, evsel);
}

175 176
void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
{
177
	entry->evlist = evlist;
178
	list_add_tail(&entry->node, &evlist->entries);
179
	entry->idx = evlist->nr_entries;
180
	entry->tracking = !entry->idx;
181

182 183
	if (!evlist->nr_entries++)
		perf_evlist__set_id_pos(evlist);
184 185

	__perf_evlist__propagate_maps(evlist, entry);
186 187
}

188 189 190 191 192 193 194
void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel)
{
	evsel->evlist = NULL;
	list_del_init(&evsel->node);
	evlist->nr_entries -= 1;
}

195
void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
196
				   struct list_head *list)
197
{
198
	struct perf_evsel *evsel, *temp;
199

200
	__evlist__for_each_entry_safe(list, temp, evsel) {
201 202 203
		list_del_init(&evsel->node);
		perf_evlist__add(evlist, evsel);
	}
204 205
}

206 207 208 209 210
void __perf_evlist__set_leader(struct list_head *list)
{
	struct perf_evsel *evsel, *leader;

	leader = list_entry(list->next, struct perf_evsel, node);
211 212 213
	evsel = list_entry(list->prev, struct perf_evsel, node);

	leader->nr_members = evsel->idx - leader->idx + 1;
214

215
	__evlist__for_each_entry(list, evsel) {
216
		evsel->leader = leader;
217 218 219 220
	}
}

void perf_evlist__set_leader(struct perf_evlist *evlist)
221
{
222 223
	if (evlist->nr_entries) {
		evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0;
224
		__perf_evlist__set_leader(&evlist->entries);
225
	}
226 227
}

228
void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr)
229 230 231 232 233 234 235 236 237 238 239 240 241
{
	attr->precise_ip = 3;

	while (attr->precise_ip != 0) {
		int fd = sys_perf_event_open(attr, 0, -1, -1, 0);
		if (fd != -1) {
			close(fd);
			break;
		}
		--attr->precise_ip;
	}
}

242
int __perf_evlist__add_default(struct perf_evlist *evlist, bool precise)
243
{
244
	struct perf_evsel *evsel = perf_evsel__new_cycles(precise);
245

246
	if (evsel == NULL)
247
		return -ENOMEM;
248 249 250 251

	perf_evlist__add(evlist, evsel);
	return 0;
}
252

253 254 255 256 257 258 259
int perf_evlist__add_dummy(struct perf_evlist *evlist)
{
	struct perf_event_attr attr = {
		.type	= PERF_TYPE_SOFTWARE,
		.config = PERF_COUNT_SW_DUMMY,
		.size	= sizeof(attr), /* to capture ABI version */
	};
260
	struct perf_evsel *evsel = perf_evsel__new_idx(&attr, evlist->nr_entries);
261 262 263 264 265 266 267 268

	if (evsel == NULL)
		return -ENOMEM;

	perf_evlist__add(evlist, evsel);
	return 0;
}

269 270
static int perf_evlist__add_attrs(struct perf_evlist *evlist,
				  struct perf_event_attr *attrs, size_t nr_attrs)
271 272 273 274 275 276
{
	struct perf_evsel *evsel, *n;
	LIST_HEAD(head);
	size_t i;

	for (i = 0; i < nr_attrs; i++) {
277
		evsel = perf_evsel__new_idx(attrs + i, evlist->nr_entries + i);
278 279 280 281 282
		if (evsel == NULL)
			goto out_delete_partial_list;
		list_add_tail(&evsel->node, &head);
	}

283
	perf_evlist__splice_list_tail(evlist, &head);
284 285 286 287

	return 0;

out_delete_partial_list:
288
	__evlist__for_each_entry_safe(&head, n, evsel)
289 290 291 292
		perf_evsel__delete(evsel);
	return -1;
}

293 294 295 296 297 298 299 300 301 302 303
int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
				     struct perf_event_attr *attrs, size_t nr_attrs)
{
	size_t i;

	for (i = 0; i < nr_attrs; i++)
		event_attr_init(attrs + i);

	return perf_evlist__add_attrs(evlist, attrs, nr_attrs);
}

304 305
struct perf_evsel *
perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id)
306 307 308
{
	struct perf_evsel *evsel;

309
	evlist__for_each_entry(evlist, evsel) {
310 311 312 313 314 315 316 317
		if (evsel->attr.type   == PERF_TYPE_TRACEPOINT &&
		    (int)evsel->attr.config == id)
			return evsel;
	}

	return NULL;
}

318 319 320 321 322 323
struct perf_evsel *
perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist,
				     const char *name)
{
	struct perf_evsel *evsel;

324
	evlist__for_each_entry(evlist, evsel) {
325 326 327 328 329 330 331 332
		if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) &&
		    (strcmp(evsel->name, name) == 0))
			return evsel;
	}

	return NULL;
}

333 334 335
int perf_evlist__add_newtp(struct perf_evlist *evlist,
			   const char *sys, const char *name, void *handler)
{
336
	struct perf_evsel *evsel = perf_evsel__newtp(sys, name);
337

338
	if (IS_ERR(evsel))
339 340
		return -1;

341
	evsel->handler = handler;
342 343 344 345
	perf_evlist__add(evlist, evsel);
	return 0;
}

346 347 348 349 350 351 352 353 354
static int perf_evlist__nr_threads(struct perf_evlist *evlist,
				   struct perf_evsel *evsel)
{
	if (evsel->system_wide)
		return 1;
	else
		return thread_map__nr(evlist->threads);
}

355 356 357
void perf_evlist__disable(struct perf_evlist *evlist)
{
	struct perf_evsel *pos;
358

359
	evlist__for_each_entry(evlist, pos) {
360 361 362
		if (!perf_evsel__is_group_leader(pos) || !pos->fd)
			continue;
		perf_evsel__disable(pos);
363
	}
364 365

	evlist->enabled = false;
366 367
}

368 369 370
void perf_evlist__enable(struct perf_evlist *evlist)
{
	struct perf_evsel *pos;
371

372
	evlist__for_each_entry(evlist, pos) {
373 374 375
		if (!perf_evsel__is_group_leader(pos) || !pos->fd)
			continue;
		perf_evsel__enable(pos);
376
	}
377 378 379 380 381 382 383

	evlist->enabled = true;
}

void perf_evlist__toggle_enable(struct perf_evlist *evlist)
{
	(evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist);
384 385
}

386 387 388
static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist,
					 struct perf_evsel *evsel, int cpu)
{
389
	int thread;
390 391 392 393 394 395
	int nr_threads = perf_evlist__nr_threads(evlist, evsel);

	if (!evsel->fd)
		return -EINVAL;

	for (thread = 0; thread < nr_threads; thread++) {
396
		int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
397 398 399 400 401 402 403 404 405 406
		if (err)
			return err;
	}
	return 0;
}

static int perf_evlist__enable_event_thread(struct perf_evlist *evlist,
					    struct perf_evsel *evsel,
					    int thread)
{
407
	int cpu;
408 409 410 411 412 413
	int nr_cpus = cpu_map__nr(evlist->cpus);

	if (!evsel->fd)
		return -EINVAL;

	for (cpu = 0; cpu < nr_cpus; cpu++) {
414
		int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431
		if (err)
			return err;
	}
	return 0;
}

int perf_evlist__enable_event_idx(struct perf_evlist *evlist,
				  struct perf_evsel *evsel, int idx)
{
	bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus);

	if (per_cpu_mmaps)
		return perf_evlist__enable_event_cpu(evlist, evsel, idx);
	else
		return perf_evlist__enable_event_thread(evlist, evsel, idx);
}

432
int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
433
{
434 435
	int nr_cpus = cpu_map__nr(evlist->cpus);
	int nr_threads = thread_map__nr(evlist->threads);
436 437 438
	int nfds = 0;
	struct perf_evsel *evsel;

439
	evlist__for_each_entry(evlist, evsel) {
440 441 442 443 444 445
		if (evsel->system_wide)
			nfds += nr_cpus;
		else
			nfds += nr_cpus * nr_threads;
	}

446 447
	if (fdarray__available_entries(&evlist->pollfd) < nfds &&
	    fdarray__grow(&evlist->pollfd, nfds) < 0)
448 449 450
		return -ENOMEM;

	return 0;
451
}
452

453 454
static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
				     struct perf_mmap *map, short revent)
455
{
456
	int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP);
457 458 459 460 461
	/*
	 * Save the idx so that when we filter out fds POLLHUP'ed we can
	 * close the associated evlist->mmap[] entry.
	 */
	if (pos >= 0) {
462
		evlist->pollfd.priv[pos].ptr = map;
463 464 465 466 467 468 469

		fcntl(fd, F_SETFL, O_NONBLOCK);
	}

	return pos;
}

470
int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
471
{
472
	return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN);
473 474
}

475 476
static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd,
					 void *arg __maybe_unused)
477
{
478
	struct perf_mmap *map = fda->priv[fd].ptr;
479

480 481
	if (map)
		perf_mmap__put(map);
482
}
483

484 485
int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask)
{
486
	return fdarray__filter(&evlist->pollfd, revents_and_mask,
487
			       perf_evlist__munmap_filtered, NULL);
488 489
}

490 491
int perf_evlist__poll(struct perf_evlist *evlist, int timeout)
{
492
	return fdarray__poll(&evlist->pollfd, timeout);
493 494
}

495 496 497
static void perf_evlist__id_hash(struct perf_evlist *evlist,
				 struct perf_evsel *evsel,
				 int cpu, int thread, u64 id)
498 499 500 501 502 503 504 505 506 507
{
	int hash;
	struct perf_sample_id *sid = SID(evsel, cpu, thread);

	sid->id = id;
	sid->evsel = evsel;
	hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS);
	hlist_add_head(&sid->node, &evlist->heads[hash]);
}

508 509 510 511 512 513 514
void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
			 int cpu, int thread, u64 id)
{
	perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
	evsel->id[evsel->ids++] = id;
}

J
Jiri Olsa 已提交
515 516 517
int perf_evlist__id_add_fd(struct perf_evlist *evlist,
			   struct perf_evsel *evsel,
			   int cpu, int thread, int fd)
518 519
{
	u64 read_data[4] = { 0, };
520
	int id_idx = 1; /* The first entry is the counter value */
521 522 523 524 525 526 527 528 529 530 531
	u64 id;
	int ret;

	ret = ioctl(fd, PERF_EVENT_IOC_ID, &id);
	if (!ret)
		goto add;

	if (errno != ENOTTY)
		return -1;

	/* Legacy way to get event id.. All hail to old kernels! */
532

533 534 535 536 537 538 539
	/*
	 * This way does not work with group format read, so bail
	 * out in that case.
	 */
	if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP)
		return -1;

540 541 542 543 544 545 546 547 548
	if (!(evsel->attr.read_format & PERF_FORMAT_ID) ||
	    read(fd, &read_data, sizeof(read_data)) == -1)
		return -1;

	if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
		++id_idx;
	if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
		++id_idx;

549 550 551 552
	id = read_data[id_idx];

 add:
	perf_evlist__id_add(evlist, evsel, cpu, thread, id);
553 554 555
	return 0;
}

A
Adrian Hunter 已提交
556 557 558 559 560 561 562 563 564 565 566
static void perf_evlist__set_sid_idx(struct perf_evlist *evlist,
				     struct perf_evsel *evsel, int idx, int cpu,
				     int thread)
{
	struct perf_sample_id *sid = SID(evsel, cpu, thread);
	sid->idx = idx;
	if (evlist->cpus && cpu >= 0)
		sid->cpu = evlist->cpus->map[cpu];
	else
		sid->cpu = -1;
	if (!evsel->system_wide && evlist->threads && thread >= 0)
567
		sid->tid = thread_map__pid(evlist->threads, thread);
A
Adrian Hunter 已提交
568 569 570 571
	else
		sid->tid = -1;
}

572
struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id)
573 574 575 576 577 578 579 580
{
	struct hlist_head *head;
	struct perf_sample_id *sid;
	int hash;

	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
	head = &evlist->heads[hash];

581
	hlist_for_each_entry(sid, head, node)
582
		if (sid->id == id)
583 584 585 586 587 588 589 590 591
			return sid;

	return NULL;
}

struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
{
	struct perf_sample_id *sid;

592
	if (evlist->nr_entries == 1 || !id)
593 594 595 596 597
		return perf_evlist__first(evlist);

	sid = perf_evlist__id2sid(evlist, id);
	if (sid)
		return sid->evsel;
598 599

	if (!perf_evlist__sample_id_all(evlist))
600
		return perf_evlist__first(evlist);
601

602 603
	return NULL;
}
604

605 606 607 608 609 610 611 612 613 614 615 616 617 618 619
struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist,
						u64 id)
{
	struct perf_sample_id *sid;

	if (!id)
		return NULL;

	sid = perf_evlist__id2sid(evlist, id);
	if (sid)
		return sid->evsel;

	return NULL;
}

620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640
static int perf_evlist__event2id(struct perf_evlist *evlist,
				 union perf_event *event, u64 *id)
{
	const u64 *array = event->sample.array;
	ssize_t n;

	n = (event->header.size - sizeof(event->header)) >> 3;

	if (event->header.type == PERF_RECORD_SAMPLE) {
		if (evlist->id_pos >= n)
			return -1;
		*id = array[evlist->id_pos];
	} else {
		if (evlist->is_pos > n)
			return -1;
		n -= evlist->is_pos;
		*id = array[n];
	}
	return 0;
}

J
Jiri Olsa 已提交
641 642
struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
					    union perf_event *event)
643
{
644
	struct perf_evsel *first = perf_evlist__first(evlist);
645 646 647 648 649 650
	struct hlist_head *head;
	struct perf_sample_id *sid;
	int hash;
	u64 id;

	if (evlist->nr_entries == 1)
651 652 653 654 655
		return first;

	if (!first->attr.sample_id_all &&
	    event->header.type != PERF_RECORD_SAMPLE)
		return first;
656 657 658 659 660 661

	if (perf_evlist__event2id(evlist, event, &id))
		return NULL;

	/* Synthesized events have an id of zero */
	if (!id)
662
		return first;
663 664 665 666 667 668 669 670 671 672 673

	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
	head = &evlist->heads[hash];

	hlist_for_each_entry(sid, head, node) {
		if (sid->id == id)
			return sid->evsel;
	}
	return NULL;
}

W
Wang Nan 已提交
674 675 676 677
static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value)
{
	int i;

678
	if (!evlist->overwrite_mmap)
679 680
		return 0;

W
Wang Nan 已提交
681
	for (i = 0; i < evlist->nr_mmaps; i++) {
682
		int fd = evlist->overwrite_mmap[i].fd;
W
Wang Nan 已提交
683 684 685 686 687 688 689 690 691 692 693
		int err;

		if (fd < 0)
			continue;
		err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0);
		if (err)
			return err;
	}
	return 0;
}

694
static int perf_evlist__pause(struct perf_evlist *evlist)
W
Wang Nan 已提交
695 696 697 698
{
	return perf_evlist__set_paused(evlist, true);
}

699
static int perf_evlist__resume(struct perf_evlist *evlist)
W
Wang Nan 已提交
700 701 702 703
{
	return perf_evlist__set_paused(evlist, false);
}

704 705 706 707 708 709 710 711 712 713
union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx)
{
	struct perf_mmap *md = &evlist->mmap[idx];

	/*
	 * Check messup is required for forward overwritable ring buffer:
	 * memory pointed by md->prev can be overwritten in this case.
	 * No need for read-write ring buffer: kernel stop outputting when
	 * it hit md->prev (perf_mmap__consume()).
	 */
714
	return perf_mmap__read_forward(md);
715 716 717 718 719 720 721 722 723 724 725 726 727 728
}

union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx)
{
	struct perf_mmap *md = &evlist->mmap[idx];

	/*
	 * No need to check messup for backward ring buffer:
	 * We can always read arbitrary long data from a backward
	 * ring buffer unless we forget to pause it before reading.
	 */
	return perf_mmap__read_backward(md);
}

729 730
union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
{
W
Wang Nan 已提交
731
	return perf_evlist__mmap_read_forward(evlist, idx);
732 733
}

734 735 736 737 738 739 740
void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx)
{
	perf_mmap__read_catchup(&evlist->mmap[idx]);
}

void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
{
W
Wang Nan 已提交
741
	perf_mmap__consume(&evlist->mmap[idx], false);
742 743
}

744
static void perf_evlist__munmap_nofree(struct perf_evlist *evlist)
745
{
746
	int i;
747

748 749 750
	if (evlist->mmap)
		for (i = 0; i < evlist->nr_mmaps; i++)
			perf_mmap__munmap(&evlist->mmap[i]);
751

752
	if (evlist->overwrite_mmap)
753
		for (i = 0; i < evlist->nr_mmaps; i++)
754
			perf_mmap__munmap(&evlist->overwrite_mmap[i]);
755
}
756

757 758 759
void perf_evlist__munmap(struct perf_evlist *evlist)
{
	perf_evlist__munmap_nofree(evlist);
760
	zfree(&evlist->mmap);
761
	zfree(&evlist->overwrite_mmap);
762 763
}

764
static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist)
765
{
W
Wang Nan 已提交
766
	int i;
767
	struct perf_mmap *map;
W
Wang Nan 已提交
768

769
	evlist->nr_mmaps = cpu_map__nr(evlist->cpus);
770
	if (cpu_map__empty(evlist->cpus))
771
		evlist->nr_mmaps = thread_map__nr(evlist->threads);
772 773 774
	map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
	if (!map)
		return NULL;
775

776
	for (i = 0; i < evlist->nr_mmaps; i++) {
777
		map[i].fd = -1;
778 779 780 781 782 783 784 785 786 787 788
		/*
		 * When the perf_mmap() call is made we grab one refcount, plus
		 * one extra to let perf_evlist__mmap_consume() get the last
		 * events after all real references (perf_mmap__get()) are
		 * dropped.
		 *
		 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and
		 * thus does perf_mmap__get() on it.
		 */
		refcount_set(&map[i].refcnt, 0);
	}
789
	return map;
790 791
}

792 793 794 795
static bool
perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused,
			 struct perf_evsel *evsel)
{
796
	if (evsel->attr.write_backward)
797 798 799 800
		return false;
	return true;
}

801
static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
802
				       struct mmap_params *mp, int cpu_idx,
803
				       int thread, int *_output, int *_output_overwrite)
804 805
{
	struct perf_evsel *evsel;
806
	int revent;
807
	int evlist_cpu = cpu_map__cpu(evlist->cpus, cpu_idx);
808

809
	evlist__for_each_entry(evlist, evsel) {
810 811
		struct perf_mmap *maps = evlist->mmap;
		int *output = _output;
812
		int fd;
813
		int cpu;
814

W
Wang Nan 已提交
815
		mp->prot = PROT_READ | PROT_WRITE;
816
		if (evsel->attr.write_backward) {
817 818
			output = _output_overwrite;
			maps = evlist->overwrite_mmap;
819 820 821 822 823

			if (!maps) {
				maps = perf_evlist__alloc_mmap(evlist);
				if (!maps)
					return -1;
824
				evlist->overwrite_mmap = maps;
825 826
				if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
					perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
827
			}
W
Wang Nan 已提交
828
			mp->prot &= ~PROT_WRITE;
829
		}
830

831 832 833
		if (evsel->system_wide && thread)
			continue;

834 835 836 837
		cpu = cpu_map__idx(evsel->cpus, evlist_cpu);
		if (cpu == -1)
			continue;

838
		fd = FD(evsel, cpu, thread);
839 840 841

		if (*output == -1) {
			*output = fd;
842 843

			if (perf_mmap__mmap(&maps[idx], mp, *output)  < 0)
844 845 846 847
				return -1;
		} else {
			if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
				return -1;
848

849
			perf_mmap__get(&maps[idx]);
850 851
		}

852 853
		revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0;

854 855 856 857 858 859 860 861
		/*
		 * The system_wide flag causes a selected event to be opened
		 * always without a pid.  Consequently it will never get a
		 * POLLHUP, but it is used for tracking in combination with
		 * other events, so it should not need to be polled anyway.
		 * Therefore don't add it for polling.
		 */
		if (!evsel->system_wide &&
862 863
		    __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) {
			perf_mmap__put(&maps[idx]);
864
			return -1;
865
		}
866

A
Adrian Hunter 已提交
867 868 869 870 871 872 873
		if (evsel->attr.read_format & PERF_FORMAT_ID) {
			if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread,
						   fd) < 0)
				return -1;
			perf_evlist__set_sid_idx(evlist, evsel, idx, cpu,
						 thread);
		}
874 875 876 877 878
	}

	return 0;
}

879 880
static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist,
				     struct mmap_params *mp)
881
{
882
	int cpu, thread;
883 884
	int nr_cpus = cpu_map__nr(evlist->cpus);
	int nr_threads = thread_map__nr(evlist->threads);
885

A
Adrian Hunter 已提交
886
	pr_debug2("perf event ring buffer mmapped per cpu\n");
887
	for (cpu = 0; cpu < nr_cpus; cpu++) {
888
		int output = -1;
889
		int output_overwrite = -1;
890

891 892 893
		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
					      true);

894
		for (thread = 0; thread < nr_threads; thread++) {
895
			if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
896
							thread, &output, &output_overwrite))
897
				goto out_unmap;
898 899 900 901 902 903
		}
	}

	return 0;

out_unmap:
904
	perf_evlist__munmap_nofree(evlist);
905 906 907
	return -1;
}

908 909
static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist,
					struct mmap_params *mp)
910 911
{
	int thread;
912
	int nr_threads = thread_map__nr(evlist->threads);
913

A
Adrian Hunter 已提交
914
	pr_debug2("perf event ring buffer mmapped per thread\n");
915
	for (thread = 0; thread < nr_threads; thread++) {
916
		int output = -1;
917
		int output_overwrite = -1;
918

919 920 921
		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
					      false);

922
		if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
923
						&output, &output_overwrite))
924
			goto out_unmap;
925 926 927 928 929
	}

	return 0;

out_unmap:
930
	perf_evlist__munmap_nofree(evlist);
931 932 933
	return -1;
}

934
unsigned long perf_event_mlock_kb_in_pages(void)
935
{
936 937
	unsigned long pages;
	int max;
938

939 940 941 942 943 944 945 946 947 948
	if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
		/*
		 * Pick a once upon a time good value, i.e. things look
		 * strange since we can't read a sysctl value, but lets not
		 * die yet...
		 */
		max = 512;
	} else {
		max -= (page_size / 1024);
	}
949

950 951 952 953 954 955 956
	pages = (max * 1024) / page_size;
	if (!is_power_of_2(pages))
		pages = rounddown_pow_of_two(pages);

	return pages;
}

957
size_t perf_evlist__mmap_size(unsigned long pages)
958 959 960 961
{
	if (pages == UINT_MAX)
		pages = perf_event_mlock_kb_in_pages();
	else if (!is_power_of_2(pages))
962 963 964 965 966
		return 0;

	return (pages + 1) * page_size;
}

967 968
static long parse_pages_arg(const char *str, unsigned long min,
			    unsigned long max)
969
{
970
	unsigned long pages, val;
971 972 973 974 975 976 977
	static struct parse_tag tags[] = {
		{ .tag  = 'B', .mult = 1       },
		{ .tag  = 'K', .mult = 1 << 10 },
		{ .tag  = 'M', .mult = 1 << 20 },
		{ .tag  = 'G', .mult = 1 << 30 },
		{ .tag  = 0 },
	};
978

979
	if (str == NULL)
980
		return -EINVAL;
981

982
	val = parse_tag_value(str, tags);
983
	if (val != (unsigned long) -1) {
984 985 986 987 988 989
		/* we got file size value */
		pages = PERF_ALIGN(val, page_size) / page_size;
	} else {
		/* we got pages count value */
		char *eptr;
		pages = strtoul(str, &eptr, 10);
990 991
		if (*eptr != '\0')
			return -EINVAL;
992 993
	}

994
	if (pages == 0 && min == 0) {
995
		/* leave number of pages at 0 */
996
	} else if (!is_power_of_2(pages)) {
997 998
		char buf[100];

999
		/* round pages up to next power of 2 */
1000
		pages = roundup_pow_of_two(pages);
1001 1002
		if (!pages)
			return -EINVAL;
1003 1004 1005 1006

		unit_number__scnprintf(buf, sizeof(buf), pages * page_size);
		pr_info("rounding mmap pages size to %s (%lu pages)\n",
			buf, pages);
1007 1008
	}

1009 1010 1011 1012 1013 1014
	if (pages > max)
		return -EINVAL;

	return pages;
}

1015
int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
1016 1017 1018 1019
{
	unsigned long max = UINT_MAX;
	long pages;

A
Adrian Hunter 已提交
1020
	if (max > SIZE_MAX / page_size)
1021 1022 1023 1024 1025
		max = SIZE_MAX / page_size;

	pages = parse_pages_arg(str, 1, max);
	if (pages < 0) {
		pr_err("Invalid argument for --mmap_pages/-m\n");
1026 1027 1028 1029 1030 1031 1032
		return -1;
	}

	*mmap_pages = pages;
	return 0;
}

1033 1034 1035 1036 1037 1038
int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
				  int unset __maybe_unused)
{
	return __perf_evlist__parse_mmap_pages(opt->value, str);
}

1039
/**
1040
 * perf_evlist__mmap_ex - Create mmaps to receive events.
1041 1042 1043
 * @evlist: list of events
 * @pages: map length in pages
 * @overwrite: overwrite older events?
1044 1045
 * @auxtrace_pages - auxtrace map length in pages
 * @auxtrace_overwrite - overwrite older auxtrace data?
1046
 *
1047 1048 1049
 * If @overwrite is %false the user needs to signal event consumption using
 * perf_mmap__write_tail().  Using perf_evlist__mmap_read() does this
 * automatically.
1050
 *
1051 1052 1053
 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
 * consumption using auxtrace_mmap__write_tail().
 *
1054
 * Return: %0 on success, negative error code otherwise.
1055
 */
1056
int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
1057
			 unsigned int auxtrace_pages,
1058
			 bool auxtrace_overwrite)
1059
{
1060
	struct perf_evsel *evsel;
1061 1062
	const struct cpu_map *cpus = evlist->cpus;
	const struct thread_map *threads = evlist->threads;
W
Wang Nan 已提交
1063 1064 1065 1066 1067 1068
	/*
	 * Delay setting mp.prot: set it before calling perf_mmap__mmap.
	 * Its value is decided by evsel's write_backward.
	 * So &mp should not be passed through const pointer.
	 */
	struct mmap_params mp;
1069

1070 1071 1072
	if (!evlist->mmap)
		evlist->mmap = perf_evlist__alloc_mmap(evlist);
	if (!evlist->mmap)
1073 1074
		return -ENOMEM;

1075
	if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
1076 1077
		return -ENOMEM;

1078
	evlist->mmap_len = perf_evlist__mmap_size(pages);
1079
	pr_debug("mmap size %zuB\n", evlist->mmap_len);
1080
	mp.mask = evlist->mmap_len - page_size - 1;
1081

1082 1083 1084
	auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len,
				   auxtrace_pages, auxtrace_overwrite);

1085
	evlist__for_each_entry(evlist, evsel) {
1086
		if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
1087
		    evsel->sample_id == NULL &&
1088
		    perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0)
1089 1090 1091
			return -ENOMEM;
	}

1092
	if (cpu_map__empty(cpus))
1093
		return perf_evlist__mmap_per_thread(evlist, &mp);
1094

1095
	return perf_evlist__mmap_per_cpu(evlist, &mp);
1096
}
1097

1098
int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages)
1099
{
1100
	return perf_evlist__mmap_ex(evlist, pages, 0, false);
1101 1102
}

1103
int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
1104
{
1105 1106
	struct cpu_map *cpus;
	struct thread_map *threads;
1107

1108
	threads = thread_map__new_str(target->pid, target->tid, target->uid);
1109

1110
	if (!threads)
1111 1112
		return -1;

1113
	if (target__uses_dummy_map(target))
1114
		cpus = cpu_map__dummy_new();
1115
	else
1116
		cpus = cpu_map__new(target->cpu_list);
1117

1118
	if (!cpus)
1119 1120
		goto out_delete_threads;

1121 1122
	evlist->has_user_cpus = !!target->cpu_list;

1123
	perf_evlist__set_maps(evlist, cpus, threads);
1124 1125

	return 0;
1126 1127

out_delete_threads:
1128
	thread_map__put(threads);
1129 1130 1131
	return -1;
}

1132 1133
void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus,
			   struct thread_map *threads)
1134
{
1135 1136 1137 1138 1139 1140 1141 1142
	/*
	 * Allow for the possibility that one or another of the maps isn't being
	 * changed i.e. don't put it.  Note we are assuming the maps that are
	 * being applied are brand new and evlist is taking ownership of the
	 * original reference count of 1.  If that is not the case it is up to
	 * the caller to increase the reference count.
	 */
	if (cpus != evlist->cpus) {
1143
		cpu_map__put(evlist->cpus);
1144
		evlist->cpus = cpu_map__get(cpus);
1145
	}
1146

1147
	if (threads != evlist->threads) {
1148
		thread_map__put(evlist->threads);
1149
		evlist->threads = thread_map__get(threads);
1150
	}
1151

1152
	perf_evlist__propagate_maps(evlist);
1153 1154
}

1155 1156 1157 1158 1159
void __perf_evlist__set_sample_bit(struct perf_evlist *evlist,
				   enum perf_event_sample_format bit)
{
	struct perf_evsel *evsel;

1160
	evlist__for_each_entry(evlist, evsel)
1161 1162 1163 1164 1165 1166 1167 1168
		__perf_evsel__set_sample_bit(evsel, bit);
}

void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist,
				     enum perf_event_sample_format bit)
{
	struct perf_evsel *evsel;

1169
	evlist__for_each_entry(evlist, evsel)
1170 1171 1172
		__perf_evsel__reset_sample_bit(evsel, bit);
}

1173
int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel)
1174 1175
{
	struct perf_evsel *evsel;
1176
	int err = 0;
1177

1178
	evlist__for_each_entry(evlist, evsel) {
1179
		if (evsel->filter == NULL)
1180
			continue;
1181

1182 1183 1184 1185
		/*
		 * filters only work for tracepoint event, which doesn't have cpu limit.
		 * So evlist and evsel should always be same.
		 */
1186
		err = perf_evsel__apply_filter(evsel, evsel->filter);
1187 1188
		if (err) {
			*err_evsel = evsel;
1189
			break;
1190
		}
1191 1192
	}

1193 1194 1195 1196 1197 1198 1199 1200
	return err;
}

int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter)
{
	struct perf_evsel *evsel;
	int err = 0;

1201
	evlist__for_each_entry(evlist, evsel) {
1202 1203 1204
		if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
			continue;

1205
		err = perf_evsel__set_filter(evsel, filter);
1206 1207 1208 1209 1210
		if (err)
			break;
	}

	return err;
1211
}
1212

1213
int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids)
1214 1215
{
	char *filter;
1216 1217
	int ret = -1;
	size_t i;
1218

1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232
	for (i = 0; i < npids; ++i) {
		if (i == 0) {
			if (asprintf(&filter, "common_pid != %d", pids[i]) < 0)
				return -1;
		} else {
			char *tmp;

			if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0)
				goto out_free;

			free(filter);
			filter = tmp;
		}
	}
1233 1234

	ret = perf_evlist__set_filter(evlist, filter);
1235
out_free:
1236 1237 1238 1239
	free(filter);
	return ret;
}

1240 1241 1242 1243 1244
int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid)
{
	return perf_evlist__set_filter_pids(evlist, 1, &pid);
}

1245
bool perf_evlist__valid_sample_type(struct perf_evlist *evlist)
1246
{
1247
	struct perf_evsel *pos;
1248

1249 1250 1251 1252 1253 1254
	if (evlist->nr_entries == 1)
		return true;

	if (evlist->id_pos < 0 || evlist->is_pos < 0)
		return false;

1255
	evlist__for_each_entry(evlist, pos) {
1256 1257
		if (pos->id_pos != evlist->id_pos ||
		    pos->is_pos != evlist->is_pos)
1258
			return false;
1259 1260
	}

1261
	return true;
1262 1263
}

1264
u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist)
1265
{
1266 1267 1268 1269 1270
	struct perf_evsel *evsel;

	if (evlist->combined_sample_type)
		return evlist->combined_sample_type;

1271
	evlist__for_each_entry(evlist, evsel)
1272 1273 1274 1275 1276 1277 1278 1279 1280
		evlist->combined_sample_type |= evsel->attr.sample_type;

	return evlist->combined_sample_type;
}

u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist)
{
	evlist->combined_sample_type = 0;
	return __perf_evlist__combined_sample_type(evlist);
1281 1282
}

1283 1284 1285 1286 1287
u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist)
{
	struct perf_evsel *evsel;
	u64 branch_type = 0;

1288
	evlist__for_each_entry(evlist, evsel)
1289 1290 1291 1292
		branch_type |= evsel->attr.branch_sample_type;
	return branch_type;
}

1293 1294 1295 1296 1297 1298
bool perf_evlist__valid_read_format(struct perf_evlist *evlist)
{
	struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
	u64 read_format = first->attr.read_format;
	u64 sample_type = first->attr.sample_type;

1299
	evlist__for_each_entry(evlist, pos) {
1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318
		if (read_format != pos->attr.read_format)
			return false;
	}

	/* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */
	if ((sample_type & PERF_SAMPLE_READ) &&
	    !(read_format & PERF_FORMAT_ID)) {
		return false;
	}

	return true;
}

u64 perf_evlist__read_format(struct perf_evlist *evlist)
{
	struct perf_evsel *first = perf_evlist__first(evlist);
	return first->attr.read_format;
}

1319
u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist)
1320
{
1321
	struct perf_evsel *first = perf_evlist__first(evlist);
1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344
	struct perf_sample *data;
	u64 sample_type;
	u16 size = 0;

	if (!first->attr.sample_id_all)
		goto out;

	sample_type = first->attr.sample_type;

	if (sample_type & PERF_SAMPLE_TID)
		size += sizeof(data->tid) * 2;

       if (sample_type & PERF_SAMPLE_TIME)
		size += sizeof(data->time);

	if (sample_type & PERF_SAMPLE_ID)
		size += sizeof(data->id);

	if (sample_type & PERF_SAMPLE_STREAM_ID)
		size += sizeof(data->stream_id);

	if (sample_type & PERF_SAMPLE_CPU)
		size += sizeof(data->cpu) * 2;
1345 1346 1347

	if (sample_type & PERF_SAMPLE_IDENTIFIER)
		size += sizeof(data->id);
1348 1349 1350 1351
out:
	return size;
}

1352
bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist)
1353
{
1354
	struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
1355

1356
	evlist__for_each_entry_continue(evlist, pos) {
1357 1358
		if (first->attr.sample_id_all != pos->attr.sample_id_all)
			return false;
1359 1360
	}

1361 1362 1363
	return true;
}

1364
bool perf_evlist__sample_id_all(struct perf_evlist *evlist)
1365
{
1366
	struct perf_evsel *first = perf_evlist__first(evlist);
1367
	return first->attr.sample_id_all;
1368
}
1369 1370 1371 1372 1373 1374

void perf_evlist__set_selected(struct perf_evlist *evlist,
			       struct perf_evsel *evsel)
{
	evlist->selected = evsel;
}
1375

1376 1377 1378 1379
void perf_evlist__close(struct perf_evlist *evlist)
{
	struct perf_evsel *evsel;

1380 1381
	evlist__for_each_entry_reverse(evlist, evsel)
		perf_evsel__close(evsel);
1382 1383
}

1384 1385
static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist)
{
1386 1387
	struct cpu_map	  *cpus;
	struct thread_map *threads;
1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398
	int err = -ENOMEM;

	/*
	 * Try reading /sys/devices/system/cpu/online to get
	 * an all cpus map.
	 *
	 * FIXME: -ENOMEM is the best we can do here, the cpu_map
	 * code needs an overhaul to properly forward the
	 * error, and we may not want to do that fallback to a
	 * default cpu identity map :-\
	 */
1399 1400
	cpus = cpu_map__new(NULL);
	if (!cpus)
1401 1402
		goto out;

1403 1404 1405
	threads = thread_map__new_dummy();
	if (!threads)
		goto out_put;
1406

1407
	perf_evlist__set_maps(evlist, cpus, threads);
1408 1409
out:
	return err;
1410 1411
out_put:
	cpu_map__put(cpus);
1412 1413 1414
	goto out;
}

1415
int perf_evlist__open(struct perf_evlist *evlist)
1416
{
1417
	struct perf_evsel *evsel;
1418
	int err;
1419

1420 1421 1422 1423 1424 1425 1426 1427 1428 1429
	/*
	 * Default: one fd per CPU, all threads, aka systemwide
	 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL
	 */
	if (evlist->threads == NULL && evlist->cpus == NULL) {
		err = perf_evlist__create_syswide_maps(evlist);
		if (err < 0)
			goto out_err;
	}

1430 1431
	perf_evlist__update_id_pos(evlist);

1432
	evlist__for_each_entry(evlist, evsel) {
1433
		err = perf_evsel__open(evsel, evsel->cpus, evsel->threads);
1434 1435 1436 1437 1438 1439
		if (err < 0)
			goto out_err;
	}

	return 0;
out_err:
1440
	perf_evlist__close(evlist);
1441
	errno = -err;
1442 1443
	return err;
}
1444

1445
int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target,
1446
				  const char *argv[], bool pipe_output,
1447
				  void (*exec_error)(int signo, siginfo_t *info, void *ucontext))
1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468
{
	int child_ready_pipe[2], go_pipe[2];
	char bf;

	if (pipe(child_ready_pipe) < 0) {
		perror("failed to create 'ready' pipe");
		return -1;
	}

	if (pipe(go_pipe) < 0) {
		perror("failed to create 'go' pipe");
		goto out_close_ready_pipe;
	}

	evlist->workload.pid = fork();
	if (evlist->workload.pid < 0) {
		perror("failed to fork");
		goto out_close_pipes;
	}

	if (!evlist->workload.pid) {
1469 1470
		int ret;

1471
		if (pipe_output)
1472 1473
			dup2(2, 1);

1474 1475
		signal(SIGTERM, SIG_DFL);

1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487
		close(child_ready_pipe[0]);
		close(go_pipe[1]);
		fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);

		/*
		 * Tell the parent we're ready to go
		 */
		close(child_ready_pipe[1]);

		/*
		 * Wait until the parent tells us to go.
		 */
1488 1489 1490 1491 1492 1493
		ret = read(go_pipe[0], &bf, 1);
		/*
		 * The parent will ask for the execvp() to be performed by
		 * writing exactly one byte, in workload.cork_fd, usually via
		 * perf_evlist__start_workload().
		 *
1494
		 * For cancelling the workload without actually running it,
1495 1496 1497 1498 1499 1500 1501 1502 1503
		 * the parent will just close workload.cork_fd, without writing
		 * anything, i.e. read will return zero and we just exit()
		 * here.
		 */
		if (ret != 1) {
			if (ret == -1)
				perror("unable to read pipe");
			exit(ret);
		}
1504 1505 1506

		execvp(argv[0], (char **)argv);

1507
		if (exec_error) {
1508 1509 1510 1511 1512 1513 1514
			union sigval val;

			val.sival_int = errno;
			if (sigqueue(getppid(), SIGUSR1, val))
				perror(argv[0]);
		} else
			perror(argv[0]);
1515 1516 1517
		exit(-1);
	}

1518 1519 1520 1521 1522 1523 1524 1525
	if (exec_error) {
		struct sigaction act = {
			.sa_flags     = SA_SIGINFO,
			.sa_sigaction = exec_error,
		};
		sigaction(SIGUSR1, &act, NULL);
	}

1526 1527 1528 1529 1530 1531
	if (target__none(target)) {
		if (evlist->threads == NULL) {
			fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n",
				__func__, __LINE__);
			goto out_close_pipes;
		}
1532
		thread_map__set_pid(evlist->threads, 0, evlist->workload.pid);
1533
	}
1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544

	close(child_ready_pipe[1]);
	close(go_pipe[0]);
	/*
	 * wait for child to settle
	 */
	if (read(child_ready_pipe[0], &bf, 1) == -1) {
		perror("unable to read pipe");
		goto out_close_pipes;
	}

1545
	fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC);
1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561
	evlist->workload.cork_fd = go_pipe[1];
	close(child_ready_pipe[0]);
	return 0;

out_close_pipes:
	close(go_pipe[0]);
	close(go_pipe[1]);
out_close_ready_pipe:
	close(child_ready_pipe[0]);
	close(child_ready_pipe[1]);
	return -1;
}

int perf_evlist__start_workload(struct perf_evlist *evlist)
{
	if (evlist->workload.cork_fd > 0) {
1562
		char bf = 0;
1563
		int ret;
1564 1565 1566
		/*
		 * Remove the cork, let it rip!
		 */
1567 1568
		ret = write(evlist->workload.cork_fd, &bf, 1);
		if (ret < 0)
1569
			perror("unable to write to pipe");
1570 1571 1572

		close(evlist->workload.cork_fd);
		return ret;
1573 1574 1575 1576
	}

	return 0;
}
1577

1578
int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event,
1579
			      struct perf_sample *sample)
1580
{
1581 1582 1583 1584
	struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event);

	if (!evsel)
		return -EFAULT;
1585
	return perf_evsel__parse_sample(evsel, event, sample);
1586
}
1587

1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598
int perf_evlist__parse_sample_timestamp(struct perf_evlist *evlist,
					union perf_event *event,
					u64 *timestamp)
{
	struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event);

	if (!evsel)
		return -EFAULT;
	return perf_evsel__parse_sample_timestamp(evsel, event, timestamp);
}

1599 1600 1601 1602 1603
size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp)
{
	struct perf_evsel *evsel;
	size_t printed = 0;

1604
	evlist__for_each_entry(evlist, evsel) {
1605 1606 1607 1608
		printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "",
				   perf_evsel__name(evsel));
	}

1609
	return printed + fprintf(fp, "\n");
1610
}
1611

1612
int perf_evlist__strerror_open(struct perf_evlist *evlist,
1613 1614 1615
			       int err, char *buf, size_t size)
{
	int printed, value;
1616
	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
1617 1618 1619 1620 1621 1622 1623 1624

	switch (err) {
	case EACCES:
	case EPERM:
		printed = scnprintf(buf, size,
				    "Error:\t%s.\n"
				    "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg);

1625
		value = perf_event_paranoid();
1626 1627 1628 1629 1630 1631 1632 1633

		printed += scnprintf(buf + printed, size - printed, "\nHint:\t");

		if (value >= 2) {
			printed += scnprintf(buf + printed, size - printed,
					     "For your workloads it needs to be <= 1\nHint:\t");
		}
		printed += scnprintf(buf + printed, size - printed,
1634
				     "For system wide tracing it needs to be set to -1.\n");
1635 1636

		printed += scnprintf(buf + printed, size - printed,
1637 1638
				    "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
				    "Hint:\tThe current value is %d.", value);
1639
		break;
1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656
	case EINVAL: {
		struct perf_evsel *first = perf_evlist__first(evlist);
		int max_freq;

		if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0)
			goto out_default;

		if (first->attr.sample_freq < (u64)max_freq)
			goto out_default;

		printed = scnprintf(buf, size,
				    "Error:\t%s.\n"
				    "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n"
				    "Hint:\tThe current value is %d and %" PRIu64 " is being requested.",
				    emsg, max_freq, first->attr.sample_freq);
		break;
	}
1657
	default:
1658
out_default:
1659 1660 1661 1662 1663 1664
		scnprintf(buf, size, "%s", emsg);
		break;
	}

	return 0;
}
1665

1666 1667
int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size)
{
1668
	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
1669
	int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0;
1670 1671 1672

	switch (err) {
	case EPERM:
1673
		sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user);
1674 1675
		printed += scnprintf(buf + printed, size - printed,
				     "Error:\t%s.\n"
1676
				     "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n"
1677
				     "Hint:\tTried using %zd kB.\n",
1678
				     emsg, pages_max_per_user, pages_attempted);
1679 1680 1681 1682 1683 1684 1685 1686 1687

		if (pages_attempted >= pages_max_per_user) {
			printed += scnprintf(buf + printed, size - printed,
					     "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n",
					     pages_max_per_user + pages_attempted);
		}

		printed += scnprintf(buf + printed, size - printed,
				     "Hint:\tTry using a smaller -m/--mmap-pages value.");
1688 1689 1690 1691 1692 1693 1694 1695 1696
		break;
	default:
		scnprintf(buf, size, "%s", emsg);
		break;
	}

	return 0;
}

1697 1698 1699 1700 1701 1702 1703 1704 1705
void perf_evlist__to_front(struct perf_evlist *evlist,
			   struct perf_evsel *move_evsel)
{
	struct perf_evsel *evsel, *n;
	LIST_HEAD(move);

	if (move_evsel == perf_evlist__first(evlist))
		return;

1706
	evlist__for_each_entry_safe(evlist, n, evsel) {
1707 1708 1709 1710 1711 1712
		if (evsel->leader == move_evsel->leader)
			list_move_tail(&evsel->node, &move);
	}

	list_splice(&move, &evlist->entries);
}
1713 1714 1715 1716 1717 1718 1719 1720 1721

void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
				     struct perf_evsel *tracking_evsel)
{
	struct perf_evsel *evsel;

	if (tracking_evsel->tracking)
		return;

1722
	evlist__for_each_entry(evlist, evsel) {
1723 1724 1725 1726 1727 1728
		if (evsel != tracking_evsel)
			evsel->tracking = false;
	}

	tracking_evsel->tracking = true;
}
1729 1730 1731 1732 1733 1734 1735

struct perf_evsel *
perf_evlist__find_evsel_by_str(struct perf_evlist *evlist,
			       const char *str)
{
	struct perf_evsel *evsel;

1736
	evlist__for_each_entry(evlist, evsel) {
1737 1738 1739 1740 1741 1742 1743 1744
		if (!evsel->name)
			continue;
		if (strcmp(str, evsel->name) == 0)
			return evsel;
	}

	return NULL;
}
1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755

void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist,
				  enum bkw_mmap_state state)
{
	enum bkw_mmap_state old_state = evlist->bkw_mmap_state;
	enum action {
		NONE,
		PAUSE,
		RESUME,
	} action = NONE;

1756
	if (!evlist->overwrite_mmap)
1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802
		return;

	switch (old_state) {
	case BKW_MMAP_NOTREADY: {
		if (state != BKW_MMAP_RUNNING)
			goto state_err;;
		break;
	}
	case BKW_MMAP_RUNNING: {
		if (state != BKW_MMAP_DATA_PENDING)
			goto state_err;
		action = PAUSE;
		break;
	}
	case BKW_MMAP_DATA_PENDING: {
		if (state != BKW_MMAP_EMPTY)
			goto state_err;
		break;
	}
	case BKW_MMAP_EMPTY: {
		if (state != BKW_MMAP_RUNNING)
			goto state_err;
		action = RESUME;
		break;
	}
	default:
		WARN_ONCE(1, "Shouldn't get there\n");
	}

	evlist->bkw_mmap_state = state;

	switch (action) {
	case PAUSE:
		perf_evlist__pause(evlist);
		break;
	case RESUME:
		perf_evlist__resume(evlist);
		break;
	case NONE:
	default:
		break;
	}

state_err:
	return;
}
1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814

bool perf_evlist__exclude_kernel(struct perf_evlist *evlist)
{
	struct perf_evsel *evsel;

	evlist__for_each_entry(evlist, evsel) {
		if (!evsel->attr.exclude_kernel)
			return false;
	}

	return true;
}