evlist.c 40.6 KB
Newer Older
1 2 3 4 5 6 7 8
/*
 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
 *
 * Parts came from builtin-{top,stat,record}.c, see those files for further
 * copyright notes.
 *
 * Released under the GPL v2. (and only v2, not any later version)
 */
9
#include "util.h"
10
#include <api/fs/fs.h>
11
#include <errno.h>
12
#include <inttypes.h>
13
#include <poll.h>
14 15
#include "cpumap.h"
#include "thread_map.h"
16
#include "target.h"
17 18
#include "evlist.h"
#include "evsel.h"
A
Adrian Hunter 已提交
19
#include "debug.h"
20
#include "units.h"
21
#include "asm/bug.h"
22
#include <signal.h>
23
#include <unistd.h>
24

25
#include "parse-events.h"
26
#include <subcmd/parse-options.h>
27

28
#include <sys/ioctl.h>
29 30
#include <sys/mman.h>

31 32
#include <linux/bitops.h>
#include <linux/hash.h>
33
#include <linux/log2.h>
34
#include <linux/err.h>
35

36
#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
37
#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
38

39 40
void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
		       struct thread_map *threads)
41 42 43 44 45 46
{
	int i;

	for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
		INIT_HLIST_HEAD(&evlist->heads[i]);
	INIT_LIST_HEAD(&evlist->entries);
47
	perf_evlist__set_maps(evlist, cpus, threads);
48
	fdarray__init(&evlist->pollfd, 64);
49
	evlist->workload.pid = -1;
50
	evlist->bkw_mmap_state = BKW_MMAP_NOTREADY;
51 52
}

53
struct perf_evlist *perf_evlist__new(void)
54 55 56
{
	struct perf_evlist *evlist = zalloc(sizeof(*evlist));

57
	if (evlist != NULL)
58
		perf_evlist__init(evlist, NULL, NULL);
59 60 61 62

	return evlist;
}

63 64 65 66 67 68 69 70 71 72 73 74
struct perf_evlist *perf_evlist__new_default(void)
{
	struct perf_evlist *evlist = perf_evlist__new();

	if (evlist && perf_evlist__add_default(evlist)) {
		perf_evlist__delete(evlist);
		evlist = NULL;
	}

	return evlist;
}

75 76 77 78 79 80 81 82 83 84 85 86
struct perf_evlist *perf_evlist__new_dummy(void)
{
	struct perf_evlist *evlist = perf_evlist__new();

	if (evlist && perf_evlist__add_dummy(evlist)) {
		perf_evlist__delete(evlist);
		evlist = NULL;
	}

	return evlist;
}

87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
/**
 * perf_evlist__set_id_pos - set the positions of event ids.
 * @evlist: selected event list
 *
 * Events with compatible sample types all have the same id_pos
 * and is_pos.  For convenience, put a copy on evlist.
 */
void perf_evlist__set_id_pos(struct perf_evlist *evlist)
{
	struct perf_evsel *first = perf_evlist__first(evlist);

	evlist->id_pos = first->id_pos;
	evlist->is_pos = first->is_pos;
}

102 103 104 105
static void perf_evlist__update_id_pos(struct perf_evlist *evlist)
{
	struct perf_evsel *evsel;

106
	evlist__for_each_entry(evlist, evsel)
107 108 109 110 111
		perf_evsel__calc_id_pos(evsel);

	perf_evlist__set_id_pos(evlist);
}

112 113 114 115
static void perf_evlist__purge(struct perf_evlist *evlist)
{
	struct perf_evsel *pos, *n;

116
	evlist__for_each_entry_safe(evlist, n, pos) {
117
		list_del_init(&pos->node);
118
		pos->evlist = NULL;
119 120 121 122 123 124
		perf_evsel__delete(pos);
	}

	evlist->nr_entries = 0;
}

125
void perf_evlist__exit(struct perf_evlist *evlist)
126
{
127
	zfree(&evlist->mmap);
128
	zfree(&evlist->backward_mmap);
129
	fdarray__exit(&evlist->pollfd);
130 131 132 133
}

void perf_evlist__delete(struct perf_evlist *evlist)
{
134 135 136
	if (evlist == NULL)
		return;

137
	perf_evlist__munmap(evlist);
138
	perf_evlist__close(evlist);
139
	cpu_map__put(evlist->cpus);
140
	thread_map__put(evlist->threads);
141 142
	evlist->cpus = NULL;
	evlist->threads = NULL;
143 144
	perf_evlist__purge(evlist);
	perf_evlist__exit(evlist);
145 146 147
	free(evlist);
}

148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
					  struct perf_evsel *evsel)
{
	/*
	 * We already have cpus for evsel (via PMU sysfs) so
	 * keep it, if there's no target cpu list defined.
	 */
	if (!evsel->own_cpus || evlist->has_user_cpus) {
		cpu_map__put(evsel->cpus);
		evsel->cpus = cpu_map__get(evlist->cpus);
	} else if (evsel->cpus != evsel->own_cpus) {
		cpu_map__put(evsel->cpus);
		evsel->cpus = cpu_map__get(evsel->own_cpus);
	}

	thread_map__put(evsel->threads);
	evsel->threads = thread_map__get(evlist->threads);
}

static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
{
	struct perf_evsel *evsel;

171
	evlist__for_each_entry(evlist, evsel)
172 173 174
		__perf_evlist__propagate_maps(evlist, evsel);
}

175 176
void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
{
177
	entry->evlist = evlist;
178
	list_add_tail(&entry->node, &evlist->entries);
179
	entry->idx = evlist->nr_entries;
180
	entry->tracking = !entry->idx;
181

182 183
	if (!evlist->nr_entries++)
		perf_evlist__set_id_pos(evlist);
184 185

	__perf_evlist__propagate_maps(evlist, entry);
186 187
}

188 189 190 191 192 193 194
void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel)
{
	evsel->evlist = NULL;
	list_del_init(&evsel->node);
	evlist->nr_entries -= 1;
}

195
void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
196
				   struct list_head *list)
197
{
198
	struct perf_evsel *evsel, *temp;
199

200
	__evlist__for_each_entry_safe(list, temp, evsel) {
201 202 203
		list_del_init(&evsel->node);
		perf_evlist__add(evlist, evsel);
	}
204 205
}

206 207 208 209 210
void __perf_evlist__set_leader(struct list_head *list)
{
	struct perf_evsel *evsel, *leader;

	leader = list_entry(list->next, struct perf_evsel, node);
211 212 213
	evsel = list_entry(list->prev, struct perf_evsel, node);

	leader->nr_members = evsel->idx - leader->idx + 1;
214

215
	__evlist__for_each_entry(list, evsel) {
216
		evsel->leader = leader;
217 218 219 220
	}
}

void perf_evlist__set_leader(struct perf_evlist *evlist)
221
{
222 223
	if (evlist->nr_entries) {
		evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0;
224
		__perf_evlist__set_leader(&evlist->entries);
225
	}
226 227
}

228
void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr)
229 230 231 232 233 234 235 236 237 238 239 240 241
{
	attr->precise_ip = 3;

	while (attr->precise_ip != 0) {
		int fd = sys_perf_event_open(attr, 0, -1, -1, 0);
		if (fd != -1) {
			close(fd);
			break;
		}
		--attr->precise_ip;
	}
}

242
int __perf_evlist__add_default(struct perf_evlist *evlist, bool precise)
243
{
244
	struct perf_evsel *evsel = perf_evsel__new_cycles(precise);
245

246
	if (evsel == NULL)
247
		return -ENOMEM;
248 249 250 251

	perf_evlist__add(evlist, evsel);
	return 0;
}
252

253 254 255 256 257 258 259
int perf_evlist__add_dummy(struct perf_evlist *evlist)
{
	struct perf_event_attr attr = {
		.type	= PERF_TYPE_SOFTWARE,
		.config = PERF_COUNT_SW_DUMMY,
		.size	= sizeof(attr), /* to capture ABI version */
	};
260
	struct perf_evsel *evsel = perf_evsel__new_idx(&attr, evlist->nr_entries);
261 262 263 264 265 266 267 268

	if (evsel == NULL)
		return -ENOMEM;

	perf_evlist__add(evlist, evsel);
	return 0;
}

269 270
static int perf_evlist__add_attrs(struct perf_evlist *evlist,
				  struct perf_event_attr *attrs, size_t nr_attrs)
271 272 273 274 275 276
{
	struct perf_evsel *evsel, *n;
	LIST_HEAD(head);
	size_t i;

	for (i = 0; i < nr_attrs; i++) {
277
		evsel = perf_evsel__new_idx(attrs + i, evlist->nr_entries + i);
278 279 280 281 282
		if (evsel == NULL)
			goto out_delete_partial_list;
		list_add_tail(&evsel->node, &head);
	}

283
	perf_evlist__splice_list_tail(evlist, &head);
284 285 286 287

	return 0;

out_delete_partial_list:
288
	__evlist__for_each_entry_safe(&head, n, evsel)
289 290 291 292
		perf_evsel__delete(evsel);
	return -1;
}

293 294 295 296 297 298 299 300 301 302 303
int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
				     struct perf_event_attr *attrs, size_t nr_attrs)
{
	size_t i;

	for (i = 0; i < nr_attrs; i++)
		event_attr_init(attrs + i);

	return perf_evlist__add_attrs(evlist, attrs, nr_attrs);
}

304 305
struct perf_evsel *
perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id)
306 307 308
{
	struct perf_evsel *evsel;

309
	evlist__for_each_entry(evlist, evsel) {
310 311 312 313 314 315 316 317
		if (evsel->attr.type   == PERF_TYPE_TRACEPOINT &&
		    (int)evsel->attr.config == id)
			return evsel;
	}

	return NULL;
}

318 319 320 321 322 323
struct perf_evsel *
perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist,
				     const char *name)
{
	struct perf_evsel *evsel;

324
	evlist__for_each_entry(evlist, evsel) {
325 326 327 328 329 330 331 332
		if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) &&
		    (strcmp(evsel->name, name) == 0))
			return evsel;
	}

	return NULL;
}

333 334 335
int perf_evlist__add_newtp(struct perf_evlist *evlist,
			   const char *sys, const char *name, void *handler)
{
336
	struct perf_evsel *evsel = perf_evsel__newtp(sys, name);
337

338
	if (IS_ERR(evsel))
339 340
		return -1;

341
	evsel->handler = handler;
342 343 344 345
	perf_evlist__add(evlist, evsel);
	return 0;
}

346 347 348 349 350 351 352 353 354
static int perf_evlist__nr_threads(struct perf_evlist *evlist,
				   struct perf_evsel *evsel)
{
	if (evsel->system_wide)
		return 1;
	else
		return thread_map__nr(evlist->threads);
}

355 356 357
void perf_evlist__disable(struct perf_evlist *evlist)
{
	struct perf_evsel *pos;
358

359
	evlist__for_each_entry(evlist, pos) {
360 361 362
		if (!perf_evsel__is_group_leader(pos) || !pos->fd)
			continue;
		perf_evsel__disable(pos);
363
	}
364 365

	evlist->enabled = false;
366 367
}

368 369 370
void perf_evlist__enable(struct perf_evlist *evlist)
{
	struct perf_evsel *pos;
371

372
	evlist__for_each_entry(evlist, pos) {
373 374 375
		if (!perf_evsel__is_group_leader(pos) || !pos->fd)
			continue;
		perf_evsel__enable(pos);
376
	}
377 378 379 380 381 382 383

	evlist->enabled = true;
}

void perf_evlist__toggle_enable(struct perf_evlist *evlist)
{
	(evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist);
384 385
}

386 387 388
static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist,
					 struct perf_evsel *evsel, int cpu)
{
389
	int thread;
390 391 392 393 394 395
	int nr_threads = perf_evlist__nr_threads(evlist, evsel);

	if (!evsel->fd)
		return -EINVAL;

	for (thread = 0; thread < nr_threads; thread++) {
396
		int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
397 398 399 400 401 402 403 404 405 406
		if (err)
			return err;
	}
	return 0;
}

static int perf_evlist__enable_event_thread(struct perf_evlist *evlist,
					    struct perf_evsel *evsel,
					    int thread)
{
407
	int cpu;
408 409 410 411 412 413
	int nr_cpus = cpu_map__nr(evlist->cpus);

	if (!evsel->fd)
		return -EINVAL;

	for (cpu = 0; cpu < nr_cpus; cpu++) {
414
		int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431
		if (err)
			return err;
	}
	return 0;
}

int perf_evlist__enable_event_idx(struct perf_evlist *evlist,
				  struct perf_evsel *evsel, int idx)
{
	bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus);

	if (per_cpu_mmaps)
		return perf_evlist__enable_event_cpu(evlist, evsel, idx);
	else
		return perf_evlist__enable_event_thread(evlist, evsel, idx);
}

432
int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
433
{
434 435
	int nr_cpus = cpu_map__nr(evlist->cpus);
	int nr_threads = thread_map__nr(evlist->threads);
436 437 438
	int nfds = 0;
	struct perf_evsel *evsel;

439
	evlist__for_each_entry(evlist, evsel) {
440 441 442 443 444 445
		if (evsel->system_wide)
			nfds += nr_cpus;
		else
			nfds += nr_cpus * nr_threads;
	}

446 447
	if (fdarray__available_entries(&evlist->pollfd) < nfds &&
	    fdarray__grow(&evlist->pollfd, nfds) < 0)
448 449 450
		return -ENOMEM;

	return 0;
451
}
452

453 454
static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
				     struct perf_mmap *map, short revent)
455
{
456
	int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP);
457 458 459 460 461
	/*
	 * Save the idx so that when we filter out fds POLLHUP'ed we can
	 * close the associated evlist->mmap[] entry.
	 */
	if (pos >= 0) {
462
		evlist->pollfd.priv[pos].ptr = map;
463 464 465 466 467 468 469

		fcntl(fd, F_SETFL, O_NONBLOCK);
	}

	return pos;
}

470
int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
471
{
472
	return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN);
473 474
}

475 476
static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd,
					 void *arg __maybe_unused)
477
{
478
	struct perf_mmap *map = fda->priv[fd].ptr;
479

480 481
	if (map)
		perf_mmap__put(map);
482
}
483

484 485
int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask)
{
486
	return fdarray__filter(&evlist->pollfd, revents_and_mask,
487
			       perf_evlist__munmap_filtered, NULL);
488 489
}

490 491
int perf_evlist__poll(struct perf_evlist *evlist, int timeout)
{
492
	return fdarray__poll(&evlist->pollfd, timeout);
493 494
}

495 496 497
static void perf_evlist__id_hash(struct perf_evlist *evlist,
				 struct perf_evsel *evsel,
				 int cpu, int thread, u64 id)
498 499 500 501 502 503 504 505 506 507
{
	int hash;
	struct perf_sample_id *sid = SID(evsel, cpu, thread);

	sid->id = id;
	sid->evsel = evsel;
	hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS);
	hlist_add_head(&sid->node, &evlist->heads[hash]);
}

508 509 510 511 512 513 514
void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
			 int cpu, int thread, u64 id)
{
	perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
	evsel->id[evsel->ids++] = id;
}

J
Jiri Olsa 已提交
515 516 517
int perf_evlist__id_add_fd(struct perf_evlist *evlist,
			   struct perf_evsel *evsel,
			   int cpu, int thread, int fd)
518 519
{
	u64 read_data[4] = { 0, };
520
	int id_idx = 1; /* The first entry is the counter value */
521 522 523 524 525 526 527 528 529 530 531
	u64 id;
	int ret;

	ret = ioctl(fd, PERF_EVENT_IOC_ID, &id);
	if (!ret)
		goto add;

	if (errno != ENOTTY)
		return -1;

	/* Legacy way to get event id.. All hail to old kernels! */
532

533 534 535 536 537 538 539
	/*
	 * This way does not work with group format read, so bail
	 * out in that case.
	 */
	if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP)
		return -1;

540 541 542 543 544 545 546 547 548
	if (!(evsel->attr.read_format & PERF_FORMAT_ID) ||
	    read(fd, &read_data, sizeof(read_data)) == -1)
		return -1;

	if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
		++id_idx;
	if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
		++id_idx;

549 550 551 552
	id = read_data[id_idx];

 add:
	perf_evlist__id_add(evlist, evsel, cpu, thread, id);
553 554 555
	return 0;
}

A
Adrian Hunter 已提交
556 557 558 559 560 561 562 563 564 565 566
static void perf_evlist__set_sid_idx(struct perf_evlist *evlist,
				     struct perf_evsel *evsel, int idx, int cpu,
				     int thread)
{
	struct perf_sample_id *sid = SID(evsel, cpu, thread);
	sid->idx = idx;
	if (evlist->cpus && cpu >= 0)
		sid->cpu = evlist->cpus->map[cpu];
	else
		sid->cpu = -1;
	if (!evsel->system_wide && evlist->threads && thread >= 0)
567
		sid->tid = thread_map__pid(evlist->threads, thread);
A
Adrian Hunter 已提交
568 569 570 571
	else
		sid->tid = -1;
}

572
struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id)
573 574 575 576 577 578 579 580
{
	struct hlist_head *head;
	struct perf_sample_id *sid;
	int hash;

	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
	head = &evlist->heads[hash];

581
	hlist_for_each_entry(sid, head, node)
582
		if (sid->id == id)
583 584 585 586 587 588 589 590 591
			return sid;

	return NULL;
}

struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
{
	struct perf_sample_id *sid;

592
	if (evlist->nr_entries == 1 || !id)
593 594 595 596 597
		return perf_evlist__first(evlist);

	sid = perf_evlist__id2sid(evlist, id);
	if (sid)
		return sid->evsel;
598 599

	if (!perf_evlist__sample_id_all(evlist))
600
		return perf_evlist__first(evlist);
601

602 603
	return NULL;
}
604

605 606 607 608 609 610 611 612 613 614 615 616 617 618 619
struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist,
						u64 id)
{
	struct perf_sample_id *sid;

	if (!id)
		return NULL;

	sid = perf_evlist__id2sid(evlist, id);
	if (sid)
		return sid->evsel;

	return NULL;
}

620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640
static int perf_evlist__event2id(struct perf_evlist *evlist,
				 union perf_event *event, u64 *id)
{
	const u64 *array = event->sample.array;
	ssize_t n;

	n = (event->header.size - sizeof(event->header)) >> 3;

	if (event->header.type == PERF_RECORD_SAMPLE) {
		if (evlist->id_pos >= n)
			return -1;
		*id = array[evlist->id_pos];
	} else {
		if (evlist->is_pos > n)
			return -1;
		n -= evlist->is_pos;
		*id = array[n];
	}
	return 0;
}

J
Jiri Olsa 已提交
641 642
struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
					    union perf_event *event)
643
{
644
	struct perf_evsel *first = perf_evlist__first(evlist);
645 646 647 648 649 650
	struct hlist_head *head;
	struct perf_sample_id *sid;
	int hash;
	u64 id;

	if (evlist->nr_entries == 1)
651 652 653 654 655
		return first;

	if (!first->attr.sample_id_all &&
	    event->header.type != PERF_RECORD_SAMPLE)
		return first;
656 657 658 659 660 661

	if (perf_evlist__event2id(evlist, event, &id))
		return NULL;

	/* Synthesized events have an id of zero */
	if (!id)
662
		return first;
663 664 665 666 667 668 669 670 671 672 673

	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
	head = &evlist->heads[hash];

	hlist_for_each_entry(sid, head, node) {
		if (sid->id == id)
			return sid->evsel;
	}
	return NULL;
}

W
Wang Nan 已提交
674 675 676 677
static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value)
{
	int i;

678 679 680
	if (!evlist->backward_mmap)
		return 0;

W
Wang Nan 已提交
681
	for (i = 0; i < evlist->nr_mmaps; i++) {
682
		int fd = evlist->backward_mmap[i].fd;
W
Wang Nan 已提交
683 684 685 686 687 688 689 690 691 692 693
		int err;

		if (fd < 0)
			continue;
		err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0);
		if (err)
			return err;
	}
	return 0;
}

694
static int perf_evlist__pause(struct perf_evlist *evlist)
W
Wang Nan 已提交
695 696 697 698
{
	return perf_evlist__set_paused(evlist, true);
}

699
static int perf_evlist__resume(struct perf_evlist *evlist)
W
Wang Nan 已提交
700 701 702 703
{
	return perf_evlist__set_paused(evlist, false);
}

704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728
union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx)
{
	struct perf_mmap *md = &evlist->mmap[idx];

	/*
	 * Check messup is required for forward overwritable ring buffer:
	 * memory pointed by md->prev can be overwritten in this case.
	 * No need for read-write ring buffer: kernel stop outputting when
	 * it hit md->prev (perf_mmap__consume()).
	 */
	return perf_mmap__read_forward(md, evlist->overwrite);
}

union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx)
{
	struct perf_mmap *md = &evlist->mmap[idx];

	/*
	 * No need to check messup for backward ring buffer:
	 * We can always read arbitrary long data from a backward
	 * ring buffer unless we forget to pause it before reading.
	 */
	return perf_mmap__read_backward(md);
}

729 730
union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
{
W
Wang Nan 已提交
731
	return perf_evlist__mmap_read_forward(evlist, idx);
732 733
}

734 735 736 737 738 739 740 741
void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx)
{
	perf_mmap__read_catchup(&evlist->mmap[idx]);
}

void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
{
	perf_mmap__consume(&evlist->mmap[idx], evlist->overwrite);
742 743
}

744
static void perf_evlist__munmap_nofree(struct perf_evlist *evlist)
745
{
746
	int i;
747

748 749 750
	if (evlist->mmap)
		for (i = 0; i < evlist->nr_mmaps; i++)
			perf_mmap__munmap(&evlist->mmap[i]);
751

752 753 754
	if (evlist->backward_mmap)
		for (i = 0; i < evlist->nr_mmaps; i++)
			perf_mmap__munmap(&evlist->backward_mmap[i]);
755
}
756

757 758 759
void perf_evlist__munmap(struct perf_evlist *evlist)
{
	perf_evlist__munmap_nofree(evlist);
760
	zfree(&evlist->mmap);
761
	zfree(&evlist->backward_mmap);
762 763
}

764
static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist)
765
{
W
Wang Nan 已提交
766
	int i;
767
	struct perf_mmap *map;
W
Wang Nan 已提交
768

769
	evlist->nr_mmaps = cpu_map__nr(evlist->cpus);
770
	if (cpu_map__empty(evlist->cpus))
771
		evlist->nr_mmaps = thread_map__nr(evlist->threads);
772 773 774
	map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
	if (!map)
		return NULL;
775

776
	for (i = 0; i < evlist->nr_mmaps; i++) {
777
		map[i].fd = -1;
778 779 780 781 782 783 784 785 786 787 788
		/*
		 * When the perf_mmap() call is made we grab one refcount, plus
		 * one extra to let perf_evlist__mmap_consume() get the last
		 * events after all real references (perf_mmap__get()) are
		 * dropped.
		 *
		 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and
		 * thus does perf_mmap__get() on it.
		 */
		refcount_set(&map[i].refcnt, 0);
	}
789
	return map;
790 791
}

792 793 794 795
static bool
perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused,
			 struct perf_evsel *evsel)
{
796
	if (evsel->attr.write_backward)
797 798 799 800
		return false;
	return true;
}

801
static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
802
				       struct mmap_params *mp, int cpu_idx,
803
				       int thread, int *_output, int *_output_backward)
804 805
{
	struct perf_evsel *evsel;
806
	int revent;
807
	int evlist_cpu = cpu_map__cpu(evlist->cpus, cpu_idx);
808

809
	evlist__for_each_entry(evlist, evsel) {
810 811
		struct perf_mmap *maps = evlist->mmap;
		int *output = _output;
812
		int fd;
813
		int cpu;
814

815 816 817 818 819 820 821 822 823
		if (evsel->attr.write_backward) {
			output = _output_backward;
			maps = evlist->backward_mmap;

			if (!maps) {
				maps = perf_evlist__alloc_mmap(evlist);
				if (!maps)
					return -1;
				evlist->backward_mmap = maps;
824 825
				if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
					perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
826 827
			}
		}
828

829 830 831
		if (evsel->system_wide && thread)
			continue;

832 833 834 835
		cpu = cpu_map__idx(evsel->cpus, evlist_cpu);
		if (cpu == -1)
			continue;

836
		fd = FD(evsel, cpu, thread);
837 838 839

		if (*output == -1) {
			*output = fd;
840 841

			if (perf_mmap__mmap(&maps[idx], mp, *output)  < 0)
842 843 844 845
				return -1;
		} else {
			if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
				return -1;
846

847
			perf_mmap__get(&maps[idx]);
848 849
		}

850 851
		revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0;

852 853 854 855 856 857 858 859
		/*
		 * The system_wide flag causes a selected event to be opened
		 * always without a pid.  Consequently it will never get a
		 * POLLHUP, but it is used for tracking in combination with
		 * other events, so it should not need to be polled anyway.
		 * Therefore don't add it for polling.
		 */
		if (!evsel->system_wide &&
860 861
		    __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) {
			perf_mmap__put(&maps[idx]);
862
			return -1;
863
		}
864

A
Adrian Hunter 已提交
865 866 867 868 869 870 871
		if (evsel->attr.read_format & PERF_FORMAT_ID) {
			if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread,
						   fd) < 0)
				return -1;
			perf_evlist__set_sid_idx(evlist, evsel, idx, cpu,
						 thread);
		}
872 873 874 875 876
	}

	return 0;
}

877 878
static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist,
				     struct mmap_params *mp)
879
{
880
	int cpu, thread;
881 882
	int nr_cpus = cpu_map__nr(evlist->cpus);
	int nr_threads = thread_map__nr(evlist->threads);
883

A
Adrian Hunter 已提交
884
	pr_debug2("perf event ring buffer mmapped per cpu\n");
885
	for (cpu = 0; cpu < nr_cpus; cpu++) {
886
		int output = -1;
887
		int output_backward = -1;
888

889 890 891
		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
					      true);

892
		for (thread = 0; thread < nr_threads; thread++) {
893
			if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
894
							thread, &output, &output_backward))
895
				goto out_unmap;
896 897 898 899 900 901
		}
	}

	return 0;

out_unmap:
902
	perf_evlist__munmap_nofree(evlist);
903 904 905
	return -1;
}

906 907
static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist,
					struct mmap_params *mp)
908 909
{
	int thread;
910
	int nr_threads = thread_map__nr(evlist->threads);
911

A
Adrian Hunter 已提交
912
	pr_debug2("perf event ring buffer mmapped per thread\n");
913
	for (thread = 0; thread < nr_threads; thread++) {
914
		int output = -1;
915
		int output_backward = -1;
916

917 918 919
		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
					      false);

920
		if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
921
						&output, &output_backward))
922
			goto out_unmap;
923 924 925 926 927
	}

	return 0;

out_unmap:
928
	perf_evlist__munmap_nofree(evlist);
929 930 931
	return -1;
}

932
unsigned long perf_event_mlock_kb_in_pages(void)
933
{
934 935
	unsigned long pages;
	int max;
936

937 938 939 940 941 942 943 944 945 946
	if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
		/*
		 * Pick a once upon a time good value, i.e. things look
		 * strange since we can't read a sysctl value, but lets not
		 * die yet...
		 */
		max = 512;
	} else {
		max -= (page_size / 1024);
	}
947

948 949 950 951 952 953 954
	pages = (max * 1024) / page_size;
	if (!is_power_of_2(pages))
		pages = rounddown_pow_of_two(pages);

	return pages;
}

955
size_t perf_evlist__mmap_size(unsigned long pages)
956 957 958 959
{
	if (pages == UINT_MAX)
		pages = perf_event_mlock_kb_in_pages();
	else if (!is_power_of_2(pages))
960 961 962 963 964
		return 0;

	return (pages + 1) * page_size;
}

965 966
static long parse_pages_arg(const char *str, unsigned long min,
			    unsigned long max)
967
{
968
	unsigned long pages, val;
969 970 971 972 973 974 975
	static struct parse_tag tags[] = {
		{ .tag  = 'B', .mult = 1       },
		{ .tag  = 'K', .mult = 1 << 10 },
		{ .tag  = 'M', .mult = 1 << 20 },
		{ .tag  = 'G', .mult = 1 << 30 },
		{ .tag  = 0 },
	};
976

977
	if (str == NULL)
978
		return -EINVAL;
979

980
	val = parse_tag_value(str, tags);
981
	if (val != (unsigned long) -1) {
982 983 984 985 986 987
		/* we got file size value */
		pages = PERF_ALIGN(val, page_size) / page_size;
	} else {
		/* we got pages count value */
		char *eptr;
		pages = strtoul(str, &eptr, 10);
988 989
		if (*eptr != '\0')
			return -EINVAL;
990 991
	}

992
	if (pages == 0 && min == 0) {
993
		/* leave number of pages at 0 */
994
	} else if (!is_power_of_2(pages)) {
995 996
		char buf[100];

997
		/* round pages up to next power of 2 */
998
		pages = roundup_pow_of_two(pages);
999 1000
		if (!pages)
			return -EINVAL;
1001 1002 1003 1004

		unit_number__scnprintf(buf, sizeof(buf), pages * page_size);
		pr_info("rounding mmap pages size to %s (%lu pages)\n",
			buf, pages);
1005 1006
	}

1007 1008 1009 1010 1011 1012
	if (pages > max)
		return -EINVAL;

	return pages;
}

1013
int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
1014 1015 1016 1017
{
	unsigned long max = UINT_MAX;
	long pages;

A
Adrian Hunter 已提交
1018
	if (max > SIZE_MAX / page_size)
1019 1020 1021 1022 1023
		max = SIZE_MAX / page_size;

	pages = parse_pages_arg(str, 1, max);
	if (pages < 0) {
		pr_err("Invalid argument for --mmap_pages/-m\n");
1024 1025 1026 1027 1028 1029 1030
		return -1;
	}

	*mmap_pages = pages;
	return 0;
}

1031 1032 1033 1034 1035 1036
int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
				  int unset __maybe_unused)
{
	return __perf_evlist__parse_mmap_pages(opt->value, str);
}

1037
/**
1038
 * perf_evlist__mmap_ex - Create mmaps to receive events.
1039 1040 1041
 * @evlist: list of events
 * @pages: map length in pages
 * @overwrite: overwrite older events?
1042 1043
 * @auxtrace_pages - auxtrace map length in pages
 * @auxtrace_overwrite - overwrite older auxtrace data?
1044
 *
1045 1046 1047
 * If @overwrite is %false the user needs to signal event consumption using
 * perf_mmap__write_tail().  Using perf_evlist__mmap_read() does this
 * automatically.
1048
 *
1049 1050 1051
 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
 * consumption using auxtrace_mmap__write_tail().
 *
1052
 * Return: %0 on success, negative error code otherwise.
1053
 */
1054 1055 1056
int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
			 bool overwrite, unsigned int auxtrace_pages,
			 bool auxtrace_overwrite)
1057
{
1058
	struct perf_evsel *evsel;
1059 1060
	const struct cpu_map *cpus = evlist->cpus;
	const struct thread_map *threads = evlist->threads;
1061 1062 1063
	struct mmap_params mp = {
		.prot = PROT_READ | (overwrite ? 0 : PROT_WRITE),
	};
1064

1065 1066 1067
	if (!evlist->mmap)
		evlist->mmap = perf_evlist__alloc_mmap(evlist);
	if (!evlist->mmap)
1068 1069
		return -ENOMEM;

1070
	if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
1071 1072 1073
		return -ENOMEM;

	evlist->overwrite = overwrite;
1074
	evlist->mmap_len = perf_evlist__mmap_size(pages);
1075
	pr_debug("mmap size %zuB\n", evlist->mmap_len);
1076
	mp.mask = evlist->mmap_len - page_size - 1;
1077

1078 1079 1080
	auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len,
				   auxtrace_pages, auxtrace_overwrite);

1081
	evlist__for_each_entry(evlist, evsel) {
1082
		if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
1083
		    evsel->sample_id == NULL &&
1084
		    perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0)
1085 1086 1087
			return -ENOMEM;
	}

1088
	if (cpu_map__empty(cpus))
1089
		return perf_evlist__mmap_per_thread(evlist, &mp);
1090

1091
	return perf_evlist__mmap_per_cpu(evlist, &mp);
1092
}
1093

1094 1095 1096 1097 1098 1099
int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
		      bool overwrite)
{
	return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false);
}

1100
int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
1101
{
1102 1103
	struct cpu_map *cpus;
	struct thread_map *threads;
1104

1105
	threads = thread_map__new_str(target->pid, target->tid, target->uid);
1106

1107
	if (!threads)
1108 1109
		return -1;

1110
	if (target__uses_dummy_map(target))
1111
		cpus = cpu_map__dummy_new();
1112
	else
1113
		cpus = cpu_map__new(target->cpu_list);
1114

1115
	if (!cpus)
1116 1117
		goto out_delete_threads;

1118 1119
	evlist->has_user_cpus = !!target->cpu_list;

1120
	perf_evlist__set_maps(evlist, cpus, threads);
1121 1122

	return 0;
1123 1124

out_delete_threads:
1125
	thread_map__put(threads);
1126 1127 1128
	return -1;
}

1129 1130
void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus,
			   struct thread_map *threads)
1131
{
1132 1133 1134 1135 1136 1137 1138 1139
	/*
	 * Allow for the possibility that one or another of the maps isn't being
	 * changed i.e. don't put it.  Note we are assuming the maps that are
	 * being applied are brand new and evlist is taking ownership of the
	 * original reference count of 1.  If that is not the case it is up to
	 * the caller to increase the reference count.
	 */
	if (cpus != evlist->cpus) {
1140
		cpu_map__put(evlist->cpus);
1141
		evlist->cpus = cpu_map__get(cpus);
1142
	}
1143

1144
	if (threads != evlist->threads) {
1145
		thread_map__put(evlist->threads);
1146
		evlist->threads = thread_map__get(threads);
1147
	}
1148

1149
	perf_evlist__propagate_maps(evlist);
1150 1151
}

1152 1153 1154 1155 1156
void __perf_evlist__set_sample_bit(struct perf_evlist *evlist,
				   enum perf_event_sample_format bit)
{
	struct perf_evsel *evsel;

1157
	evlist__for_each_entry(evlist, evsel)
1158 1159 1160 1161 1162 1163 1164 1165
		__perf_evsel__set_sample_bit(evsel, bit);
}

void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist,
				     enum perf_event_sample_format bit)
{
	struct perf_evsel *evsel;

1166
	evlist__for_each_entry(evlist, evsel)
1167 1168 1169
		__perf_evsel__reset_sample_bit(evsel, bit);
}

1170
int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel)
1171 1172
{
	struct perf_evsel *evsel;
1173
	int err = 0;
1174

1175
	evlist__for_each_entry(evlist, evsel) {
1176
		if (evsel->filter == NULL)
1177
			continue;
1178

1179 1180 1181 1182
		/*
		 * filters only work for tracepoint event, which doesn't have cpu limit.
		 * So evlist and evsel should always be same.
		 */
1183
		err = perf_evsel__apply_filter(evsel, evsel->filter);
1184 1185
		if (err) {
			*err_evsel = evsel;
1186
			break;
1187
		}
1188 1189
	}

1190 1191 1192 1193 1194 1195 1196 1197
	return err;
}

int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter)
{
	struct perf_evsel *evsel;
	int err = 0;

1198
	evlist__for_each_entry(evlist, evsel) {
1199 1200 1201
		if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
			continue;

1202
		err = perf_evsel__set_filter(evsel, filter);
1203 1204 1205 1206 1207
		if (err)
			break;
	}

	return err;
1208
}
1209

1210
int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids)
1211 1212
{
	char *filter;
1213 1214
	int ret = -1;
	size_t i;
1215

1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229
	for (i = 0; i < npids; ++i) {
		if (i == 0) {
			if (asprintf(&filter, "common_pid != %d", pids[i]) < 0)
				return -1;
		} else {
			char *tmp;

			if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0)
				goto out_free;

			free(filter);
			filter = tmp;
		}
	}
1230 1231

	ret = perf_evlist__set_filter(evlist, filter);
1232
out_free:
1233 1234 1235 1236
	free(filter);
	return ret;
}

1237 1238 1239 1240 1241
int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid)
{
	return perf_evlist__set_filter_pids(evlist, 1, &pid);
}

1242
bool perf_evlist__valid_sample_type(struct perf_evlist *evlist)
1243
{
1244
	struct perf_evsel *pos;
1245

1246 1247 1248 1249 1250 1251
	if (evlist->nr_entries == 1)
		return true;

	if (evlist->id_pos < 0 || evlist->is_pos < 0)
		return false;

1252
	evlist__for_each_entry(evlist, pos) {
1253 1254
		if (pos->id_pos != evlist->id_pos ||
		    pos->is_pos != evlist->is_pos)
1255
			return false;
1256 1257
	}

1258
	return true;
1259 1260
}

1261
u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist)
1262
{
1263 1264 1265 1266 1267
	struct perf_evsel *evsel;

	if (evlist->combined_sample_type)
		return evlist->combined_sample_type;

1268
	evlist__for_each_entry(evlist, evsel)
1269 1270 1271 1272 1273 1274 1275 1276 1277
		evlist->combined_sample_type |= evsel->attr.sample_type;

	return evlist->combined_sample_type;
}

u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist)
{
	evlist->combined_sample_type = 0;
	return __perf_evlist__combined_sample_type(evlist);
1278 1279
}

1280 1281 1282 1283 1284
u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist)
{
	struct perf_evsel *evsel;
	u64 branch_type = 0;

1285
	evlist__for_each_entry(evlist, evsel)
1286 1287 1288 1289
		branch_type |= evsel->attr.branch_sample_type;
	return branch_type;
}

1290 1291 1292 1293 1294 1295
bool perf_evlist__valid_read_format(struct perf_evlist *evlist)
{
	struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
	u64 read_format = first->attr.read_format;
	u64 sample_type = first->attr.sample_type;

1296
	evlist__for_each_entry(evlist, pos) {
1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315
		if (read_format != pos->attr.read_format)
			return false;
	}

	/* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */
	if ((sample_type & PERF_SAMPLE_READ) &&
	    !(read_format & PERF_FORMAT_ID)) {
		return false;
	}

	return true;
}

u64 perf_evlist__read_format(struct perf_evlist *evlist)
{
	struct perf_evsel *first = perf_evlist__first(evlist);
	return first->attr.read_format;
}

1316
u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist)
1317
{
1318
	struct perf_evsel *first = perf_evlist__first(evlist);
1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341
	struct perf_sample *data;
	u64 sample_type;
	u16 size = 0;

	if (!first->attr.sample_id_all)
		goto out;

	sample_type = first->attr.sample_type;

	if (sample_type & PERF_SAMPLE_TID)
		size += sizeof(data->tid) * 2;

       if (sample_type & PERF_SAMPLE_TIME)
		size += sizeof(data->time);

	if (sample_type & PERF_SAMPLE_ID)
		size += sizeof(data->id);

	if (sample_type & PERF_SAMPLE_STREAM_ID)
		size += sizeof(data->stream_id);

	if (sample_type & PERF_SAMPLE_CPU)
		size += sizeof(data->cpu) * 2;
1342 1343 1344

	if (sample_type & PERF_SAMPLE_IDENTIFIER)
		size += sizeof(data->id);
1345 1346 1347 1348
out:
	return size;
}

1349
bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist)
1350
{
1351
	struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
1352

1353
	evlist__for_each_entry_continue(evlist, pos) {
1354 1355
		if (first->attr.sample_id_all != pos->attr.sample_id_all)
			return false;
1356 1357
	}

1358 1359 1360
	return true;
}

1361
bool perf_evlist__sample_id_all(struct perf_evlist *evlist)
1362
{
1363
	struct perf_evsel *first = perf_evlist__first(evlist);
1364
	return first->attr.sample_id_all;
1365
}
1366 1367 1368 1369 1370 1371

void perf_evlist__set_selected(struct perf_evlist *evlist,
			       struct perf_evsel *evsel)
{
	evlist->selected = evsel;
}
1372

1373 1374 1375 1376
void perf_evlist__close(struct perf_evlist *evlist)
{
	struct perf_evsel *evsel;

1377 1378
	evlist__for_each_entry_reverse(evlist, evsel)
		perf_evsel__close(evsel);
1379 1380
}

1381 1382
static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist)
{
1383 1384
	struct cpu_map	  *cpus;
	struct thread_map *threads;
1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395
	int err = -ENOMEM;

	/*
	 * Try reading /sys/devices/system/cpu/online to get
	 * an all cpus map.
	 *
	 * FIXME: -ENOMEM is the best we can do here, the cpu_map
	 * code needs an overhaul to properly forward the
	 * error, and we may not want to do that fallback to a
	 * default cpu identity map :-\
	 */
1396 1397
	cpus = cpu_map__new(NULL);
	if (!cpus)
1398 1399
		goto out;

1400 1401 1402
	threads = thread_map__new_dummy();
	if (!threads)
		goto out_put;
1403

1404
	perf_evlist__set_maps(evlist, cpus, threads);
1405 1406
out:
	return err;
1407 1408
out_put:
	cpu_map__put(cpus);
1409 1410 1411
	goto out;
}

1412
int perf_evlist__open(struct perf_evlist *evlist)
1413
{
1414
	struct perf_evsel *evsel;
1415
	int err;
1416

1417 1418 1419 1420 1421 1422 1423 1424 1425 1426
	/*
	 * Default: one fd per CPU, all threads, aka systemwide
	 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL
	 */
	if (evlist->threads == NULL && evlist->cpus == NULL) {
		err = perf_evlist__create_syswide_maps(evlist);
		if (err < 0)
			goto out_err;
	}

1427 1428
	perf_evlist__update_id_pos(evlist);

1429
	evlist__for_each_entry(evlist, evsel) {
1430
		err = perf_evsel__open(evsel, evsel->cpus, evsel->threads);
1431 1432 1433 1434 1435 1436
		if (err < 0)
			goto out_err;
	}

	return 0;
out_err:
1437
	perf_evlist__close(evlist);
1438
	errno = -err;
1439 1440
	return err;
}
1441

1442
int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target,
1443
				  const char *argv[], bool pipe_output,
1444
				  void (*exec_error)(int signo, siginfo_t *info, void *ucontext))
1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465
{
	int child_ready_pipe[2], go_pipe[2];
	char bf;

	if (pipe(child_ready_pipe) < 0) {
		perror("failed to create 'ready' pipe");
		return -1;
	}

	if (pipe(go_pipe) < 0) {
		perror("failed to create 'go' pipe");
		goto out_close_ready_pipe;
	}

	evlist->workload.pid = fork();
	if (evlist->workload.pid < 0) {
		perror("failed to fork");
		goto out_close_pipes;
	}

	if (!evlist->workload.pid) {
1466 1467
		int ret;

1468
		if (pipe_output)
1469 1470
			dup2(2, 1);

1471 1472
		signal(SIGTERM, SIG_DFL);

1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484
		close(child_ready_pipe[0]);
		close(go_pipe[1]);
		fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);

		/*
		 * Tell the parent we're ready to go
		 */
		close(child_ready_pipe[1]);

		/*
		 * Wait until the parent tells us to go.
		 */
1485 1486 1487 1488 1489 1490
		ret = read(go_pipe[0], &bf, 1);
		/*
		 * The parent will ask for the execvp() to be performed by
		 * writing exactly one byte, in workload.cork_fd, usually via
		 * perf_evlist__start_workload().
		 *
1491
		 * For cancelling the workload without actually running it,
1492 1493 1494 1495 1496 1497 1498 1499 1500
		 * the parent will just close workload.cork_fd, without writing
		 * anything, i.e. read will return zero and we just exit()
		 * here.
		 */
		if (ret != 1) {
			if (ret == -1)
				perror("unable to read pipe");
			exit(ret);
		}
1501 1502 1503

		execvp(argv[0], (char **)argv);

1504
		if (exec_error) {
1505 1506 1507 1508 1509 1510 1511
			union sigval val;

			val.sival_int = errno;
			if (sigqueue(getppid(), SIGUSR1, val))
				perror(argv[0]);
		} else
			perror(argv[0]);
1512 1513 1514
		exit(-1);
	}

1515 1516 1517 1518 1519 1520 1521 1522
	if (exec_error) {
		struct sigaction act = {
			.sa_flags     = SA_SIGINFO,
			.sa_sigaction = exec_error,
		};
		sigaction(SIGUSR1, &act, NULL);
	}

1523 1524 1525 1526 1527 1528
	if (target__none(target)) {
		if (evlist->threads == NULL) {
			fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n",
				__func__, __LINE__);
			goto out_close_pipes;
		}
1529
		thread_map__set_pid(evlist->threads, 0, evlist->workload.pid);
1530
	}
1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541

	close(child_ready_pipe[1]);
	close(go_pipe[0]);
	/*
	 * wait for child to settle
	 */
	if (read(child_ready_pipe[0], &bf, 1) == -1) {
		perror("unable to read pipe");
		goto out_close_pipes;
	}

1542
	fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC);
1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558
	evlist->workload.cork_fd = go_pipe[1];
	close(child_ready_pipe[0]);
	return 0;

out_close_pipes:
	close(go_pipe[0]);
	close(go_pipe[1]);
out_close_ready_pipe:
	close(child_ready_pipe[0]);
	close(child_ready_pipe[1]);
	return -1;
}

int perf_evlist__start_workload(struct perf_evlist *evlist)
{
	if (evlist->workload.cork_fd > 0) {
1559
		char bf = 0;
1560
		int ret;
1561 1562 1563
		/*
		 * Remove the cork, let it rip!
		 */
1564 1565
		ret = write(evlist->workload.cork_fd, &bf, 1);
		if (ret < 0)
1566
			perror("unable to write to pipe");
1567 1568 1569

		close(evlist->workload.cork_fd);
		return ret;
1570 1571 1572 1573
	}

	return 0;
}
1574

1575
int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event,
1576
			      struct perf_sample *sample)
1577
{
1578 1579 1580 1581
	struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event);

	if (!evsel)
		return -EFAULT;
1582
	return perf_evsel__parse_sample(evsel, event, sample);
1583
}
1584 1585 1586 1587 1588 1589

size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp)
{
	struct perf_evsel *evsel;
	size_t printed = 0;

1590
	evlist__for_each_entry(evlist, evsel) {
1591 1592 1593 1594
		printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "",
				   perf_evsel__name(evsel));
	}

1595
	return printed + fprintf(fp, "\n");
1596
}
1597

1598
int perf_evlist__strerror_open(struct perf_evlist *evlist,
1599 1600 1601
			       int err, char *buf, size_t size)
{
	int printed, value;
1602
	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
1603 1604 1605 1606 1607 1608 1609 1610

	switch (err) {
	case EACCES:
	case EPERM:
		printed = scnprintf(buf, size,
				    "Error:\t%s.\n"
				    "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg);

1611
		value = perf_event_paranoid();
1612 1613 1614 1615 1616 1617 1618 1619

		printed += scnprintf(buf + printed, size - printed, "\nHint:\t");

		if (value >= 2) {
			printed += scnprintf(buf + printed, size - printed,
					     "For your workloads it needs to be <= 1\nHint:\t");
		}
		printed += scnprintf(buf + printed, size - printed,
1620
				     "For system wide tracing it needs to be set to -1.\n");
1621 1622

		printed += scnprintf(buf + printed, size - printed,
1623 1624
				    "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
				    "Hint:\tThe current value is %d.", value);
1625
		break;
1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642
	case EINVAL: {
		struct perf_evsel *first = perf_evlist__first(evlist);
		int max_freq;

		if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0)
			goto out_default;

		if (first->attr.sample_freq < (u64)max_freq)
			goto out_default;

		printed = scnprintf(buf, size,
				    "Error:\t%s.\n"
				    "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n"
				    "Hint:\tThe current value is %d and %" PRIu64 " is being requested.",
				    emsg, max_freq, first->attr.sample_freq);
		break;
	}
1643
	default:
1644
out_default:
1645 1646 1647 1648 1649 1650
		scnprintf(buf, size, "%s", emsg);
		break;
	}

	return 0;
}
1651

1652 1653
int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size)
{
1654
	char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
1655
	int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0;
1656 1657 1658

	switch (err) {
	case EPERM:
1659
		sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user);
1660 1661
		printed += scnprintf(buf + printed, size - printed,
				     "Error:\t%s.\n"
1662
				     "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n"
1663
				     "Hint:\tTried using %zd kB.\n",
1664
				     emsg, pages_max_per_user, pages_attempted);
1665 1666 1667 1668 1669 1670 1671 1672 1673

		if (pages_attempted >= pages_max_per_user) {
			printed += scnprintf(buf + printed, size - printed,
					     "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n",
					     pages_max_per_user + pages_attempted);
		}

		printed += scnprintf(buf + printed, size - printed,
				     "Hint:\tTry using a smaller -m/--mmap-pages value.");
1674 1675 1676 1677 1678 1679 1680 1681 1682
		break;
	default:
		scnprintf(buf, size, "%s", emsg);
		break;
	}

	return 0;
}

1683 1684 1685 1686 1687 1688 1689 1690 1691
void perf_evlist__to_front(struct perf_evlist *evlist,
			   struct perf_evsel *move_evsel)
{
	struct perf_evsel *evsel, *n;
	LIST_HEAD(move);

	if (move_evsel == perf_evlist__first(evlist))
		return;

1692
	evlist__for_each_entry_safe(evlist, n, evsel) {
1693 1694 1695 1696 1697 1698
		if (evsel->leader == move_evsel->leader)
			list_move_tail(&evsel->node, &move);
	}

	list_splice(&move, &evlist->entries);
}
1699 1700 1701 1702 1703 1704 1705 1706 1707

void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
				     struct perf_evsel *tracking_evsel)
{
	struct perf_evsel *evsel;

	if (tracking_evsel->tracking)
		return;

1708
	evlist__for_each_entry(evlist, evsel) {
1709 1710 1711 1712 1713 1714
		if (evsel != tracking_evsel)
			evsel->tracking = false;
	}

	tracking_evsel->tracking = true;
}
1715 1716 1717 1718 1719 1720 1721

struct perf_evsel *
perf_evlist__find_evsel_by_str(struct perf_evlist *evlist,
			       const char *str)
{
	struct perf_evsel *evsel;

1722
	evlist__for_each_entry(evlist, evsel) {
1723 1724 1725 1726 1727 1728 1729 1730
		if (!evsel->name)
			continue;
		if (strcmp(str, evsel->name) == 0)
			return evsel;
	}

	return NULL;
}
1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788

void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist,
				  enum bkw_mmap_state state)
{
	enum bkw_mmap_state old_state = evlist->bkw_mmap_state;
	enum action {
		NONE,
		PAUSE,
		RESUME,
	} action = NONE;

	if (!evlist->backward_mmap)
		return;

	switch (old_state) {
	case BKW_MMAP_NOTREADY: {
		if (state != BKW_MMAP_RUNNING)
			goto state_err;;
		break;
	}
	case BKW_MMAP_RUNNING: {
		if (state != BKW_MMAP_DATA_PENDING)
			goto state_err;
		action = PAUSE;
		break;
	}
	case BKW_MMAP_DATA_PENDING: {
		if (state != BKW_MMAP_EMPTY)
			goto state_err;
		break;
	}
	case BKW_MMAP_EMPTY: {
		if (state != BKW_MMAP_RUNNING)
			goto state_err;
		action = RESUME;
		break;
	}
	default:
		WARN_ONCE(1, "Shouldn't get there\n");
	}

	evlist->bkw_mmap_state = state;

	switch (action) {
	case PAUSE:
		perf_evlist__pause(evlist);
		break;
	case RESUME:
		perf_evlist__resume(evlist);
		break;
	case NONE:
	default:
		break;
	}

state_err:
	return;
}
1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800

bool perf_evlist__exclude_kernel(struct perf_evlist *evlist)
{
	struct perf_evsel *evsel;

	evlist__for_each_entry(evlist, evsel) {
		if (!evsel->attr.exclude_kernel)
			return false;
	}

	return true;
}