evlist.c 39.8 KB
Newer Older
1 2 3 4 5 6 7 8
/*
 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
 *
 * Parts came from builtin-{top,stat,record}.c, see those files for further
 * copyright notes.
 *
 * Released under the GPL v2. (and only v2, not any later version)
 */
9
#include "util.h"
10
#include <api/fs/fs.h>
11
#include <poll.h>
12 13
#include "cpumap.h"
#include "thread_map.h"
14
#include "target.h"
15 16
#include "evlist.h"
#include "evsel.h"
A
Adrian Hunter 已提交
17
#include "debug.h"
18
#include <unistd.h>
19

20
#include "parse-events.h"
21
#include "parse-options.h"
22

23 24
#include <sys/mman.h>

25 26
#include <linux/bitops.h>
#include <linux/hash.h>
27
#include <linux/log2.h>
28

29 30 31
static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx);
static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx);

32
#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
33
#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
34

35 36
void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
		       struct thread_map *threads)
37 38 39 40 41 42
{
	int i;

	for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
		INIT_HLIST_HEAD(&evlist->heads[i]);
	INIT_LIST_HEAD(&evlist->entries);
43
	perf_evlist__set_maps(evlist, cpus, threads);
44
	fdarray__init(&evlist->pollfd, 64);
45
	evlist->workload.pid = -1;
46 47
}

48
struct perf_evlist *perf_evlist__new(void)
49 50 51
{
	struct perf_evlist *evlist = zalloc(sizeof(*evlist));

52
	if (evlist != NULL)
53
		perf_evlist__init(evlist, NULL, NULL);
54 55 56 57

	return evlist;
}

58 59 60 61 62 63 64 65 66 67 68 69
struct perf_evlist *perf_evlist__new_default(void)
{
	struct perf_evlist *evlist = perf_evlist__new();

	if (evlist && perf_evlist__add_default(evlist)) {
		perf_evlist__delete(evlist);
		evlist = NULL;
	}

	return evlist;
}

70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
/**
 * perf_evlist__set_id_pos - set the positions of event ids.
 * @evlist: selected event list
 *
 * Events with compatible sample types all have the same id_pos
 * and is_pos.  For convenience, put a copy on evlist.
 */
void perf_evlist__set_id_pos(struct perf_evlist *evlist)
{
	struct perf_evsel *first = perf_evlist__first(evlist);

	evlist->id_pos = first->id_pos;
	evlist->is_pos = first->is_pos;
}

85 86 87 88
static void perf_evlist__update_id_pos(struct perf_evlist *evlist)
{
	struct perf_evsel *evsel;

89
	evlist__for_each(evlist, evsel)
90 91 92 93 94
		perf_evsel__calc_id_pos(evsel);

	perf_evlist__set_id_pos(evlist);
}

95 96 97 98
static void perf_evlist__purge(struct perf_evlist *evlist)
{
	struct perf_evsel *pos, *n;

99
	evlist__for_each_safe(evlist, n, pos) {
100
		list_del_init(&pos->node);
101
		pos->evlist = NULL;
102 103 104 105 106 107
		perf_evsel__delete(pos);
	}

	evlist->nr_entries = 0;
}

108
void perf_evlist__exit(struct perf_evlist *evlist)
109
{
110
	zfree(&evlist->mmap);
111
	fdarray__exit(&evlist->pollfd);
112 113 114 115
}

void perf_evlist__delete(struct perf_evlist *evlist)
{
116
	perf_evlist__munmap(evlist);
117
	perf_evlist__close(evlist);
118
	cpu_map__put(evlist->cpus);
119
	thread_map__put(evlist->threads);
120 121
	evlist->cpus = NULL;
	evlist->threads = NULL;
122 123
	perf_evlist__purge(evlist);
	perf_evlist__exit(evlist);
124 125 126
	free(evlist);
}

127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
					  struct perf_evsel *evsel)
{
	/*
	 * We already have cpus for evsel (via PMU sysfs) so
	 * keep it, if there's no target cpu list defined.
	 */
	if (!evsel->own_cpus || evlist->has_user_cpus) {
		cpu_map__put(evsel->cpus);
		evsel->cpus = cpu_map__get(evlist->cpus);
	} else if (evsel->cpus != evsel->own_cpus) {
		cpu_map__put(evsel->cpus);
		evsel->cpus = cpu_map__get(evsel->own_cpus);
	}

	thread_map__put(evsel->threads);
	evsel->threads = thread_map__get(evlist->threads);
}

static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
{
	struct perf_evsel *evsel;

	evlist__for_each(evlist, evsel)
		__perf_evlist__propagate_maps(evlist, evsel);
}

154 155
void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
{
156
	entry->evlist = evlist;
157
	list_add_tail(&entry->node, &evlist->entries);
158
	entry->idx = evlist->nr_entries;
159
	entry->tracking = !entry->idx;
160

161 162
	if (!evlist->nr_entries++)
		perf_evlist__set_id_pos(evlist);
163 164

	__perf_evlist__propagate_maps(evlist, entry);
165 166
}

167
void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
168
				   struct list_head *list)
169
{
170
	struct perf_evsel *evsel, *temp;
171

172 173 174 175
	__evlist__for_each_safe(list, temp, evsel) {
		list_del_init(&evsel->node);
		perf_evlist__add(evlist, evsel);
	}
176 177
}

178 179 180 181 182
void __perf_evlist__set_leader(struct list_head *list)
{
	struct perf_evsel *evsel, *leader;

	leader = list_entry(list->next, struct perf_evsel, node);
183 184 185
	evsel = list_entry(list->prev, struct perf_evsel, node);

	leader->nr_members = evsel->idx - leader->idx + 1;
186

187
	__evlist__for_each(list, evsel) {
188
		evsel->leader = leader;
189 190 191 192
	}
}

void perf_evlist__set_leader(struct perf_evlist *evlist)
193
{
194 195
	if (evlist->nr_entries) {
		evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0;
196
		__perf_evlist__set_leader(&evlist->entries);
197
	}
198 199
}

200 201 202 203 204 205
int perf_evlist__add_default(struct perf_evlist *evlist)
{
	struct perf_event_attr attr = {
		.type = PERF_TYPE_HARDWARE,
		.config = PERF_COUNT_HW_CPU_CYCLES,
	};
206 207 208
	struct perf_evsel *evsel;

	event_attr_init(&attr);
209

210
	evsel = perf_evsel__new(&attr);
211
	if (evsel == NULL)
212 213 214 215 216 217
		goto error;

	/* use strdup() because free(evsel) assumes name is allocated */
	evsel->name = strdup("cycles");
	if (!evsel->name)
		goto error_free;
218 219 220

	perf_evlist__add(evlist, evsel);
	return 0;
221 222 223 224
error_free:
	perf_evsel__delete(evsel);
error:
	return -ENOMEM;
225
}
226

227 228
static int perf_evlist__add_attrs(struct perf_evlist *evlist,
				  struct perf_event_attr *attrs, size_t nr_attrs)
229 230 231 232 233 234
{
	struct perf_evsel *evsel, *n;
	LIST_HEAD(head);
	size_t i;

	for (i = 0; i < nr_attrs; i++) {
235
		evsel = perf_evsel__new_idx(attrs + i, evlist->nr_entries + i);
236 237 238 239 240
		if (evsel == NULL)
			goto out_delete_partial_list;
		list_add_tail(&evsel->node, &head);
	}

241
	perf_evlist__splice_list_tail(evlist, &head);
242 243 244 245

	return 0;

out_delete_partial_list:
246
	__evlist__for_each_safe(&head, n, evsel)
247 248 249 250
		perf_evsel__delete(evsel);
	return -1;
}

251 252 253 254 255 256 257 258 259 260 261
int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
				     struct perf_event_attr *attrs, size_t nr_attrs)
{
	size_t i;

	for (i = 0; i < nr_attrs; i++)
		event_attr_init(attrs + i);

	return perf_evlist__add_attrs(evlist, attrs, nr_attrs);
}

262 263
struct perf_evsel *
perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id)
264 265 266
{
	struct perf_evsel *evsel;

267
	evlist__for_each(evlist, evsel) {
268 269 270 271 272 273 274 275
		if (evsel->attr.type   == PERF_TYPE_TRACEPOINT &&
		    (int)evsel->attr.config == id)
			return evsel;
	}

	return NULL;
}

276 277 278 279 280 281
struct perf_evsel *
perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist,
				     const char *name)
{
	struct perf_evsel *evsel;

282
	evlist__for_each(evlist, evsel) {
283 284 285 286 287 288 289 290
		if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) &&
		    (strcmp(evsel->name, name) == 0))
			return evsel;
	}

	return NULL;
}

291 292 293
int perf_evlist__add_newtp(struct perf_evlist *evlist,
			   const char *sys, const char *name, void *handler)
{
294
	struct perf_evsel *evsel = perf_evsel__newtp(sys, name);
295 296 297 298

	if (evsel == NULL)
		return -1;

299
	evsel->handler = handler;
300 301 302 303
	perf_evlist__add(evlist, evsel);
	return 0;
}

304 305 306 307 308 309 310 311 312
static int perf_evlist__nr_threads(struct perf_evlist *evlist,
				   struct perf_evsel *evsel)
{
	if (evsel->system_wide)
		return 1;
	else
		return thread_map__nr(evlist->threads);
}

313 314 315 316
void perf_evlist__disable(struct perf_evlist *evlist)
{
	int cpu, thread;
	struct perf_evsel *pos;
317
	int nr_cpus = cpu_map__nr(evlist->cpus);
318
	int nr_threads;
319

320
	for (cpu = 0; cpu < nr_cpus; cpu++) {
321
		evlist__for_each(evlist, pos) {
322
			if (!perf_evsel__is_group_leader(pos) || !pos->fd)
323
				continue;
324
			nr_threads = perf_evlist__nr_threads(evlist, pos);
325
			for (thread = 0; thread < nr_threads; thread++)
326 327
				ioctl(FD(pos, cpu, thread),
				      PERF_EVENT_IOC_DISABLE, 0);
328 329
		}
	}
330 331

	evlist->enabled = false;
332 333
}

334 335 336 337
void perf_evlist__enable(struct perf_evlist *evlist)
{
	int cpu, thread;
	struct perf_evsel *pos;
338
	int nr_cpus = cpu_map__nr(evlist->cpus);
339
	int nr_threads;
340

341
	for (cpu = 0; cpu < nr_cpus; cpu++) {
342
		evlist__for_each(evlist, pos) {
343
			if (!perf_evsel__is_group_leader(pos) || !pos->fd)
344
				continue;
345
			nr_threads = perf_evlist__nr_threads(evlist, pos);
346
			for (thread = 0; thread < nr_threads; thread++)
347 348
				ioctl(FD(pos, cpu, thread),
				      PERF_EVENT_IOC_ENABLE, 0);
349 350
		}
	}
351 352 353 354 355 356 357

	evlist->enabled = true;
}

void perf_evlist__toggle_enable(struct perf_evlist *evlist)
{
	(evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist);
358 359
}

360 361 362 363
int perf_evlist__disable_event(struct perf_evlist *evlist,
			       struct perf_evsel *evsel)
{
	int cpu, thread, err;
364 365
	int nr_cpus = cpu_map__nr(evlist->cpus);
	int nr_threads = perf_evlist__nr_threads(evlist, evsel);
366 367 368 369

	if (!evsel->fd)
		return 0;

370 371
	for (cpu = 0; cpu < nr_cpus; cpu++) {
		for (thread = 0; thread < nr_threads; thread++) {
372 373 374 375 376 377 378 379 380 381 382 383 384
			err = ioctl(FD(evsel, cpu, thread),
				    PERF_EVENT_IOC_DISABLE, 0);
			if (err)
				return err;
		}
	}
	return 0;
}

int perf_evlist__enable_event(struct perf_evlist *evlist,
			      struct perf_evsel *evsel)
{
	int cpu, thread, err;
385 386
	int nr_cpus = cpu_map__nr(evlist->cpus);
	int nr_threads = perf_evlist__nr_threads(evlist, evsel);
387 388 389 390

	if (!evsel->fd)
		return -EINVAL;

391 392
	for (cpu = 0; cpu < nr_cpus; cpu++) {
		for (thread = 0; thread < nr_threads; thread++) {
393 394 395 396 397 398 399 400 401
			err = ioctl(FD(evsel, cpu, thread),
				    PERF_EVENT_IOC_ENABLE, 0);
			if (err)
				return err;
		}
	}
	return 0;
}

402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448
static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist,
					 struct perf_evsel *evsel, int cpu)
{
	int thread, err;
	int nr_threads = perf_evlist__nr_threads(evlist, evsel);

	if (!evsel->fd)
		return -EINVAL;

	for (thread = 0; thread < nr_threads; thread++) {
		err = ioctl(FD(evsel, cpu, thread),
			    PERF_EVENT_IOC_ENABLE, 0);
		if (err)
			return err;
	}
	return 0;
}

static int perf_evlist__enable_event_thread(struct perf_evlist *evlist,
					    struct perf_evsel *evsel,
					    int thread)
{
	int cpu, err;
	int nr_cpus = cpu_map__nr(evlist->cpus);

	if (!evsel->fd)
		return -EINVAL;

	for (cpu = 0; cpu < nr_cpus; cpu++) {
		err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
		if (err)
			return err;
	}
	return 0;
}

int perf_evlist__enable_event_idx(struct perf_evlist *evlist,
				  struct perf_evsel *evsel, int idx)
{
	bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus);

	if (per_cpu_mmaps)
		return perf_evlist__enable_event_cpu(evlist, evsel, idx);
	else
		return perf_evlist__enable_event_thread(evlist, evsel, idx);
}

449
int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
450
{
451 452
	int nr_cpus = cpu_map__nr(evlist->cpus);
	int nr_threads = thread_map__nr(evlist->threads);
453 454 455
	int nfds = 0;
	struct perf_evsel *evsel;

456
	evlist__for_each(evlist, evsel) {
457 458 459 460 461 462
		if (evsel->system_wide)
			nfds += nr_cpus;
		else
			nfds += nr_cpus * nr_threads;
	}

463 464
	if (fdarray__available_entries(&evlist->pollfd) < nfds &&
	    fdarray__grow(&evlist->pollfd, nfds) < 0)
465 466 467
		return -ENOMEM;

	return 0;
468
}
469

470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx)
{
	int pos = fdarray__add(&evlist->pollfd, fd, POLLIN | POLLERR | POLLHUP);
	/*
	 * Save the idx so that when we filter out fds POLLHUP'ed we can
	 * close the associated evlist->mmap[] entry.
	 */
	if (pos >= 0) {
		evlist->pollfd.priv[pos].idx = idx;

		fcntl(fd, F_SETFL, O_NONBLOCK);
	}

	return pos;
}

486
int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
487
{
488 489 490 491 492 493
	return __perf_evlist__add_pollfd(evlist, fd, -1);
}

static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd)
{
	struct perf_evlist *evlist = container_of(fda, struct perf_evlist, pollfd);
494

495
	perf_evlist__mmap_put(evlist, fda->priv[fd].idx);
496
}
497

498 499
int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask)
{
500 501
	return fdarray__filter(&evlist->pollfd, revents_and_mask,
			       perf_evlist__munmap_filtered);
502 503
}

504 505
int perf_evlist__poll(struct perf_evlist *evlist, int timeout)
{
506
	return fdarray__poll(&evlist->pollfd, timeout);
507 508
}

509 510 511
static void perf_evlist__id_hash(struct perf_evlist *evlist,
				 struct perf_evsel *evsel,
				 int cpu, int thread, u64 id)
512 513 514 515 516 517 518 519 520 521
{
	int hash;
	struct perf_sample_id *sid = SID(evsel, cpu, thread);

	sid->id = id;
	sid->evsel = evsel;
	hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS);
	hlist_add_head(&sid->node, &evlist->heads[hash]);
}

522 523 524 525 526 527 528 529 530 531
void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
			 int cpu, int thread, u64 id)
{
	perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
	evsel->id[evsel->ids++] = id;
}

static int perf_evlist__id_add_fd(struct perf_evlist *evlist,
				  struct perf_evsel *evsel,
				  int cpu, int thread, int fd)
532 533
{
	u64 read_data[4] = { 0, };
534
	int id_idx = 1; /* The first entry is the counter value */
535 536 537 538 539 540 541 542 543 544 545
	u64 id;
	int ret;

	ret = ioctl(fd, PERF_EVENT_IOC_ID, &id);
	if (!ret)
		goto add;

	if (errno != ENOTTY)
		return -1;

	/* Legacy way to get event id.. All hail to old kernels! */
546

547 548 549 550 551 552 553
	/*
	 * This way does not work with group format read, so bail
	 * out in that case.
	 */
	if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP)
		return -1;

554 555 556 557 558 559 560 561 562
	if (!(evsel->attr.read_format & PERF_FORMAT_ID) ||
	    read(fd, &read_data, sizeof(read_data)) == -1)
		return -1;

	if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
		++id_idx;
	if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
		++id_idx;

563 564 565 566
	id = read_data[id_idx];

 add:
	perf_evlist__id_add(evlist, evsel, cpu, thread, id);
567 568 569
	return 0;
}

A
Adrian Hunter 已提交
570 571 572 573 574 575 576 577 578 579 580
static void perf_evlist__set_sid_idx(struct perf_evlist *evlist,
				     struct perf_evsel *evsel, int idx, int cpu,
				     int thread)
{
	struct perf_sample_id *sid = SID(evsel, cpu, thread);
	sid->idx = idx;
	if (evlist->cpus && cpu >= 0)
		sid->cpu = evlist->cpus->map[cpu];
	else
		sid->cpu = -1;
	if (!evsel->system_wide && evlist->threads && thread >= 0)
581
		sid->tid = thread_map__pid(evlist->threads, thread);
A
Adrian Hunter 已提交
582 583 584 585
	else
		sid->tid = -1;
}

586
struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id)
587 588 589 590 591 592 593 594
{
	struct hlist_head *head;
	struct perf_sample_id *sid;
	int hash;

	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
	head = &evlist->heads[hash];

595
	hlist_for_each_entry(sid, head, node)
596
		if (sid->id == id)
597 598 599 600 601 602 603 604 605
			return sid;

	return NULL;
}

struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
{
	struct perf_sample_id *sid;

606
	if (evlist->nr_entries == 1 || !id)
607 608 609 610 611
		return perf_evlist__first(evlist);

	sid = perf_evlist__id2sid(evlist, id);
	if (sid)
		return sid->evsel;
612 613

	if (!perf_evlist__sample_id_all(evlist))
614
		return perf_evlist__first(evlist);
615

616 617
	return NULL;
}
618

619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642
static int perf_evlist__event2id(struct perf_evlist *evlist,
				 union perf_event *event, u64 *id)
{
	const u64 *array = event->sample.array;
	ssize_t n;

	n = (event->header.size - sizeof(event->header)) >> 3;

	if (event->header.type == PERF_RECORD_SAMPLE) {
		if (evlist->id_pos >= n)
			return -1;
		*id = array[evlist->id_pos];
	} else {
		if (evlist->is_pos > n)
			return -1;
		n -= evlist->is_pos;
		*id = array[n];
	}
	return 0;
}

static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
						   union perf_event *event)
{
643
	struct perf_evsel *first = perf_evlist__first(evlist);
644 645 646 647 648 649
	struct hlist_head *head;
	struct perf_sample_id *sid;
	int hash;
	u64 id;

	if (evlist->nr_entries == 1)
650 651 652 653 654
		return first;

	if (!first->attr.sample_id_all &&
	    event->header.type != PERF_RECORD_SAMPLE)
		return first;
655 656 657 658 659 660

	if (perf_evlist__event2id(evlist, event, &id))
		return NULL;

	/* Synthesized events have an id of zero */
	if (!id)
661
		return first;
662 663 664 665 666 667 668 669 670 671 672

	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
	head = &evlist->heads[hash];

	hlist_for_each_entry(sid, head, node) {
		if (sid->id == id)
			return sid->evsel;
	}
	return NULL;
}

673
union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
674
{
675
	struct perf_mmap *md = &evlist->mmap[idx];
676
	u64 head;
677
	u64 old = md->prev;
678
	unsigned char *data = md->base + page_size;
679
	union perf_event *event = NULL;
680

681 682 683 684 685 686 687
	/*
	 * Check if event was unmapped due to a POLLHUP/POLLERR.
	 */
	if (!atomic_read(&md->refcnt))
		return NULL;

	head = perf_mmap__read_head(md);
688
	if (evlist->overwrite) {
689
		/*
690 691 692 693 694 695
		 * If we're further behind than half the buffer, there's a chance
		 * the writer will bite our tail and mess up the samples under us.
		 *
		 * If we somehow ended up ahead of the head, we got messed up.
		 *
		 * In either case, truncate and restart at head.
696
		 */
697 698 699 700 701 702 703 704 705
		int diff = head - old;
		if (diff > md->mask / 2 || diff < 0) {
			fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");

			/*
			 * head points to a known good entry, start there.
			 */
			old = head;
		}
706 707 708 709 710
	}

	if (old != head) {
		size_t size;

711
		event = (union perf_event *)&data[old & md->mask];
712 713 714 715 716 717 718 719 720
		size = event->header.size;

		/*
		 * Event straddles the mmap boundary -- header should always
		 * be inside due to u64 alignment of output.
		 */
		if ((old & md->mask) + size != ((old + size) & md->mask)) {
			unsigned int offset = old;
			unsigned int len = min(sizeof(*event), size), cpy;
721
			void *dst = md->event_copy;
722 723 724 725 726 727 728 729 730

			do {
				cpy = min(md->mask + 1 - (offset & md->mask), len);
				memcpy(dst, &data[offset & md->mask], cpy);
				offset += cpy;
				dst += cpy;
				len -= cpy;
			} while (len);

731
			event = (union perf_event *) md->event_copy;
732 733 734 735 736 737
		}

		old += size;
	}

	md->prev = old;
738

739 740
	return event;
}
741

742 743
static bool perf_mmap__empty(struct perf_mmap *md)
{
744
	return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base;
745 746 747 748
}

static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx)
{
749
	atomic_inc(&evlist->mmap[idx].refcnt);
750 751 752 753
}

static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx)
{
754
	BUG_ON(atomic_read(&evlist->mmap[idx].refcnt) == 0);
755

756
	if (atomic_dec_and_test(&evlist->mmap[idx].refcnt))
757 758 759
		__perf_evlist__munmap(evlist, idx);
}

760 761
void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
{
762 763
	struct perf_mmap *md = &evlist->mmap[idx];

764
	if (!evlist->overwrite) {
765
		u64 old = md->prev;
766 767 768

		perf_mmap__write_tail(md, old);
	}
769

770
	if (atomic_read(&md->refcnt) == 1 && perf_mmap__empty(md))
771
		perf_evlist__mmap_put(evlist, idx);
772 773
}

774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801
int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused,
			       struct auxtrace_mmap_params *mp __maybe_unused,
			       void *userpg __maybe_unused,
			       int fd __maybe_unused)
{
	return 0;
}

void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused)
{
}

void __weak auxtrace_mmap_params__init(
			struct auxtrace_mmap_params *mp __maybe_unused,
			off_t auxtrace_offset __maybe_unused,
			unsigned int auxtrace_pages __maybe_unused,
			bool auxtrace_overwrite __maybe_unused)
{
}

void __weak auxtrace_mmap_params__set_idx(
			struct auxtrace_mmap_params *mp __maybe_unused,
			struct perf_evlist *evlist __maybe_unused,
			int idx __maybe_unused,
			bool per_cpu __maybe_unused)
{
}

802 803 804 805 806
static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx)
{
	if (evlist->mmap[idx].base != NULL) {
		munmap(evlist->mmap[idx].base, evlist->mmap_len);
		evlist->mmap[idx].base = NULL;
807
		atomic_set(&evlist->mmap[idx].refcnt, 0);
808
	}
809
	auxtrace_mmap__munmap(&evlist->mmap[idx].auxtrace_mmap);
810 811
}

812
void perf_evlist__munmap(struct perf_evlist *evlist)
813
{
814
	int i;
815

816 817 818
	if (evlist->mmap == NULL)
		return;

819 820
	for (i = 0; i < evlist->nr_mmaps; i++)
		__perf_evlist__munmap(evlist, i);
821

822
	zfree(&evlist->mmap);
823 824
}

825
static int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
826
{
827
	evlist->nr_mmaps = cpu_map__nr(evlist->cpus);
828
	if (cpu_map__empty(evlist->cpus))
829
		evlist->nr_mmaps = thread_map__nr(evlist->threads);
830
	evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
831 832 833
	return evlist->mmap != NULL ? 0 : -ENOMEM;
}

834 835 836
struct mmap_params {
	int prot;
	int mask;
837
	struct auxtrace_mmap_params auxtrace_mp;
838 839 840 841
};

static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
			       struct mmap_params *mp, int fd)
842
{
843 844 845 846 847 848 849 850 851 852 853 854 855
	/*
	 * The last one will be done at perf_evlist__mmap_consume(), so that we
	 * make sure we don't prevent tools from consuming every last event in
	 * the ring buffer.
	 *
	 * I.e. we can get the POLLHUP meaning that the fd doesn't exist
	 * anymore, but the last events for it are still in the ring buffer,
	 * waiting to be consumed.
	 *
	 * Tools can chose to ignore this at their own discretion, but the
	 * evlist layer can't just drop it when filtering events in
	 * perf_evlist__filter_pollfd().
	 */
856
	atomic_set(&evlist->mmap[idx].refcnt, 2);
857
	evlist->mmap[idx].prev = 0;
858 859
	evlist->mmap[idx].mask = mp->mask;
	evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot,
860
				      MAP_SHARED, fd, 0);
861
	if (evlist->mmap[idx].base == MAP_FAILED) {
862 863
		pr_debug2("failed to mmap perf event ring buffer, error %d\n",
			  errno);
864
		evlist->mmap[idx].base = NULL;
865
		return -1;
866
	}
867

868 869 870 871
	if (auxtrace_mmap__mmap(&evlist->mmap[idx].auxtrace_mmap,
				&mp->auxtrace_mp, evlist->mmap[idx].base, fd))
		return -1;

872 873 874
	return 0;
}

875
static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
876 877
				       struct mmap_params *mp, int cpu,
				       int thread, int *output)
878 879
{
	struct perf_evsel *evsel;
880

881
	evlist__for_each(evlist, evsel) {
882 883 884 885 886 887
		int fd;

		if (evsel->system_wide && thread)
			continue;

		fd = FD(evsel, cpu, thread);
888 889 890

		if (*output == -1) {
			*output = fd;
891
			if (__perf_evlist__mmap(evlist, idx, mp, *output) < 0)
892 893 894 895
				return -1;
		} else {
			if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
				return -1;
896 897

			perf_evlist__mmap_get(evlist, idx);
898 899
		}

900 901 902 903 904 905 906 907 908
		/*
		 * The system_wide flag causes a selected event to be opened
		 * always without a pid.  Consequently it will never get a
		 * POLLHUP, but it is used for tracking in combination with
		 * other events, so it should not need to be polled anyway.
		 * Therefore don't add it for polling.
		 */
		if (!evsel->system_wide &&
		    __perf_evlist__add_pollfd(evlist, fd, idx) < 0) {
909
			perf_evlist__mmap_put(evlist, idx);
910
			return -1;
911
		}
912

A
Adrian Hunter 已提交
913 914 915 916 917 918 919
		if (evsel->attr.read_format & PERF_FORMAT_ID) {
			if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread,
						   fd) < 0)
				return -1;
			perf_evlist__set_sid_idx(evlist, evsel, idx, cpu,
						 thread);
		}
920 921 922 923 924
	}

	return 0;
}

925 926
static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist,
				     struct mmap_params *mp)
927
{
928
	int cpu, thread;
929 930
	int nr_cpus = cpu_map__nr(evlist->cpus);
	int nr_threads = thread_map__nr(evlist->threads);
931

A
Adrian Hunter 已提交
932
	pr_debug2("perf event ring buffer mmapped per cpu\n");
933
	for (cpu = 0; cpu < nr_cpus; cpu++) {
934 935
		int output = -1;

936 937 938
		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
					      true);

939
		for (thread = 0; thread < nr_threads; thread++) {
940 941
			if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
							thread, &output))
942
				goto out_unmap;
943 944 945 946 947 948
		}
	}

	return 0;

out_unmap:
949 950
	for (cpu = 0; cpu < nr_cpus; cpu++)
		__perf_evlist__munmap(evlist, cpu);
951 952 953
	return -1;
}

954 955
static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist,
					struct mmap_params *mp)
956 957
{
	int thread;
958
	int nr_threads = thread_map__nr(evlist->threads);
959

A
Adrian Hunter 已提交
960
	pr_debug2("perf event ring buffer mmapped per thread\n");
961
	for (thread = 0; thread < nr_threads; thread++) {
962 963
		int output = -1;

964 965 966
		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
					      false);

967 968
		if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
						&output))
969
			goto out_unmap;
970 971 972 973 974
	}

	return 0;

out_unmap:
975 976
	for (thread = 0; thread < nr_threads; thread++)
		__perf_evlist__munmap(evlist, thread);
977 978 979
	return -1;
}

980 981
static size_t perf_evlist__mmap_size(unsigned long pages)
{
982 983 984 985 986 987 988 989 990 991 992 993 994 995 996
	if (pages == UINT_MAX) {
		int max;

		if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
			/*
			 * Pick a once upon a time good value, i.e. things look
			 * strange since we can't read a sysctl value, but lets not
			 * die yet...
			 */
			max = 512;
		} else {
			max -= (page_size / 1024);
		}

		pages = (max * 1024) / page_size;
997 998
		if (!is_power_of_2(pages))
			pages = rounddown_pow_of_two(pages);
999
	} else if (!is_power_of_2(pages))
1000 1001 1002 1003 1004
		return 0;

	return (pages + 1) * page_size;
}

1005 1006
static long parse_pages_arg(const char *str, unsigned long min,
			    unsigned long max)
1007
{
1008
	unsigned long pages, val;
1009 1010 1011 1012 1013 1014 1015
	static struct parse_tag tags[] = {
		{ .tag  = 'B', .mult = 1       },
		{ .tag  = 'K', .mult = 1 << 10 },
		{ .tag  = 'M', .mult = 1 << 20 },
		{ .tag  = 'G', .mult = 1 << 30 },
		{ .tag  = 0 },
	};
1016

1017
	if (str == NULL)
1018
		return -EINVAL;
1019

1020
	val = parse_tag_value(str, tags);
1021
	if (val != (unsigned long) -1) {
1022 1023 1024 1025 1026 1027
		/* we got file size value */
		pages = PERF_ALIGN(val, page_size) / page_size;
	} else {
		/* we got pages count value */
		char *eptr;
		pages = strtoul(str, &eptr, 10);
1028 1029
		if (*eptr != '\0')
			return -EINVAL;
1030 1031
	}

1032
	if (pages == 0 && min == 0) {
1033
		/* leave number of pages at 0 */
1034
	} else if (!is_power_of_2(pages)) {
1035
		/* round pages up to next power of 2 */
1036
		pages = roundup_pow_of_two(pages);
1037 1038
		if (!pages)
			return -EINVAL;
1039 1040
		pr_info("rounding mmap pages size to %lu bytes (%lu pages)\n",
			pages * page_size, pages);
1041 1042
	}

1043 1044 1045 1046 1047 1048
	if (pages > max)
		return -EINVAL;

	return pages;
}

1049
int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
1050 1051 1052 1053
{
	unsigned long max = UINT_MAX;
	long pages;

A
Adrian Hunter 已提交
1054
	if (max > SIZE_MAX / page_size)
1055 1056 1057 1058 1059
		max = SIZE_MAX / page_size;

	pages = parse_pages_arg(str, 1, max);
	if (pages < 0) {
		pr_err("Invalid argument for --mmap_pages/-m\n");
1060 1061 1062 1063 1064 1065 1066
		return -1;
	}

	*mmap_pages = pages;
	return 0;
}

1067 1068 1069 1070 1071 1072
int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
				  int unset __maybe_unused)
{
	return __perf_evlist__parse_mmap_pages(opt->value, str);
}

1073
/**
1074
 * perf_evlist__mmap_ex - Create mmaps to receive events.
1075 1076 1077
 * @evlist: list of events
 * @pages: map length in pages
 * @overwrite: overwrite older events?
1078 1079
 * @auxtrace_pages - auxtrace map length in pages
 * @auxtrace_overwrite - overwrite older auxtrace data?
1080
 *
1081 1082 1083
 * If @overwrite is %false the user needs to signal event consumption using
 * perf_mmap__write_tail().  Using perf_evlist__mmap_read() does this
 * automatically.
1084
 *
1085 1086 1087
 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
 * consumption using auxtrace_mmap__write_tail().
 *
1088
 * Return: %0 on success, negative error code otherwise.
1089
 */
1090 1091 1092
int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
			 bool overwrite, unsigned int auxtrace_pages,
			 bool auxtrace_overwrite)
1093
{
1094
	struct perf_evsel *evsel;
1095 1096
	const struct cpu_map *cpus = evlist->cpus;
	const struct thread_map *threads = evlist->threads;
1097 1098 1099
	struct mmap_params mp = {
		.prot = PROT_READ | (overwrite ? 0 : PROT_WRITE),
	};
1100

1101
	if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0)
1102 1103
		return -ENOMEM;

1104
	if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
1105 1106 1107
		return -ENOMEM;

	evlist->overwrite = overwrite;
1108
	evlist->mmap_len = perf_evlist__mmap_size(pages);
1109
	pr_debug("mmap size %zuB\n", evlist->mmap_len);
1110
	mp.mask = evlist->mmap_len - page_size - 1;
1111

1112 1113 1114
	auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len,
				   auxtrace_pages, auxtrace_overwrite);

1115
	evlist__for_each(evlist, evsel) {
1116
		if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
1117
		    evsel->sample_id == NULL &&
1118
		    perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0)
1119 1120 1121
			return -ENOMEM;
	}

1122
	if (cpu_map__empty(cpus))
1123
		return perf_evlist__mmap_per_thread(evlist, &mp);
1124

1125
	return perf_evlist__mmap_per_cpu(evlist, &mp);
1126
}
1127

1128 1129 1130 1131 1132 1133
int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
		      bool overwrite)
{
	return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false);
}

1134
int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
1135
{
1136 1137
	struct cpu_map *cpus;
	struct thread_map *threads;
1138

1139 1140 1141
	threads = thread_map__new_str(target->pid, target->tid, target->uid);

	if (!threads)
1142 1143
		return -1;

1144
	if (target__uses_dummy_map(target))
1145
		cpus = cpu_map__dummy_new();
1146
	else
1147
		cpus = cpu_map__new(target->cpu_list);
1148

1149
	if (!cpus)
1150 1151
		goto out_delete_threads;

1152 1153
	evlist->has_user_cpus = !!target->cpu_list;

1154
	perf_evlist__set_maps(evlist, cpus, threads);
1155 1156

	return 0;
1157 1158

out_delete_threads:
1159
	thread_map__put(threads);
1160 1161 1162
	return -1;
}

1163 1164
void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus,
			   struct thread_map *threads)
1165
{
1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176
	/*
	 * Allow for the possibility that one or another of the maps isn't being
	 * changed i.e. don't put it.  Note we are assuming the maps that are
	 * being applied are brand new and evlist is taking ownership of the
	 * original reference count of 1.  If that is not the case it is up to
	 * the caller to increase the reference count.
	 */
	if (cpus != evlist->cpus) {
		cpu_map__put(evlist->cpus);
		evlist->cpus = cpus;
	}
1177

1178 1179 1180 1181
	if (threads != evlist->threads) {
		thread_map__put(evlist->threads);
		evlist->threads = threads;
	}
1182

1183
	perf_evlist__propagate_maps(evlist);
1184 1185
}

1186
int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel)
1187 1188
{
	struct perf_evsel *evsel;
1189 1190
	int err = 0;
	const int ncpus = cpu_map__nr(evlist->cpus),
1191
		  nthreads = thread_map__nr(evlist->threads);
1192

1193
	evlist__for_each(evlist, evsel) {
1194
		if (evsel->filter == NULL)
1195
			continue;
1196

1197 1198 1199 1200
		/*
		 * filters only work for tracepoint event, which doesn't have cpu limit.
		 * So evlist and evsel should always be same.
		 */
1201
		err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter);
1202 1203
		if (err) {
			*err_evsel = evsel;
1204
			break;
1205
		}
1206 1207
	}

1208 1209 1210 1211 1212 1213 1214 1215
	return err;
}

int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter)
{
	struct perf_evsel *evsel;
	int err = 0;

1216
	evlist__for_each(evlist, evsel) {
1217
		err = perf_evsel__set_filter(evsel, filter);
1218 1219 1220 1221 1222
		if (err)
			break;
	}

	return err;
1223
}
1224

1225
int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids)
1226 1227
{
	char *filter;
1228 1229
	int ret = -1;
	size_t i;
1230

1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244
	for (i = 0; i < npids; ++i) {
		if (i == 0) {
			if (asprintf(&filter, "common_pid != %d", pids[i]) < 0)
				return -1;
		} else {
			char *tmp;

			if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0)
				goto out_free;

			free(filter);
			filter = tmp;
		}
	}
1245 1246

	ret = perf_evlist__set_filter(evlist, filter);
1247
out_free:
1248 1249 1250 1251
	free(filter);
	return ret;
}

1252 1253 1254 1255 1256
int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid)
{
	return perf_evlist__set_filter_pids(evlist, 1, &pid);
}

1257
bool perf_evlist__valid_sample_type(struct perf_evlist *evlist)
1258
{
1259
	struct perf_evsel *pos;
1260

1261 1262 1263 1264 1265 1266
	if (evlist->nr_entries == 1)
		return true;

	if (evlist->id_pos < 0 || evlist->is_pos < 0)
		return false;

1267
	evlist__for_each(evlist, pos) {
1268 1269
		if (pos->id_pos != evlist->id_pos ||
		    pos->is_pos != evlist->is_pos)
1270
			return false;
1271 1272
	}

1273
	return true;
1274 1275
}

1276
u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist)
1277
{
1278 1279 1280 1281 1282
	struct perf_evsel *evsel;

	if (evlist->combined_sample_type)
		return evlist->combined_sample_type;

1283
	evlist__for_each(evlist, evsel)
1284 1285 1286 1287 1288 1289 1290 1291 1292
		evlist->combined_sample_type |= evsel->attr.sample_type;

	return evlist->combined_sample_type;
}

u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist)
{
	evlist->combined_sample_type = 0;
	return __perf_evlist__combined_sample_type(evlist);
1293 1294
}

1295 1296 1297 1298 1299 1300 1301 1302 1303 1304
u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist)
{
	struct perf_evsel *evsel;
	u64 branch_type = 0;

	evlist__for_each(evlist, evsel)
		branch_type |= evsel->attr.branch_sample_type;
	return branch_type;
}

1305 1306 1307 1308 1309 1310
bool perf_evlist__valid_read_format(struct perf_evlist *evlist)
{
	struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
	u64 read_format = first->attr.read_format;
	u64 sample_type = first->attr.sample_type;

1311
	evlist__for_each(evlist, pos) {
1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330
		if (read_format != pos->attr.read_format)
			return false;
	}

	/* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */
	if ((sample_type & PERF_SAMPLE_READ) &&
	    !(read_format & PERF_FORMAT_ID)) {
		return false;
	}

	return true;
}

u64 perf_evlist__read_format(struct perf_evlist *evlist)
{
	struct perf_evsel *first = perf_evlist__first(evlist);
	return first->attr.read_format;
}

1331
u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist)
1332
{
1333
	struct perf_evsel *first = perf_evlist__first(evlist);
1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356
	struct perf_sample *data;
	u64 sample_type;
	u16 size = 0;

	if (!first->attr.sample_id_all)
		goto out;

	sample_type = first->attr.sample_type;

	if (sample_type & PERF_SAMPLE_TID)
		size += sizeof(data->tid) * 2;

       if (sample_type & PERF_SAMPLE_TIME)
		size += sizeof(data->time);

	if (sample_type & PERF_SAMPLE_ID)
		size += sizeof(data->id);

	if (sample_type & PERF_SAMPLE_STREAM_ID)
		size += sizeof(data->stream_id);

	if (sample_type & PERF_SAMPLE_CPU)
		size += sizeof(data->cpu) * 2;
1357 1358 1359

	if (sample_type & PERF_SAMPLE_IDENTIFIER)
		size += sizeof(data->id);
1360 1361 1362 1363
out:
	return size;
}

1364
bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist)
1365
{
1366
	struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
1367

1368
	evlist__for_each_continue(evlist, pos) {
1369 1370
		if (first->attr.sample_id_all != pos->attr.sample_id_all)
			return false;
1371 1372
	}

1373 1374 1375
	return true;
}

1376
bool perf_evlist__sample_id_all(struct perf_evlist *evlist)
1377
{
1378
	struct perf_evsel *first = perf_evlist__first(evlist);
1379
	return first->attr.sample_id_all;
1380
}
1381 1382 1383 1384 1385 1386

void perf_evlist__set_selected(struct perf_evlist *evlist,
			       struct perf_evsel *evsel)
{
	evlist->selected = evsel;
}
1387

1388 1389 1390 1391 1392
void perf_evlist__close(struct perf_evlist *evlist)
{
	struct perf_evsel *evsel;
	int ncpus = cpu_map__nr(evlist->cpus);
	int nthreads = thread_map__nr(evlist->threads);
1393
	int n;
1394

1395 1396 1397 1398
	evlist__for_each_reverse(evlist, evsel) {
		n = evsel->cpus ? evsel->cpus->nr : ncpus;
		perf_evsel__close(evsel, n, nthreads);
	}
1399 1400
}

1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425
static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist)
{
	int err = -ENOMEM;

	/*
	 * Try reading /sys/devices/system/cpu/online to get
	 * an all cpus map.
	 *
	 * FIXME: -ENOMEM is the best we can do here, the cpu_map
	 * code needs an overhaul to properly forward the
	 * error, and we may not want to do that fallback to a
	 * default cpu identity map :-\
	 */
	evlist->cpus = cpu_map__new(NULL);
	if (evlist->cpus == NULL)
		goto out;

	evlist->threads = thread_map__new_dummy();
	if (evlist->threads == NULL)
		goto out_free_cpus;

	err = 0;
out:
	return err;
out_free_cpus:
1426
	cpu_map__put(evlist->cpus);
1427 1428 1429 1430
	evlist->cpus = NULL;
	goto out;
}

1431
int perf_evlist__open(struct perf_evlist *evlist)
1432
{
1433
	struct perf_evsel *evsel;
1434
	int err;
1435

1436 1437 1438 1439 1440 1441 1442 1443 1444 1445
	/*
	 * Default: one fd per CPU, all threads, aka systemwide
	 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL
	 */
	if (evlist->threads == NULL && evlist->cpus == NULL) {
		err = perf_evlist__create_syswide_maps(evlist);
		if (err < 0)
			goto out_err;
	}

1446 1447
	perf_evlist__update_id_pos(evlist);

1448
	evlist__for_each(evlist, evsel) {
1449
		err = perf_evsel__open(evsel, evlist->cpus, evlist->threads);
1450 1451 1452 1453 1454 1455
		if (err < 0)
			goto out_err;
	}

	return 0;
out_err:
1456
	perf_evlist__close(evlist);
1457
	errno = -err;
1458 1459
	return err;
}
1460

1461
int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target,
1462
				  const char *argv[], bool pipe_output,
1463
				  void (*exec_error)(int signo, siginfo_t *info, void *ucontext))
1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484
{
	int child_ready_pipe[2], go_pipe[2];
	char bf;

	if (pipe(child_ready_pipe) < 0) {
		perror("failed to create 'ready' pipe");
		return -1;
	}

	if (pipe(go_pipe) < 0) {
		perror("failed to create 'go' pipe");
		goto out_close_ready_pipe;
	}

	evlist->workload.pid = fork();
	if (evlist->workload.pid < 0) {
		perror("failed to fork");
		goto out_close_pipes;
	}

	if (!evlist->workload.pid) {
1485 1486
		int ret;

1487
		if (pipe_output)
1488 1489
			dup2(2, 1);

1490 1491
		signal(SIGTERM, SIG_DFL);

1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503
		close(child_ready_pipe[0]);
		close(go_pipe[1]);
		fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);

		/*
		 * Tell the parent we're ready to go
		 */
		close(child_ready_pipe[1]);

		/*
		 * Wait until the parent tells us to go.
		 */
1504 1505 1506 1507 1508 1509
		ret = read(go_pipe[0], &bf, 1);
		/*
		 * The parent will ask for the execvp() to be performed by
		 * writing exactly one byte, in workload.cork_fd, usually via
		 * perf_evlist__start_workload().
		 *
1510
		 * For cancelling the workload without actually running it,
1511 1512 1513 1514 1515 1516 1517 1518 1519
		 * the parent will just close workload.cork_fd, without writing
		 * anything, i.e. read will return zero and we just exit()
		 * here.
		 */
		if (ret != 1) {
			if (ret == -1)
				perror("unable to read pipe");
			exit(ret);
		}
1520 1521 1522

		execvp(argv[0], (char **)argv);

1523
		if (exec_error) {
1524 1525 1526 1527 1528 1529 1530
			union sigval val;

			val.sival_int = errno;
			if (sigqueue(getppid(), SIGUSR1, val))
				perror(argv[0]);
		} else
			perror(argv[0]);
1531 1532 1533
		exit(-1);
	}

1534 1535 1536 1537 1538 1539 1540 1541
	if (exec_error) {
		struct sigaction act = {
			.sa_flags     = SA_SIGINFO,
			.sa_sigaction = exec_error,
		};
		sigaction(SIGUSR1, &act, NULL);
	}

1542 1543 1544 1545 1546 1547
	if (target__none(target)) {
		if (evlist->threads == NULL) {
			fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n",
				__func__, __LINE__);
			goto out_close_pipes;
		}
1548
		thread_map__set_pid(evlist->threads, 0, evlist->workload.pid);
1549
	}
1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560

	close(child_ready_pipe[1]);
	close(go_pipe[0]);
	/*
	 * wait for child to settle
	 */
	if (read(child_ready_pipe[0], &bf, 1) == -1) {
		perror("unable to read pipe");
		goto out_close_pipes;
	}

1561
	fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC);
1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577
	evlist->workload.cork_fd = go_pipe[1];
	close(child_ready_pipe[0]);
	return 0;

out_close_pipes:
	close(go_pipe[0]);
	close(go_pipe[1]);
out_close_ready_pipe:
	close(child_ready_pipe[0]);
	close(child_ready_pipe[1]);
	return -1;
}

int perf_evlist__start_workload(struct perf_evlist *evlist)
{
	if (evlist->workload.cork_fd > 0) {
1578
		char bf = 0;
1579
		int ret;
1580 1581 1582
		/*
		 * Remove the cork, let it rip!
		 */
1583 1584 1585 1586 1587 1588
		ret = write(evlist->workload.cork_fd, &bf, 1);
		if (ret < 0)
			perror("enable to write to pipe");

		close(evlist->workload.cork_fd);
		return ret;
1589 1590 1591 1592
	}

	return 0;
}
1593

1594
int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event,
1595
			      struct perf_sample *sample)
1596
{
1597 1598 1599 1600
	struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event);

	if (!evsel)
		return -EFAULT;
1601
	return perf_evsel__parse_sample(evsel, event, sample);
1602
}
1603 1604 1605 1606 1607 1608

size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp)
{
	struct perf_evsel *evsel;
	size_t printed = 0;

1609
	evlist__for_each(evlist, evsel) {
1610 1611 1612 1613
		printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "",
				   perf_evsel__name(evsel));
	}

1614
	return printed + fprintf(fp, "\n");
1615
}
1616

1617 1618 1619 1620
int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused,
			       int err, char *buf, size_t size)
{
	int printed, value;
1621
	char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf));
1622 1623 1624 1625 1626 1627 1628 1629

	switch (err) {
	case EACCES:
	case EPERM:
		printed = scnprintf(buf, size,
				    "Error:\t%s.\n"
				    "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg);

1630
		value = perf_event_paranoid();
1631 1632 1633 1634 1635 1636 1637 1638

		printed += scnprintf(buf + printed, size - printed, "\nHint:\t");

		if (value >= 2) {
			printed += scnprintf(buf + printed, size - printed,
					     "For your workloads it needs to be <= 1\nHint:\t");
		}
		printed += scnprintf(buf + printed, size - printed,
1639
				     "For system wide tracing it needs to be set to -1.\n");
1640 1641

		printed += scnprintf(buf + printed, size - printed,
1642 1643
				    "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
				    "Hint:\tThe current value is %d.", value);
1644 1645 1646 1647 1648 1649 1650 1651
		break;
	default:
		scnprintf(buf, size, "%s", emsg);
		break;
	}

	return 0;
}
1652

1653 1654 1655
int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size)
{
	char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf));
1656
	int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0;
1657 1658 1659

	switch (err) {
	case EPERM:
1660
		sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user);
1661 1662
		printed += scnprintf(buf + printed, size - printed,
				     "Error:\t%s.\n"
1663
				     "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n"
1664
				     "Hint:\tTried using %zd kB.\n",
1665
				     emsg, pages_max_per_user, pages_attempted);
1666 1667 1668 1669 1670 1671 1672 1673 1674

		if (pages_attempted >= pages_max_per_user) {
			printed += scnprintf(buf + printed, size - printed,
					     "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n",
					     pages_max_per_user + pages_attempted);
		}

		printed += scnprintf(buf + printed, size - printed,
				     "Hint:\tTry using a smaller -m/--mmap-pages value.");
1675 1676 1677 1678 1679 1680 1681 1682 1683
		break;
	default:
		scnprintf(buf, size, "%s", emsg);
		break;
	}

	return 0;
}

1684 1685 1686 1687 1688 1689 1690 1691 1692
void perf_evlist__to_front(struct perf_evlist *evlist,
			   struct perf_evsel *move_evsel)
{
	struct perf_evsel *evsel, *n;
	LIST_HEAD(move);

	if (move_evsel == perf_evlist__first(evlist))
		return;

1693
	evlist__for_each_safe(evlist, n, evsel) {
1694 1695 1696 1697 1698 1699
		if (evsel->leader == move_evsel->leader)
			list_move_tail(&evsel->node, &move);
	}

	list_splice(&move, &evlist->entries);
}
1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715

void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
				     struct perf_evsel *tracking_evsel)
{
	struct perf_evsel *evsel;

	if (tracking_evsel->tracking)
		return;

	evlist__for_each(evlist, evsel) {
		if (evsel != tracking_evsel)
			evsel->tracking = false;
	}

	tracking_evsel->tracking = true;
}