evsel.c 63.5 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
2 3 4 5 6 7 8
/*
 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
 *
 * Parts came from builtin-{top,stat,record}.c, see those files for further
 * copyright notes.
 */

9
#include <byteswap.h>
10
#include <errno.h>
11
#include <inttypes.h>
12
#include <linux/bitops.h>
13
#include <api/fs/fs.h>
14
#include <api/fs/tracing_path.h>
15 16 17
#include <traceevent/event-parse.h>
#include <linux/hw_breakpoint.h>
#include <linux/perf_event.h>
18
#include <linux/compiler.h>
19
#include <linux/err.h>
20
#include <linux/zalloc.h>
21
#include <sys/ioctl.h>
22
#include <sys/resource.h>
23 24
#include <sys/types.h>
#include <dirent.h>
25
#include <stdlib.h>
26
#include <perf/evsel.h>
27
#include "asm/bug.h"
28
#include "callchain.h"
29
#include "cgroup.h"
30
#include "counts.h"
31
#include "event.h"
32
#include "evsel.h"
33
#include "util/evsel_fprintf.h"
34
#include "evlist.h"
35
#include <perf/cpumap.h>
36
#include "thread_map.h"
37
#include "target.h"
38
#include "perf_regs.h"
39
#include "record.h"
A
Adrian Hunter 已提交
40
#include "debug.h"
41
#include "trace-event.h"
42
#include "stat.h"
43
#include "string2.h"
44
#include "memswap.h"
45
#include "util.h"
46
#include "../perf-sys.h"
47
#include "util/parse-branch-options.h"
48
#include <internal/xyarray.h>
49
#include <internal/lib.h>
50

51
#include <linux/ctype.h>
52

53
struct perf_missing_features perf_missing_features;
54

55 56
static clockid_t clockid;

57
static int perf_evsel__no_extra_init(struct evsel *evsel __maybe_unused)
A
Arnaldo Carvalho de Melo 已提交
58 59 60 61
{
	return 0;
}

62 63
void __weak test_attr__ready(void) { }

64
static void perf_evsel__no_extra_fini(struct evsel *evsel __maybe_unused)
A
Arnaldo Carvalho de Melo 已提交
65 66 67 68 69
{
}

static struct {
	size_t	size;
70 71
	int	(*init)(struct evsel *evsel);
	void	(*fini)(struct evsel *evsel);
A
Arnaldo Carvalho de Melo 已提交
72
} perf_evsel__object = {
73
	.size = sizeof(struct evsel),
A
Arnaldo Carvalho de Melo 已提交
74 75 76 77 78
	.init = perf_evsel__no_extra_init,
	.fini = perf_evsel__no_extra_fini,
};

int perf_evsel__object_config(size_t object_size,
79 80
			      int (*init)(struct evsel *evsel),
			      void (*fini)(struct evsel *evsel))
A
Arnaldo Carvalho de Melo 已提交
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
{

	if (object_size == 0)
		goto set_methods;

	if (perf_evsel__object.size > object_size)
		return -EINVAL;

	perf_evsel__object.size = object_size;

set_methods:
	if (init != NULL)
		perf_evsel__object.init = init;

	if (fini != NULL)
		perf_evsel__object.fini = fini;

	return 0;
}

101
#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
102

103
int __perf_evsel__sample_size(u64 sample_type)
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
{
	u64 mask = sample_type & PERF_SAMPLE_MASK;
	int size = 0;
	int i;

	for (i = 0; i < 64; i++) {
		if (mask & (1ULL << i))
			size++;
	}

	size *= sizeof(u64);

	return size;
}

119 120 121 122 123 124
/**
 * __perf_evsel__calc_id_pos - calculate id_pos.
 * @sample_type: sample type
 *
 * This function returns the position of the event id (PERF_SAMPLE_ID or
 * PERF_SAMPLE_IDENTIFIER) in a sample event i.e. in the array of struct
125
 * perf_record_sample.
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
 */
static int __perf_evsel__calc_id_pos(u64 sample_type)
{
	int idx = 0;

	if (sample_type & PERF_SAMPLE_IDENTIFIER)
		return 0;

	if (!(sample_type & PERF_SAMPLE_ID))
		return -1;

	if (sample_type & PERF_SAMPLE_IP)
		idx += 1;

	if (sample_type & PERF_SAMPLE_TID)
		idx += 1;

	if (sample_type & PERF_SAMPLE_TIME)
		idx += 1;

	if (sample_type & PERF_SAMPLE_ADDR)
		idx += 1;

	return idx;
}

/**
 * __perf_evsel__calc_is_pos - calculate is_pos.
 * @sample_type: sample type
 *
 * This function returns the position (counting backwards) of the event id
 * (PERF_SAMPLE_ID or PERF_SAMPLE_IDENTIFIER) in a non-sample event i.e. if
 * sample_id_all is used there is an id sample appended to non-sample events.
 */
static int __perf_evsel__calc_is_pos(u64 sample_type)
{
	int idx = 1;

	if (sample_type & PERF_SAMPLE_IDENTIFIER)
		return 1;

	if (!(sample_type & PERF_SAMPLE_ID))
		return -1;

	if (sample_type & PERF_SAMPLE_CPU)
		idx += 1;

	if (sample_type & PERF_SAMPLE_STREAM_ID)
		idx += 1;

	return idx;
}

179
void perf_evsel__calc_id_pos(struct evsel *evsel)
180
{
181 182
	evsel->id_pos = __perf_evsel__calc_id_pos(evsel->core.attr.sample_type);
	evsel->is_pos = __perf_evsel__calc_is_pos(evsel->core.attr.sample_type);
183 184
}

185
void __perf_evsel__set_sample_bit(struct evsel *evsel,
186 187
				  enum perf_event_sample_format bit)
{
188 189
	if (!(evsel->core.attr.sample_type & bit)) {
		evsel->core.attr.sample_type |= bit;
190
		evsel->sample_size += sizeof(u64);
191
		perf_evsel__calc_id_pos(evsel);
192 193 194
	}
}

195
void __perf_evsel__reset_sample_bit(struct evsel *evsel,
196 197
				    enum perf_event_sample_format bit)
{
198 199
	if (evsel->core.attr.sample_type & bit) {
		evsel->core.attr.sample_type &= ~bit;
200
		evsel->sample_size -= sizeof(u64);
201
		perf_evsel__calc_id_pos(evsel);
202 203 204
	}
}

205
void perf_evsel__set_sample_id(struct evsel *evsel,
206
			       bool can_sample_identifier)
207
{
208 209 210 211 212 213
	if (can_sample_identifier) {
		perf_evsel__reset_sample_bit(evsel, ID);
		perf_evsel__set_sample_bit(evsel, IDENTIFIER);
	} else {
		perf_evsel__set_sample_bit(evsel, ID);
	}
214
	evsel->core.attr.read_format |= PERF_FORMAT_ID;
215 216
}

217 218 219 220 221 222 223 224
/**
 * perf_evsel__is_function_event - Return whether given evsel is a function
 * trace event
 *
 * @evsel - evsel selector to be tested
 *
 * Return %true if event is function trace event
 */
225
bool perf_evsel__is_function_event(struct evsel *evsel)
226 227 228 229 230 231 232 233 234
{
#define FUNCTION_EVENT "ftrace:function"

	return evsel->name &&
	       !strncmp(FUNCTION_EVENT, evsel->name, sizeof(FUNCTION_EVENT));

#undef FUNCTION_EVENT
}

235 236
void evsel__init(struct evsel *evsel,
		 struct perf_event_attr *attr, int idx)
237
{
238
	perf_evsel__init(&evsel->core, attr);
239
	evsel->idx	   = idx;
240
	evsel->tracking	   = !idx;
241
	evsel->leader	   = evsel;
242 243
	evsel->unit	   = "";
	evsel->scale	   = 1.0;
244
	evsel->max_events  = ULONG_MAX;
245
	evsel->evlist	   = NULL;
246
	evsel->bpf_obj	   = NULL;
247
	evsel->bpf_fd	   = -1;
248
	INIT_LIST_HEAD(&evsel->config_terms);
A
Arnaldo Carvalho de Melo 已提交
249
	perf_evsel__object.init(evsel);
250
	evsel->sample_size = __perf_evsel__sample_size(attr->sample_type);
251
	perf_evsel__calc_id_pos(evsel);
252
	evsel->cmdline_group_boundary = false;
253
	evsel->metric_expr   = NULL;
254
	evsel->metric_name   = NULL;
255 256
	evsel->metric_events = NULL;
	evsel->collect_stat  = false;
257
	evsel->pmu_name      = NULL;
258 259
}

260
struct evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
261
{
262
	struct evsel *evsel = zalloc(perf_evsel__object.size);
263

264 265
	if (!evsel)
		return NULL;
266
	evsel__init(evsel, attr, idx);
267

268
	if (perf_evsel__is_bpf_output(evsel)) {
269
		evsel->core.attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
270
					    PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
271
		evsel->core.attr.sample_period = 1;
272 273
	}

274 275 276 277 278 279 280 281 282 283 284
	if (perf_evsel__is_clock(evsel)) {
		/*
		 * The evsel->unit points to static alias->unit
		 * so it's ok to use static string in here.
		 */
		static const char *unit = "msec";

		evsel->unit = unit;
		evsel->scale = 1e-6;
	}

285 286 287
	return evsel;
}

288 289
static bool perf_event_can_profile_kernel(void)
{
290
	return perf_event_paranoid_check(1);
291 292
}

293
struct evsel *perf_evsel__new_cycles(bool precise)
294 295 296 297
{
	struct perf_event_attr attr = {
		.type	= PERF_TYPE_HARDWARE,
		.config	= PERF_COUNT_HW_CPU_CYCLES,
298
		.exclude_kernel	= !perf_event_can_profile_kernel(),
299
	};
300
	struct evsel *evsel;
301 302

	event_attr_init(&attr);
303 304 305

	if (!precise)
		goto new_event;
306

307 308 309 310
	/*
	 * Now let the usual logic to set up the perf_event_attr defaults
	 * to kick in when we return and before perf_evsel__open() is called.
	 */
311
new_event:
312
	evsel = evsel__new(&attr);
313 314 315
	if (evsel == NULL)
		goto out;

316 317
	evsel->precise_max = true;

318
	/* use asprintf() because free(evsel) assumes name is allocated */
319 320 321 322
	if (asprintf(&evsel->name, "cycles%s%s%.*s",
		     (attr.precise_ip || attr.exclude_kernel) ? ":" : "",
		     attr.exclude_kernel ? "u" : "",
		     attr.precise_ip ? attr.precise_ip + 1 : 0, "ppp") < 0)
323 324 325 326
		goto error_free;
out:
	return evsel;
error_free:
327
	evsel__delete(evsel);
328 329 330 331
	evsel = NULL;
	goto out;
}

332 333 334
/*
 * Returns pointer with encoded error via <linux/err.h> interface.
 */
335
struct evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx)
336
{
337
	struct evsel *evsel = zalloc(perf_evsel__object.size);
338
	int err = -ENOMEM;
339

340 341 342
	if (evsel == NULL) {
		goto out_err;
	} else {
343
		struct perf_event_attr attr = {
344 345 346
			.type	       = PERF_TYPE_TRACEPOINT,
			.sample_type   = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
					  PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
347 348
		};

349 350 351
		if (asprintf(&evsel->name, "%s:%s", sys, name) < 0)
			goto out_free;

352
		evsel->tp_format = trace_event__tp_format(sys, name);
353 354
		if (IS_ERR(evsel->tp_format)) {
			err = PTR_ERR(evsel->tp_format);
355
			goto out_free;
356
		}
357

358
		event_attr_init(&attr);
359
		attr.config = evsel->tp_format->id;
360
		attr.sample_period = 1;
361
		evsel__init(evsel, &attr, idx);
362 363 364 365 366
	}

	return evsel;

out_free:
367
	zfree(&evsel->name);
368
	free(evsel);
369 370
out_err:
	return ERR_PTR(err);
371 372
}

373
const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = {
374 375 376 377 378 379 380 381 382 383 384 385
	"cycles",
	"instructions",
	"cache-references",
	"cache-misses",
	"branches",
	"branch-misses",
	"bus-cycles",
	"stalled-cycles-frontend",
	"stalled-cycles-backend",
	"ref-cycles",
};

386
static const char *__perf_evsel__hw_name(u64 config)
387 388 389 390 391 392 393
{
	if (config < PERF_COUNT_HW_MAX && perf_evsel__hw_names[config])
		return perf_evsel__hw_names[config];

	return "unknown-hardware";
}

394
static int perf_evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size)
395
{
396
	int colon = 0, r = 0;
397
	struct perf_event_attr *attr = &evsel->core.attr;
398 399 400 401
	bool exclude_guest_default = false;

#define MOD_PRINT(context, mod)	do {					\
		if (!attr->exclude_##context) {				\
402
			if (!colon) colon = ++r;			\
403 404 405 406 407 408 409 410 411 412 413 414
			r += scnprintf(bf + r, size - r, "%c", mod);	\
		} } while(0)

	if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv) {
		MOD_PRINT(kernel, 'k');
		MOD_PRINT(user, 'u');
		MOD_PRINT(hv, 'h');
		exclude_guest_default = true;
	}

	if (attr->precise_ip) {
		if (!colon)
415
			colon = ++r;
416 417 418 419 420 421 422 423 424 425
		r += scnprintf(bf + r, size - r, "%.*s", attr->precise_ip, "ppp");
		exclude_guest_default = true;
	}

	if (attr->exclude_host || attr->exclude_guest == exclude_guest_default) {
		MOD_PRINT(host, 'H');
		MOD_PRINT(guest, 'G');
	}
#undef MOD_PRINT
	if (colon)
426
		bf[colon - 1] = ':';
427 428 429
	return r;
}

430
static int perf_evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
431
{
432
	int r = scnprintf(bf, size, "%s", __perf_evsel__hw_name(evsel->core.attr.config));
433 434 435
	return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
}

436
const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = {
437 438 439 440
	"cpu-clock",
	"task-clock",
	"page-faults",
	"context-switches",
441
	"cpu-migrations",
442 443 444 445
	"minor-faults",
	"major-faults",
	"alignment-faults",
	"emulation-faults",
446
	"dummy",
447 448
};

449
static const char *__perf_evsel__sw_name(u64 config)
450 451 452 453 454 455
{
	if (config < PERF_COUNT_SW_MAX && perf_evsel__sw_names[config])
		return perf_evsel__sw_names[config];
	return "unknown-software";
}

456
static int perf_evsel__sw_name(struct evsel *evsel, char *bf, size_t size)
457
{
458
	int r = scnprintf(bf, size, "%s", __perf_evsel__sw_name(evsel->core.attr.config));
459 460 461
	return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
}

462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479
static int __perf_evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)
{
	int r;

	r = scnprintf(bf, size, "mem:0x%" PRIx64 ":", addr);

	if (type & HW_BREAKPOINT_R)
		r += scnprintf(bf + r, size - r, "r");

	if (type & HW_BREAKPOINT_W)
		r += scnprintf(bf + r, size - r, "w");

	if (type & HW_BREAKPOINT_X)
		r += scnprintf(bf + r, size - r, "x");

	return r;
}

480
static int perf_evsel__bp_name(struct evsel *evsel, char *bf, size_t size)
481
{
482
	struct perf_event_attr *attr = &evsel->core.attr;
483 484 485 486
	int r = __perf_evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type);
	return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
}

487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552
const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX]
				[PERF_EVSEL__MAX_ALIASES] = {
 { "L1-dcache",	"l1-d",		"l1d",		"L1-data",		},
 { "L1-icache",	"l1-i",		"l1i",		"L1-instruction",	},
 { "LLC",	"L2",							},
 { "dTLB",	"d-tlb",	"Data-TLB",				},
 { "iTLB",	"i-tlb",	"Instruction-TLB",			},
 { "branch",	"branches",	"bpu",		"btb",		"bpc",	},
 { "node",								},
};

const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX]
				   [PERF_EVSEL__MAX_ALIASES] = {
 { "load",	"loads",	"read",					},
 { "store",	"stores",	"write",				},
 { "prefetch",	"prefetches",	"speculative-read", "speculative-load",	},
};

const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
				       [PERF_EVSEL__MAX_ALIASES] = {
 { "refs",	"Reference",	"ops",		"access",		},
 { "misses",	"miss",							},
};

#define C(x)		PERF_COUNT_HW_CACHE_##x
#define CACHE_READ	(1 << C(OP_READ))
#define CACHE_WRITE	(1 << C(OP_WRITE))
#define CACHE_PREFETCH	(1 << C(OP_PREFETCH))
#define COP(x)		(1 << x)

/*
 * cache operartion stat
 * L1I : Read and prefetch only
 * ITLB and BPU : Read-only
 */
static unsigned long perf_evsel__hw_cache_stat[C(MAX)] = {
 [C(L1D)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
 [C(L1I)]	= (CACHE_READ | CACHE_PREFETCH),
 [C(LL)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
 [C(DTLB)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
 [C(ITLB)]	= (CACHE_READ),
 [C(BPU)]	= (CACHE_READ),
 [C(NODE)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
};

bool perf_evsel__is_cache_op_valid(u8 type, u8 op)
{
	if (perf_evsel__hw_cache_stat[type] & COP(op))
		return true;	/* valid */
	else
		return false;	/* invalid */
}

int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result,
					    char *bf, size_t size)
{
	if (result) {
		return scnprintf(bf, size, "%s-%s-%s", perf_evsel__hw_cache[type][0],
				 perf_evsel__hw_cache_op[op][0],
				 perf_evsel__hw_cache_result[result][0]);
	}

	return scnprintf(bf, size, "%s-%s", perf_evsel__hw_cache[type][0],
			 perf_evsel__hw_cache_op[op][1]);
}

553
static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size)
554 555 556 557
{
	u8 op, result, type = (config >>  0) & 0xff;
	const char *err = "unknown-ext-hardware-cache-type";

558
	if (type >= PERF_COUNT_HW_CACHE_MAX)
559 560 561 562
		goto out_err;

	op = (config >>  8) & 0xff;
	err = "unknown-ext-hardware-cache-op";
563
	if (op >= PERF_COUNT_HW_CACHE_OP_MAX)
564 565 566 567
		goto out_err;

	result = (config >> 16) & 0xff;
	err = "unknown-ext-hardware-cache-result";
568
	if (result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
569 570 571 572 573 574 575 576 577 578 579
		goto out_err;

	err = "invalid-cache";
	if (!perf_evsel__is_cache_op_valid(type, op))
		goto out_err;

	return __perf_evsel__hw_cache_type_op_res_name(type, op, result, bf, size);
out_err:
	return scnprintf(bf, size, "%s", err);
}

580
static int perf_evsel__hw_cache_name(struct evsel *evsel, char *bf, size_t size)
581
{
582
	int ret = __perf_evsel__hw_cache_name(evsel->core.attr.config, bf, size);
583 584 585
	return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
}

586
static int perf_evsel__raw_name(struct evsel *evsel, char *bf, size_t size)
587
{
588
	int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->core.attr.config);
589 590 591
	return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
}

592 593 594 595 596 597
static int perf_evsel__tool_name(char *bf, size_t size)
{
	int ret = scnprintf(bf, size, "duration_time");
	return ret;
}

598
const char *perf_evsel__name(struct evsel *evsel)
599
{
600
	char bf[128];
601

602 603 604
	if (!evsel)
		goto out_unknown;

605 606
	if (evsel->name)
		return evsel->name;
607

608
	switch (evsel->core.attr.type) {
609
	case PERF_TYPE_RAW:
610
		perf_evsel__raw_name(evsel, bf, sizeof(bf));
611 612 613
		break;

	case PERF_TYPE_HARDWARE:
614
		perf_evsel__hw_name(evsel, bf, sizeof(bf));
615
		break;
616 617

	case PERF_TYPE_HW_CACHE:
618
		perf_evsel__hw_cache_name(evsel, bf, sizeof(bf));
619 620
		break;

621
	case PERF_TYPE_SOFTWARE:
622 623 624 625
		if (evsel->tool_event)
			perf_evsel__tool_name(bf, sizeof(bf));
		else
			perf_evsel__sw_name(evsel, bf, sizeof(bf));
626 627
		break;

628
	case PERF_TYPE_TRACEPOINT:
629
		scnprintf(bf, sizeof(bf), "%s", "unknown tracepoint");
630 631
		break;

632 633 634 635
	case PERF_TYPE_BREAKPOINT:
		perf_evsel__bp_name(evsel, bf, sizeof(bf));
		break;

636
	default:
637
		scnprintf(bf, sizeof(bf), "unknown attr type: %d",
638
			  evsel->core.attr.type);
639
		break;
640 641
	}

642 643
	evsel->name = strdup(bf);

644 645 646 647
	if (evsel->name)
		return evsel->name;
out_unknown:
	return "unknown";
648 649
}

650
const char *perf_evsel__group_name(struct evsel *evsel)
651 652 653 654
{
	return evsel->group_name ?: "anon group";
}

655 656 657 658 659 660 661 662 663 664
/*
 * Returns the group details for the specified leader,
 * with following rules.
 *
 *  For record -e '{cycles,instructions}'
 *    'anon group { cycles:u, instructions:u }'
 *
 *  For record -e 'cycles,instructions' and report --group
 *    'cycles:u, instructions:u'
 */
665
int perf_evsel__group_desc(struct evsel *evsel, char *buf, size_t size)
666
{
667
	int ret = 0;
668
	struct evsel *pos;
669 670
	const char *group_name = perf_evsel__group_name(evsel);

671 672
	if (!evsel->forced_leader)
		ret = scnprintf(buf, size, "%s { ", group_name);
673

674
	ret += scnprintf(buf + ret, size - ret, "%s",
675 676 677 678 679 680
			 perf_evsel__name(evsel));

	for_each_group_member(pos, evsel)
		ret += scnprintf(buf + ret, size - ret, ", %s",
				 perf_evsel__name(pos));

681 682
	if (!evsel->forced_leader)
		ret += scnprintf(buf + ret, size - ret, " }");
683 684 685 686

	return ret;
}

687
static void __perf_evsel__config_callchain(struct evsel *evsel,
688 689
					   struct record_opts *opts,
					   struct callchain_param *param)
690 691
{
	bool function = perf_evsel__is_function_event(evsel);
692
	struct perf_event_attr *attr = &evsel->core.attr;
693 694 695

	perf_evsel__set_sample_bit(evsel, CALLCHAIN);

696 697
	attr->sample_max_stack = param->max_stack;

698 699 700 701
	if (opts->kernel_callchains)
		attr->exclude_callchain_user = 1;
	if (opts->user_callchains)
		attr->exclude_callchain_kernel = 1;
702
	if (param->record_mode == CALLCHAIN_LBR) {
703 704 705 706 707 708 709 710
		if (!opts->branch_stack) {
			if (attr->exclude_user) {
				pr_warning("LBR callstack option is only available "
					   "to get user callchain information. "
					   "Falling back to framepointers.\n");
			} else {
				perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
				attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER |
711 712 713
							PERF_SAMPLE_BRANCH_CALL_STACK |
							PERF_SAMPLE_BRANCH_NO_CYCLES |
							PERF_SAMPLE_BRANCH_NO_FLAGS;
714 715 716 717 718 719
			}
		} else
			 pr_warning("Cannot use LBR callstack with branch stack. "
				    "Falling back to framepointers.\n");
	}

720
	if (param->record_mode == CALLCHAIN_DWARF) {
721 722 723
		if (!function) {
			perf_evsel__set_sample_bit(evsel, REGS_USER);
			perf_evsel__set_sample_bit(evsel, STACK_USER);
724 725 726 727 728 729 730 731
			if (opts->sample_user_regs && DWARF_MINIMAL_REGS != PERF_REGS_MASK) {
				attr->sample_regs_user |= DWARF_MINIMAL_REGS;
				pr_warning("WARNING: The use of --call-graph=dwarf may require all the user registers, "
					   "specifying a subset with --user-regs may render DWARF unwinding unreliable, "
					   "so the minimal registers set (IP, SP) is explicitly forced.\n");
			} else {
				attr->sample_regs_user |= PERF_REGS_MASK;
			}
732
			attr->sample_stack_user = param->dump_size;
733 734 735 736 737 738 739 740 741 742 743 744 745
			attr->exclude_callchain_user = 1;
		} else {
			pr_info("Cannot use DWARF unwind for function trace event,"
				" falling back to framepointers.\n");
		}
	}

	if (function) {
		pr_info("Disabling user space callchains for function trace event.\n");
		attr->exclude_callchain_user = 1;
	}
}

746
void perf_evsel__config_callchain(struct evsel *evsel,
747 748 749 750 751 752 753
				  struct record_opts *opts,
				  struct callchain_param *param)
{
	if (param->enabled)
		return __perf_evsel__config_callchain(evsel, opts, param);
}

754
static void
755
perf_evsel__reset_callgraph(struct evsel *evsel,
756 757
			    struct callchain_param *param)
{
758
	struct perf_event_attr *attr = &evsel->core.attr;
759 760 761 762 763 764 765 766 767 768 769 770 771

	perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
	if (param->record_mode == CALLCHAIN_LBR) {
		perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
		attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
					      PERF_SAMPLE_BRANCH_CALL_STACK);
	}
	if (param->record_mode == CALLCHAIN_DWARF) {
		perf_evsel__reset_sample_bit(evsel, REGS_USER);
		perf_evsel__reset_sample_bit(evsel, STACK_USER);
	}
}

772
static void apply_config_terms(struct evsel *evsel,
773
			       struct record_opts *opts, bool track)
774 775
{
	struct perf_evsel_config_term *term;
K
Kan Liang 已提交
776
	struct list_head *config_terms = &evsel->config_terms;
777
	struct perf_event_attr *attr = &evsel->core.attr;
778 779 780 781
	/* callgraph default */
	struct callchain_param param = {
		.record_mode = callchain_param.record_mode,
	};
782
	u32 dump_size = 0;
783 784
	int max_stack = 0;
	const char *callgraph_buf = NULL;
785

786 787
	list_for_each_entry(term, config_terms, list) {
		switch (term->type) {
788
		case PERF_EVSEL__CONFIG_TERM_PERIOD:
789 790 791
			if (!(term->weak && opts->user_interval != ULLONG_MAX)) {
				attr->sample_period = term->val.period;
				attr->freq = 0;
792
				perf_evsel__reset_sample_bit(evsel, PERIOD);
793
			}
K
Kan Liang 已提交
794
			break;
795
		case PERF_EVSEL__CONFIG_TERM_FREQ:
796 797 798
			if (!(term->weak && opts->user_freq != UINT_MAX)) {
				attr->sample_freq = term->val.freq;
				attr->freq = 1;
799
				perf_evsel__set_sample_bit(evsel, PERIOD);
800
			}
801
			break;
K
Kan Liang 已提交
802 803 804 805 806 807
		case PERF_EVSEL__CONFIG_TERM_TIME:
			if (term->val.time)
				perf_evsel__set_sample_bit(evsel, TIME);
			else
				perf_evsel__reset_sample_bit(evsel, TIME);
			break;
808 809 810
		case PERF_EVSEL__CONFIG_TERM_CALLGRAPH:
			callgraph_buf = term->val.callgraph;
			break;
811 812 813 814 815 816 817 818
		case PERF_EVSEL__CONFIG_TERM_BRANCH:
			if (term->val.branch && strcmp(term->val.branch, "no")) {
				perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
				parse_branch_str(term->val.branch,
						 &attr->branch_sample_type);
			} else
				perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
			break;
819 820 821
		case PERF_EVSEL__CONFIG_TERM_STACK_USER:
			dump_size = term->val.stack_user;
			break;
822 823 824
		case PERF_EVSEL__CONFIG_TERM_MAX_STACK:
			max_stack = term->val.max_stack;
			break;
825 826 827
		case PERF_EVSEL__CONFIG_TERM_MAX_EVENTS:
			evsel->max_events = term->val.max_events;
			break;
828 829 830 831 832 833 834 835 836
		case PERF_EVSEL__CONFIG_TERM_INHERIT:
			/*
			 * attr->inherit should has already been set by
			 * perf_evsel__config. If user explicitly set
			 * inherit using config terms, override global
			 * opt->no_inherit setting.
			 */
			attr->inherit = term->val.inherit ? 1 : 0;
			break;
W
Wang Nan 已提交
837 838 839
		case PERF_EVSEL__CONFIG_TERM_OVERWRITE:
			attr->write_backward = term->val.overwrite ? 1 : 0;
			break;
840
		case PERF_EVSEL__CONFIG_TERM_DRV_CFG:
841
			break;
842 843
		case PERF_EVSEL__CONFIG_TERM_PERCORE:
			break;
844 845 846
		case PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT:
			attr->aux_output = term->val.aux_output ? 1 : 0;
			break;
847 848 849 850
		default:
			break;
		}
	}
851 852

	/* User explicitly set per-event callgraph, clear the old setting and reset. */
853
	if ((callgraph_buf != NULL) || (dump_size > 0) || max_stack) {
854 855
		bool sample_address = false;

856 857 858 859 860
		if (max_stack) {
			param.max_stack = max_stack;
			if (callgraph_buf == NULL)
				callgraph_buf = "fp";
		}
861 862 863

		/* parse callgraph parameters */
		if (callgraph_buf != NULL) {
864 865 866 867 868 869 870 871 872 873 874
			if (!strcmp(callgraph_buf, "no")) {
				param.enabled = false;
				param.record_mode = CALLCHAIN_NONE;
			} else {
				param.enabled = true;
				if (parse_callchain_record(callgraph_buf, &param)) {
					pr_err("per-event callgraph setting for %s failed. "
					       "Apply callgraph global setting for it\n",
					       evsel->name);
					return;
				}
875 876
				if (param.record_mode == CALLCHAIN_DWARF)
					sample_address = true;
877 878 879 880 881 882 883 884 885 886 887 888
			}
		}
		if (dump_size > 0) {
			dump_size = round_up(dump_size, sizeof(u64));
			param.dump_size = dump_size;
		}

		/* If global callgraph set, clear it */
		if (callchain_param.enabled)
			perf_evsel__reset_callgraph(evsel, &callchain_param);

		/* set perf-event callgraph */
889 890 891 892
		if (param.enabled) {
			if (sample_address) {
				perf_evsel__set_sample_bit(evsel, ADDR);
				perf_evsel__set_sample_bit(evsel, DATA_SRC);
893
				evsel->core.attr.mmap_data = track;
894
			}
895
			perf_evsel__config_callchain(evsel, opts, &param);
896
		}
897
	}
898 899
}

900
static bool is_dummy_event(struct evsel *evsel)
901
{
902 903
	return (evsel->core.attr.type == PERF_TYPE_SOFTWARE) &&
	       (evsel->core.attr.config == PERF_COUNT_SW_DUMMY);
904 905
}

906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933
/*
 * The enable_on_exec/disabled value strategy:
 *
 *  1) For any type of traced program:
 *    - all independent events and group leaders are disabled
 *    - all group members are enabled
 *
 *     Group members are ruled by group leaders. They need to
 *     be enabled, because the group scheduling relies on that.
 *
 *  2) For traced programs executed by perf:
 *     - all independent events and group leaders have
 *       enable_on_exec set
 *     - we don't specifically enable or disable any event during
 *       the record command
 *
 *     Independent events and group leaders are initially disabled
 *     and get enabled by exec. Group members are ruled by group
 *     leaders as stated in 1).
 *
 *  3) For traced programs attached by perf (pid/tid):
 *     - we specifically enable or disable all events during
 *       the record command
 *
 *     When attaching events to already running traced we
 *     enable/disable events specifically, as there's no
 *     initial traced exec call.
 */
934
void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
935
			struct callchain_param *callchain)
936
{
937
	struct evsel *leader = evsel->leader;
938
	struct perf_event_attr *attr = &evsel->core.attr;
939
	int track = evsel->tracking;
940
	bool per_cpu = opts->target.default_per_cpu && !opts->target.per_thread;
941

942
	attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1;
943
	attr->inherit	    = !opts->no_inherit;
W
Wang Nan 已提交
944
	attr->write_backward = opts->overwrite ? 1 : 0;
945

946 947
	perf_evsel__set_sample_bit(evsel, IP);
	perf_evsel__set_sample_bit(evsel, TID);
948

949 950 951 952 953 954 955
	if (evsel->sample_read) {
		perf_evsel__set_sample_bit(evsel, READ);

		/*
		 * We need ID even in case of single event, because
		 * PERF_SAMPLE_READ process ID specific data.
		 */
956
		perf_evsel__set_sample_id(evsel, false);
957 958 959 960 961

		/*
		 * Apply group format only if we belong to group
		 * with more than one members.
		 */
962
		if (leader->core.nr_members > 1) {
963 964 965 966 967
			attr->read_format |= PERF_FORMAT_GROUP;
			attr->inherit = 0;
		}
	}

968
	/*
969
	 * We default some events to have a default interval. But keep
970 971
	 * it a weak assumption overridable by the user.
	 */
972
	if (!attr->sample_period || (opts->user_freq != UINT_MAX ||
973 974
				     opts->user_interval != ULLONG_MAX)) {
		if (opts->freq) {
975
			perf_evsel__set_sample_bit(evsel, PERIOD);
976 977 978 979 980 981 982
			attr->freq		= 1;
			attr->sample_freq	= opts->freq;
		} else {
			attr->sample_period = opts->default_interval;
		}
	}

983 984 985 986 987
	/*
	 * Disable sampling for all group members other
	 * than leader in case leader 'leads' the sampling.
	 */
	if ((leader != evsel) && leader->sample_read) {
988 989 990 991
		attr->freq           = 0;
		attr->sample_freq    = 0;
		attr->sample_period  = 0;
		attr->write_backward = 0;
992 993 994 995 996 997 998

		/*
		 * We don't get sample for slave events, we make them
		 * when delivering group leader sample. Set the slave
		 * event to follow the master sample_type to ease up
		 * report.
		 */
999
		attr->sample_type = leader->core.attr.sample_type;
1000 1001
	}

1002 1003 1004
	if (opts->no_samples)
		attr->sample_freq = 0;

1005
	if (opts->inherit_stat) {
1006
		evsel->core.attr.read_format |=
1007 1008 1009
			PERF_FORMAT_TOTAL_TIME_ENABLED |
			PERF_FORMAT_TOTAL_TIME_RUNNING |
			PERF_FORMAT_ID;
1010
		attr->inherit_stat = 1;
1011
	}
1012 1013

	if (opts->sample_address) {
1014
		perf_evsel__set_sample_bit(evsel, ADDR);
1015 1016 1017
		attr->mmap_data = track;
	}

1018 1019 1020 1021 1022 1023
	/*
	 * We don't allow user space callchains for  function trace
	 * event, due to issues with page faults while tracing page
	 * fault handler and its overall trickiness nature.
	 */
	if (perf_evsel__is_function_event(evsel))
1024
		evsel->core.attr.exclude_callchain_user = 1;
1025

1026
	if (callchain && callchain->enabled && !evsel->no_aux_samples)
1027
		perf_evsel__config_callchain(evsel, opts, callchain);
1028

1029
	if (opts->sample_intr_regs) {
1030
		attr->sample_regs_intr = opts->sample_intr_regs;
1031 1032 1033
		perf_evsel__set_sample_bit(evsel, REGS_INTR);
	}

1034 1035 1036 1037 1038
	if (opts->sample_user_regs) {
		attr->sample_regs_user |= opts->sample_user_regs;
		perf_evsel__set_sample_bit(evsel, REGS_USER);
	}

J
Jiri Olsa 已提交
1039
	if (target__has_cpu(&opts->target) || opts->sample_cpu)
1040
		perf_evsel__set_sample_bit(evsel, CPU);
1041

1042
	/*
1043
	 * When the user explicitly disabled time don't force it here.
1044 1045 1046
	 */
	if (opts->sample_time &&
	    (!perf_missing_features.sample_id_all &&
1047 1048
	    (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu ||
	     opts->sample_time_set)))
1049
		perf_evsel__set_sample_bit(evsel, TIME);
1050

1051
	if (opts->raw_samples && !evsel->no_aux_samples) {
1052 1053 1054
		perf_evsel__set_sample_bit(evsel, TIME);
		perf_evsel__set_sample_bit(evsel, RAW);
		perf_evsel__set_sample_bit(evsel, CPU);
1055 1056
	}

1057
	if (opts->sample_address)
1058
		perf_evsel__set_sample_bit(evsel, DATA_SRC);
1059

1060 1061 1062
	if (opts->sample_phys_addr)
		perf_evsel__set_sample_bit(evsel, PHYS_ADDR);

1063
	if (opts->no_buffering) {
1064 1065 1066
		attr->watermark = 0;
		attr->wakeup_events = 1;
	}
1067
	if (opts->branch_stack && !evsel->no_aux_samples) {
1068
		perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
1069 1070
		attr->branch_sample_type = opts->branch_stack;
	}
1071

1072
	if (opts->sample_weight)
1073
		perf_evsel__set_sample_bit(evsel, WEIGHT);
1074

1075
	attr->task  = track;
1076
	attr->mmap  = track;
1077
	attr->mmap2 = track && !perf_missing_features.mmap2;
1078
	attr->comm  = track;
1079
	attr->ksymbol = track && !perf_missing_features.ksymbol;
1080
	attr->bpf_event = track && !opts->no_bpf_event && !perf_missing_features.bpf;
1081

1082 1083 1084
	if (opts->record_namespaces)
		attr->namespaces  = track;

1085 1086 1087
	if (opts->record_switch_events)
		attr->context_switch = track;

1088
	if (opts->sample_transaction)
1089
		perf_evsel__set_sample_bit(evsel, TRANSACTION);
1090

1091
	if (opts->running_time) {
1092
		evsel->core.attr.read_format |=
1093 1094 1095 1096
			PERF_FORMAT_TOTAL_TIME_ENABLED |
			PERF_FORMAT_TOTAL_TIME_RUNNING;
	}

1097 1098 1099 1100 1101 1102
	/*
	 * XXX see the function comment above
	 *
	 * Disabling only independent events or group leaders,
	 * keeping group members enabled.
	 */
1103
	if (perf_evsel__is_group_leader(evsel))
1104 1105 1106 1107 1108 1109
		attr->disabled = 1;

	/*
	 * Setting enable_on_exec for independent events and
	 * group leaders for traced executed by perf.
	 */
1110 1111
	if (target__none(&opts->target) && perf_evsel__is_group_leader(evsel) &&
		!opts->initial_delay)
1112
		attr->enable_on_exec = 1;
1113 1114 1115 1116 1117

	if (evsel->immediate) {
		attr->disabled = 0;
		attr->enable_on_exec = 0;
	}
1118 1119 1120 1121 1122 1123

	clockid = opts->clockid;
	if (opts->use_clockid) {
		attr->use_clockid = 1;
		attr->clockid = opts->clockid;
	}
1124

1125
	if (evsel->precise_max)
1126
		attr->precise_ip = 3;
1127

1128 1129 1130 1131 1132 1133 1134 1135 1136 1137
	if (opts->all_user) {
		attr->exclude_kernel = 1;
		attr->exclude_user   = 0;
	}

	if (opts->all_kernel) {
		attr->exclude_kernel = 0;
		attr->exclude_user   = 1;
	}

1138
	if (evsel->core.own_cpus || evsel->unit)
1139
		evsel->core.attr.read_format |= PERF_FORMAT_ID;
1140

1141 1142 1143 1144
	/*
	 * Apply event specific term settings,
	 * it overloads any global configuration.
	 */
1145
	apply_config_terms(evsel, opts, track);
1146 1147

	evsel->ignore_missing_thread = opts->ignore_missing_thread;
1148 1149 1150 1151 1152 1153 1154 1155

	/* The --period option takes the precedence. */
	if (opts->period_set) {
		if (opts->period)
			perf_evsel__set_sample_bit(evsel, PERIOD);
		else
			perf_evsel__reset_sample_bit(evsel, PERIOD);
	}
1156 1157 1158 1159 1160 1161 1162 1163

	/*
	 * For initial_delay, a dummy event is added implicitly.
	 * The software event will trigger -EOPNOTSUPP error out,
	 * if BRANCH_STACK bit is set.
	 */
	if (opts->initial_delay && is_dummy_event(evsel))
		perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
1164 1165
}

1166
int perf_evsel__set_filter(struct evsel *evsel, const char *filter)
1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178
{
	char *new_filter = strdup(filter);

	if (new_filter != NULL) {
		free(evsel->filter);
		evsel->filter = new_filter;
		return 0;
	}

	return -1;
}

1179
static int perf_evsel__append_filter(struct evsel *evsel,
1180
				     const char *fmt, const char *filter)
1181 1182 1183 1184 1185 1186
{
	char *new_filter;

	if (evsel->filter == NULL)
		return perf_evsel__set_filter(evsel, filter);

1187
	if (asprintf(&new_filter, fmt, evsel->filter, filter) > 0) {
1188 1189 1190 1191 1192 1193 1194 1195
		free(evsel->filter);
		evsel->filter = new_filter;
		return 0;
	}

	return -1;
}

1196
int perf_evsel__append_tp_filter(struct evsel *evsel, const char *filter)
1197 1198 1199 1200
{
	return perf_evsel__append_filter(evsel, "(%s) && (%s)", filter);
}

1201
int perf_evsel__append_addr_filter(struct evsel *evsel, const char *filter)
1202 1203 1204 1205
{
	return perf_evsel__append_filter(evsel, "%s,%s", filter);
}

1206
int evsel__enable(struct evsel *evsel)
1207
{
1208
	int err = perf_evsel__enable(&evsel->core);
1209 1210 1211 1212 1213

	if (!err)
		evsel->disabled = false;

	return err;
1214 1215
}

1216
int evsel__disable(struct evsel *evsel)
J
Jiri Olsa 已提交
1217
{
1218
	int err = perf_evsel__disable(&evsel->core);
1219 1220 1221 1222 1223 1224 1225 1226 1227 1228
	/*
	 * We mark it disabled here so that tools that disable a event can
	 * ignore events after they disable it. I.e. the ring buffer may have
	 * already a few more events queued up before the kernel got the stop
	 * request.
	 */
	if (!err)
		evsel->disabled = true;

	return err;
J
Jiri Olsa 已提交
1229 1230
}

1231
static void perf_evsel__free_config_terms(struct evsel *evsel)
1232 1233 1234 1235
{
	struct perf_evsel_config_term *term, *h;

	list_for_each_entry_safe(term, h, &evsel->config_terms, list) {
1236
		list_del_init(&term->list);
1237 1238 1239 1240
		free(term);
	}
}

1241
void perf_evsel__exit(struct evsel *evsel)
1242
{
1243
	assert(list_empty(&evsel->core.node));
1244
	assert(evsel->evlist == NULL);
1245
	perf_evsel__free_counts(evsel);
1246
	perf_evsel__free_fd(&evsel->core);
1247
	perf_evsel__free_id(&evsel->core);
1248
	perf_evsel__free_config_terms(evsel);
1249
	cgroup__put(evsel->cgrp);
1250
	perf_cpu_map__put(evsel->core.cpus);
1251
	perf_cpu_map__put(evsel->core.own_cpus);
1252
	perf_thread_map__put(evsel->core.threads);
1253 1254
	zfree(&evsel->group_name);
	zfree(&evsel->name);
A
Arnaldo Carvalho de Melo 已提交
1255
	perf_evsel__object.fini(evsel);
1256 1257
}

1258
void evsel__delete(struct evsel *evsel)
1259 1260
{
	perf_evsel__exit(evsel);
1261 1262
	free(evsel);
}
1263

1264
void perf_evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
1265
				struct perf_counts_values *count)
1266 1267 1268 1269 1270 1271 1272 1273 1274 1275
{
	struct perf_counts_values tmp;

	if (!evsel->prev_raw_counts)
		return;

	if (cpu == -1) {
		tmp = evsel->prev_raw_counts->aggr;
		evsel->prev_raw_counts->aggr = *count;
	} else {
1276 1277
		tmp = *perf_counts(evsel->prev_raw_counts, cpu, thread);
		*perf_counts(evsel->prev_raw_counts, cpu, thread) = *count;
1278 1279 1280 1281 1282 1283 1284
	}

	count->val = count->val - tmp.val;
	count->ena = count->ena - tmp.ena;
	count->run = count->run - tmp.run;
}

1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295
void perf_counts_values__scale(struct perf_counts_values *count,
			       bool scale, s8 *pscaled)
{
	s8 scaled = 0;

	if (scale) {
		if (count->run == 0) {
			scaled = -1;
			count->val = 0;
		} else if (count->run < count->ena) {
			scaled = 1;
A
Andi Kleen 已提交
1296
			count->val = (u64)((double) count->val * count->ena / count->run);
1297
		}
A
Andi Kleen 已提交
1298
	}
1299 1300 1301 1302 1303

	if (pscaled)
		*pscaled = scaled;
}

J
Jiri Olsa 已提交
1304
static int
1305
perf_evsel__read_one(struct evsel *evsel, int cpu, int thread)
J
Jiri Olsa 已提交
1306 1307 1308
{
	struct perf_counts_values *count = perf_counts(evsel->counts, cpu, thread);

1309
	return perf_evsel__read(&evsel->core, cpu, thread, count);
J
Jiri Olsa 已提交
1310 1311 1312
}

static void
1313
perf_evsel__set_count(struct evsel *counter, int cpu, int thread,
J
Jiri Olsa 已提交
1314 1315 1316 1317 1318 1319 1320 1321 1322
		      u64 val, u64 ena, u64 run)
{
	struct perf_counts_values *count;

	count = perf_counts(counter->counts, cpu, thread);

	count->val    = val;
	count->ena    = ena;
	count->run    = run;
1323 1324

	perf_counts__set_loaded(counter->counts, cpu, thread, true);
J
Jiri Olsa 已提交
1325 1326 1327
}

static int
1328
perf_evsel__process_group_data(struct evsel *leader,
J
Jiri Olsa 已提交
1329 1330
			       int cpu, int thread, u64 *data)
{
1331
	u64 read_format = leader->core.attr.read_format;
J
Jiri Olsa 已提交
1332 1333 1334 1335 1336
	struct sample_read_value *v;
	u64 nr, ena = 0, run = 0, i;

	nr = *data++;

1337
	if (nr != (u64) leader->core.nr_members)
J
Jiri Olsa 已提交
1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351
		return -EINVAL;

	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
		ena = *data++;

	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
		run = *data++;

	v = (struct sample_read_value *) data;

	perf_evsel__set_count(leader, cpu, thread,
			      v[0].value, ena, run);

	for (i = 1; i < nr; i++) {
1352
		struct evsel *counter;
J
Jiri Olsa 已提交
1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365

		counter = perf_evlist__id2evsel(leader->evlist, v[i].id);
		if (!counter)
			return -EINVAL;

		perf_evsel__set_count(counter, cpu, thread,
				      v[i].value, ena, run);
	}

	return 0;
}

static int
1366
perf_evsel__read_group(struct evsel *leader, int cpu, int thread)
J
Jiri Olsa 已提交
1367
{
1368
	struct perf_stat_evsel *ps = leader->stats;
1369
	u64 read_format = leader->core.attr.read_format;
1370
	int size = perf_evsel__read_size(&leader->core);
J
Jiri Olsa 已提交
1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395
	u64 *data = ps->group_data;

	if (!(read_format & PERF_FORMAT_ID))
		return -EINVAL;

	if (!perf_evsel__is_group_leader(leader))
		return -EINVAL;

	if (!data) {
		data = zalloc(size);
		if (!data)
			return -ENOMEM;

		ps->group_data = data;
	}

	if (FD(leader, cpu, thread) < 0)
		return -EINVAL;

	if (readn(FD(leader, cpu, thread), data, size) <= 0)
		return -errno;

	return perf_evsel__process_group_data(leader, cpu, thread, data);
}

1396
int perf_evsel__read_counter(struct evsel *evsel, int cpu, int thread)
J
Jiri Olsa 已提交
1397
{
1398
	u64 read_format = evsel->core.attr.read_format;
J
Jiri Olsa 已提交
1399 1400 1401 1402 1403 1404 1405

	if (read_format & PERF_FORMAT_GROUP)
		return perf_evsel__read_group(evsel, cpu, thread);
	else
		return perf_evsel__read_one(evsel, cpu, thread);
}

1406
int __perf_evsel__read_on_cpu(struct evsel *evsel,
1407 1408 1409 1410 1411 1412 1413 1414
			      int cpu, int thread, bool scale)
{
	struct perf_counts_values count;
	size_t nv = scale ? 3 : 1;

	if (FD(evsel, cpu, thread) < 0)
		return -EINVAL;

1415
	if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0)
1416 1417
		return -ENOMEM;

1418
	if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0)
1419 1420
		return -errno;

1421
	perf_evsel__compute_deltas(evsel, cpu, thread, &count);
1422
	perf_counts_values__scale(&count, scale, NULL);
1423
	*perf_counts(evsel->counts, cpu, thread) = count;
1424 1425 1426
	return 0;
}

1427
static int get_group_fd(struct evsel *evsel, int cpu, int thread)
1428
{
1429
	struct evsel *leader = evsel->leader;
1430 1431
	int fd;

1432
	if (perf_evsel__is_group_leader(evsel))
1433 1434 1435 1436 1437 1438
		return -1;

	/*
	 * Leader must be already processed/open,
	 * if not it's a bug.
	 */
1439
	BUG_ON(!leader->core.fd);
1440 1441 1442 1443 1444 1445 1446

	fd = FD(leader, cpu, thread);
	BUG_ON(fd == -1);

	return fd;
}

1447
static void perf_evsel__remove_fd(struct evsel *pos,
1448 1449 1450 1451 1452 1453 1454 1455
				  int nr_cpus, int nr_threads,
				  int thread_idx)
{
	for (int cpu = 0; cpu < nr_cpus; cpu++)
		for (int thread = thread_idx; thread < nr_threads - 1; thread++)
			FD(pos, cpu, thread) = FD(pos, cpu, thread + 1);
}

1456
static int update_fds(struct evsel *evsel,
1457 1458 1459
		      int nr_cpus, int cpu_idx,
		      int nr_threads, int thread_idx)
{
1460
	struct evsel *pos;
1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479

	if (cpu_idx >= nr_cpus || thread_idx >= nr_threads)
		return -EINVAL;

	evlist__for_each_entry(evsel->evlist, pos) {
		nr_cpus = pos != evsel ? nr_cpus : cpu_idx;

		perf_evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx);

		/*
		 * Since fds for next evsel has not been created,
		 * there is no need to iterate whole event list.
		 */
		if (pos == evsel)
			break;
	}
	return 0;
}

1480
static bool ignore_missing_thread(struct evsel *evsel,
1481
				  int nr_cpus, int cpu,
1482
				  struct perf_thread_map *threads,
1483 1484
				  int thread, int err)
{
1485
	pid_t ignore_pid = perf_thread_map__pid(threads, thread);
1486

1487 1488 1489 1490
	if (!evsel->ignore_missing_thread)
		return false;

	/* The system wide setup does not work with threads. */
1491
	if (evsel->core.system_wide)
1492 1493 1494 1495 1496 1497 1498 1499 1500 1501
		return false;

	/* The -ESRCH is perf event syscall errno for pid's not found. */
	if (err != -ESRCH)
		return false;

	/* If there's only one thread, let it fail. */
	if (threads->nr == 1)
		return false;

1502 1503 1504 1505 1506 1507 1508
	/*
	 * We should remove fd for missing_thread first
	 * because thread_map__remove() will decrease threads->nr.
	 */
	if (update_fds(evsel, nr_cpus, cpu, threads->nr, thread))
		return false;

1509 1510 1511 1512
	if (thread_map__remove(threads, thread))
		return false;

	pr_warning("WARNING: Ignored open failure for pid %d\n",
1513
		   ignore_pid);
1514 1515 1516
	return true;
}

1517 1518 1519 1520 1521 1522
static int __open_attr__fprintf(FILE *fp, const char *name, const char *val,
				void *priv __maybe_unused)
{
	return fprintf(fp, "  %-32s %s\n", name, val);
}

1523 1524 1525 1526 1527 1528 1529 1530 1531 1532
static void display_attr(struct perf_event_attr *attr)
{
	if (verbose >= 2) {
		fprintf(stderr, "%.60s\n", graph_dotted_line);
		fprintf(stderr, "perf_event_attr:\n");
		perf_event_attr__fprintf(stderr, attr, __open_attr__fprintf, NULL);
		fprintf(stderr, "%.60s\n", graph_dotted_line);
	}
}

1533
static int perf_event_open(struct evsel *evsel,
1534 1535 1536
			   pid_t pid, int cpu, int group_fd,
			   unsigned long flags)
{
1537
	int precise_ip = evsel->core.attr.precise_ip;
1538 1539 1540 1541 1542 1543
	int fd;

	while (1) {
		pr_debug2("sys_perf_event_open: pid %d  cpu %d  group_fd %d  flags %#lx",
			  pid, cpu, group_fd, flags);

1544
		fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, group_fd, flags);
1545 1546 1547
		if (fd >= 0)
			break;

1548 1549
		/* Do not try less precise if not requested. */
		if (!evsel->precise_max)
1550 1551 1552 1553 1554 1555
			break;

		/*
		 * We tried all the precise_ip values, and it's
		 * still failing, so leave it to standard fallback.
		 */
1556 1557
		if (!evsel->core.attr.precise_ip) {
			evsel->core.attr.precise_ip = precise_ip;
1558 1559 1560 1561
			break;
		}

		pr_debug2("\nsys_perf_event_open failed, error %d\n", -ENOTSUP);
1562 1563 1564
		evsel->core.attr.precise_ip--;
		pr_debug2("decreasing precise_ip by one (%d)\n", evsel->core.attr.precise_ip);
		display_attr(&evsel->core.attr);
1565 1566 1567 1568 1569
	}

	return fd;
}

1570 1571
int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
		struct perf_thread_map *threads)
1572
{
1573
	int cpu, thread, nthreads;
1574
	unsigned long flags = PERF_FLAG_FD_CLOEXEC;
1575
	int pid = -1, err;
1576
	enum { NO_CHANGE, SET_TO_MAX, INCREASED_MAX } set_rlimit = NO_CHANGE;
1577

1578 1579
	if ((perf_missing_features.write_backward && evsel->core.attr.write_backward) ||
	    (perf_missing_features.aux_output     && evsel->core.attr.aux_output))
1580 1581
		return -EINVAL;

1582
	if (cpus == NULL) {
1583
		static struct perf_cpu_map *empty_cpu_map;
1584 1585

		if (empty_cpu_map == NULL) {
1586
			empty_cpu_map = perf_cpu_map__dummy_new();
1587 1588 1589 1590 1591 1592 1593 1594
			if (empty_cpu_map == NULL)
				return -ENOMEM;
		}

		cpus = empty_cpu_map;
	}

	if (threads == NULL) {
1595
		static struct perf_thread_map *empty_thread_map;
1596 1597 1598 1599 1600 1601 1602 1603 1604 1605

		if (empty_thread_map == NULL) {
			empty_thread_map = thread_map__new_by_tid(-1);
			if (empty_thread_map == NULL)
				return -ENOMEM;
		}

		threads = empty_thread_map;
	}

1606
	if (evsel->core.system_wide)
1607 1608 1609 1610
		nthreads = 1;
	else
		nthreads = threads->nr;

1611
	if (evsel->core.fd == NULL &&
1612
	    perf_evsel__alloc_fd(&evsel->core, cpus->nr, nthreads) < 0)
1613
		return -ENOMEM;
1614

S
Stephane Eranian 已提交
1615
	if (evsel->cgrp) {
1616
		flags |= PERF_FLAG_PID_CGROUP;
S
Stephane Eranian 已提交
1617 1618 1619
		pid = evsel->cgrp->fd;
	}

1620
fallback_missing_features:
1621
	if (perf_missing_features.clockid_wrong)
1622
		evsel->core.attr.clockid = CLOCK_MONOTONIC; /* should always work */
1623
	if (perf_missing_features.clockid) {
1624 1625
		evsel->core.attr.use_clockid = 0;
		evsel->core.attr.clockid = 0;
1626
	}
1627 1628
	if (perf_missing_features.cloexec)
		flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
1629
	if (perf_missing_features.mmap2)
1630
		evsel->core.attr.mmap2 = 0;
1631
	if (perf_missing_features.exclude_guest)
1632
		evsel->core.attr.exclude_guest = evsel->core.attr.exclude_host = 0;
1633
	if (perf_missing_features.lbr_flags)
1634
		evsel->core.attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
1635
				     PERF_SAMPLE_BRANCH_NO_CYCLES);
1636 1637
	if (perf_missing_features.group_read && evsel->core.attr.inherit)
		evsel->core.attr.read_format &= ~(PERF_FORMAT_GROUP|PERF_FORMAT_ID);
1638
	if (perf_missing_features.ksymbol)
1639
		evsel->core.attr.ksymbol = 0;
1640
	if (perf_missing_features.bpf)
1641
		evsel->core.attr.bpf_event = 0;
1642 1643
retry_sample_id:
	if (perf_missing_features.sample_id_all)
1644
		evsel->core.attr.sample_id_all = 0;
1645

1646
	display_attr(&evsel->core.attr);
A
Adrian Hunter 已提交
1647

1648
	for (cpu = 0; cpu < cpus->nr; cpu++) {
1649

1650
		for (thread = 0; thread < nthreads; thread++) {
1651
			int fd, group_fd;
S
Stephane Eranian 已提交
1652

1653
			if (!evsel->cgrp && !evsel->core.system_wide)
1654
				pid = perf_thread_map__pid(threads, thread);
S
Stephane Eranian 已提交
1655

1656
			group_fd = get_group_fd(evsel, cpu, thread);
1657
retry_open:
1658 1659
			test_attr__ready();

1660 1661
			fd = perf_event_open(evsel, pid, cpus->map[cpu],
					     group_fd, flags);
1662 1663 1664 1665

			FD(evsel, cpu, thread) = fd;

			if (fd < 0) {
1666
				err = -errno;
1667

1668
				if (ignore_missing_thread(evsel, cpus->nr, cpu, threads, thread, err)) {
1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681
					/*
					 * We just removed 1 thread, so take a step
					 * back on thread index and lower the upper
					 * nthreads limit.
					 */
					nthreads--;
					thread--;

					/* ... and pretend like nothing have happened. */
					err = 0;
					continue;
				}

1682
				pr_debug2("\nsys_perf_event_open failed, error %d\n",
1683
					  err);
1684
				goto try_fallback;
1685
			}
1686

1687
			pr_debug2(" = %d\n", fd);
1688

1689
			if (evsel->bpf_fd >= 0) {
1690
				int evt_fd = fd;
1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703
				int bpf_fd = evsel->bpf_fd;

				err = ioctl(evt_fd,
					    PERF_EVENT_IOC_SET_BPF,
					    bpf_fd);
				if (err && errno != EEXIST) {
					pr_err("failed to attach bpf fd %d: %s\n",
					       bpf_fd, strerror(errno));
					err = -EINVAL;
					goto out_close;
				}
			}

1704
			set_rlimit = NO_CHANGE;
1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715

			/*
			 * If we succeeded but had to kill clockid, fail and
			 * have perf_evsel__open_strerror() print us a nice
			 * error.
			 */
			if (perf_missing_features.clockid ||
			    perf_missing_features.clockid_wrong) {
				err = -EINVAL;
				goto out_close;
			}
1716
		}
1717 1718 1719 1720
	}

	return 0;

1721
try_fallback:
1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745
	/*
	 * perf stat needs between 5 and 22 fds per CPU. When we run out
	 * of them try to increase the limits.
	 */
	if (err == -EMFILE && set_rlimit < INCREASED_MAX) {
		struct rlimit l;
		int old_errno = errno;

		if (getrlimit(RLIMIT_NOFILE, &l) == 0) {
			if (set_rlimit == NO_CHANGE)
				l.rlim_cur = l.rlim_max;
			else {
				l.rlim_cur = l.rlim_max + 1000;
				l.rlim_max = l.rlim_cur;
			}
			if (setrlimit(RLIMIT_NOFILE, &l) == 0) {
				set_rlimit++;
				errno = old_errno;
				goto retry_open;
			}
		}
		errno = old_errno;
	}

1746 1747 1748
	if (err != -EINVAL || cpu > 0 || thread > 0)
		goto out_close;

1749 1750 1751 1752
	/*
	 * Must probe features in the order they were added to the
	 * perf_event_attr interface.
	 */
1753 1754 1755 1756
	if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) {
		perf_missing_features.aux_output = true;
		pr_debug2("Kernel has no attr.aux_output support, bailing out\n");
		goto out_close;
1757 1758
	} else if (!perf_missing_features.bpf && evsel->core.attr.bpf_event) {
		perf_missing_features.bpf = true;
1759 1760
		pr_debug2("switching off bpf_event\n");
		goto fallback_missing_features;
1761
	} else if (!perf_missing_features.ksymbol && evsel->core.attr.ksymbol) {
1762 1763 1764
		perf_missing_features.ksymbol = true;
		pr_debug2("switching off ksymbol\n");
		goto fallback_missing_features;
1765
	} else if (!perf_missing_features.write_backward && evsel->core.attr.write_backward) {
1766
		perf_missing_features.write_backward = true;
1767
		pr_debug2("switching off write_backward\n");
1768
		goto out_close;
1769
	} else if (!perf_missing_features.clockid_wrong && evsel->core.attr.use_clockid) {
1770
		perf_missing_features.clockid_wrong = true;
1771
		pr_debug2("switching off clockid\n");
1772
		goto fallback_missing_features;
1773
	} else if (!perf_missing_features.clockid && evsel->core.attr.use_clockid) {
1774
		perf_missing_features.clockid = true;
1775
		pr_debug2("switching off use_clockid\n");
1776 1777
		goto fallback_missing_features;
	} else if (!perf_missing_features.cloexec && (flags & PERF_FLAG_FD_CLOEXEC)) {
1778
		perf_missing_features.cloexec = true;
1779
		pr_debug2("switching off cloexec flag\n");
1780
		goto fallback_missing_features;
1781
	} else if (!perf_missing_features.mmap2 && evsel->core.attr.mmap2) {
1782
		perf_missing_features.mmap2 = true;
1783
		pr_debug2("switching off mmap2\n");
1784 1785
		goto fallback_missing_features;
	} else if (!perf_missing_features.exclude_guest &&
1786
		   (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host)) {
1787
		perf_missing_features.exclude_guest = true;
1788
		pr_debug2("switching off exclude_guest, exclude_host\n");
1789 1790 1791
		goto fallback_missing_features;
	} else if (!perf_missing_features.sample_id_all) {
		perf_missing_features.sample_id_all = true;
1792
		pr_debug2("switching off sample_id_all\n");
1793
		goto retry_sample_id;
1794
	} else if (!perf_missing_features.lbr_flags &&
1795
			(evsel->core.attr.branch_sample_type &
1796 1797 1798
			 (PERF_SAMPLE_BRANCH_NO_CYCLES |
			  PERF_SAMPLE_BRANCH_NO_FLAGS))) {
		perf_missing_features.lbr_flags = true;
1799
		pr_debug2("switching off branch sample type no (cycles/flags)\n");
1800
		goto fallback_missing_features;
1801
	} else if (!perf_missing_features.group_read &&
1802 1803
		    evsel->core.attr.inherit &&
		   (evsel->core.attr.read_format & PERF_FORMAT_GROUP) &&
1804
		   perf_evsel__is_group_leader(evsel)) {
1805 1806 1807
		perf_missing_features.group_read = true;
		pr_debug2("switching off group read\n");
		goto fallback_missing_features;
1808
	}
1809
out_close:
1810 1811 1812
	if (err)
		threads->err_thread = thread;

1813 1814 1815 1816 1817
	do {
		while (--thread >= 0) {
			close(FD(evsel, cpu, thread));
			FD(evsel, cpu, thread) = -1;
		}
1818
		thread = nthreads;
1819
	} while (--cpu >= 0);
1820 1821 1822
	return err;
}

1823
void evsel__close(struct evsel *evsel)
1824
{
1825
	perf_evsel__close(&evsel->core);
1826
	perf_evsel__free_id(&evsel->core);
1827 1828
}

1829
int perf_evsel__open_per_cpu(struct evsel *evsel,
1830
			     struct perf_cpu_map *cpus)
1831
{
1832
	return evsel__open(evsel, cpus, NULL);
1833
}
1834

1835
int perf_evsel__open_per_thread(struct evsel *evsel,
1836
				struct perf_thread_map *threads)
1837
{
1838
	return evsel__open(evsel, NULL, threads);
1839
}
1840

1841
static int perf_evsel__parse_id_sample(const struct evsel *evsel,
1842 1843
				       const union perf_event *event,
				       struct perf_sample *sample)
1844
{
1845
	u64 type = evsel->core.attr.sample_type;
1846
	const __u64 *array = event->sample.array;
1847
	bool swapped = evsel->needs_swap;
1848
	union u64_swap u;
1849 1850 1851 1852

	array += ((event->header.size -
		   sizeof(event->header)) / sizeof(u64)) - 1;

1853 1854 1855 1856 1857
	if (type & PERF_SAMPLE_IDENTIFIER) {
		sample->id = *array;
		array--;
	}

1858
	if (type & PERF_SAMPLE_CPU) {
1859 1860 1861 1862 1863 1864 1865 1866
		u.val64 = *array;
		if (swapped) {
			/* undo swap of u64, then swap on individual u32s */
			u.val64 = bswap_64(u.val64);
			u.val32[0] = bswap_32(u.val32[0]);
		}

		sample->cpu = u.val32[0];
1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885
		array--;
	}

	if (type & PERF_SAMPLE_STREAM_ID) {
		sample->stream_id = *array;
		array--;
	}

	if (type & PERF_SAMPLE_ID) {
		sample->id = *array;
		array--;
	}

	if (type & PERF_SAMPLE_TIME) {
		sample->time = *array;
		array--;
	}

	if (type & PERF_SAMPLE_TID) {
1886 1887 1888 1889 1890 1891 1892 1893 1894 1895
		u.val64 = *array;
		if (swapped) {
			/* undo swap of u64, then swap on individual u32s */
			u.val64 = bswap_64(u.val64);
			u.val32[0] = bswap_32(u.val32[0]);
			u.val32[1] = bswap_32(u.val32[1]);
		}

		sample->pid = u.val32[0];
		sample->tid = u.val32[1];
1896
		array--;
1897 1898 1899 1900 1901
	}

	return 0;
}

1902 1903
static inline bool overflow(const void *endp, u16 max_size, const void *offset,
			    u64 size)
1904
{
1905 1906
	return size > max_size || offset + size > endp;
}
1907

1908 1909 1910 1911 1912
#define OVERFLOW_CHECK(offset, size, max_size)				\
	do {								\
		if (overflow(endp, (max_size), (offset), (size)))	\
			return -EFAULT;					\
	} while (0)
1913

1914 1915
#define OVERFLOW_CHECK_u64(offset) \
	OVERFLOW_CHECK(offset, sizeof(u64), sizeof(u64))
1916

1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930
static int
perf_event__check_size(union perf_event *event, unsigned int sample_size)
{
	/*
	 * The evsel's sample_size is based on PERF_SAMPLE_MASK which includes
	 * up to PERF_SAMPLE_PERIOD.  After that overflow() must be used to
	 * check the format does not go past the end of the event.
	 */
	if (sample_size + sizeof(event->header) > event->header.size)
		return -EFAULT;

	return 0;
}

1931
int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event,
1932
			     struct perf_sample *data)
1933
{
1934
	u64 type = evsel->core.attr.sample_type;
1935
	bool swapped = evsel->needs_swap;
1936
	const __u64 *array;
1937 1938 1939
	u16 max_size = event->header.size;
	const void *endp = (void *)event + max_size;
	u64 sz;
1940

1941 1942 1943 1944
	/*
	 * used for cross-endian analysis. See git commit 65014ab3
	 * for why this goofiness is needed.
	 */
1945
	union u64_swap u;
1946

1947
	memset(data, 0, sizeof(*data));
1948 1949
	data->cpu = data->pid = data->tid = -1;
	data->stream_id = data->id = data->time = -1ULL;
1950
	data->period = evsel->core.attr.sample_period;
1951
	data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1952
	data->misc    = event->header.misc;
1953 1954
	data->id = -1ULL;
	data->data_src = PERF_MEM_DATA_SRC_NONE;
1955 1956

	if (event->header.type != PERF_RECORD_SAMPLE) {
1957
		if (!evsel->core.attr.sample_id_all)
1958
			return 0;
1959
		return perf_evsel__parse_id_sample(evsel, event, data);
1960 1961 1962 1963
	}

	array = event->sample.array;

1964
	if (perf_event__check_size(event, evsel->sample_size))
1965 1966
		return -EFAULT;

1967 1968 1969 1970 1971
	if (type & PERF_SAMPLE_IDENTIFIER) {
		data->id = *array;
		array++;
	}

1972
	if (type & PERF_SAMPLE_IP) {
1973
		data->ip = *array;
1974 1975 1976 1977
		array++;
	}

	if (type & PERF_SAMPLE_TID) {
1978 1979 1980 1981 1982 1983 1984 1985 1986 1987
		u.val64 = *array;
		if (swapped) {
			/* undo swap of u64, then swap on individual u32s */
			u.val64 = bswap_64(u.val64);
			u.val32[0] = bswap_32(u.val32[0]);
			u.val32[1] = bswap_32(u.val32[1]);
		}

		data->pid = u.val32[0];
		data->tid = u.val32[1];
1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011
		array++;
	}

	if (type & PERF_SAMPLE_TIME) {
		data->time = *array;
		array++;
	}

	if (type & PERF_SAMPLE_ADDR) {
		data->addr = *array;
		array++;
	}

	if (type & PERF_SAMPLE_ID) {
		data->id = *array;
		array++;
	}

	if (type & PERF_SAMPLE_STREAM_ID) {
		data->stream_id = *array;
		array++;
	}

	if (type & PERF_SAMPLE_CPU) {
2012 2013 2014 2015 2016 2017 2018 2019 2020

		u.val64 = *array;
		if (swapped) {
			/* undo swap of u64, then swap on individual u32s */
			u.val64 = bswap_64(u.val64);
			u.val32[0] = bswap_32(u.val32[0]);
		}

		data->cpu = u.val32[0];
2021 2022 2023 2024 2025 2026 2027 2028 2029
		array++;
	}

	if (type & PERF_SAMPLE_PERIOD) {
		data->period = *array;
		array++;
	}

	if (type & PERF_SAMPLE_READ) {
2030
		u64 read_format = evsel->core.attr.read_format;
2031

2032
		OVERFLOW_CHECK_u64(array);
2033 2034 2035 2036 2037 2038 2039 2040
		if (read_format & PERF_FORMAT_GROUP)
			data->read.group.nr = *array;
		else
			data->read.one.value = *array;

		array++;

		if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
2041
			OVERFLOW_CHECK_u64(array);
2042 2043 2044 2045 2046
			data->read.time_enabled = *array;
			array++;
		}

		if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
2047
			OVERFLOW_CHECK_u64(array);
2048 2049 2050 2051 2052 2053
			data->read.time_running = *array;
			array++;
		}

		/* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
		if (read_format & PERF_FORMAT_GROUP) {
2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064
			const u64 max_group_nr = UINT64_MAX /
					sizeof(struct sample_read_value);

			if (data->read.group.nr > max_group_nr)
				return -EFAULT;
			sz = data->read.group.nr *
			     sizeof(struct sample_read_value);
			OVERFLOW_CHECK(array, sz, max_size);
			data->read.group.values =
					(struct sample_read_value *)array;
			array = (void *)array + sz;
2065
		} else {
2066
			OVERFLOW_CHECK_u64(array);
2067 2068 2069
			data->read.one.id = *array;
			array++;
		}
2070 2071
	}

2072
	if (evsel__has_callchain(evsel)) {
2073
		const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
2074

2075 2076 2077
		OVERFLOW_CHECK_u64(array);
		data->callchain = (struct ip_callchain *)array++;
		if (data->callchain->nr > max_callchain_nr)
2078
			return -EFAULT;
2079 2080 2081
		sz = data->callchain->nr * sizeof(u64);
		OVERFLOW_CHECK(array, sz, max_size);
		array = (void *)array + sz;
2082 2083 2084
	}

	if (type & PERF_SAMPLE_RAW) {
2085
		OVERFLOW_CHECK_u64(array);
2086
		u.val64 = *array;
2087 2088 2089 2090 2091 2092 2093 2094

		/*
		 * Undo swap of u64, then swap on individual u32s,
		 * get the size of the raw area and undo all of the
		 * swap. The pevent interface handles endianity by
		 * itself.
		 */
		if (swapped) {
2095 2096 2097 2098 2099
			u.val64 = bswap_64(u.val64);
			u.val32[0] = bswap_32(u.val32[0]);
			u.val32[1] = bswap_32(u.val32[1]);
		}
		data->raw_size = u.val32[0];
2100 2101 2102 2103 2104 2105 2106 2107

		/*
		 * The raw data is aligned on 64bits including the
		 * u32 size, so it's safe to use mem_bswap_64.
		 */
		if (swapped)
			mem_bswap_64((void *) array, data->raw_size);

2108
		array = (void *)array + sizeof(u32);
2109

2110 2111 2112
		OVERFLOW_CHECK(array, data->raw_size, max_size);
		data->raw_data = (void *)array;
		array = (void *)array + data->raw_size;
2113 2114
	}

2115
	if (type & PERF_SAMPLE_BRANCH_STACK) {
2116 2117
		const u64 max_branch_nr = UINT64_MAX /
					  sizeof(struct branch_entry);
2118

2119 2120
		OVERFLOW_CHECK_u64(array);
		data->branch_stack = (struct branch_stack *)array++;
2121

2122 2123
		if (data->branch_stack->nr > max_branch_nr)
			return -EFAULT;
2124
		sz = data->branch_stack->nr * sizeof(struct branch_entry);
2125 2126
		OVERFLOW_CHECK(array, sz, max_size);
		array = (void *)array + sz;
2127
	}
2128 2129

	if (type & PERF_SAMPLE_REGS_USER) {
2130
		OVERFLOW_CHECK_u64(array);
2131 2132
		data->user_regs.abi = *array;
		array++;
2133

2134
		if (data->user_regs.abi) {
2135
			u64 mask = evsel->core.attr.sample_regs_user;
2136

2137
			sz = hweight64(mask) * sizeof(u64);
2138
			OVERFLOW_CHECK(array, sz, max_size);
2139
			data->user_regs.mask = mask;
2140
			data->user_regs.regs = (u64 *)array;
2141
			array = (void *)array + sz;
2142 2143 2144 2145
		}
	}

	if (type & PERF_SAMPLE_STACK_USER) {
2146 2147
		OVERFLOW_CHECK_u64(array);
		sz = *array++;
2148 2149 2150 2151

		data->user_stack.offset = ((char *)(array - 1)
					  - (char *) event);

2152
		if (!sz) {
2153 2154
			data->user_stack.size = 0;
		} else {
2155
			OVERFLOW_CHECK(array, sz, max_size);
2156
			data->user_stack.data = (char *)array;
2157 2158
			array = (void *)array + sz;
			OVERFLOW_CHECK_u64(array);
2159
			data->user_stack.size = *array++;
2160 2161 2162
			if (WARN_ONCE(data->user_stack.size > sz,
				      "user stack dump failure\n"))
				return -EFAULT;
2163 2164 2165
		}
	}

2166
	if (type & PERF_SAMPLE_WEIGHT) {
2167
		OVERFLOW_CHECK_u64(array);
2168 2169 2170 2171
		data->weight = *array;
		array++;
	}

2172
	if (type & PERF_SAMPLE_DATA_SRC) {
2173
		OVERFLOW_CHECK_u64(array);
2174 2175 2176 2177
		data->data_src = *array;
		array++;
	}

2178
	if (type & PERF_SAMPLE_TRANSACTION) {
2179
		OVERFLOW_CHECK_u64(array);
2180 2181 2182 2183
		data->transaction = *array;
		array++;
	}

2184 2185 2186 2187 2188 2189 2190
	data->intr_regs.abi = PERF_SAMPLE_REGS_ABI_NONE;
	if (type & PERF_SAMPLE_REGS_INTR) {
		OVERFLOW_CHECK_u64(array);
		data->intr_regs.abi = *array;
		array++;

		if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) {
2191
			u64 mask = evsel->core.attr.sample_regs_intr;
2192

2193
			sz = hweight64(mask) * sizeof(u64);
2194 2195 2196 2197 2198 2199 2200
			OVERFLOW_CHECK(array, sz, max_size);
			data->intr_regs.mask = mask;
			data->intr_regs.regs = (u64 *)array;
			array = (void *)array + sz;
		}
	}

2201 2202 2203 2204 2205 2206
	data->phys_addr = 0;
	if (type & PERF_SAMPLE_PHYS_ADDR) {
		data->phys_addr = *array;
		array++;
	}

2207 2208
	return 0;
}
2209

2210
int perf_evsel__parse_sample_timestamp(struct evsel *evsel,
2211 2212 2213
				       union perf_event *event,
				       u64 *timestamp)
{
2214
	u64 type = evsel->core.attr.sample_type;
2215
	const __u64 *array;
2216 2217 2218 2219 2220 2221 2222 2223 2224

	if (!(type & PERF_SAMPLE_TIME))
		return -1;

	if (event->header.type != PERF_RECORD_SAMPLE) {
		struct perf_sample data = {
			.time = -1ULL,
		};

2225
		if (!evsel->core.attr.sample_id_all)
2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253
			return -1;
		if (perf_evsel__parse_id_sample(evsel, event, &data))
			return -1;

		*timestamp = data.time;
		return 0;
	}

	array = event->sample.array;

	if (perf_event__check_size(event, evsel->sample_size))
		return -EFAULT;

	if (type & PERF_SAMPLE_IDENTIFIER)
		array++;

	if (type & PERF_SAMPLE_IP)
		array++;

	if (type & PERF_SAMPLE_TID)
		array++;

	if (type & PERF_SAMPLE_TIME)
		*timestamp = *array;

	return 0;
}

2254
struct tep_format_field *perf_evsel__field(struct evsel *evsel, const char *name)
2255
{
2256
	return tep_find_field(evsel->tp_format, name);
2257 2258
}

2259
void *perf_evsel__rawptr(struct evsel *evsel, struct perf_sample *sample,
2260 2261
			 const char *name)
{
2262
	struct tep_format_field *field = perf_evsel__field(evsel, name);
2263 2264
	int offset;

2265 2266
	if (!field)
		return NULL;
2267 2268 2269

	offset = field->offset;

2270
	if (field->flags & TEP_FIELD_IS_DYNAMIC) {
2271 2272 2273 2274 2275 2276 2277
		offset = *(int *)(sample->raw_data + field->offset);
		offset &= 0xffff;
	}

	return sample->raw_data + offset;
}

2278
u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sample,
2279
			 bool needs_swap)
2280
{
2281
	u64 value;
2282
	void *ptr = sample->raw_data + field->offset;
2283

2284 2285 2286 2287 2288 2289 2290 2291 2292 2293
	switch (field->size) {
	case 1:
		return *(u8 *)ptr;
	case 2:
		value = *(u16 *)ptr;
		break;
	case 4:
		value = *(u32 *)ptr;
		break;
	case 8:
2294
		memcpy(&value, ptr, sizeof(u64));
2295 2296 2297 2298 2299
		break;
	default:
		return 0;
	}

2300
	if (!needs_swap)
2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314
		return value;

	switch (field->size) {
	case 2:
		return bswap_16(value);
	case 4:
		return bswap_32(value);
	case 8:
		return bswap_64(value);
	default:
		return 0;
	}

	return 0;
2315
}
2316

2317
u64 perf_evsel__intval(struct evsel *evsel, struct perf_sample *sample,
2318 2319
		       const char *name)
{
2320
	struct tep_format_field *field = perf_evsel__field(evsel, name);
2321 2322 2323 2324 2325 2326 2327

	if (!field)
		return 0;

	return field ? format_field__intval(field, sample, evsel->needs_swap) : 0;
}

2328
bool perf_evsel__fallback(struct evsel *evsel, int err,
2329 2330
			  char *msg, size_t msgsize)
{
2331 2332
	int paranoid;

2333
	if ((err == ENOENT || err == ENXIO || err == ENODEV) &&
2334 2335
	    evsel->core.attr.type   == PERF_TYPE_HARDWARE &&
	    evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES) {
2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346
		/*
		 * If it's cycles then fall back to hrtimer based
		 * cpu-clock-tick sw counter, which is always available even if
		 * no PMU support.
		 *
		 * PPC returns ENXIO until 2.6.37 (behavior changed with commit
		 * b0a873e).
		 */
		scnprintf(msg, msgsize, "%s",
"The cycles event is not supported, trying to fall back to cpu-clock-ticks");

2347 2348
		evsel->core.attr.type   = PERF_TYPE_SOFTWARE;
		evsel->core.attr.config = PERF_COUNT_SW_CPU_CLOCK;
2349

2350
		zfree(&evsel->name);
2351
		return true;
2352
	} else if (err == EACCES && !evsel->core.attr.exclude_kernel &&
2353 2354 2355
		   (paranoid = perf_event_paranoid()) > 1) {
		const char *name = perf_evsel__name(evsel);
		char *new_name;
2356
		const char *sep = ":";
2357

2358 2359 2360 2361 2362 2363
		/* Is there already the separator in the name. */
		if (strchr(name, '/') ||
		    strchr(name, ':'))
			sep = "";

		if (asprintf(&new_name, "%s%su", name, sep) < 0)
2364 2365 2366 2367 2368
			return false;

		if (evsel->name)
			free(evsel->name);
		evsel->name = new_name;
2369 2370 2371
		scnprintf(msg, msgsize, "kernel.perf_event_paranoid=%d, trying "
			  "to fall back to excluding kernel and hypervisor "
			  " samples", paranoid);
2372
		evsel->core.attr.exclude_kernel = 1;
2373
		evsel->core.attr.exclude_hv     = 1;
2374

2375 2376 2377 2378 2379
		return true;
	}

	return false;
}
2380

2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416
static bool find_process(const char *name)
{
	size_t len = strlen(name);
	DIR *dir;
	struct dirent *d;
	int ret = -1;

	dir = opendir(procfs__mountpoint());
	if (!dir)
		return false;

	/* Walk through the directory. */
	while (ret && (d = readdir(dir)) != NULL) {
		char path[PATH_MAX];
		char *data;
		size_t size;

		if ((d->d_type != DT_DIR) ||
		     !strcmp(".", d->d_name) ||
		     !strcmp("..", d->d_name))
			continue;

		scnprintf(path, sizeof(path), "%s/%s/comm",
			  procfs__mountpoint(), d->d_name);

		if (filename__read_str(path, &data, &size))
			continue;

		ret = strncmp(name, data, len);
		free(data);
	}

	closedir(dir);
	return ret ? false : true;
}

2417
int perf_evsel__open_strerror(struct evsel *evsel, struct target *target,
2418 2419
			      int err, char *msg, size_t size)
{
2420
	char sbuf[STRERR_BUFSIZE];
2421
	int printed = 0;
2422

2423 2424 2425
	switch (err) {
	case EPERM:
	case EACCES:
2426 2427 2428 2429 2430 2431
		if (err == EPERM)
			printed = scnprintf(msg, size,
				"No permission to enable %s event.\n\n",
				perf_evsel__name(evsel));

		return scnprintf(msg + printed, size - printed,
2432 2433 2434 2435
		 "You may not have permission to collect %sstats.\n\n"
		 "Consider tweaking /proc/sys/kernel/perf_event_paranoid,\n"
		 "which controls use of the performance events system by\n"
		 "unprivileged users (without CAP_SYS_ADMIN).\n\n"
2436
		 "The current value is %d:\n\n"
2437
		 "  -1: Allow use of (almost) all events by all users\n"
2438 2439 2440
		 "      Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK\n"
		 ">= 0: Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN\n"
		 "      Disallow raw tracepoint access by users without CAP_SYS_ADMIN\n"
2441
		 ">= 1: Disallow CPU event access by users without CAP_SYS_ADMIN\n"
2442 2443 2444
		 ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN\n\n"
		 "To make this setting permanent, edit /etc/sysctl.conf too, e.g.:\n\n"
		 "	kernel.perf_event_paranoid = -1\n" ,
2445 2446
				 target->system_wide ? "system-wide " : "",
				 perf_event_paranoid());
2447 2448 2449 2450 2451 2452
	case ENOENT:
		return scnprintf(msg, size, "The %s event is not supported.",
				 perf_evsel__name(evsel));
	case EMFILE:
		return scnprintf(msg, size, "%s",
			 "Too many events are opened.\n"
2453 2454 2455
			 "Probably the maximum number of open file descriptors has been reached.\n"
			 "Hint: Try again after reducing the number of events.\n"
			 "Hint: Try increasing the limit with 'ulimit -n <limit>'");
2456
	case ENOMEM:
2457
		if (evsel__has_callchain(evsel) &&
2458 2459 2460 2461
		    access("/proc/sys/kernel/perf_event_max_stack", F_OK) == 0)
			return scnprintf(msg, size,
					 "Not enough memory to setup event with callchain.\n"
					 "Hint: Try tweaking /proc/sys/kernel/perf_event_max_stack\n"
2462
					 "Hint: Current value: %d", sysctl__max_stack());
2463
		break;
2464 2465 2466
	case ENODEV:
		if (target->cpu_list)
			return scnprintf(msg, size, "%s",
2467
	 "No such device - did you specify an out-of-range profile CPU?");
2468 2469
		break;
	case EOPNOTSUPP:
2470
		if (evsel->core.attr.sample_period != 0)
2471 2472 2473
			return scnprintf(msg, size,
	"%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'",
					 perf_evsel__name(evsel));
2474
		if (evsel->core.attr.precise_ip)
2475 2476 2477
			return scnprintf(msg, size, "%s",
	"\'precise\' request may not be supported. Try removing 'p' modifier.");
#if defined(__i386__) || defined(__x86_64__)
2478
		if (evsel->core.attr.type == PERF_TYPE_HARDWARE)
2479
			return scnprintf(msg, size, "%s",
2480
	"No hardware sampling interrupt available.\n");
2481 2482
#endif
		break;
2483 2484 2485 2486 2487 2488
	case EBUSY:
		if (find_process("oprofiled"))
			return scnprintf(msg, size,
	"The PMU counters are busy/taken by another profiler.\n"
	"We found oprofile daemon running, please stop it and try again.");
		break;
2489
	case EINVAL:
2490
		if (evsel->core.attr.write_backward && perf_missing_features.write_backward)
2491
			return scnprintf(msg, size, "Reading from overwrite event is not supported by this kernel.");
2492 2493 2494 2495
		if (perf_missing_features.clockid)
			return scnprintf(msg, size, "clockid feature not supported.");
		if (perf_missing_features.clockid_wrong)
			return scnprintf(msg, size, "wrong clockid (%d).", clockid);
2496 2497
		if (perf_missing_features.aux_output)
			return scnprintf(msg, size, "The 'aux_output' feature is not supported, update the kernel.");
2498
		break;
2499 2500 2501 2502 2503
	default:
		break;
	}

	return scnprintf(msg, size,
2504
	"The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n"
2505
	"/bin/dmesg | grep -i perf may provide additional information.\n",
2506
			 err, str_error_r(err, sbuf, sizeof(sbuf)),
2507
			 perf_evsel__name(evsel));
2508
}
2509

2510
struct perf_env *perf_evsel__env(struct evsel *evsel)
2511
{
2512 2513
	if (evsel && evsel->evlist)
		return evsel->evlist->env;
2514 2515
	return NULL;
}
2516

2517
static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist)
2518 2519 2520
{
	int cpu, thread;

2521 2522
	for (cpu = 0; cpu < xyarray__max_x(evsel->core.fd); cpu++) {
		for (thread = 0; thread < xyarray__max_y(evsel->core.fd);
2523 2524 2525
		     thread++) {
			int fd = FD(evsel, cpu, thread);

2526
			if (perf_evlist__id_add_fd(&evlist->core, &evsel->core,
2527 2528 2529 2530 2531 2532 2533 2534
						   cpu, thread, fd) < 0)
				return -1;
		}
	}

	return 0;
}

2535
int perf_evsel__store_ids(struct evsel *evsel, struct evlist *evlist)
2536
{
2537
	struct perf_cpu_map *cpus = evsel->core.cpus;
2538
	struct perf_thread_map *threads = evsel->core.threads;
2539

2540
	if (perf_evsel__alloc_id(&evsel->core, cpus->nr, threads->nr))
2541 2542 2543 2544
		return -ENOMEM;

	return store_evsel_ids(evsel, evlist);
}