session.c 66.4 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2
#include <errno.h>
3
#include <inttypes.h>
4
#include <linux/err.h>
5
#include <linux/kernel.h>
6
#include <linux/zalloc.h>
7
#include <traceevent/event-parse.h>
8
#include <api/fs/fs.h>
9

10
#include <byteswap.h>
11 12
#include <unistd.h>
#include <sys/types.h>
13
#include <sys/mman.h>
14
#include <perf/cpumap.h>
15

16 17
#include "evlist.h"
#include "evsel.h"
18
#include "memswap.h"
19
#include "map.h"
20
#include "symbol.h"
21
#include "session.h"
22
#include "tool.h"
23
#include "sort.h"
24
#include "cpumap.h"
25
#include "perf_regs.h"
26
#include "asm/bug.h"
27
#include "auxtrace.h"
28
#include "thread.h"
29
#include "thread-stack.h"
30
#include "sample-raw.h"
31
#include "stat.h"
32
#include "util.h"
33
#include "arch/common.h"
34

35 36 37 38 39 40 41
#ifdef HAVE_ZSTD_SUPPORT
static int perf_session__process_compressed_event(struct perf_session *session,
						  union perf_event *event, u64 file_offset)
{
	void *src;
	size_t decomp_size, src_size;
	u64 decomp_last_rem = 0;
42
	size_t mmap_len, decomp_len = session->header.env.comp_mmap_len;
43 44
	struct decomp *decomp, *decomp_last = session->decomp_last;

45 46 47 48 49 50 51
	if (decomp_last) {
		decomp_last_rem = decomp_last->size - decomp_last->head;
		decomp_len += decomp_last_rem;
	}

	mmap_len = sizeof(struct decomp) + decomp_len;
	decomp = mmap(NULL, mmap_len, PROT_READ|PROT_WRITE,
52 53 54 55 56 57 58
		      MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
	if (decomp == MAP_FAILED) {
		pr_err("Couldn't allocate memory for decompression\n");
		return -1;
	}

	decomp->file_pos = file_offset;
59
	decomp->mmap_len = mmap_len;
60 61
	decomp->head = 0;

62
	if (decomp_last_rem) {
63 64 65 66
		memcpy(decomp->data, &(decomp_last->data[decomp_last->head]), decomp_last_rem);
		decomp->size = decomp_last_rem;
	}

67 68
	src = (void *)event + sizeof(struct perf_record_compressed);
	src_size = event->pack.header.size - sizeof(struct perf_record_compressed);
69 70 71 72

	decomp_size = zstd_decompress_stream(&(session->zstd_data), src, src_size,
				&(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem);
	if (!decomp_size) {
73
		munmap(decomp, mmap_len);
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
		pr_err("Couldn't decompress data\n");
		return -1;
	}

	decomp->size += decomp_size;

	if (session->decomp == NULL) {
		session->decomp = decomp;
		session->decomp_last = decomp;
	} else {
		session->decomp_last->next = decomp;
		session->decomp_last = decomp;
	}

	pr_debug("decomp (B): %ld to %ld\n", src_size, decomp_size);

	return 0;
}
#else /* !HAVE_ZSTD_SUPPORT */
#define perf_session__process_compressed_event perf_session__process_compressed_event_stub
#endif

96 97 98 99
static int perf_session__deliver_event(struct perf_session *session,
				       union perf_event *event,
				       struct perf_tool *tool,
				       u64 file_offset);
100

101
static int perf_session__open(struct perf_session *session)
102
{
103
	struct perf_data *data = session->data;
104

105
	if (perf_session__read_header(session) < 0) {
106
		pr_err("incompatible file format (rerun with -v to learn more)\n");
107
		return -1;
108 109
	}

110
	if (perf_data__is_pipe(data))
111 112
		return 0;

113 114 115
	if (perf_header__has_feat(&session->header, HEADER_STAT))
		return 0;

116
	if (!perf_evlist__valid_sample_type(session->evlist)) {
117
		pr_err("non matching sample_type\n");
118
		return -1;
119 120
	}

121
	if (!perf_evlist__valid_sample_id_all(session->evlist)) {
122
		pr_err("non matching sample_id_all\n");
123
		return -1;
124 125
	}

126
	if (!perf_evlist__valid_read_format(session->evlist)) {
127
		pr_err("non matching read_format\n");
128
		return -1;
129 130
	}

131 132 133
	return 0;
}

134
void perf_session__set_id_hdr_size(struct perf_session *session)
135
{
136 137 138
	u16 id_hdr_size = perf_evlist__id_hdr_size(session->evlist);

	machines__set_id_hdr_size(&session->machines, id_hdr_size);
139 140
}

141
int perf_session__create_kernel_maps(struct perf_session *session)
142
{
143
	int ret = machine__create_kernel_maps(&session->machines.host);
144 145

	if (ret >= 0)
146
		ret = machines__create_guest_kernel_maps(&session->machines);
147 148 149
	return ret;
}

150
static void perf_session__destroy_kernel_maps(struct perf_session *session)
151
{
152
	machines__destroy_kernel_maps(&session->machines);
153 154
}

155 156
static bool perf_session__has_comm_exec(struct perf_session *session)
{
157
	struct evsel *evsel;
158

159
	evlist__for_each_entry(session->evlist, evsel) {
160
		if (evsel->core.attr.comm_exec)
161 162 163 164 165 166 167 168 169 170 171 172 173
			return true;
	}

	return false;
}

static void perf_session__set_comm_exec(struct perf_session *session)
{
	bool comm_exec = perf_session__has_comm_exec(session);

	machines__set_comm_exec(&session->machines, comm_exec);
}

174
static int ordered_events__deliver_event(struct ordered_events *oe,
175
					 struct ordered_event *event)
176
{
177 178 179
	struct perf_session *session = container_of(oe, struct perf_session,
						    ordered_events);

180
	return perf_session__deliver_event(session, event->event,
181
					   session->tool, event->file_offset);
182 183
}

184
struct perf_session *perf_session__new(struct perf_data *data,
185
				       bool repipe, struct perf_tool *tool)
186
{
187
	struct perf_session *session = zalloc(sizeof(*session));
188

189
	if (!session)
190 191
		goto out;

192
	session->repipe = repipe;
193
	session->tool   = tool;
194
	INIT_LIST_HEAD(&session->auxtrace_index);
195
	machines__init(&session->machines);
196 197
	ordered_events__init(&session->ordered_events,
			     ordered_events__deliver_event, NULL);
198

199
	perf_env__init(&session->header.env);
200 201
	if (data) {
		if (perf_data__open(data))
202
			goto out_delete;
203

204
		session->data = data;
205

206
		if (perf_data__is_read(data)) {
207
			if (perf_session__open(session) < 0)
208
				goto out_delete;
209

210 211 212 213
			/*
			 * set session attributes that are present in perf.data
			 * but not in pipe-mode.
			 */
214
			if (!data->is_pipe) {
215 216 217
				perf_session__set_id_hdr_size(session);
				perf_session__set_comm_exec(session);
			}
218 219

			perf_evlist__init_trace_event_sample_raw(session->evlist);
220 221 222 223

			/* Open the directory data. */
			if (data->is_dir && perf_data__open_dir(data))
				goto out_delete;
224
		}
225 226
	} else  {
		session->machines.host.env = &perf_env;
227 228
	}

229 230 231
	session->machines.host.single_address_space =
		perf_env__single_address_space(session->machines.host.env);

232
	if (!data || perf_data__is_write(data)) {
233 234
		/*
		 * In O_RDONLY mode this will be performed when reading the
235
		 * kernel MMAP event, in perf_event__process_mmap().
236
		 */
237
		if (perf_session__create_kernel_maps(session) < 0)
238
			pr_warning("Cannot read kernel map\n");
239
	}
240

241 242 243 244
	/*
	 * In pipe-mode, evlist is empty until PERF_RECORD_HEADER_ATTR is
	 * processed, so perf_evlist__sample_id_all is not meaningful here.
	 */
245
	if ((!data || !data->is_pipe) && tool && tool->ordering_requires_timestamps &&
246
	    tool->ordered_events && !perf_evlist__sample_id_all(session->evlist)) {
247
		dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
248
		tool->ordered_events = false;
249
	}
250

251
	return session;
252 253

 out_delete:
254
	perf_session__delete(session);
255
 out:
256
	return NULL;
257 258
}

259 260
static void perf_session__delete_threads(struct perf_session *session)
{
261
	machine__delete_threads(&session->machines.host);
262 263
}

264 265 266
static void perf_session__release_decomp_events(struct perf_session *session)
{
	struct decomp *next, *decomp;
267
	size_t mmap_len;
268 269 270 271 272 273
	next = session->decomp;
	do {
		decomp = next;
		if (decomp == NULL)
			break;
		next = decomp->next;
274 275
		mmap_len = decomp->mmap_len;
		munmap(decomp, mmap_len);
276 277 278
	} while (1);
}

279
void perf_session__delete(struct perf_session *session)
280
{
281 282
	if (session == NULL)
		return;
283
	auxtrace__free(session);
284
	auxtrace_index__free(&session->auxtrace_index);
285 286
	perf_session__destroy_kernel_maps(session);
	perf_session__delete_threads(session);
287
	perf_session__release_decomp_events(session);
288
	perf_env__exit(&session->header.env);
289
	machines__exit(&session->machines);
290 291
	if (session->data)
		perf_data__close(session->data);
292
	free(session);
293
}
294

295
static int process_event_synth_tracing_data_stub(struct perf_session *session
296 297
						 __maybe_unused,
						 union perf_event *event
298
						 __maybe_unused)
299 300 301 302 303
{
	dump_printf(": unhandled!\n");
	return 0;
}

304 305
static int process_event_synth_attr_stub(struct perf_tool *tool __maybe_unused,
					 union perf_event *event __maybe_unused,
306
					 struct evlist **pevlist
307
					 __maybe_unused)
308 309 310 311 312
{
	dump_printf(": unhandled!\n");
	return 0;
}

313 314
static int process_event_synth_event_update_stub(struct perf_tool *tool __maybe_unused,
						 union perf_event *event __maybe_unused,
315
						 struct evlist **pevlist
316 317
						 __maybe_unused)
{
318 319 320
	if (dump_trace)
		perf_event__fprintf_event_update(event, stdout);

321 322 323 324
	dump_printf(": unhandled!\n");
	return 0;
}

325 326 327
static int process_event_sample_stub(struct perf_tool *tool __maybe_unused,
				     union perf_event *event __maybe_unused,
				     struct perf_sample *sample __maybe_unused,
328
				     struct evsel *evsel __maybe_unused,
329
				     struct machine *machine __maybe_unused)
330 331 332 333 334
{
	dump_printf(": unhandled!\n");
	return 0;
}

335 336 337 338
static int process_event_stub(struct perf_tool *tool __maybe_unused,
			      union perf_event *event __maybe_unused,
			      struct perf_sample *sample __maybe_unused,
			      struct machine *machine __maybe_unused)
339 340 341 342 343
{
	dump_printf(": unhandled!\n");
	return 0;
}

344 345
static int process_finished_round_stub(struct perf_tool *tool __maybe_unused,
				       union perf_event *event __maybe_unused,
346
				       struct ordered_events *oe __maybe_unused)
347 348 349 350 351
{
	dump_printf(": unhandled!\n");
	return 0;
}

352
static int process_finished_round(struct perf_tool *tool,
353
				  union perf_event *event,
354
				  struct ordered_events *oe);
355

356 357 358 359 360 361 362 363 364 365 366 367 368 369 370
static int skipn(int fd, off_t n)
{
	char buf[4096];
	ssize_t ret;

	while (n > 0) {
		ret = read(fd, buf, min(n, (off_t)sizeof(buf)));
		if (ret <= 0)
			return ret;
		n -= ret;
	}

	return 0;
}

371 372
static s64 process_event_auxtrace_stub(struct perf_session *session __maybe_unused,
				       union perf_event *event)
373 374
{
	dump_printf(": unhandled!\n");
375 376
	if (perf_data__is_pipe(session->data))
		skipn(perf_data__fd(session->data), event->auxtrace.size);
377 378 379
	return event->auxtrace.size;
}

380 381
static int process_event_op2_stub(struct perf_session *session __maybe_unused,
				  union perf_event *event __maybe_unused)
382 383 384 385 386
{
	dump_printf(": unhandled!\n");
	return 0;
}

387 388

static
389 390
int process_event_thread_map_stub(struct perf_session *session __maybe_unused,
				  union perf_event *event __maybe_unused)
391
{
392 393 394
	if (dump_trace)
		perf_event__fprintf_thread_map(event, stdout);

395 396 397 398
	dump_printf(": unhandled!\n");
	return 0;
}

399
static
400 401
int process_event_cpu_map_stub(struct perf_session *session __maybe_unused,
			       union perf_event *event __maybe_unused)
402
{
403 404 405
	if (dump_trace)
		perf_event__fprintf_cpu_map(event, stdout);

406 407 408 409
	dump_printf(": unhandled!\n");
	return 0;
}

410
static
411 412
int process_event_stat_config_stub(struct perf_session *session __maybe_unused,
				   union perf_event *event __maybe_unused)
413
{
414 415 416
	if (dump_trace)
		perf_event__fprintf_stat_config(event, stdout);

417 418 419 420
	dump_printf(": unhandled!\n");
	return 0;
}

421 422
static int process_stat_stub(struct perf_session *perf_session __maybe_unused,
			     union perf_event *event)
J
Jiri Olsa 已提交
423
{
424 425 426
	if (dump_trace)
		perf_event__fprintf_stat(event, stdout);

J
Jiri Olsa 已提交
427 428 429 430
	dump_printf(": unhandled!\n");
	return 0;
}

431 432
static int process_stat_round_stub(struct perf_session *perf_session __maybe_unused,
				   union perf_event *event)
433
{
434 435 436
	if (dump_trace)
		perf_event__fprintf_stat_round(event, stdout);

437 438 439 440
	dump_printf(": unhandled!\n");
	return 0;
}

441 442 443 444 445 446 447 448
static int perf_session__process_compressed_event_stub(struct perf_session *session __maybe_unused,
						       union perf_event *event __maybe_unused,
						       u64 file_offset __maybe_unused)
{
       dump_printf(": unhandled!\n");
       return 0;
}

449
void perf_tool__fill_defaults(struct perf_tool *tool)
450
{
451 452 453 454
	if (tool->sample == NULL)
		tool->sample = process_event_sample_stub;
	if (tool->mmap == NULL)
		tool->mmap = process_event_stub;
455 456
	if (tool->mmap2 == NULL)
		tool->mmap2 = process_event_stub;
457 458
	if (tool->comm == NULL)
		tool->comm = process_event_stub;
459 460
	if (tool->namespaces == NULL)
		tool->namespaces = process_event_stub;
461 462 463 464 465 466
	if (tool->fork == NULL)
		tool->fork = process_event_stub;
	if (tool->exit == NULL)
		tool->exit = process_event_stub;
	if (tool->lost == NULL)
		tool->lost = perf_event__process_lost;
467 468
	if (tool->lost_samples == NULL)
		tool->lost_samples = perf_event__process_lost_samples;
469 470
	if (tool->aux == NULL)
		tool->aux = perf_event__process_aux;
471 472
	if (tool->itrace_start == NULL)
		tool->itrace_start = perf_event__process_itrace_start;
473 474
	if (tool->context_switch == NULL)
		tool->context_switch = perf_event__process_switch;
475 476
	if (tool->ksymbol == NULL)
		tool->ksymbol = perf_event__process_ksymbol;
477 478
	if (tool->bpf == NULL)
		tool->bpf = perf_event__process_bpf;
479 480 481 482 483 484 485 486
	if (tool->read == NULL)
		tool->read = process_event_sample_stub;
	if (tool->throttle == NULL)
		tool->throttle = process_event_stub;
	if (tool->unthrottle == NULL)
		tool->unthrottle = process_event_stub;
	if (tool->attr == NULL)
		tool->attr = process_event_synth_attr_stub;
487 488
	if (tool->event_update == NULL)
		tool->event_update = process_event_synth_event_update_stub;
489 490 491
	if (tool->tracing_data == NULL)
		tool->tracing_data = process_event_synth_tracing_data_stub;
	if (tool->build_id == NULL)
492
		tool->build_id = process_event_op2_stub;
493
	if (tool->finished_round == NULL) {
494
		if (tool->ordered_events)
495
			tool->finished_round = process_finished_round;
496
		else
497
			tool->finished_round = process_finished_round_stub;
498
	}
A
Adrian Hunter 已提交
499
	if (tool->id_index == NULL)
500
		tool->id_index = process_event_op2_stub;
501
	if (tool->auxtrace_info == NULL)
502
		tool->auxtrace_info = process_event_op2_stub;
503 504
	if (tool->auxtrace == NULL)
		tool->auxtrace = process_event_auxtrace_stub;
505
	if (tool->auxtrace_error == NULL)
506
		tool->auxtrace_error = process_event_op2_stub;
507 508
	if (tool->thread_map == NULL)
		tool->thread_map = process_event_thread_map_stub;
509 510
	if (tool->cpu_map == NULL)
		tool->cpu_map = process_event_cpu_map_stub;
511 512
	if (tool->stat_config == NULL)
		tool->stat_config = process_event_stat_config_stub;
J
Jiri Olsa 已提交
513 514
	if (tool->stat == NULL)
		tool->stat = process_stat_stub;
515 516
	if (tool->stat_round == NULL)
		tool->stat_round = process_stat_round_stub;
517 518
	if (tool->time_conv == NULL)
		tool->time_conv = process_event_op2_stub;
519 520
	if (tool->feature == NULL)
		tool->feature = process_event_op2_stub;
521
	if (tool->compressed == NULL)
522
		tool->compressed = perf_session__process_compressed_event;
523
}
524

525 526 527 528 529 530 531 532 533 534
static void swap_sample_id_all(union perf_event *event, void *data)
{
	void *end = (void *) event + event->header.size;
	int size = end - data;

	BUG_ON(size % sizeof(u64));
	mem_bswap_64(data, size);
}

static void perf_event__all64_swap(union perf_event *event,
535
				   bool sample_id_all __maybe_unused)
536
{
537 538
	struct perf_event_header *hdr = &event->header;
	mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr));
539 540
}

541
static void perf_event__comm_swap(union perf_event *event, bool sample_id_all)
542
{
543 544
	event->comm.pid = bswap_32(event->comm.pid);
	event->comm.tid = bswap_32(event->comm.tid);
545 546 547 548

	if (sample_id_all) {
		void *data = &event->comm.comm;

549
		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
550 551
		swap_sample_id_all(event, data);
	}
552 553
}

554 555
static void perf_event__mmap_swap(union perf_event *event,
				  bool sample_id_all)
556
{
557 558 559 560 561
	event->mmap.pid	  = bswap_32(event->mmap.pid);
	event->mmap.tid	  = bswap_32(event->mmap.tid);
	event->mmap.start = bswap_64(event->mmap.start);
	event->mmap.len	  = bswap_64(event->mmap.len);
	event->mmap.pgoff = bswap_64(event->mmap.pgoff);
562 563 564 565

	if (sample_id_all) {
		void *data = &event->mmap.filename;

566
		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
567 568
		swap_sample_id_all(event, data);
	}
569 570
}

571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589
static void perf_event__mmap2_swap(union perf_event *event,
				  bool sample_id_all)
{
	event->mmap2.pid   = bswap_32(event->mmap2.pid);
	event->mmap2.tid   = bswap_32(event->mmap2.tid);
	event->mmap2.start = bswap_64(event->mmap2.start);
	event->mmap2.len   = bswap_64(event->mmap2.len);
	event->mmap2.pgoff = bswap_64(event->mmap2.pgoff);
	event->mmap2.maj   = bswap_32(event->mmap2.maj);
	event->mmap2.min   = bswap_32(event->mmap2.min);
	event->mmap2.ino   = bswap_64(event->mmap2.ino);

	if (sample_id_all) {
		void *data = &event->mmap2.filename;

		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
		swap_sample_id_all(event, data);
	}
}
590
static void perf_event__task_swap(union perf_event *event, bool sample_id_all)
591
{
592 593 594 595 596
	event->fork.pid	 = bswap_32(event->fork.pid);
	event->fork.tid	 = bswap_32(event->fork.tid);
	event->fork.ppid = bswap_32(event->fork.ppid);
	event->fork.ptid = bswap_32(event->fork.ptid);
	event->fork.time = bswap_64(event->fork.time);
597 598 599

	if (sample_id_all)
		swap_sample_id_all(event, &event->fork + 1);
600 601
}

602
static void perf_event__read_swap(union perf_event *event, bool sample_id_all)
603
{
604 605 606 607 608 609
	event->read.pid		 = bswap_32(event->read.pid);
	event->read.tid		 = bswap_32(event->read.tid);
	event->read.value	 = bswap_64(event->read.value);
	event->read.time_enabled = bswap_64(event->read.time_enabled);
	event->read.time_running = bswap_64(event->read.time_running);
	event->read.id		 = bswap_64(event->read.id);
610 611 612

	if (sample_id_all)
		swap_sample_id_all(event, &event->read + 1);
613 614
}

615 616 617 618 619 620 621 622 623 624
static void perf_event__aux_swap(union perf_event *event, bool sample_id_all)
{
	event->aux.aux_offset = bswap_64(event->aux.aux_offset);
	event->aux.aux_size   = bswap_64(event->aux.aux_size);
	event->aux.flags      = bswap_64(event->aux.flags);

	if (sample_id_all)
		swap_sample_id_all(event, &event->aux + 1);
}

625 626 627 628 629 630 631 632 633 634
static void perf_event__itrace_start_swap(union perf_event *event,
					  bool sample_id_all)
{
	event->itrace_start.pid	 = bswap_32(event->itrace_start.pid);
	event->itrace_start.tid	 = bswap_32(event->itrace_start.tid);

	if (sample_id_all)
		swap_sample_id_all(event, &event->itrace_start + 1);
}

635 636 637 638 639 640 641 642 643 644 645 646 647
static void perf_event__switch_swap(union perf_event *event, bool sample_id_all)
{
	if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) {
		event->context_switch.next_prev_pid =
				bswap_32(event->context_switch.next_prev_pid);
		event->context_switch.next_prev_tid =
				bswap_32(event->context_switch.next_prev_tid);
	}

	if (sample_id_all)
		swap_sample_id_all(event, &event->context_switch + 1);
}

648 649 650 651 652 653 654 655 656 657 658
static void perf_event__throttle_swap(union perf_event *event,
				      bool sample_id_all)
{
	event->throttle.time	  = bswap_64(event->throttle.time);
	event->throttle.id	  = bswap_64(event->throttle.id);
	event->throttle.stream_id = bswap_64(event->throttle.stream_id);

	if (sample_id_all)
		swap_sample_id_all(event, &event->throttle + 1);
}

659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678
static void perf_event__namespaces_swap(union perf_event *event,
					bool sample_id_all)
{
	u64 i;

	event->namespaces.pid		= bswap_32(event->namespaces.pid);
	event->namespaces.tid		= bswap_32(event->namespaces.tid);
	event->namespaces.nr_namespaces	= bswap_64(event->namespaces.nr_namespaces);

	for (i = 0; i < event->namespaces.nr_namespaces; i++) {
		struct perf_ns_link_info *ns = &event->namespaces.link_info[i];

		ns->dev = bswap_64(ns->dev);
		ns->ino = bswap_64(ns->ino);
	}

	if (sample_id_all)
		swap_sample_id_all(event, &event->namespaces.link_info[i]);
}

679 680 681 682 683 684 685 686 687 688
static u8 revbyte(u8 b)
{
	int rev = (b >> 4) | ((b & 0xf) << 4);
	rev = ((rev & 0xcc) >> 2) | ((rev & 0x33) << 2);
	rev = ((rev & 0xaa) >> 1) | ((rev & 0x55) << 1);
	return (u8) rev;
}

/*
 * XXX this is hack in attempt to carry flags bitfield
689
 * through endian village. ABI says:
690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710
 *
 * Bit-fields are allocated from right to left (least to most significant)
 * on little-endian implementations and from left to right (most to least
 * significant) on big-endian implementations.
 *
 * The above seems to be byte specific, so we need to reverse each
 * byte of the bitfield. 'Internet' also says this might be implementation
 * specific and we probably need proper fix and carry perf_event_attr
 * bitfield flags in separate data file FEAT_ section. Thought this seems
 * to work for now.
 */
static void swap_bitfield(u8 *p, unsigned len)
{
	unsigned i;

	for (i = 0; i < len; i++) {
		*p = revbyte(*p);
		p++;
	}
}

711 712 713 714 715
/* exported for swapping attributes in file header */
void perf_event__attr_swap(struct perf_event_attr *attr)
{
	attr->type		= bswap_32(attr->type);
	attr->size		= bswap_32(attr->size);
716 717 718 719 720 721 722 723 724

#define bswap_safe(f, n) 					\
	(attr->size > (offsetof(struct perf_event_attr, f) + 	\
		       sizeof(attr->f) * (n)))
#define bswap_field(f, sz) 			\
do { 						\
	if (bswap_safe(f, 0))			\
		attr->f = bswap_##sz(attr->f);	\
} while(0)
725
#define bswap_field_16(f) bswap_field(f, 16)
726 727 728 729 730 731 732 733 734 735 736 737 738 739 740
#define bswap_field_32(f) bswap_field(f, 32)
#define bswap_field_64(f) bswap_field(f, 64)

	bswap_field_64(config);
	bswap_field_64(sample_period);
	bswap_field_64(sample_type);
	bswap_field_64(read_format);
	bswap_field_32(wakeup_events);
	bswap_field_32(bp_type);
	bswap_field_64(bp_addr);
	bswap_field_64(bp_len);
	bswap_field_64(branch_sample_type);
	bswap_field_64(sample_regs_user);
	bswap_field_32(sample_stack_user);
	bswap_field_32(aux_watermark);
741
	bswap_field_16(sample_max_stack);
742 743 744 745 746 747 748 749 750 751 752 753

	/*
	 * After read_format are bitfields. Check read_format because
	 * we are unable to use offsetof on bitfield.
	 */
	if (bswap_safe(read_format, 1))
		swap_bitfield((u8 *) (&attr->read_format + 1),
			      sizeof(u64));
#undef bswap_field_64
#undef bswap_field_32
#undef bswap_field
#undef bswap_safe
754 755
}

756
static void perf_event__hdr_attr_swap(union perf_event *event,
757
				      bool sample_id_all __maybe_unused)
758 759 760
{
	size_t size;

761
	perf_event__attr_swap(&event->attr.attr);
762

763 764 765
	size = event->header.size;
	size -= (void *)&event->attr.id - (void *)event;
	mem_bswap_64(event->attr.id, size);
766 767
}

768 769 770 771 772 773 774
static void perf_event__event_update_swap(union perf_event *event,
					  bool sample_id_all __maybe_unused)
{
	event->event_update.type = bswap_64(event->event_update.type);
	event->event_update.id   = bswap_64(event->event_update.id);
}

775
static void perf_event__event_type_swap(union perf_event *event,
776
					bool sample_id_all __maybe_unused)
777
{
778 779
	event->event_type.event_type.event_id =
		bswap_64(event->event_type.event_type.event_id);
780 781
}

782
static void perf_event__tracing_data_swap(union perf_event *event,
783
					  bool sample_id_all __maybe_unused)
784
{
785
	event->tracing_data.size = bswap_32(event->tracing_data.size);
786 787
}

788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810
static void perf_event__auxtrace_info_swap(union perf_event *event,
					   bool sample_id_all __maybe_unused)
{
	size_t size;

	event->auxtrace_info.type = bswap_32(event->auxtrace_info.type);

	size = event->header.size;
	size -= (void *)&event->auxtrace_info.priv - (void *)event;
	mem_bswap_64(event->auxtrace_info.priv, size);
}

static void perf_event__auxtrace_swap(union perf_event *event,
				      bool sample_id_all __maybe_unused)
{
	event->auxtrace.size      = bswap_64(event->auxtrace.size);
	event->auxtrace.offset    = bswap_64(event->auxtrace.offset);
	event->auxtrace.reference = bswap_64(event->auxtrace.reference);
	event->auxtrace.idx       = bswap_32(event->auxtrace.idx);
	event->auxtrace.tid       = bswap_32(event->auxtrace.tid);
	event->auxtrace.cpu       = bswap_32(event->auxtrace.cpu);
}

811 812 813 814 815 816 817 818
static void perf_event__auxtrace_error_swap(union perf_event *event,
					    bool sample_id_all __maybe_unused)
{
	event->auxtrace_error.type = bswap_32(event->auxtrace_error.type);
	event->auxtrace_error.code = bswap_32(event->auxtrace_error.code);
	event->auxtrace_error.cpu  = bswap_32(event->auxtrace_error.cpu);
	event->auxtrace_error.pid  = bswap_32(event->auxtrace_error.pid);
	event->auxtrace_error.tid  = bswap_32(event->auxtrace_error.tid);
819
	event->auxtrace_error.fmt  = bswap_32(event->auxtrace_error.fmt);
820
	event->auxtrace_error.ip   = bswap_64(event->auxtrace_error.ip);
821 822
	if (event->auxtrace_error.fmt)
		event->auxtrace_error.time = bswap_64(event->auxtrace_error.time);
823 824
}

825 826 827 828 829 830 831 832 833 834 835
static void perf_event__thread_map_swap(union perf_event *event,
					bool sample_id_all __maybe_unused)
{
	unsigned i;

	event->thread_map.nr = bswap_64(event->thread_map.nr);

	for (i = 0; i < event->thread_map.nr; i++)
		event->thread_map.entries[i].pid = bswap_64(event->thread_map.entries[i].pid);
}

836 837 838
static void perf_event__cpu_map_swap(union perf_event *event,
				     bool sample_id_all __maybe_unused)
{
839
	struct perf_record_cpu_map_data *data = &event->cpu_map.data;
840
	struct cpu_map_entries *cpus;
841
	struct perf_record_record_cpu_map *mask;
842 843 844 845 846 847 848 849 850 851 852 853 854 855
	unsigned i;

	data->type = bswap_64(data->type);

	switch (data->type) {
	case PERF_CPU_MAP__CPUS:
		cpus = (struct cpu_map_entries *)data->data;

		cpus->nr = bswap_16(cpus->nr);

		for (i = 0; i < cpus->nr; i++)
			cpus->cpu[i] = bswap_16(cpus->cpu[i]);
		break;
	case PERF_CPU_MAP__MASK:
856
		mask = (struct perf_record_record_cpu_map *)data->data;
857 858 859 860 861 862 863 864 865 866 867 868 869 870 871

		mask->nr = bswap_16(mask->nr);
		mask->long_size = bswap_16(mask->long_size);

		switch (mask->long_size) {
		case 4: mem_bswap_32(&mask->mask, mask->nr); break;
		case 8: mem_bswap_64(&mask->mask, mask->nr); break;
		default:
			pr_err("cpu_map swap: unsupported long size\n");
		}
	default:
		break;
	}
}

872 873 874 875 876 877 878 879 880 881
static void perf_event__stat_config_swap(union perf_event *event,
					 bool sample_id_all __maybe_unused)
{
	u64 size;

	size  = event->stat_config.nr * sizeof(event->stat_config.data[0]);
	size += 1; /* nr item itself */
	mem_bswap_64(&event->stat_config.nr, size);
}

J
Jiri Olsa 已提交
882 883 884 885 886 887 888 889 890 891 892
static void perf_event__stat_swap(union perf_event *event,
				  bool sample_id_all __maybe_unused)
{
	event->stat.id     = bswap_64(event->stat.id);
	event->stat.thread = bswap_32(event->stat.thread);
	event->stat.cpu    = bswap_32(event->stat.cpu);
	event->stat.val    = bswap_64(event->stat.val);
	event->stat.ena    = bswap_64(event->stat.ena);
	event->stat.run    = bswap_64(event->stat.run);
}

893 894 895 896 897 898 899
static void perf_event__stat_round_swap(union perf_event *event,
					bool sample_id_all __maybe_unused)
{
	event->stat_round.type = bswap_64(event->stat_round.type);
	event->stat_round.time = bswap_64(event->stat_round.time);
}

900 901
typedef void (*perf_event__swap_op)(union perf_event *event,
				    bool sample_id_all);
902

903 904
static perf_event__swap_op perf_event__swap_ops[] = {
	[PERF_RECORD_MMAP]		  = perf_event__mmap_swap,
905
	[PERF_RECORD_MMAP2]		  = perf_event__mmap2_swap,
906 907 908 909 910
	[PERF_RECORD_COMM]		  = perf_event__comm_swap,
	[PERF_RECORD_FORK]		  = perf_event__task_swap,
	[PERF_RECORD_EXIT]		  = perf_event__task_swap,
	[PERF_RECORD_LOST]		  = perf_event__all64_swap,
	[PERF_RECORD_READ]		  = perf_event__read_swap,
911 912
	[PERF_RECORD_THROTTLE]		  = perf_event__throttle_swap,
	[PERF_RECORD_UNTHROTTLE]	  = perf_event__throttle_swap,
913
	[PERF_RECORD_SAMPLE]		  = perf_event__all64_swap,
914
	[PERF_RECORD_AUX]		  = perf_event__aux_swap,
915
	[PERF_RECORD_ITRACE_START]	  = perf_event__itrace_start_swap,
916
	[PERF_RECORD_LOST_SAMPLES]	  = perf_event__all64_swap,
917 918
	[PERF_RECORD_SWITCH]		  = perf_event__switch_swap,
	[PERF_RECORD_SWITCH_CPU_WIDE]	  = perf_event__switch_swap,
919
	[PERF_RECORD_NAMESPACES]	  = perf_event__namespaces_swap,
920
	[PERF_RECORD_HEADER_ATTR]	  = perf_event__hdr_attr_swap,
921 922 923
	[PERF_RECORD_HEADER_EVENT_TYPE]	  = perf_event__event_type_swap,
	[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
	[PERF_RECORD_HEADER_BUILD_ID]	  = NULL,
A
Adrian Hunter 已提交
924
	[PERF_RECORD_ID_INDEX]		  = perf_event__all64_swap,
925 926
	[PERF_RECORD_AUXTRACE_INFO]	  = perf_event__auxtrace_info_swap,
	[PERF_RECORD_AUXTRACE]		  = perf_event__auxtrace_swap,
927
	[PERF_RECORD_AUXTRACE_ERROR]	  = perf_event__auxtrace_error_swap,
928
	[PERF_RECORD_THREAD_MAP]	  = perf_event__thread_map_swap,
929
	[PERF_RECORD_CPU_MAP]		  = perf_event__cpu_map_swap,
930
	[PERF_RECORD_STAT_CONFIG]	  = perf_event__stat_config_swap,
J
Jiri Olsa 已提交
931
	[PERF_RECORD_STAT]		  = perf_event__stat_swap,
932
	[PERF_RECORD_STAT_ROUND]	  = perf_event__stat_round_swap,
933
	[PERF_RECORD_EVENT_UPDATE]	  = perf_event__event_update_swap,
934
	[PERF_RECORD_TIME_CONV]		  = perf_event__all64_swap,
935
	[PERF_RECORD_HEADER_MAX]	  = NULL,
936 937
};

938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976
/*
 * When perf record finishes a pass on every buffers, it records this pseudo
 * event.
 * We record the max timestamp t found in the pass n.
 * Assuming these timestamps are monotonic across cpus, we know that if
 * a buffer still has events with timestamps below t, they will be all
 * available and then read in the pass n + 1.
 * Hence when we start to read the pass n + 2, we can safely flush every
 * events with timestamps below t.
 *
 *    ============ PASS n =================
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          1          |         2
 *          2          |         3
 *          -          |         4  <--- max recorded
 *
 *    ============ PASS n + 1 ==============
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          3          |         5
 *          4          |         6
 *          5          |         7 <---- max recorded
 *
 *      Flush every events below timestamp 4
 *
 *    ============ PASS n + 2 ==============
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          6          |         8
 *          7          |         9
 *          -          |         10
 *
 *      Flush every events below timestamp 7
 *      etc...
 */
977
static int process_finished_round(struct perf_tool *tool __maybe_unused,
978
				  union perf_event *event __maybe_unused,
979
				  struct ordered_events *oe)
980
{
981 982
	if (dump_trace)
		fprintf(stdout, "\n");
983
	return ordered_events__flush(oe, OE_FLUSH__ROUND);
984 985
}

986
int perf_session__queue_event(struct perf_session *s, union perf_event *event,
987
			      u64 timestamp, u64 file_offset)
988
{
989
	return ordered_events__queue(&s->ordered_events, event, timestamp, file_offset);
990
}
991

K
Kan Liang 已提交
992
static void callchain__lbr_callstack_printf(struct perf_sample *sample)
993
{
K
Kan Liang 已提交
994 995 996
	struct ip_callchain *callchain = sample->callchain;
	struct branch_stack *lbr_stack = sample->branch_stack;
	u64 kernel_callchain_nr = callchain->nr;
997
	unsigned int i;
998

K
Kan Liang 已提交
999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038
	for (i = 0; i < kernel_callchain_nr; i++) {
		if (callchain->ips[i] == PERF_CONTEXT_USER)
			break;
	}

	if ((i != kernel_callchain_nr) && lbr_stack->nr) {
		u64 total_nr;
		/*
		 * LBR callstack can only get user call chain,
		 * i is kernel call chain number,
		 * 1 is PERF_CONTEXT_USER.
		 *
		 * The user call chain is stored in LBR registers.
		 * LBR are pair registers. The caller is stored
		 * in "from" register, while the callee is stored
		 * in "to" register.
		 * For example, there is a call stack
		 * "A"->"B"->"C"->"D".
		 * The LBR registers will recorde like
		 * "C"->"D", "B"->"C", "A"->"B".
		 * So only the first "to" register and all "from"
		 * registers are needed to construct the whole stack.
		 */
		total_nr = i + 1 + lbr_stack->nr + 1;
		kernel_callchain_nr = i + 1;

		printf("... LBR call chain: nr:%" PRIu64 "\n", total_nr);

		for (i = 0; i < kernel_callchain_nr; i++)
			printf("..... %2d: %016" PRIx64 "\n",
			       i, callchain->ips[i]);

		printf("..... %2d: %016" PRIx64 "\n",
		       (int)(kernel_callchain_nr), lbr_stack->entries[0].to);
		for (i = 0; i < lbr_stack->nr; i++)
			printf("..... %2d: %016" PRIx64 "\n",
			       (int)(i + kernel_callchain_nr + 1), lbr_stack->entries[i].from);
	}
}

1039
static void callchain__printf(struct evsel *evsel,
K
Kan Liang 已提交
1040 1041 1042 1043 1044
			      struct perf_sample *sample)
{
	unsigned int i;
	struct ip_callchain *callchain = sample->callchain;

1045
	if (perf_evsel__has_branch_callstack(evsel))
K
Kan Liang 已提交
1046 1047 1048
		callchain__lbr_callstack_printf(sample);

	printf("... FP chain: nr:%" PRIu64 "\n", callchain->nr);
1049

K
Kan Liang 已提交
1050
	for (i = 0; i < callchain->nr; i++)
1051
		printf("..... %2d: %016" PRIx64 "\n",
K
Kan Liang 已提交
1052
		       i, callchain->ips[i]);
1053 1054
}

1055
static void branch_stack__printf(struct perf_sample *sample, bool callstack)
1056 1057 1058
{
	uint64_t i;

1059 1060 1061
	printf("%s: nr:%" PRIu64 "\n",
		!callstack ? "... branch stack" : "... branch callstack",
		sample->branch_stack->nr);
1062

1063 1064 1065
	for (i = 0; i < sample->branch_stack->nr; i++) {
		struct branch_entry *e = &sample->branch_stack->entries[i];

1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078
		if (!callstack) {
			printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n",
				i, e->from, e->to,
				(unsigned short)e->flags.cycles,
				e->flags.mispred ? "M" : " ",
				e->flags.predicted ? "P" : " ",
				e->flags.abort ? "A" : " ",
				e->flags.in_tx ? "T" : " ",
				(unsigned)e->flags.reserved);
		} else {
			printf("..... %2"PRIu64": %016" PRIx64 "\n",
				i, i > 0 ? e->from : e->to);
		}
1079
	}
1080 1081
}

1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093
static void regs_dump__printf(u64 mask, u64 *regs)
{
	unsigned rid, i = 0;

	for_each_set_bit(rid, (unsigned long *) &mask, sizeof(mask) * 8) {
		u64 val = regs[i++];

		printf(".... %-5s 0x%" PRIx64 "\n",
		       perf_reg_name(rid), val);
	}
}

1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119
static const char *regs_abi[] = {
	[PERF_SAMPLE_REGS_ABI_NONE] = "none",
	[PERF_SAMPLE_REGS_ABI_32] = "32-bit",
	[PERF_SAMPLE_REGS_ABI_64] = "64-bit",
};

static inline const char *regs_dump_abi(struct regs_dump *d)
{
	if (d->abi > PERF_SAMPLE_REGS_ABI_64)
		return "unknown";

	return regs_abi[d->abi];
}

static void regs__printf(const char *type, struct regs_dump *regs)
{
	u64 mask = regs->mask;

	printf("... %s regs: mask 0x%" PRIx64 " ABI %s\n",
	       type,
	       mask,
	       regs_dump_abi(regs));

	regs_dump__printf(mask, regs->regs);
}

1120
static void regs_user__printf(struct perf_sample *sample)
1121 1122 1123
{
	struct regs_dump *user_regs = &sample->user_regs;

1124 1125 1126 1127 1128 1129 1130 1131 1132 1133
	if (user_regs->regs)
		regs__printf("user", user_regs);
}

static void regs_intr__printf(struct perf_sample *sample)
{
	struct regs_dump *intr_regs = &sample->intr_regs;

	if (intr_regs->regs)
		regs__printf("intr", intr_regs);
1134 1135 1136 1137 1138 1139 1140 1141
}

static void stack_user__printf(struct stack_dump *dump)
{
	printf("... ustack: size %" PRIu64 ", offset 0x%x\n",
	       dump->size, dump->offset);
}

1142
static void perf_evlist__print_tstamp(struct evlist *evlist,
1143
				       union perf_event *event,
1144
				       struct perf_sample *sample)
1145
{
1146
	u64 sample_type = __perf_evlist__combined_sample_type(evlist);
1147

1148
	if (event->header.type != PERF_RECORD_SAMPLE &&
1149
	    !perf_evlist__sample_id_all(evlist)) {
1150 1151 1152 1153
		fputs("-1 -1 ", stdout);
		return;
	}

1154
	if ((sample_type & PERF_SAMPLE_CPU))
1155 1156
		printf("%u ", sample->cpu);

1157
	if (sample_type & PERF_SAMPLE_TIME)
1158
		printf("%" PRIu64 " ", sample->time);
1159 1160
}

1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190
static void sample_read__printf(struct perf_sample *sample, u64 read_format)
{
	printf("... sample_read:\n");

	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
		printf("...... time enabled %016" PRIx64 "\n",
		       sample->read.time_enabled);

	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
		printf("...... time running %016" PRIx64 "\n",
		       sample->read.time_running);

	if (read_format & PERF_FORMAT_GROUP) {
		u64 i;

		printf(".... group nr %" PRIu64 "\n", sample->read.group.nr);

		for (i = 0; i < sample->read.group.nr; i++) {
			struct sample_read_value *value;

			value = &sample->read.group.values[i];
			printf("..... id %016" PRIx64
			       ", value %016" PRIx64 "\n",
			       value->id, value->value);
		}
	} else
		printf("..... id %016" PRIx64 ", value %016" PRIx64 "\n",
			sample->read.one.id, sample->read.one.value);
}

1191
static void dump_event(struct evlist *evlist, union perf_event *event,
1192
		       u64 file_offset, struct perf_sample *sample)
1193 1194 1195 1196
{
	if (!dump_trace)
		return;

1197 1198
	printf("\n%#" PRIx64 " [%#x]: event: %d\n",
	       file_offset, event->header.size, event->header.type);
1199 1200

	trace_event(event);
1201 1202
	if (event->header.type == PERF_RECORD_SAMPLE && evlist->trace_event_sample_raw)
		evlist->trace_event_sample_raw(evlist, event, sample);
1203 1204

	if (sample)
1205
		perf_evlist__print_tstamp(evlist, event, sample);
1206

1207
	printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset,
1208
	       event->header.size, perf_event__name(event->header.type));
1209 1210
}

1211
static void dump_sample(struct evsel *evsel, union perf_event *event,
1212
			struct perf_sample *sample)
1213
{
1214 1215
	u64 sample_type;

1216 1217 1218
	if (!dump_trace)
		return;

1219
	printf("(IP, 0x%x): %d/%d: %#" PRIx64 " period: %" PRIu64 " addr: %#" PRIx64 "\n",
1220
	       event->header.misc, sample->pid, sample->tid, sample->ip,
1221
	       sample->period, sample->addr);
1222

1223
	sample_type = evsel->core.attr.sample_type;
1224

1225
	if (evsel__has_callchain(evsel))
K
Kan Liang 已提交
1226
		callchain__printf(evsel, sample);
1227

1228 1229
	if (sample_type & PERF_SAMPLE_BRANCH_STACK)
		branch_stack__printf(sample, perf_evsel__has_branch_callstack(evsel));
1230 1231

	if (sample_type & PERF_SAMPLE_REGS_USER)
1232
		regs_user__printf(sample);
1233

1234 1235 1236
	if (sample_type & PERF_SAMPLE_REGS_INTR)
		regs_intr__printf(sample);

1237 1238
	if (sample_type & PERF_SAMPLE_STACK_USER)
		stack_user__printf(&sample->user_stack);
1239 1240 1241

	if (sample_type & PERF_SAMPLE_WEIGHT)
		printf("... weight: %" PRIu64 "\n", sample->weight);
1242 1243 1244

	if (sample_type & PERF_SAMPLE_DATA_SRC)
		printf(" . data_src: 0x%"PRIx64"\n", sample->data_src);
1245

1246 1247 1248
	if (sample_type & PERF_SAMPLE_PHYS_ADDR)
		printf(" .. phys_addr: 0x%"PRIx64"\n", sample->phys_addr);

1249 1250 1251
	if (sample_type & PERF_SAMPLE_TRANSACTION)
		printf("... transaction: %" PRIx64 "\n", sample->transaction);

1252
	if (sample_type & PERF_SAMPLE_READ)
1253
		sample_read__printf(sample, evsel->core.attr.read_format);
1254 1255
}

1256
static void dump_read(struct evsel *evsel, union perf_event *event)
J
Jiri Olsa 已提交
1257
{
1258
	struct perf_record_read *read_event = &event->read;
J
Jiri Olsa 已提交
1259 1260 1261 1262 1263
	u64 read_format;

	if (!dump_trace)
		return;

1264
	printf(": %d %d %s %" PRI_lu64 "\n", event->read.pid, event->read.tid,
1265
	       perf_evsel__name(evsel),
J
Jiri Olsa 已提交
1266 1267
	       event->read.value);

1268 1269 1270
	if (!evsel)
		return;

1271
	read_format = evsel->core.attr.read_format;
J
Jiri Olsa 已提交
1272 1273

	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1274
		printf("... time enabled : %" PRI_lu64 "\n", read_event->time_enabled);
J
Jiri Olsa 已提交
1275 1276

	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1277
		printf("... time running : %" PRI_lu64 "\n", read_event->time_running);
J
Jiri Olsa 已提交
1278 1279

	if (read_format & PERF_FORMAT_ID)
1280
		printf("... id           : %" PRI_lu64 "\n", read_event->id);
J
Jiri Olsa 已提交
1281 1282
}

1283
static struct machine *machines__find_for_cpumode(struct machines *machines,
1284 1285
					       union perf_event *event,
					       struct perf_sample *sample)
1286
{
1287
	struct machine *machine;
1288

1289
	if (perf_guest &&
1290 1291
	    ((sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ||
	     (sample->cpumode == PERF_RECORD_MISC_GUEST_USER))) {
1292 1293
		u32 pid;

1294 1295
		if (event->header.type == PERF_RECORD_MMAP
		    || event->header.type == PERF_RECORD_MMAP2)
1296 1297
			pid = event->mmap.pid;
		else
1298
			pid = sample->pid;
1299

1300
		machine = machines__find(machines, pid);
1301
		if (!machine)
1302
			machine = machines__findnew(machines, DEFAULT_GUEST_KERNEL_ID);
1303
		return machine;
1304
	}
1305

1306
	return &machines->host;
1307 1308
}

1309
static int deliver_sample_value(struct evlist *evlist,
1310 1311 1312 1313 1314 1315
				struct perf_tool *tool,
				union perf_event *event,
				struct perf_sample *sample,
				struct sample_read_value *v,
				struct machine *machine)
{
1316
	struct perf_sample_id *sid = perf_evlist__id2sid(evlist, v->id);
1317 1318 1319 1320 1321 1322 1323 1324

	if (sid) {
		sample->id     = v->id;
		sample->period = v->value - sid->period;
		sid->period    = v->value;
	}

	if (!sid || sid->evsel == NULL) {
1325
		++evlist->stats.nr_unknown_id;
1326 1327 1328
		return 0;
	}

1329 1330 1331 1332 1333 1334 1335
	/*
	 * There's no reason to deliver sample
	 * for zero period, bail out.
	 */
	if (!sample->period)
		return 0;

1336 1337 1338
	return tool->sample(tool, event, sample, sid->evsel, machine);
}

1339
static int deliver_sample_group(struct evlist *evlist,
1340 1341 1342 1343 1344 1345 1346 1347 1348
				struct perf_tool *tool,
				union  perf_event *event,
				struct perf_sample *sample,
				struct machine *machine)
{
	int ret = -EINVAL;
	u64 i;

	for (i = 0; i < sample->read.group.nr; i++) {
1349
		ret = deliver_sample_value(evlist, tool, event, sample,
1350 1351 1352 1353 1354 1355 1356 1357 1358 1359
					   &sample->read.group.values[i],
					   machine);
		if (ret)
			break;
	}

	return ret;
}

static int
1360
 perf_evlist__deliver_sample(struct evlist *evlist,
1361 1362 1363
			     struct perf_tool *tool,
			     union  perf_event *event,
			     struct perf_sample *sample,
1364
			     struct evsel *evsel,
1365 1366 1367
			     struct machine *machine)
{
	/* We know evsel != NULL. */
1368 1369
	u64 sample_type = evsel->core.attr.sample_type;
	u64 read_format = evsel->core.attr.read_format;
1370

1371
	/* Standard sample delivery. */
1372 1373 1374 1375 1376
	if (!(sample_type & PERF_SAMPLE_READ))
		return tool->sample(tool, event, sample, evsel, machine);

	/* For PERF_SAMPLE_READ we have either single or group mode. */
	if (read_format & PERF_FORMAT_GROUP)
1377
		return deliver_sample_group(evlist, tool, event, sample,
1378 1379
					    machine);
	else
1380
		return deliver_sample_value(evlist, tool, event, sample,
1381 1382 1383
					    &sample->read.one, machine);
}

1384
static int machines__deliver_event(struct machines *machines,
1385
				   struct evlist *evlist,
1386 1387 1388
				   union perf_event *event,
				   struct perf_sample *sample,
				   struct perf_tool *tool, u64 file_offset)
1389
{
1390
	struct evsel *evsel;
1391
	struct machine *machine;
1392

1393
	dump_event(evlist, event, file_offset, sample);
1394

1395
	evsel = perf_evlist__id2evsel(evlist, sample->id);
1396

1397
	machine = machines__find_for_cpumode(machines, event, sample);
1398

1399 1400
	switch (event->header.type) {
	case PERF_RECORD_SAMPLE:
1401
		if (evsel == NULL) {
1402
			++evlist->stats.nr_unknown_id;
1403
			return 0;
1404
		}
1405
		dump_sample(evsel, event, sample);
1406
		if (machine == NULL) {
1407
			++evlist->stats.nr_unprocessable_samples;
1408
			return 0;
1409
		}
1410
		return perf_evlist__deliver_sample(evlist, tool, event, sample, evsel, machine);
1411
	case PERF_RECORD_MMAP:
1412
		return tool->mmap(tool, event, sample, machine);
1413
	case PERF_RECORD_MMAP2:
1414 1415
		if (event->header.misc & PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT)
			++evlist->stats.nr_proc_map_timeout;
1416
		return tool->mmap2(tool, event, sample, machine);
1417
	case PERF_RECORD_COMM:
1418
		return tool->comm(tool, event, sample, machine);
1419 1420
	case PERF_RECORD_NAMESPACES:
		return tool->namespaces(tool, event, sample, machine);
1421
	case PERF_RECORD_FORK:
1422
		return tool->fork(tool, event, sample, machine);
1423
	case PERF_RECORD_EXIT:
1424
		return tool->exit(tool, event, sample, machine);
1425
	case PERF_RECORD_LOST:
1426
		if (tool->lost == perf_event__process_lost)
1427
			evlist->stats.total_lost += event->lost.lost;
1428
		return tool->lost(tool, event, sample, machine);
1429 1430 1431 1432
	case PERF_RECORD_LOST_SAMPLES:
		if (tool->lost_samples == perf_event__process_lost_samples)
			evlist->stats.total_lost_samples += event->lost_samples.lost;
		return tool->lost_samples(tool, event, sample, machine);
1433
	case PERF_RECORD_READ:
J
Jiri Olsa 已提交
1434
		dump_read(evsel, event);
1435
		return tool->read(tool, event, sample, evsel, machine);
1436
	case PERF_RECORD_THROTTLE:
1437
		return tool->throttle(tool, event, sample, machine);
1438
	case PERF_RECORD_UNTHROTTLE:
1439
		return tool->unthrottle(tool, event, sample, machine);
1440
	case PERF_RECORD_AUX:
1441 1442 1443 1444 1445 1446
		if (tool->aux == perf_event__process_aux) {
			if (event->aux.flags & PERF_AUX_FLAG_TRUNCATED)
				evlist->stats.total_aux_lost += 1;
			if (event->aux.flags & PERF_AUX_FLAG_PARTIAL)
				evlist->stats.total_aux_partial += 1;
		}
1447
		return tool->aux(tool, event, sample, machine);
1448 1449
	case PERF_RECORD_ITRACE_START:
		return tool->itrace_start(tool, event, sample, machine);
1450 1451 1452
	case PERF_RECORD_SWITCH:
	case PERF_RECORD_SWITCH_CPU_WIDE:
		return tool->context_switch(tool, event, sample, machine);
1453 1454
	case PERF_RECORD_KSYMBOL:
		return tool->ksymbol(tool, event, sample, machine);
1455
	case PERF_RECORD_BPF_EVENT:
1456
		return tool->bpf(tool, event, sample, machine);
1457
	default:
1458
		++evlist->stats.nr_unknown_events;
1459 1460 1461 1462
		return -1;
	}
}

1463 1464 1465 1466 1467
static int perf_session__deliver_event(struct perf_session *session,
				       union perf_event *event,
				       struct perf_tool *tool,
				       u64 file_offset)
{
1468
	struct perf_sample sample;
1469 1470
	int ret;

1471 1472 1473 1474 1475 1476 1477
	ret = perf_evlist__parse_sample(session->evlist, event, &sample);
	if (ret) {
		pr_err("Can't parse sample, err = %d\n", ret);
		return ret;
	}

	ret = auxtrace__process_event(session, event, &sample, tool);
1478 1479 1480 1481 1482 1483
	if (ret < 0)
		return ret;
	if (ret > 0)
		return 0;

	return machines__deliver_event(&session->machines, session->evlist,
1484
				       event, &sample, tool, file_offset);
1485 1486
}

1487 1488 1489
static s64 perf_session__process_user_event(struct perf_session *session,
					    union perf_event *event,
					    u64 file_offset)
1490
{
1491
	struct ordered_events *oe = &session->ordered_events;
1492
	struct perf_tool *tool = session->tool;
1493
	struct perf_sample sample = { .time = 0, };
1494
	int fd = perf_data__fd(session->data);
1495 1496
	int err;

1497 1498 1499
	if (event->header.type != PERF_RECORD_COMPRESSED ||
	    tool->compressed == perf_session__process_compressed_event_stub)
		dump_event(session->evlist, event, file_offset, &sample);
1500

1501
	/* These events are processed right away */
1502
	switch (event->header.type) {
1503
	case PERF_RECORD_HEADER_ATTR:
1504
		err = tool->attr(tool, event, &session->evlist);
1505
		if (err == 0) {
1506
			perf_session__set_id_hdr_size(session);
1507 1508
			perf_session__set_comm_exec(session);
		}
1509
		return err;
1510 1511
	case PERF_RECORD_EVENT_UPDATE:
		return tool->event_update(tool, event, &session->evlist);
1512 1513 1514 1515 1516 1517
	case PERF_RECORD_HEADER_EVENT_TYPE:
		/*
		 * Depreceated, but we need to handle it for sake
		 * of old data files create in pipe mode.
		 */
		return 0;
1518 1519
	case PERF_RECORD_HEADER_TRACING_DATA:
		/* setup for reading amidst mmap */
1520
		lseek(fd, file_offset, SEEK_SET);
1521
		return tool->tracing_data(session, event);
1522
	case PERF_RECORD_HEADER_BUILD_ID:
1523
		return tool->build_id(session, event);
1524
	case PERF_RECORD_FINISHED_ROUND:
1525
		return tool->finished_round(tool, event, oe);
A
Adrian Hunter 已提交
1526
	case PERF_RECORD_ID_INDEX:
1527
		return tool->id_index(session, event);
1528
	case PERF_RECORD_AUXTRACE_INFO:
1529
		return tool->auxtrace_info(session, event);
1530 1531 1532
	case PERF_RECORD_AUXTRACE:
		/* setup for reading amidst mmap */
		lseek(fd, file_offset + event->header.size, SEEK_SET);
1533
		return tool->auxtrace(session, event);
1534
	case PERF_RECORD_AUXTRACE_ERROR:
1535
		perf_session__auxtrace_error_inc(session, event);
1536
		return tool->auxtrace_error(session, event);
1537
	case PERF_RECORD_THREAD_MAP:
1538
		return tool->thread_map(session, event);
1539
	case PERF_RECORD_CPU_MAP:
1540
		return tool->cpu_map(session, event);
1541
	case PERF_RECORD_STAT_CONFIG:
1542
		return tool->stat_config(session, event);
J
Jiri Olsa 已提交
1543
	case PERF_RECORD_STAT:
1544
		return tool->stat(session, event);
1545
	case PERF_RECORD_STAT_ROUND:
1546
		return tool->stat_round(session, event);
1547 1548
	case PERF_RECORD_TIME_CONV:
		session->time_conv = event->time_conv;
1549
		return tool->time_conv(session, event);
1550
	case PERF_RECORD_HEADER_FEATURE:
1551
		return tool->feature(session, event);
1552 1553 1554 1555 1556
	case PERF_RECORD_COMPRESSED:
		err = tool->compressed(session, event, file_offset);
		if (err)
			dump_event(session->evlist, event, file_offset, &sample);
		return err;
1557
	default:
1558
		return -EINVAL;
1559
	}
1560 1561
}

1562 1563
int perf_session__deliver_synth_event(struct perf_session *session,
				      union perf_event *event,
1564
				      struct perf_sample *sample)
1565
{
1566
	struct evlist *evlist = session->evlist;
1567
	struct perf_tool *tool = session->tool;
1568 1569

	events_stats__inc(&evlist->stats, event->header.type);
1570 1571

	if (event->header.type >= PERF_RECORD_USER_TYPE_START)
1572
		return perf_session__process_user_event(session, event, 0);
1573

1574
	return machines__deliver_event(&session->machines, evlist, event, sample, tool, 0);
1575 1576
}

1577 1578 1579 1580 1581 1582 1583 1584 1585
static void event_swap(union perf_event *event, bool sample_id_all)
{
	perf_event__swap_op swap;

	swap = perf_event__swap_ops[event->header.type];
	if (swap)
		swap(event, sample_id_all);
}

1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600
int perf_session__peek_event(struct perf_session *session, off_t file_offset,
			     void *buf, size_t buf_sz,
			     union perf_event **event_ptr,
			     struct perf_sample *sample)
{
	union perf_event *event;
	size_t hdr_sz, rest;
	int fd;

	if (session->one_mmap && !session->header.needs_swap) {
		event = file_offset - session->one_mmap_offset +
			session->one_mmap_addr;
		goto out_parse_sample;
	}

1601
	if (perf_data__is_pipe(session->data))
1602 1603
		return -1;

1604
	fd = perf_data__fd(session->data);
1605 1606 1607 1608 1609 1610
	hdr_sz = sizeof(struct perf_event_header);

	if (buf_sz < hdr_sz)
		return -1;

	if (lseek(fd, file_offset, SEEK_SET) == (off_t)-1 ||
1611
	    readn(fd, buf, hdr_sz) != (ssize_t)hdr_sz)
1612 1613 1614 1615 1616 1617 1618
		return -1;

	event = (union perf_event *)buf;

	if (session->header.needs_swap)
		perf_event_header__bswap(&event->header);

1619
	if (event->header.size < hdr_sz || event->header.size > buf_sz)
1620 1621 1622 1623
		return -1;

	rest = event->header.size - hdr_sz;

1624
	if (readn(fd, buf, rest) != (ssize_t)rest)
1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640
		return -1;

	if (session->header.needs_swap)
		event_swap(event, perf_evlist__sample_id_all(session->evlist));

out_parse_sample:

	if (sample && event->header.type < PERF_RECORD_USER_TYPE_START &&
	    perf_evlist__parse_sample(session->evlist, event, sample))
		return -1;

	*event_ptr = event;

	return 0;
}

1641
static s64 perf_session__process_event(struct perf_session *session,
1642
				       union perf_event *event, u64 file_offset)
1643
{
1644
	struct evlist *evlist = session->evlist;
1645
	struct perf_tool *tool = session->tool;
1646 1647
	int ret;

1648
	if (session->header.needs_swap)
1649
		event_swap(event, perf_evlist__sample_id_all(evlist));
1650 1651 1652 1653

	if (event->header.type >= PERF_RECORD_HEADER_MAX)
		return -EINVAL;

1654
	events_stats__inc(&evlist->stats, event->header.type);
1655 1656

	if (event->header.type >= PERF_RECORD_USER_TYPE_START)
1657
		return perf_session__process_user_event(session, event, file_offset);
1658

1659
	if (tool->ordered_events) {
1660
		u64 timestamp = -1ULL;
1661 1662

		ret = perf_evlist__parse_sample_timestamp(evlist, event, &timestamp);
1663
		if (ret && ret != -1)
1664 1665 1666
			return ret;

		ret = perf_session__queue_event(session, event, timestamp, file_offset);
1667 1668 1669 1670
		if (ret != -ETIME)
			return ret;
	}

1671
	return perf_session__deliver_event(session, event, tool, file_offset);
1672 1673
}

1674
void perf_event_header__bswap(struct perf_event_header *hdr)
1675
{
1676 1677 1678
	hdr->type = bswap_32(hdr->type);
	hdr->misc = bswap_16(hdr->misc);
	hdr->size = bswap_16(hdr->size);
1679 1680
}

1681 1682
struct thread *perf_session__findnew(struct perf_session *session, pid_t pid)
{
1683
	return machine__findnew_thread(&session->machines.host, -1, pid);
1684 1685
}

1686 1687 1688 1689 1690 1691 1692
/*
 * Threads are identified by pid and tid, and the idle task has pid == tid == 0.
 * So here a single thread is created for that, but actually there is a separate
 * idle task per cpu, so there should be one 'struct thread' per cpu, but there
 * is only 1. That causes problems for some tools, requiring workarounds. For
 * example get_idle_thread() in builtin-sched.c, or thread_stack__per_cpu().
 */
1693
int perf_session__register_idle_thread(struct perf_session *session)
1694
{
1695
	struct thread *thread;
1696
	int err = 0;
1697

1698
	thread = machine__findnew_thread(&session->machines.host, 0, 0);
1699
	if (thread == NULL || thread__set_comm(thread, "swapper", 0)) {
1700
		pr_err("problem inserting idle task.\n");
1701
		err = -1;
1702 1703
	}

1704 1705 1706 1707 1708
	if (thread == NULL || thread__set_namespaces(thread, 0, NULL)) {
		pr_err("problem inserting idle task.\n");
		err = -1;
	}

1709 1710 1711
	/* machine__findnew_thread() got the thread, so put it */
	thread__put(thread);
	return err;
1712 1713
}

1714 1715 1716 1717
static void
perf_session__warn_order(const struct perf_session *session)
{
	const struct ordered_events *oe = &session->ordered_events;
1718
	struct evsel *evsel;
1719 1720 1721
	bool should_warn = true;

	evlist__for_each_entry(session->evlist, evsel) {
1722
		if (evsel->core.attr.write_backward)
1723 1724 1725 1726 1727 1728 1729 1730 1731
			should_warn = false;
	}

	if (!should_warn)
		return;
	if (oe->nr_unordered_events != 0)
		ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events);
}

1732
static void perf_session__warn_about_errors(const struct perf_session *session)
1733
{
1734 1735 1736
	const struct events_stats *stats = &session->evlist->stats;

	if (session->tool->lost == perf_event__process_lost &&
1737
	    stats->nr_events[PERF_RECORD_LOST] != 0) {
1738 1739
		ui__warning("Processed %d events and lost %d chunks!\n\n"
			    "Check IO/CPU overload!\n\n",
1740 1741
			    stats->nr_events[0],
			    stats->nr_events[PERF_RECORD_LOST]);
1742 1743
	}

1744 1745 1746 1747 1748 1749
	if (session->tool->lost_samples == perf_event__process_lost_samples) {
		double drop_rate;

		drop_rate = (double)stats->total_lost_samples /
			    (double) (stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples);
		if (drop_rate > 0.05) {
1750
			ui__warning("Processed %" PRIu64 " samples and lost %3.2f%%!\n\n",
1751 1752 1753 1754 1755
				    stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples,
				    drop_rate * 100.0);
		}
	}

1756 1757 1758 1759 1760 1761 1762
	if (session->tool->aux == perf_event__process_aux &&
	    stats->total_aux_lost != 0) {
		ui__warning("AUX data lost %" PRIu64 " times out of %u!\n\n",
			    stats->total_aux_lost,
			    stats->nr_events[PERF_RECORD_AUX]);
	}

1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779
	if (session->tool->aux == perf_event__process_aux &&
	    stats->total_aux_partial != 0) {
		bool vmm_exclusive = false;

		(void)sysfs__read_bool("module/kvm_intel/parameters/vmm_exclusive",
		                       &vmm_exclusive);

		ui__warning("AUX data had gaps in it %" PRIu64 " times out of %u!\n\n"
		            "Are you running a KVM guest in the background?%s\n\n",
			    stats->total_aux_partial,
			    stats->nr_events[PERF_RECORD_AUX],
			    vmm_exclusive ?
			    "\nReloading kvm_intel module with vmm_exclusive=0\n"
			    "will reduce the gaps to only guest's timeslices." :
			    "");
	}

1780
	if (stats->nr_unknown_events != 0) {
1781 1782 1783 1784 1785
		ui__warning("Found %u unknown events!\n\n"
			    "Is this an older tool processing a perf.data "
			    "file generated by a more recent tool?\n\n"
			    "If that is not the case, consider "
			    "reporting to linux-kernel@vger.kernel.org.\n\n",
1786
			    stats->nr_unknown_events);
1787 1788
	}

1789
	if (stats->nr_unknown_id != 0) {
1790
		ui__warning("%u samples with id not present in the header\n",
1791
			    stats->nr_unknown_id);
1792 1793
	}

1794
	if (stats->nr_invalid_chains != 0) {
1795 1796 1797
		ui__warning("Found invalid callchains!\n\n"
			    "%u out of %u events were discarded for this reason.\n\n"
			    "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
1798 1799
			    stats->nr_invalid_chains,
			    stats->nr_events[PERF_RECORD_SAMPLE]);
1800
	}
1801

1802
	if (stats->nr_unprocessable_samples != 0) {
1803 1804
		ui__warning("%u unprocessable samples recorded.\n"
			    "Do you have a KVM guest running and not using 'perf kvm'?\n",
1805
			    stats->nr_unprocessable_samples);
1806
	}
1807

1808
	perf_session__warn_order(session);
1809 1810

	events_stats__auxtrace_error_warn(stats);
1811 1812 1813 1814 1815 1816

	if (stats->nr_proc_map_timeout != 0) {
		ui__warning("%d map information files for pre-existing threads were\n"
			    "not processed, if there are samples for addresses they\n"
			    "will not be resolved, you may find out which are these\n"
			    "threads by running with -v and redirecting the output\n"
1817 1818 1819
			    "to a file.\n"
			    "The time limit to process proc map is too short?\n"
			    "Increase it by --proc-map-timeout\n",
1820 1821
			    stats->nr_proc_map_timeout);
	}
1822 1823
}

1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836
static int perf_session__flush_thread_stack(struct thread *thread,
					    void *p __maybe_unused)
{
	return thread_stack__flush(thread);
}

static int perf_session__flush_thread_stacks(struct perf_session *session)
{
	return machines__for_each_thread(&session->machines,
					 perf_session__flush_thread_stack,
					 NULL);
}

1837 1838
volatile int session_done;

1839 1840
static int __perf_session__process_decomp_events(struct perf_session *session);

1841
static int __perf_session__process_pipe_events(struct perf_session *session)
1842
{
1843
	struct ordered_events *oe = &session->ordered_events;
1844
	struct perf_tool *tool = session->tool;
1845
	int fd = perf_data__fd(session->data);
1846 1847 1848
	union perf_event *event;
	uint32_t size, cur_size = 0;
	void *buf = NULL;
1849
	s64 skip = 0;
1850
	u64 head;
1851
	ssize_t err;
1852 1853
	void *p;

1854
	perf_tool__fill_defaults(tool);
1855 1856

	head = 0;
1857 1858 1859 1860 1861
	cur_size = sizeof(union perf_event);

	buf = malloc(cur_size);
	if (!buf)
		return -errno;
1862
	ordered_events__set_copy_on_queue(oe, true);
1863
more:
1864
	event = buf;
1865
	err = readn(fd, event, sizeof(struct perf_event_header));
1866 1867 1868 1869 1870 1871 1872 1873
	if (err <= 0) {
		if (err == 0)
			goto done;

		pr_err("failed to read event header\n");
		goto out_err;
	}

1874
	if (session->header.needs_swap)
1875
		perf_event_header__bswap(&event->header);
1876

1877
	size = event->header.size;
1878 1879 1880 1881
	if (size < sizeof(struct perf_event_header)) {
		pr_err("bad event header size\n");
		goto out_err;
	}
1882

1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893
	if (size > cur_size) {
		void *new = realloc(buf, size);
		if (!new) {
			pr_err("failed to allocate memory to read event\n");
			goto out_err;
		}
		buf = new;
		cur_size = size;
		event = buf;
	}
	p = event;
1894 1895
	p += sizeof(struct perf_event_header);

1896
	if (size - sizeof(struct perf_event_header)) {
1897
		err = readn(fd, p, size - sizeof(struct perf_event_header));
1898 1899 1900 1901 1902
		if (err <= 0) {
			if (err == 0) {
				pr_err("unexpected end of event stream\n");
				goto done;
			}
1903

1904 1905 1906
			pr_err("failed to read event data\n");
			goto out_err;
		}
1907 1908
	}

1909
	if ((skip = perf_session__process_event(session, event, head)) < 0) {
1910
		pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
1911
		       head, event->header.size, event->header.type);
1912 1913
		err = -EINVAL;
		goto out_err;
1914 1915 1916 1917 1918 1919 1920
	}

	head += size;

	if (skip > 0)
		head += skip;

1921 1922 1923 1924
	err = __perf_session__process_decomp_events(session);
	if (err)
		goto out_err;

1925 1926 1927
	if (!session_done())
		goto more;
done:
1928
	/* do the final flush for ordered samples */
1929
	err = ordered_events__flush(oe, OE_FLUSH__FINAL);
1930 1931 1932
	if (err)
		goto out_err;
	err = auxtrace__flush_events(session, tool);
1933 1934 1935
	if (err)
		goto out_err;
	err = perf_session__flush_thread_stacks(session);
1936
out_err:
1937
	free(buf);
1938 1939
	if (!tool->no_warn)
		perf_session__warn_about_errors(session);
1940
	ordered_events__free(&session->ordered_events);
1941
	auxtrace__free_events(session);
1942 1943 1944
	return err;
}

1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962
static union perf_event *
fetch_mmaped_event(struct perf_session *session,
		   u64 head, size_t mmap_size, char *buf)
{
	union perf_event *event;

	/*
	 * Ensure we have enough space remaining to read
	 * the size of the event in the headers.
	 */
	if (head + sizeof(event->header) > mmap_size)
		return NULL;

	event = (union perf_event *)(buf + head);

	if (session->header.needs_swap)
		perf_event_header__bswap(&event->header);

1963 1964 1965 1966
	if (head + event->header.size > mmap_size) {
		/* We're not fetching the event so swap back again */
		if (session->header.needs_swap)
			perf_event_header__bswap(&event->header);
1967 1968 1969
		pr_debug("%s: head=%#" PRIx64 " event->header_size=%#x, mmap_size=%#zx: fuzzed perf.data?\n",
			 __func__, head, event->header.size, mmap_size);
		return ERR_PTR(-EINVAL);
1970
	}
1971 1972 1973 1974

	return event;
}

1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986
static int __perf_session__process_decomp_events(struct perf_session *session)
{
	s64 skip;
	u64 size, file_pos = 0;
	struct decomp *decomp = session->decomp_last;

	if (!decomp)
		return 0;

	while (decomp->head < decomp->size && !session_done()) {
		union perf_event *event = fetch_mmaped_event(session, decomp->head, decomp->size, decomp->data);

1987 1988 1989
		if (IS_ERR(event))
			return PTR_ERR(event);

1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010
		if (!event)
			break;

		size = event->header.size;

		if (size < sizeof(struct perf_event_header) ||
		    (skip = perf_session__process_event(session, event, file_pos)) < 0) {
			pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
				decomp->file_pos + decomp->head, event->header.size, event->header.type);
			return -EINVAL;
		}

		if (skip)
			size += skip;

		decomp->head += size;
	}

	return 0;
}

2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022
/*
 * On 64bit we can mmap the data file in one go. No need for tiny mmap
 * slices. On 32bit we use 32MB.
 */
#if BITS_PER_LONG == 64
#define MMAP_SIZE ULLONG_MAX
#define NUM_MMAPS 1
#else
#define MMAP_SIZE (32 * 1024 * 1024ULL)
#define NUM_MMAPS 128
#endif

2023 2024 2025 2026 2027 2028
struct reader;

typedef s64 (*reader_cb_t)(struct perf_session *session,
			   union perf_event *event,
			   u64 file_offset);

J
Jiri Olsa 已提交
2029
struct reader {
2030 2031 2032 2033
	int		 fd;
	u64		 data_size;
	u64		 data_offset;
	reader_cb_t	 process;
J
Jiri Olsa 已提交
2034 2035
};

2036 2037 2038
static int
reader__process_events(struct reader *rd, struct perf_session *session,
		       struct ui_progress *prog)
2039
{
2040
	u64 data_size = rd->data_size;
2041
	u64 head, page_offset, file_offset, file_pos, size;
2042
	int err = 0, mmap_prot, mmap_flags, map_idx = 0;
2043
	size_t	mmap_size;
2044
	char *buf, *mmaps[NUM_MMAPS];
2045
	union perf_event *event;
2046
	s64 skip;
2047

2048
	page_offset = page_size * (rd->data_offset / page_size);
2049
	file_offset = page_offset;
2050
	head = rd->data_offset - page_offset;
2051

2052
	ui_progress__init_size(prog, data_size, "Processing events...");
2053

2054
	data_size += rd->data_offset;
2055

2056
	mmap_size = MMAP_SIZE;
2057 2058
	if (mmap_size > data_size) {
		mmap_size = data_size;
2059 2060
		session->one_mmap = true;
	}
2061

2062 2063
	memset(mmaps, 0, sizeof(mmaps));

2064 2065 2066
	mmap_prot  = PROT_READ;
	mmap_flags = MAP_SHARED;

2067
	if (session->header.needs_swap) {
2068 2069 2070
		mmap_prot  |= PROT_WRITE;
		mmap_flags = MAP_PRIVATE;
	}
2071
remap:
2072
	buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, rd->fd,
2073
		   file_offset);
2074 2075 2076
	if (buf == MAP_FAILED) {
		pr_err("failed to mmap file\n");
		err = -errno;
2077
		goto out;
2078
	}
2079 2080
	mmaps[map_idx] = buf;
	map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1);
2081
	file_pos = file_offset + head;
2082 2083 2084 2085
	if (session->one_mmap) {
		session->one_mmap_addr = buf;
		session->one_mmap_offset = file_offset;
	}
2086 2087

more:
2088
	event = fetch_mmaped_event(session, head, mmap_size, buf);
2089 2090 2091
	if (IS_ERR(event))
		return PTR_ERR(event);

2092
	if (!event) {
2093 2094 2095 2096
		if (mmaps[map_idx]) {
			munmap(mmaps[map_idx], mmap_size);
			mmaps[map_idx] = NULL;
		}
2097

2098 2099 2100
		page_offset = page_size * (head / page_size);
		file_offset += page_offset;
		head -= page_offset;
2101 2102 2103 2104 2105
		goto remap;
	}

	size = event->header.size;

2106 2107
	skip = -EINVAL;

2108
	if (size < sizeof(struct perf_event_header) ||
2109
	    (skip = rd->process(session, event, file_pos)) < 0) {
2110
		pr_err("%#" PRIx64 " [%#x]: failed to process type: %d [%s]\n",
2111
		       file_offset + head, event->header.size,
2112 2113
		       event->header.type, strerror(-skip));
		err = skip;
2114
		goto out;
2115 2116
	}

2117 2118 2119
	if (skip)
		size += skip;

2120
	head += size;
2121
	file_pos += size;
2122

2123 2124 2125 2126
	err = __perf_session__process_decomp_events(session);
	if (err)
		goto out;

2127
	ui_progress__update(prog, size);
2128

2129
	if (session_done())
2130
		goto out;
2131

2132
	if (file_pos < data_size)
2133
		goto more;
2134

2135
out:
2136 2137 2138
	return err;
}

2139 2140 2141 2142 2143 2144 2145
static s64 process_simple(struct perf_session *session,
			  union perf_event *event,
			  u64 file_offset)
{
	return perf_session__process_event(session, event, file_offset);
}

2146 2147 2148 2149 2150 2151
static int __perf_session__process_events(struct perf_session *session)
{
	struct reader rd = {
		.fd		= perf_data__fd(session->data),
		.data_size	= session->header.data_size,
		.data_offset	= session->header.data_offset,
2152
		.process	= process_simple,
2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168
	};
	struct ordered_events *oe = &session->ordered_events;
	struct perf_tool *tool = session->tool;
	struct ui_progress prog;
	int err;

	perf_tool__fill_defaults(tool);

	if (rd.data_size == 0)
		return -1;

	ui_progress__init_size(&prog, rd.data_size, "Processing events...");

	err = reader__process_events(&rd, session, &prog);
	if (err)
		goto out_err;
2169
	/* do the final flush for ordered samples */
2170
	err = ordered_events__flush(oe, OE_FLUSH__FINAL);
2171 2172 2173
	if (err)
		goto out_err;
	err = auxtrace__flush_events(session, tool);
2174 2175 2176
	if (err)
		goto out_err;
	err = perf_session__flush_thread_stacks(session);
2177
out_err:
N
Namhyung Kim 已提交
2178
	ui_progress__finish();
2179 2180
	if (!tool->no_warn)
		perf_session__warn_about_errors(session);
2181 2182 2183 2184 2185
	/*
	 * We may switching perf.data output, make ordered_events
	 * reusable.
	 */
	ordered_events__reinit(&session->ordered_events);
2186
	auxtrace__free_events(session);
2187
	session->one_mmap = false;
2188 2189
	return err;
}
2190

2191
int perf_session__process_events(struct perf_session *session)
2192
{
2193
	if (perf_session__register_idle_thread(session) < 0)
2194 2195
		return -ENOMEM;

2196 2197
	if (perf_data__is_pipe(session->data))
		return __perf_session__process_pipe_events(session);
2198

2199
	return __perf_session__process_events(session);
2200 2201
}

2202
bool perf_session__has_traces(struct perf_session *session, const char *msg)
2203
{
2204
	struct evsel *evsel;
2205

2206
	evlist__for_each_entry(session->evlist, evsel) {
2207
		if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT)
2208
			return true;
2209 2210
	}

2211 2212
	pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg);
	return false;
2213
}
2214

2215
int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, u64 addr)
2216 2217
{
	char *bracket;
2218
	struct ref_reloc_sym *ref;
2219
	struct kmap *kmap;
2220 2221 2222 2223

	ref = zalloc(sizeof(struct ref_reloc_sym));
	if (ref == NULL)
		return -ENOMEM;
2224

2225 2226 2227
	ref->name = strdup(symbol_name);
	if (ref->name == NULL) {
		free(ref);
2228
		return -ENOMEM;
2229
	}
2230

2231
	bracket = strchr(ref->name, ']');
2232 2233 2234
	if (bracket)
		*bracket = '\0';

2235
	ref->addr = addr;
2236

2237 2238
	kmap = map__kmap(map);
	if (kmap)
2239
		kmap->ref_reloc_sym = ref;
2240

2241 2242
	return 0;
}
2243

2244
size_t perf_session__fprintf_dsos(struct perf_session *session, FILE *fp)
2245
{
2246
	return machines__fprintf_dsos(&session->machines, fp);
2247
}
2248

2249
size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp,
2250
					  bool (skip)(struct dso *dso, int parm), int parm)
2251
{
2252
	return machines__fprintf_dsos_buildid(&session->machines, fp, skip, parm);
2253
}
2254 2255 2256

size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
{
2257 2258 2259 2260 2261 2262
	size_t ret;
	const char *msg = "";

	if (perf_header__has_feat(&session->header, HEADER_AUXTRACE))
		msg = " (excludes AUX area (e.g. instruction trace) decoded / synthesized events)";

2263
	ret = fprintf(fp, "\nAggregated stats:%s\n", msg);
2264

2265
	ret += events_stats__fprintf(&session->evlist->stats, fp);
2266 2267
	return ret;
}
2268

2269 2270 2271 2272 2273 2274
size_t perf_session__fprintf(struct perf_session *session, FILE *fp)
{
	/*
	 * FIXME: Here we have to actually print all the machines in this
	 * session, not just the host...
	 */
2275
	return machine__fprintf(&session->machines.host, fp);
2276 2277
}

2278
struct evsel *perf_session__find_first_evtype(struct perf_session *session,
2279 2280
					      unsigned int type)
{
2281
	struct evsel *pos;
2282

2283
	evlist__for_each_entry(session->evlist, pos) {
2284
		if (pos->core.attr.type == type)
2285 2286 2287 2288 2289
			return pos;
	}
	return NULL;
}

2290 2291 2292
int perf_session__cpu_bitmap(struct perf_session *session,
			     const char *cpu_list, unsigned long *cpu_bitmap)
{
2293
	int i, err = -1;
2294
	struct perf_cpu_map *map;
2295 2296

	for (i = 0; i < PERF_TYPE_MAX; ++i) {
2297
		struct evsel *evsel;
2298 2299 2300 2301 2302

		evsel = perf_session__find_first_evtype(session, i);
		if (!evsel)
			continue;

2303
		if (!(evsel->core.attr.sample_type & PERF_SAMPLE_CPU)) {
2304
			pr_err("File does not contain CPU events. "
2305
			       "Remove -C option to proceed.\n");
2306 2307 2308 2309
			return -1;
		}
	}

2310
	map = perf_cpu_map__new(cpu_list);
2311 2312 2313 2314
	if (map == NULL) {
		pr_err("Invalid cpu_list\n");
		return -1;
	}
2315 2316 2317 2318 2319 2320 2321

	for (i = 0; i < map->nr; i++) {
		int cpu = map->map[i];

		if (cpu >= MAX_NR_CPUS) {
			pr_err("Requested CPU %d too large. "
			       "Consider raising MAX_NR_CPUS\n", cpu);
2322
			goto out_delete_map;
2323 2324 2325 2326 2327
		}

		set_bit(cpu, cpu_bitmap);
	}

2328 2329 2330
	err = 0;

out_delete_map:
2331
	perf_cpu_map__put(map);
2332
	return err;
2333
}
2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344

void perf_session__fprintf_info(struct perf_session *session, FILE *fp,
				bool full)
{
	if (session == NULL || fp == NULL)
		return;

	fprintf(fp, "# ========\n");
	perf_header__fprintf_info(session, fp, full);
	fprintf(fp, "# ========\n#\n");
}
2345 2346 2347


int __perf_session__set_tracepoints_handlers(struct perf_session *session,
2348
					     const struct evsel_str_handler *assocs,
2349 2350
					     size_t nr_assocs)
{
2351
	struct evsel *evsel;
2352 2353 2354 2355
	size_t i;
	int err;

	for (i = 0; i < nr_assocs; i++) {
2356 2357 2358 2359 2360
		/*
		 * Adding a handler for an event not in the session,
		 * just ignore it.
		 */
		evsel = perf_evlist__find_tracepoint_by_name(session->evlist, assocs[i].name);
2361
		if (evsel == NULL)
2362
			continue;
2363 2364

		err = -EEXIST;
2365
		if (evsel->handler != NULL)
2366
			goto out;
2367
		evsel->handler = assocs[i].handler;
2368 2369 2370 2371 2372 2373
	}

	err = 0;
out:
	return err;
}
A
Adrian Hunter 已提交
2374

2375 2376
int perf_event__process_id_index(struct perf_session *session,
				 union perf_event *event)
A
Adrian Hunter 已提交
2377
{
2378
	struct evlist *evlist = session->evlist;
2379
	struct perf_record_id_index *ie = &event->id_index;
A
Adrian Hunter 已提交
2380 2381
	size_t i, nr, max_nr;

2382
	max_nr = (ie->header.size - sizeof(struct perf_record_id_index)) /
A
Adrian Hunter 已提交
2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395
		 sizeof(struct id_index_entry);
	nr = ie->nr;
	if (nr > max_nr)
		return -EINVAL;

	if (dump_trace)
		fprintf(stdout, " nr: %zu\n", nr);

	for (i = 0; i < nr; i++) {
		struct id_index_entry *e = &ie->entries[i];
		struct perf_sample_id *sid;

		if (dump_trace) {
2396 2397 2398 2399
			fprintf(stdout,	" ... id: %"PRI_lu64, e->id);
			fprintf(stdout,	"  idx: %"PRI_lu64, e->idx);
			fprintf(stdout,	"  cpu: %"PRI_ld64, e->cpu);
			fprintf(stdout,	"  tid: %"PRI_ld64"\n", e->tid);
A
Adrian Hunter 已提交
2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413
		}

		sid = perf_evlist__id2sid(evlist, e->id);
		if (!sid)
			return -ENOENT;
		sid->idx = e->idx;
		sid->cpu = e->cpu;
		sid->tid = e->tid;
	}
	return 0;
}

int perf_event__synthesize_id_index(struct perf_tool *tool,
				    perf_event__handler_t process,
2414
				    struct evlist *evlist,
A
Adrian Hunter 已提交
2415 2416 2417
				    struct machine *machine)
{
	union perf_event *ev;
2418
	struct evsel *evsel;
A
Adrian Hunter 已提交
2419 2420 2421 2422 2423
	size_t nr = 0, i = 0, sz, max_nr, n;
	int err;

	pr_debug2("Synthesizing id index\n");

2424
	max_nr = (UINT16_MAX - sizeof(struct perf_record_id_index)) /
A
Adrian Hunter 已提交
2425 2426
		 sizeof(struct id_index_entry);

2427
	evlist__for_each_entry(evlist, evsel)
A
Adrian Hunter 已提交
2428 2429 2430
		nr += evsel->ids;

	n = nr > max_nr ? max_nr : nr;
2431
	sz = sizeof(struct perf_record_id_index) + n * sizeof(struct id_index_entry);
A
Adrian Hunter 已提交
2432 2433 2434 2435 2436 2437 2438 2439
	ev = zalloc(sz);
	if (!ev)
		return -ENOMEM;

	ev->id_index.header.type = PERF_RECORD_ID_INDEX;
	ev->id_index.header.size = sz;
	ev->id_index.nr = n;

2440
	evlist__for_each_entry(evlist, evsel) {
A
Adrian Hunter 已提交
2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470
		u32 j;

		for (j = 0; j < evsel->ids; j++) {
			struct id_index_entry *e;
			struct perf_sample_id *sid;

			if (i >= n) {
				err = process(tool, ev, NULL, machine);
				if (err)
					goto out_err;
				nr -= n;
				i = 0;
			}

			e = &ev->id_index.entries[i++];

			e->id = evsel->id[j];

			sid = perf_evlist__id2sid(evlist, e->id);
			if (!sid) {
				free(ev);
				return -ENOENT;
			}

			e->idx = sid->idx;
			e->cpu = sid->cpu;
			e->tid = sid->tid;
		}
	}

2471
	sz = sizeof(struct perf_record_id_index) + nr * sizeof(struct id_index_entry);
A
Adrian Hunter 已提交
2472 2473 2474 2475 2476 2477 2478 2479 2480
	ev->id_index.header.size = sz;
	ev->id_index.nr = nr;

	err = process(tool, ev, NULL, machine);
out_err:
	free(ev);

	return err;
}