session.c 65.3 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2
#include <errno.h>
3
#include <inttypes.h>
4
#include <linux/err.h>
5
#include <linux/kernel.h>
6
#include <linux/zalloc.h>
7
#include <api/fs/fs.h>
8

9
#include <byteswap.h>
10 11
#include <unistd.h>
#include <sys/types.h>
12
#include <sys/mman.h>
13
#include <perf/cpumap.h>
14

15 16
#include "map_symbol.h"
#include "branch.h"
17
#include "debug.h"
18 19
#include "evlist.h"
#include "evsel.h"
20
#include "memswap.h"
21
#include "map.h"
22
#include "symbol.h"
23
#include "session.h"
24
#include "tool.h"
25
#include "perf_regs.h"
26
#include "asm/bug.h"
27
#include "auxtrace.h"
28
#include "thread.h"
29
#include "thread-stack.h"
30
#include "sample-raw.h"
31
#include "stat.h"
32
#include "util.h"
33
#include "ui/progress.h"
34
#include "../perf.h"
35
#include "arch/common.h"
36
#include <internal/lib.h>
37
#include <linux/err.h>
38

39 40 41 42 43 44 45
#ifdef HAVE_ZSTD_SUPPORT
static int perf_session__process_compressed_event(struct perf_session *session,
						  union perf_event *event, u64 file_offset)
{
	void *src;
	size_t decomp_size, src_size;
	u64 decomp_last_rem = 0;
46
	size_t mmap_len, decomp_len = session->header.env.comp_mmap_len;
47 48
	struct decomp *decomp, *decomp_last = session->decomp_last;

49 50 51 52 53 54 55
	if (decomp_last) {
		decomp_last_rem = decomp_last->size - decomp_last->head;
		decomp_len += decomp_last_rem;
	}

	mmap_len = sizeof(struct decomp) + decomp_len;
	decomp = mmap(NULL, mmap_len, PROT_READ|PROT_WRITE,
56 57 58 59 60 61 62
		      MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
	if (decomp == MAP_FAILED) {
		pr_err("Couldn't allocate memory for decompression\n");
		return -1;
	}

	decomp->file_pos = file_offset;
63
	decomp->mmap_len = mmap_len;
64 65
	decomp->head = 0;

66
	if (decomp_last_rem) {
67 68 69 70
		memcpy(decomp->data, &(decomp_last->data[decomp_last->head]), decomp_last_rem);
		decomp->size = decomp_last_rem;
	}

71 72
	src = (void *)event + sizeof(struct perf_record_compressed);
	src_size = event->pack.header.size - sizeof(struct perf_record_compressed);
73 74 75 76

	decomp_size = zstd_decompress_stream(&(session->zstd_data), src, src_size,
				&(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem);
	if (!decomp_size) {
77
		munmap(decomp, mmap_len);
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
		pr_err("Couldn't decompress data\n");
		return -1;
	}

	decomp->size += decomp_size;

	if (session->decomp == NULL) {
		session->decomp = decomp;
		session->decomp_last = decomp;
	} else {
		session->decomp_last->next = decomp;
		session->decomp_last = decomp;
	}

	pr_debug("decomp (B): %ld to %ld\n", src_size, decomp_size);

	return 0;
}
#else /* !HAVE_ZSTD_SUPPORT */
#define perf_session__process_compressed_event perf_session__process_compressed_event_stub
#endif

100 101 102 103
static int perf_session__deliver_event(struct perf_session *session,
				       union perf_event *event,
				       struct perf_tool *tool,
				       u64 file_offset);
104

105
static int perf_session__open(struct perf_session *session)
106
{
107
	struct perf_data *data = session->data;
108

109
	if (perf_session__read_header(session) < 0) {
110
		pr_err("incompatible file format (rerun with -v to learn more)\n");
111
		return -1;
112 113
	}

114
	if (perf_data__is_pipe(data))
115 116
		return 0;

117 118 119
	if (perf_header__has_feat(&session->header, HEADER_STAT))
		return 0;

120
	if (!perf_evlist__valid_sample_type(session->evlist)) {
121
		pr_err("non matching sample_type\n");
122
		return -1;
123 124
	}

125
	if (!perf_evlist__valid_sample_id_all(session->evlist)) {
126
		pr_err("non matching sample_id_all\n");
127
		return -1;
128 129
	}

130
	if (!perf_evlist__valid_read_format(session->evlist)) {
131
		pr_err("non matching read_format\n");
132
		return -1;
133 134
	}

135 136 137
	return 0;
}

138
void perf_session__set_id_hdr_size(struct perf_session *session)
139
{
140 141 142
	u16 id_hdr_size = perf_evlist__id_hdr_size(session->evlist);

	machines__set_id_hdr_size(&session->machines, id_hdr_size);
143 144
}

145
int perf_session__create_kernel_maps(struct perf_session *session)
146
{
147
	int ret = machine__create_kernel_maps(&session->machines.host);
148 149

	if (ret >= 0)
150
		ret = machines__create_guest_kernel_maps(&session->machines);
151 152 153
	return ret;
}

154
static void perf_session__destroy_kernel_maps(struct perf_session *session)
155
{
156
	machines__destroy_kernel_maps(&session->machines);
157 158
}

159 160
static bool perf_session__has_comm_exec(struct perf_session *session)
{
161
	struct evsel *evsel;
162

163
	evlist__for_each_entry(session->evlist, evsel) {
164
		if (evsel->core.attr.comm_exec)
165 166 167 168 169 170 171 172 173 174 175 176 177
			return true;
	}

	return false;
}

static void perf_session__set_comm_exec(struct perf_session *session)
{
	bool comm_exec = perf_session__has_comm_exec(session);

	machines__set_comm_exec(&session->machines, comm_exec);
}

178
static int ordered_events__deliver_event(struct ordered_events *oe,
179
					 struct ordered_event *event)
180
{
181 182 183
	struct perf_session *session = container_of(oe, struct perf_session,
						    ordered_events);

184
	return perf_session__deliver_event(session, event->event,
185
					   session->tool, event->file_offset);
186 187
}

188
struct perf_session *perf_session__new(struct perf_data *data,
189
				       bool repipe, struct perf_tool *tool)
190
{
191
	int ret = -ENOMEM;
192
	struct perf_session *session = zalloc(sizeof(*session));
193

194
	if (!session)
195 196
		goto out;

197
	session->repipe = repipe;
198
	session->tool   = tool;
199
	INIT_LIST_HEAD(&session->auxtrace_index);
200
	machines__init(&session->machines);
201 202
	ordered_events__init(&session->ordered_events,
			     ordered_events__deliver_event, NULL);
203

204
	perf_env__init(&session->header.env);
205
	if (data) {
206 207
		ret = perf_data__open(data);
		if (ret < 0)
208
			goto out_delete;
209

210
		session->data = data;
211

212
		if (perf_data__is_read(data)) {
213 214
			ret = perf_session__open(session);
			if (ret < 0)
215
				goto out_delete;
216

217 218 219 220
			/*
			 * set session attributes that are present in perf.data
			 * but not in pipe-mode.
			 */
221
			if (!data->is_pipe) {
222 223 224
				perf_session__set_id_hdr_size(session);
				perf_session__set_comm_exec(session);
			}
225 226

			perf_evlist__init_trace_event_sample_raw(session->evlist);
227 228

			/* Open the directory data. */
229 230 231
			if (data->is_dir) {
				ret = perf_data__open_dir(data);
			if (ret)
232
				goto out_delete;
233
			}
234
		}
235 236
	} else  {
		session->machines.host.env = &perf_env;
237 238
	}

239 240 241
	session->machines.host.single_address_space =
		perf_env__single_address_space(session->machines.host.env);

242
	if (!data || perf_data__is_write(data)) {
243 244
		/*
		 * In O_RDONLY mode this will be performed when reading the
245
		 * kernel MMAP event, in perf_event__process_mmap().
246
		 */
247
		if (perf_session__create_kernel_maps(session) < 0)
248
			pr_warning("Cannot read kernel map\n");
249
	}
250

251 252 253 254
	/*
	 * In pipe-mode, evlist is empty until PERF_RECORD_HEADER_ATTR is
	 * processed, so perf_evlist__sample_id_all is not meaningful here.
	 */
255
	if ((!data || !data->is_pipe) && tool && tool->ordering_requires_timestamps &&
256
	    tool->ordered_events && !perf_evlist__sample_id_all(session->evlist)) {
257
		dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
258
		tool->ordered_events = false;
259
	}
260

261
	return session;
262 263

 out_delete:
264
	perf_session__delete(session);
265
 out:
266
	return ERR_PTR(ret);
267 268
}

269 270
static void perf_session__delete_threads(struct perf_session *session)
{
271
	machine__delete_threads(&session->machines.host);
272 273
}

274 275 276
static void perf_session__release_decomp_events(struct perf_session *session)
{
	struct decomp *next, *decomp;
277
	size_t mmap_len;
278 279 280 281 282 283
	next = session->decomp;
	do {
		decomp = next;
		if (decomp == NULL)
			break;
		next = decomp->next;
284 285
		mmap_len = decomp->mmap_len;
		munmap(decomp, mmap_len);
286 287 288
	} while (1);
}

289
void perf_session__delete(struct perf_session *session)
290
{
291 292
	if (session == NULL)
		return;
293
	auxtrace__free(session);
294
	auxtrace_index__free(&session->auxtrace_index);
295 296
	perf_session__destroy_kernel_maps(session);
	perf_session__delete_threads(session);
297
	perf_session__release_decomp_events(session);
298
	perf_env__exit(&session->header.env);
299
	machines__exit(&session->machines);
300 301
	if (session->data)
		perf_data__close(session->data);
302
	free(session);
303
}
304

305
static int process_event_synth_tracing_data_stub(struct perf_session *session
306 307
						 __maybe_unused,
						 union perf_event *event
308
						 __maybe_unused)
309 310 311 312 313
{
	dump_printf(": unhandled!\n");
	return 0;
}

314 315
static int process_event_synth_attr_stub(struct perf_tool *tool __maybe_unused,
					 union perf_event *event __maybe_unused,
316
					 struct evlist **pevlist
317
					 __maybe_unused)
318 319 320 321 322
{
	dump_printf(": unhandled!\n");
	return 0;
}

323 324
static int process_event_synth_event_update_stub(struct perf_tool *tool __maybe_unused,
						 union perf_event *event __maybe_unused,
325
						 struct evlist **pevlist
326 327
						 __maybe_unused)
{
328 329 330
	if (dump_trace)
		perf_event__fprintf_event_update(event, stdout);

331 332 333 334
	dump_printf(": unhandled!\n");
	return 0;
}

335 336 337
static int process_event_sample_stub(struct perf_tool *tool __maybe_unused,
				     union perf_event *event __maybe_unused,
				     struct perf_sample *sample __maybe_unused,
338
				     struct evsel *evsel __maybe_unused,
339
				     struct machine *machine __maybe_unused)
340 341 342 343 344
{
	dump_printf(": unhandled!\n");
	return 0;
}

345 346 347 348
static int process_event_stub(struct perf_tool *tool __maybe_unused,
			      union perf_event *event __maybe_unused,
			      struct perf_sample *sample __maybe_unused,
			      struct machine *machine __maybe_unused)
349 350 351 352 353
{
	dump_printf(": unhandled!\n");
	return 0;
}

354 355
static int process_finished_round_stub(struct perf_tool *tool __maybe_unused,
				       union perf_event *event __maybe_unused,
356
				       struct ordered_events *oe __maybe_unused)
357 358 359 360 361
{
	dump_printf(": unhandled!\n");
	return 0;
}

362
static int process_finished_round(struct perf_tool *tool,
363
				  union perf_event *event,
364
				  struct ordered_events *oe);
365

366 367 368 369 370 371 372 373 374 375 376 377 378 379 380
static int skipn(int fd, off_t n)
{
	char buf[4096];
	ssize_t ret;

	while (n > 0) {
		ret = read(fd, buf, min(n, (off_t)sizeof(buf)));
		if (ret <= 0)
			return ret;
		n -= ret;
	}

	return 0;
}

381 382
static s64 process_event_auxtrace_stub(struct perf_session *session __maybe_unused,
				       union perf_event *event)
383 384
{
	dump_printf(": unhandled!\n");
385 386
	if (perf_data__is_pipe(session->data))
		skipn(perf_data__fd(session->data), event->auxtrace.size);
387 388 389
	return event->auxtrace.size;
}

390 391
static int process_event_op2_stub(struct perf_session *session __maybe_unused,
				  union perf_event *event __maybe_unused)
392 393 394 395 396
{
	dump_printf(": unhandled!\n");
	return 0;
}

397 398

static
399 400
int process_event_thread_map_stub(struct perf_session *session __maybe_unused,
				  union perf_event *event __maybe_unused)
401
{
402 403 404
	if (dump_trace)
		perf_event__fprintf_thread_map(event, stdout);

405 406 407 408
	dump_printf(": unhandled!\n");
	return 0;
}

409
static
410 411
int process_event_cpu_map_stub(struct perf_session *session __maybe_unused,
			       union perf_event *event __maybe_unused)
412
{
413 414 415
	if (dump_trace)
		perf_event__fprintf_cpu_map(event, stdout);

416 417 418 419
	dump_printf(": unhandled!\n");
	return 0;
}

420
static
421 422
int process_event_stat_config_stub(struct perf_session *session __maybe_unused,
				   union perf_event *event __maybe_unused)
423
{
424 425 426
	if (dump_trace)
		perf_event__fprintf_stat_config(event, stdout);

427 428 429 430
	dump_printf(": unhandled!\n");
	return 0;
}

431 432
static int process_stat_stub(struct perf_session *perf_session __maybe_unused,
			     union perf_event *event)
J
Jiri Olsa 已提交
433
{
434 435 436
	if (dump_trace)
		perf_event__fprintf_stat(event, stdout);

J
Jiri Olsa 已提交
437 438 439 440
	dump_printf(": unhandled!\n");
	return 0;
}

441 442
static int process_stat_round_stub(struct perf_session *perf_session __maybe_unused,
				   union perf_event *event)
443
{
444 445 446
	if (dump_trace)
		perf_event__fprintf_stat_round(event, stdout);

447 448 449 450
	dump_printf(": unhandled!\n");
	return 0;
}

451 452 453 454 455 456 457 458
static int perf_session__process_compressed_event_stub(struct perf_session *session __maybe_unused,
						       union perf_event *event __maybe_unused,
						       u64 file_offset __maybe_unused)
{
       dump_printf(": unhandled!\n");
       return 0;
}

459
void perf_tool__fill_defaults(struct perf_tool *tool)
460
{
461 462 463 464
	if (tool->sample == NULL)
		tool->sample = process_event_sample_stub;
	if (tool->mmap == NULL)
		tool->mmap = process_event_stub;
465 466
	if (tool->mmap2 == NULL)
		tool->mmap2 = process_event_stub;
467 468
	if (tool->comm == NULL)
		tool->comm = process_event_stub;
469 470
	if (tool->namespaces == NULL)
		tool->namespaces = process_event_stub;
471 472 473 474 475 476
	if (tool->fork == NULL)
		tool->fork = process_event_stub;
	if (tool->exit == NULL)
		tool->exit = process_event_stub;
	if (tool->lost == NULL)
		tool->lost = perf_event__process_lost;
477 478
	if (tool->lost_samples == NULL)
		tool->lost_samples = perf_event__process_lost_samples;
479 480
	if (tool->aux == NULL)
		tool->aux = perf_event__process_aux;
481 482
	if (tool->itrace_start == NULL)
		tool->itrace_start = perf_event__process_itrace_start;
483 484
	if (tool->context_switch == NULL)
		tool->context_switch = perf_event__process_switch;
485 486
	if (tool->ksymbol == NULL)
		tool->ksymbol = perf_event__process_ksymbol;
487 488
	if (tool->bpf == NULL)
		tool->bpf = perf_event__process_bpf;
489 490 491 492 493 494 495 496
	if (tool->read == NULL)
		tool->read = process_event_sample_stub;
	if (tool->throttle == NULL)
		tool->throttle = process_event_stub;
	if (tool->unthrottle == NULL)
		tool->unthrottle = process_event_stub;
	if (tool->attr == NULL)
		tool->attr = process_event_synth_attr_stub;
497 498
	if (tool->event_update == NULL)
		tool->event_update = process_event_synth_event_update_stub;
499 500 501
	if (tool->tracing_data == NULL)
		tool->tracing_data = process_event_synth_tracing_data_stub;
	if (tool->build_id == NULL)
502
		tool->build_id = process_event_op2_stub;
503
	if (tool->finished_round == NULL) {
504
		if (tool->ordered_events)
505
			tool->finished_round = process_finished_round;
506
		else
507
			tool->finished_round = process_finished_round_stub;
508
	}
A
Adrian Hunter 已提交
509
	if (tool->id_index == NULL)
510
		tool->id_index = process_event_op2_stub;
511
	if (tool->auxtrace_info == NULL)
512
		tool->auxtrace_info = process_event_op2_stub;
513 514
	if (tool->auxtrace == NULL)
		tool->auxtrace = process_event_auxtrace_stub;
515
	if (tool->auxtrace_error == NULL)
516
		tool->auxtrace_error = process_event_op2_stub;
517 518
	if (tool->thread_map == NULL)
		tool->thread_map = process_event_thread_map_stub;
519 520
	if (tool->cpu_map == NULL)
		tool->cpu_map = process_event_cpu_map_stub;
521 522
	if (tool->stat_config == NULL)
		tool->stat_config = process_event_stat_config_stub;
J
Jiri Olsa 已提交
523 524
	if (tool->stat == NULL)
		tool->stat = process_stat_stub;
525 526
	if (tool->stat_round == NULL)
		tool->stat_round = process_stat_round_stub;
527 528
	if (tool->time_conv == NULL)
		tool->time_conv = process_event_op2_stub;
529 530
	if (tool->feature == NULL)
		tool->feature = process_event_op2_stub;
531
	if (tool->compressed == NULL)
532
		tool->compressed = perf_session__process_compressed_event;
533
}
534

535 536 537 538 539 540 541 542 543 544
static void swap_sample_id_all(union perf_event *event, void *data)
{
	void *end = (void *) event + event->header.size;
	int size = end - data;

	BUG_ON(size % sizeof(u64));
	mem_bswap_64(data, size);
}

static void perf_event__all64_swap(union perf_event *event,
545
				   bool sample_id_all __maybe_unused)
546
{
547 548
	struct perf_event_header *hdr = &event->header;
	mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr));
549 550
}

551
static void perf_event__comm_swap(union perf_event *event, bool sample_id_all)
552
{
553 554
	event->comm.pid = bswap_32(event->comm.pid);
	event->comm.tid = bswap_32(event->comm.tid);
555 556 557 558

	if (sample_id_all) {
		void *data = &event->comm.comm;

559
		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
560 561
		swap_sample_id_all(event, data);
	}
562 563
}

564 565
static void perf_event__mmap_swap(union perf_event *event,
				  bool sample_id_all)
566
{
567 568 569 570 571
	event->mmap.pid	  = bswap_32(event->mmap.pid);
	event->mmap.tid	  = bswap_32(event->mmap.tid);
	event->mmap.start = bswap_64(event->mmap.start);
	event->mmap.len	  = bswap_64(event->mmap.len);
	event->mmap.pgoff = bswap_64(event->mmap.pgoff);
572 573 574 575

	if (sample_id_all) {
		void *data = &event->mmap.filename;

576
		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
577 578
		swap_sample_id_all(event, data);
	}
579 580
}

581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599
static void perf_event__mmap2_swap(union perf_event *event,
				  bool sample_id_all)
{
	event->mmap2.pid   = bswap_32(event->mmap2.pid);
	event->mmap2.tid   = bswap_32(event->mmap2.tid);
	event->mmap2.start = bswap_64(event->mmap2.start);
	event->mmap2.len   = bswap_64(event->mmap2.len);
	event->mmap2.pgoff = bswap_64(event->mmap2.pgoff);
	event->mmap2.maj   = bswap_32(event->mmap2.maj);
	event->mmap2.min   = bswap_32(event->mmap2.min);
	event->mmap2.ino   = bswap_64(event->mmap2.ino);

	if (sample_id_all) {
		void *data = &event->mmap2.filename;

		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
		swap_sample_id_all(event, data);
	}
}
600
static void perf_event__task_swap(union perf_event *event, bool sample_id_all)
601
{
602 603 604 605 606
	event->fork.pid	 = bswap_32(event->fork.pid);
	event->fork.tid	 = bswap_32(event->fork.tid);
	event->fork.ppid = bswap_32(event->fork.ppid);
	event->fork.ptid = bswap_32(event->fork.ptid);
	event->fork.time = bswap_64(event->fork.time);
607 608 609

	if (sample_id_all)
		swap_sample_id_all(event, &event->fork + 1);
610 611
}

612
static void perf_event__read_swap(union perf_event *event, bool sample_id_all)
613
{
614 615 616 617 618 619
	event->read.pid		 = bswap_32(event->read.pid);
	event->read.tid		 = bswap_32(event->read.tid);
	event->read.value	 = bswap_64(event->read.value);
	event->read.time_enabled = bswap_64(event->read.time_enabled);
	event->read.time_running = bswap_64(event->read.time_running);
	event->read.id		 = bswap_64(event->read.id);
620 621 622

	if (sample_id_all)
		swap_sample_id_all(event, &event->read + 1);
623 624
}

625 626 627 628 629 630 631 632 633 634
static void perf_event__aux_swap(union perf_event *event, bool sample_id_all)
{
	event->aux.aux_offset = bswap_64(event->aux.aux_offset);
	event->aux.aux_size   = bswap_64(event->aux.aux_size);
	event->aux.flags      = bswap_64(event->aux.flags);

	if (sample_id_all)
		swap_sample_id_all(event, &event->aux + 1);
}

635 636 637 638 639 640 641 642 643 644
static void perf_event__itrace_start_swap(union perf_event *event,
					  bool sample_id_all)
{
	event->itrace_start.pid	 = bswap_32(event->itrace_start.pid);
	event->itrace_start.tid	 = bswap_32(event->itrace_start.tid);

	if (sample_id_all)
		swap_sample_id_all(event, &event->itrace_start + 1);
}

645 646 647 648 649 650 651 652 653 654 655 656 657
static void perf_event__switch_swap(union perf_event *event, bool sample_id_all)
{
	if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) {
		event->context_switch.next_prev_pid =
				bswap_32(event->context_switch.next_prev_pid);
		event->context_switch.next_prev_tid =
				bswap_32(event->context_switch.next_prev_tid);
	}

	if (sample_id_all)
		swap_sample_id_all(event, &event->context_switch + 1);
}

658 659 660 661 662 663 664 665 666 667 668
static void perf_event__throttle_swap(union perf_event *event,
				      bool sample_id_all)
{
	event->throttle.time	  = bswap_64(event->throttle.time);
	event->throttle.id	  = bswap_64(event->throttle.id);
	event->throttle.stream_id = bswap_64(event->throttle.stream_id);

	if (sample_id_all)
		swap_sample_id_all(event, &event->throttle + 1);
}

669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688
static void perf_event__namespaces_swap(union perf_event *event,
					bool sample_id_all)
{
	u64 i;

	event->namespaces.pid		= bswap_32(event->namespaces.pid);
	event->namespaces.tid		= bswap_32(event->namespaces.tid);
	event->namespaces.nr_namespaces	= bswap_64(event->namespaces.nr_namespaces);

	for (i = 0; i < event->namespaces.nr_namespaces; i++) {
		struct perf_ns_link_info *ns = &event->namespaces.link_info[i];

		ns->dev = bswap_64(ns->dev);
		ns->ino = bswap_64(ns->ino);
	}

	if (sample_id_all)
		swap_sample_id_all(event, &event->namespaces.link_info[i]);
}

689 690 691 692 693 694 695 696 697 698
static u8 revbyte(u8 b)
{
	int rev = (b >> 4) | ((b & 0xf) << 4);
	rev = ((rev & 0xcc) >> 2) | ((rev & 0x33) << 2);
	rev = ((rev & 0xaa) >> 1) | ((rev & 0x55) << 1);
	return (u8) rev;
}

/*
 * XXX this is hack in attempt to carry flags bitfield
699
 * through endian village. ABI says:
700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720
 *
 * Bit-fields are allocated from right to left (least to most significant)
 * on little-endian implementations and from left to right (most to least
 * significant) on big-endian implementations.
 *
 * The above seems to be byte specific, so we need to reverse each
 * byte of the bitfield. 'Internet' also says this might be implementation
 * specific and we probably need proper fix and carry perf_event_attr
 * bitfield flags in separate data file FEAT_ section. Thought this seems
 * to work for now.
 */
static void swap_bitfield(u8 *p, unsigned len)
{
	unsigned i;

	for (i = 0; i < len; i++) {
		*p = revbyte(*p);
		p++;
	}
}

721 722 723 724 725
/* exported for swapping attributes in file header */
void perf_event__attr_swap(struct perf_event_attr *attr)
{
	attr->type		= bswap_32(attr->type);
	attr->size		= bswap_32(attr->size);
726 727 728 729 730 731 732 733 734

#define bswap_safe(f, n) 					\
	(attr->size > (offsetof(struct perf_event_attr, f) + 	\
		       sizeof(attr->f) * (n)))
#define bswap_field(f, sz) 			\
do { 						\
	if (bswap_safe(f, 0))			\
		attr->f = bswap_##sz(attr->f);	\
} while(0)
735
#define bswap_field_16(f) bswap_field(f, 16)
736 737 738 739 740 741 742 743 744 745 746 747 748 749 750
#define bswap_field_32(f) bswap_field(f, 32)
#define bswap_field_64(f) bswap_field(f, 64)

	bswap_field_64(config);
	bswap_field_64(sample_period);
	bswap_field_64(sample_type);
	bswap_field_64(read_format);
	bswap_field_32(wakeup_events);
	bswap_field_32(bp_type);
	bswap_field_64(bp_addr);
	bswap_field_64(bp_len);
	bswap_field_64(branch_sample_type);
	bswap_field_64(sample_regs_user);
	bswap_field_32(sample_stack_user);
	bswap_field_32(aux_watermark);
751
	bswap_field_16(sample_max_stack);
752 753 754 755 756 757 758 759 760 761 762 763

	/*
	 * After read_format are bitfields. Check read_format because
	 * we are unable to use offsetof on bitfield.
	 */
	if (bswap_safe(read_format, 1))
		swap_bitfield((u8 *) (&attr->read_format + 1),
			      sizeof(u64));
#undef bswap_field_64
#undef bswap_field_32
#undef bswap_field
#undef bswap_safe
764 765
}

766
static void perf_event__hdr_attr_swap(union perf_event *event,
767
				      bool sample_id_all __maybe_unused)
768 769 770
{
	size_t size;

771
	perf_event__attr_swap(&event->attr.attr);
772

773 774 775
	size = event->header.size;
	size -= (void *)&event->attr.id - (void *)event;
	mem_bswap_64(event->attr.id, size);
776 777
}

778 779 780 781 782 783 784
static void perf_event__event_update_swap(union perf_event *event,
					  bool sample_id_all __maybe_unused)
{
	event->event_update.type = bswap_64(event->event_update.type);
	event->event_update.id   = bswap_64(event->event_update.id);
}

785
static void perf_event__event_type_swap(union perf_event *event,
786
					bool sample_id_all __maybe_unused)
787
{
788 789
	event->event_type.event_type.event_id =
		bswap_64(event->event_type.event_type.event_id);
790 791
}

792
static void perf_event__tracing_data_swap(union perf_event *event,
793
					  bool sample_id_all __maybe_unused)
794
{
795
	event->tracing_data.size = bswap_32(event->tracing_data.size);
796 797
}

798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820
static void perf_event__auxtrace_info_swap(union perf_event *event,
					   bool sample_id_all __maybe_unused)
{
	size_t size;

	event->auxtrace_info.type = bswap_32(event->auxtrace_info.type);

	size = event->header.size;
	size -= (void *)&event->auxtrace_info.priv - (void *)event;
	mem_bswap_64(event->auxtrace_info.priv, size);
}

static void perf_event__auxtrace_swap(union perf_event *event,
				      bool sample_id_all __maybe_unused)
{
	event->auxtrace.size      = bswap_64(event->auxtrace.size);
	event->auxtrace.offset    = bswap_64(event->auxtrace.offset);
	event->auxtrace.reference = bswap_64(event->auxtrace.reference);
	event->auxtrace.idx       = bswap_32(event->auxtrace.idx);
	event->auxtrace.tid       = bswap_32(event->auxtrace.tid);
	event->auxtrace.cpu       = bswap_32(event->auxtrace.cpu);
}

821 822 823 824 825 826 827 828
static void perf_event__auxtrace_error_swap(union perf_event *event,
					    bool sample_id_all __maybe_unused)
{
	event->auxtrace_error.type = bswap_32(event->auxtrace_error.type);
	event->auxtrace_error.code = bswap_32(event->auxtrace_error.code);
	event->auxtrace_error.cpu  = bswap_32(event->auxtrace_error.cpu);
	event->auxtrace_error.pid  = bswap_32(event->auxtrace_error.pid);
	event->auxtrace_error.tid  = bswap_32(event->auxtrace_error.tid);
829
	event->auxtrace_error.fmt  = bswap_32(event->auxtrace_error.fmt);
830
	event->auxtrace_error.ip   = bswap_64(event->auxtrace_error.ip);
831 832
	if (event->auxtrace_error.fmt)
		event->auxtrace_error.time = bswap_64(event->auxtrace_error.time);
833 834
}

835 836 837 838 839 840 841 842 843 844 845
static void perf_event__thread_map_swap(union perf_event *event,
					bool sample_id_all __maybe_unused)
{
	unsigned i;

	event->thread_map.nr = bswap_64(event->thread_map.nr);

	for (i = 0; i < event->thread_map.nr; i++)
		event->thread_map.entries[i].pid = bswap_64(event->thread_map.entries[i].pid);
}

846 847 848
static void perf_event__cpu_map_swap(union perf_event *event,
				     bool sample_id_all __maybe_unused)
{
849
	struct perf_record_cpu_map_data *data = &event->cpu_map.data;
850
	struct cpu_map_entries *cpus;
851
	struct perf_record_record_cpu_map *mask;
852 853 854 855 856 857 858 859 860 861 862 863 864 865
	unsigned i;

	data->type = bswap_64(data->type);

	switch (data->type) {
	case PERF_CPU_MAP__CPUS:
		cpus = (struct cpu_map_entries *)data->data;

		cpus->nr = bswap_16(cpus->nr);

		for (i = 0; i < cpus->nr; i++)
			cpus->cpu[i] = bswap_16(cpus->cpu[i]);
		break;
	case PERF_CPU_MAP__MASK:
866
		mask = (struct perf_record_record_cpu_map *)data->data;
867 868 869 870 871 872 873 874 875 876 877 878 879 880 881

		mask->nr = bswap_16(mask->nr);
		mask->long_size = bswap_16(mask->long_size);

		switch (mask->long_size) {
		case 4: mem_bswap_32(&mask->mask, mask->nr); break;
		case 8: mem_bswap_64(&mask->mask, mask->nr); break;
		default:
			pr_err("cpu_map swap: unsupported long size\n");
		}
	default:
		break;
	}
}

882 883 884 885 886 887 888 889 890 891
static void perf_event__stat_config_swap(union perf_event *event,
					 bool sample_id_all __maybe_unused)
{
	u64 size;

	size  = event->stat_config.nr * sizeof(event->stat_config.data[0]);
	size += 1; /* nr item itself */
	mem_bswap_64(&event->stat_config.nr, size);
}

J
Jiri Olsa 已提交
892 893 894 895 896 897 898 899 900 901 902
static void perf_event__stat_swap(union perf_event *event,
				  bool sample_id_all __maybe_unused)
{
	event->stat.id     = bswap_64(event->stat.id);
	event->stat.thread = bswap_32(event->stat.thread);
	event->stat.cpu    = bswap_32(event->stat.cpu);
	event->stat.val    = bswap_64(event->stat.val);
	event->stat.ena    = bswap_64(event->stat.ena);
	event->stat.run    = bswap_64(event->stat.run);
}

903 904 905 906 907 908 909
static void perf_event__stat_round_swap(union perf_event *event,
					bool sample_id_all __maybe_unused)
{
	event->stat_round.type = bswap_64(event->stat_round.type);
	event->stat_round.time = bswap_64(event->stat_round.time);
}

910 911
typedef void (*perf_event__swap_op)(union perf_event *event,
				    bool sample_id_all);
912

913 914
static perf_event__swap_op perf_event__swap_ops[] = {
	[PERF_RECORD_MMAP]		  = perf_event__mmap_swap,
915
	[PERF_RECORD_MMAP2]		  = perf_event__mmap2_swap,
916 917 918 919 920
	[PERF_RECORD_COMM]		  = perf_event__comm_swap,
	[PERF_RECORD_FORK]		  = perf_event__task_swap,
	[PERF_RECORD_EXIT]		  = perf_event__task_swap,
	[PERF_RECORD_LOST]		  = perf_event__all64_swap,
	[PERF_RECORD_READ]		  = perf_event__read_swap,
921 922
	[PERF_RECORD_THROTTLE]		  = perf_event__throttle_swap,
	[PERF_RECORD_UNTHROTTLE]	  = perf_event__throttle_swap,
923
	[PERF_RECORD_SAMPLE]		  = perf_event__all64_swap,
924
	[PERF_RECORD_AUX]		  = perf_event__aux_swap,
925
	[PERF_RECORD_ITRACE_START]	  = perf_event__itrace_start_swap,
926
	[PERF_RECORD_LOST_SAMPLES]	  = perf_event__all64_swap,
927 928
	[PERF_RECORD_SWITCH]		  = perf_event__switch_swap,
	[PERF_RECORD_SWITCH_CPU_WIDE]	  = perf_event__switch_swap,
929
	[PERF_RECORD_NAMESPACES]	  = perf_event__namespaces_swap,
930
	[PERF_RECORD_HEADER_ATTR]	  = perf_event__hdr_attr_swap,
931 932 933
	[PERF_RECORD_HEADER_EVENT_TYPE]	  = perf_event__event_type_swap,
	[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
	[PERF_RECORD_HEADER_BUILD_ID]	  = NULL,
A
Adrian Hunter 已提交
934
	[PERF_RECORD_ID_INDEX]		  = perf_event__all64_swap,
935 936
	[PERF_RECORD_AUXTRACE_INFO]	  = perf_event__auxtrace_info_swap,
	[PERF_RECORD_AUXTRACE]		  = perf_event__auxtrace_swap,
937
	[PERF_RECORD_AUXTRACE_ERROR]	  = perf_event__auxtrace_error_swap,
938
	[PERF_RECORD_THREAD_MAP]	  = perf_event__thread_map_swap,
939
	[PERF_RECORD_CPU_MAP]		  = perf_event__cpu_map_swap,
940
	[PERF_RECORD_STAT_CONFIG]	  = perf_event__stat_config_swap,
J
Jiri Olsa 已提交
941
	[PERF_RECORD_STAT]		  = perf_event__stat_swap,
942
	[PERF_RECORD_STAT_ROUND]	  = perf_event__stat_round_swap,
943
	[PERF_RECORD_EVENT_UPDATE]	  = perf_event__event_update_swap,
944
	[PERF_RECORD_TIME_CONV]		  = perf_event__all64_swap,
945
	[PERF_RECORD_HEADER_MAX]	  = NULL,
946 947
};

948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986
/*
 * When perf record finishes a pass on every buffers, it records this pseudo
 * event.
 * We record the max timestamp t found in the pass n.
 * Assuming these timestamps are monotonic across cpus, we know that if
 * a buffer still has events with timestamps below t, they will be all
 * available and then read in the pass n + 1.
 * Hence when we start to read the pass n + 2, we can safely flush every
 * events with timestamps below t.
 *
 *    ============ PASS n =================
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          1          |         2
 *          2          |         3
 *          -          |         4  <--- max recorded
 *
 *    ============ PASS n + 1 ==============
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          3          |         5
 *          4          |         6
 *          5          |         7 <---- max recorded
 *
 *      Flush every events below timestamp 4
 *
 *    ============ PASS n + 2 ==============
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          6          |         8
 *          7          |         9
 *          -          |         10
 *
 *      Flush every events below timestamp 7
 *      etc...
 */
987
static int process_finished_round(struct perf_tool *tool __maybe_unused,
988
				  union perf_event *event __maybe_unused,
989
				  struct ordered_events *oe)
990
{
991 992
	if (dump_trace)
		fprintf(stdout, "\n");
993
	return ordered_events__flush(oe, OE_FLUSH__ROUND);
994 995
}

996
int perf_session__queue_event(struct perf_session *s, union perf_event *event,
997
			      u64 timestamp, u64 file_offset)
998
{
999
	return ordered_events__queue(&s->ordered_events, event, timestamp, file_offset);
1000
}
1001

K
Kan Liang 已提交
1002
static void callchain__lbr_callstack_printf(struct perf_sample *sample)
1003
{
K
Kan Liang 已提交
1004 1005 1006
	struct ip_callchain *callchain = sample->callchain;
	struct branch_stack *lbr_stack = sample->branch_stack;
	u64 kernel_callchain_nr = callchain->nr;
1007
	unsigned int i;
1008

K
Kan Liang 已提交
1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048
	for (i = 0; i < kernel_callchain_nr; i++) {
		if (callchain->ips[i] == PERF_CONTEXT_USER)
			break;
	}

	if ((i != kernel_callchain_nr) && lbr_stack->nr) {
		u64 total_nr;
		/*
		 * LBR callstack can only get user call chain,
		 * i is kernel call chain number,
		 * 1 is PERF_CONTEXT_USER.
		 *
		 * The user call chain is stored in LBR registers.
		 * LBR are pair registers. The caller is stored
		 * in "from" register, while the callee is stored
		 * in "to" register.
		 * For example, there is a call stack
		 * "A"->"B"->"C"->"D".
		 * The LBR registers will recorde like
		 * "C"->"D", "B"->"C", "A"->"B".
		 * So only the first "to" register and all "from"
		 * registers are needed to construct the whole stack.
		 */
		total_nr = i + 1 + lbr_stack->nr + 1;
		kernel_callchain_nr = i + 1;

		printf("... LBR call chain: nr:%" PRIu64 "\n", total_nr);

		for (i = 0; i < kernel_callchain_nr; i++)
			printf("..... %2d: %016" PRIx64 "\n",
			       i, callchain->ips[i]);

		printf("..... %2d: %016" PRIx64 "\n",
		       (int)(kernel_callchain_nr), lbr_stack->entries[0].to);
		for (i = 0; i < lbr_stack->nr; i++)
			printf("..... %2d: %016" PRIx64 "\n",
			       (int)(i + kernel_callchain_nr + 1), lbr_stack->entries[i].from);
	}
}

1049
static void callchain__printf(struct evsel *evsel,
K
Kan Liang 已提交
1050 1051 1052 1053 1054
			      struct perf_sample *sample)
{
	unsigned int i;
	struct ip_callchain *callchain = sample->callchain;

1055
	if (perf_evsel__has_branch_callstack(evsel))
K
Kan Liang 已提交
1056 1057 1058
		callchain__lbr_callstack_printf(sample);

	printf("... FP chain: nr:%" PRIu64 "\n", callchain->nr);
1059

K
Kan Liang 已提交
1060
	for (i = 0; i < callchain->nr; i++)
1061
		printf("..... %2d: %016" PRIx64 "\n",
K
Kan Liang 已提交
1062
		       i, callchain->ips[i]);
1063 1064
}

1065
static void branch_stack__printf(struct perf_sample *sample, bool callstack)
1066 1067 1068
{
	uint64_t i;

1069 1070 1071
	printf("%s: nr:%" PRIu64 "\n",
		!callstack ? "... branch stack" : "... branch callstack",
		sample->branch_stack->nr);
1072

1073 1074 1075
	for (i = 0; i < sample->branch_stack->nr; i++) {
		struct branch_entry *e = &sample->branch_stack->entries[i];

1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088
		if (!callstack) {
			printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n",
				i, e->from, e->to,
				(unsigned short)e->flags.cycles,
				e->flags.mispred ? "M" : " ",
				e->flags.predicted ? "P" : " ",
				e->flags.abort ? "A" : " ",
				e->flags.in_tx ? "T" : " ",
				(unsigned)e->flags.reserved);
		} else {
			printf("..... %2"PRIu64": %016" PRIx64 "\n",
				i, i > 0 ? e->from : e->to);
		}
1089
	}
1090 1091
}

1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103
static void regs_dump__printf(u64 mask, u64 *regs)
{
	unsigned rid, i = 0;

	for_each_set_bit(rid, (unsigned long *) &mask, sizeof(mask) * 8) {
		u64 val = regs[i++];

		printf(".... %-5s 0x%" PRIx64 "\n",
		       perf_reg_name(rid), val);
	}
}

1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129
static const char *regs_abi[] = {
	[PERF_SAMPLE_REGS_ABI_NONE] = "none",
	[PERF_SAMPLE_REGS_ABI_32] = "32-bit",
	[PERF_SAMPLE_REGS_ABI_64] = "64-bit",
};

static inline const char *regs_dump_abi(struct regs_dump *d)
{
	if (d->abi > PERF_SAMPLE_REGS_ABI_64)
		return "unknown";

	return regs_abi[d->abi];
}

static void regs__printf(const char *type, struct regs_dump *regs)
{
	u64 mask = regs->mask;

	printf("... %s regs: mask 0x%" PRIx64 " ABI %s\n",
	       type,
	       mask,
	       regs_dump_abi(regs));

	regs_dump__printf(mask, regs->regs);
}

1130
static void regs_user__printf(struct perf_sample *sample)
1131 1132 1133
{
	struct regs_dump *user_regs = &sample->user_regs;

1134 1135 1136 1137 1138 1139 1140 1141 1142 1143
	if (user_regs->regs)
		regs__printf("user", user_regs);
}

static void regs_intr__printf(struct perf_sample *sample)
{
	struct regs_dump *intr_regs = &sample->intr_regs;

	if (intr_regs->regs)
		regs__printf("intr", intr_regs);
1144 1145 1146 1147 1148 1149 1150 1151
}

static void stack_user__printf(struct stack_dump *dump)
{
	printf("... ustack: size %" PRIu64 ", offset 0x%x\n",
	       dump->size, dump->offset);
}

1152
static void perf_evlist__print_tstamp(struct evlist *evlist,
1153
				       union perf_event *event,
1154
				       struct perf_sample *sample)
1155
{
1156
	u64 sample_type = __perf_evlist__combined_sample_type(evlist);
1157

1158
	if (event->header.type != PERF_RECORD_SAMPLE &&
1159
	    !perf_evlist__sample_id_all(evlist)) {
1160 1161 1162 1163
		fputs("-1 -1 ", stdout);
		return;
	}

1164
	if ((sample_type & PERF_SAMPLE_CPU))
1165 1166
		printf("%u ", sample->cpu);

1167
	if (sample_type & PERF_SAMPLE_TIME)
1168
		printf("%" PRIu64 " ", sample->time);
1169 1170
}

1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200
static void sample_read__printf(struct perf_sample *sample, u64 read_format)
{
	printf("... sample_read:\n");

	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
		printf("...... time enabled %016" PRIx64 "\n",
		       sample->read.time_enabled);

	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
		printf("...... time running %016" PRIx64 "\n",
		       sample->read.time_running);

	if (read_format & PERF_FORMAT_GROUP) {
		u64 i;

		printf(".... group nr %" PRIu64 "\n", sample->read.group.nr);

		for (i = 0; i < sample->read.group.nr; i++) {
			struct sample_read_value *value;

			value = &sample->read.group.values[i];
			printf("..... id %016" PRIx64
			       ", value %016" PRIx64 "\n",
			       value->id, value->value);
		}
	} else
		printf("..... id %016" PRIx64 ", value %016" PRIx64 "\n",
			sample->read.one.id, sample->read.one.value);
}

1201
static void dump_event(struct evlist *evlist, union perf_event *event,
1202
		       u64 file_offset, struct perf_sample *sample)
1203 1204 1205 1206
{
	if (!dump_trace)
		return;

1207 1208
	printf("\n%#" PRIx64 " [%#x]: event: %d\n",
	       file_offset, event->header.size, event->header.type);
1209 1210

	trace_event(event);
1211 1212
	if (event->header.type == PERF_RECORD_SAMPLE && evlist->trace_event_sample_raw)
		evlist->trace_event_sample_raw(evlist, event, sample);
1213 1214

	if (sample)
1215
		perf_evlist__print_tstamp(evlist, event, sample);
1216

1217
	printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset,
1218
	       event->header.size, perf_event__name(event->header.type));
1219 1220
}

1221
static void dump_sample(struct evsel *evsel, union perf_event *event,
1222
			struct perf_sample *sample)
1223
{
1224 1225
	u64 sample_type;

1226 1227 1228
	if (!dump_trace)
		return;

1229
	printf("(IP, 0x%x): %d/%d: %#" PRIx64 " period: %" PRIu64 " addr: %#" PRIx64 "\n",
1230
	       event->header.misc, sample->pid, sample->tid, sample->ip,
1231
	       sample->period, sample->addr);
1232

1233
	sample_type = evsel->core.attr.sample_type;
1234

1235
	if (evsel__has_callchain(evsel))
K
Kan Liang 已提交
1236
		callchain__printf(evsel, sample);
1237

1238 1239
	if (sample_type & PERF_SAMPLE_BRANCH_STACK)
		branch_stack__printf(sample, perf_evsel__has_branch_callstack(evsel));
1240 1241

	if (sample_type & PERF_SAMPLE_REGS_USER)
1242
		regs_user__printf(sample);
1243

1244 1245 1246
	if (sample_type & PERF_SAMPLE_REGS_INTR)
		regs_intr__printf(sample);

1247 1248
	if (sample_type & PERF_SAMPLE_STACK_USER)
		stack_user__printf(&sample->user_stack);
1249 1250 1251

	if (sample_type & PERF_SAMPLE_WEIGHT)
		printf("... weight: %" PRIu64 "\n", sample->weight);
1252 1253 1254

	if (sample_type & PERF_SAMPLE_DATA_SRC)
		printf(" . data_src: 0x%"PRIx64"\n", sample->data_src);
1255

1256 1257 1258
	if (sample_type & PERF_SAMPLE_PHYS_ADDR)
		printf(" .. phys_addr: 0x%"PRIx64"\n", sample->phys_addr);

1259 1260 1261
	if (sample_type & PERF_SAMPLE_TRANSACTION)
		printf("... transaction: %" PRIx64 "\n", sample->transaction);

1262
	if (sample_type & PERF_SAMPLE_READ)
1263
		sample_read__printf(sample, evsel->core.attr.read_format);
1264 1265
}

1266
static void dump_read(struct evsel *evsel, union perf_event *event)
J
Jiri Olsa 已提交
1267
{
1268
	struct perf_record_read *read_event = &event->read;
J
Jiri Olsa 已提交
1269 1270 1271 1272 1273
	u64 read_format;

	if (!dump_trace)
		return;

1274
	printf(": %d %d %s %" PRI_lu64 "\n", event->read.pid, event->read.tid,
1275
	       perf_evsel__name(evsel),
J
Jiri Olsa 已提交
1276 1277
	       event->read.value);

1278 1279 1280
	if (!evsel)
		return;

1281
	read_format = evsel->core.attr.read_format;
J
Jiri Olsa 已提交
1282 1283

	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1284
		printf("... time enabled : %" PRI_lu64 "\n", read_event->time_enabled);
J
Jiri Olsa 已提交
1285 1286

	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1287
		printf("... time running : %" PRI_lu64 "\n", read_event->time_running);
J
Jiri Olsa 已提交
1288 1289

	if (read_format & PERF_FORMAT_ID)
1290
		printf("... id           : %" PRI_lu64 "\n", read_event->id);
J
Jiri Olsa 已提交
1291 1292
}

1293
static struct machine *machines__find_for_cpumode(struct machines *machines,
1294 1295
					       union perf_event *event,
					       struct perf_sample *sample)
1296
{
1297
	struct machine *machine;
1298

1299
	if (perf_guest &&
1300 1301
	    ((sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ||
	     (sample->cpumode == PERF_RECORD_MISC_GUEST_USER))) {
1302 1303
		u32 pid;

1304 1305
		if (event->header.type == PERF_RECORD_MMAP
		    || event->header.type == PERF_RECORD_MMAP2)
1306 1307
			pid = event->mmap.pid;
		else
1308
			pid = sample->pid;
1309

1310
		machine = machines__find(machines, pid);
1311
		if (!machine)
1312
			machine = machines__findnew(machines, DEFAULT_GUEST_KERNEL_ID);
1313
		return machine;
1314
	}
1315

1316
	return &machines->host;
1317 1318
}

1319
static int deliver_sample_value(struct evlist *evlist,
1320 1321 1322 1323 1324 1325
				struct perf_tool *tool,
				union perf_event *event,
				struct perf_sample *sample,
				struct sample_read_value *v,
				struct machine *machine)
{
1326
	struct perf_sample_id *sid = perf_evlist__id2sid(evlist, v->id);
1327 1328 1329 1330 1331 1332 1333 1334

	if (sid) {
		sample->id     = v->id;
		sample->period = v->value - sid->period;
		sid->period    = v->value;
	}

	if (!sid || sid->evsel == NULL) {
1335
		++evlist->stats.nr_unknown_id;
1336 1337 1338
		return 0;
	}

1339 1340 1341 1342 1343 1344 1345
	/*
	 * There's no reason to deliver sample
	 * for zero period, bail out.
	 */
	if (!sample->period)
		return 0;

1346 1347 1348
	return tool->sample(tool, event, sample, sid->evsel, machine);
}

1349
static int deliver_sample_group(struct evlist *evlist,
1350 1351 1352 1353 1354 1355 1356 1357 1358
				struct perf_tool *tool,
				union  perf_event *event,
				struct perf_sample *sample,
				struct machine *machine)
{
	int ret = -EINVAL;
	u64 i;

	for (i = 0; i < sample->read.group.nr; i++) {
1359
		ret = deliver_sample_value(evlist, tool, event, sample,
1360 1361 1362 1363 1364 1365 1366 1367 1368 1369
					   &sample->read.group.values[i],
					   machine);
		if (ret)
			break;
	}

	return ret;
}

static int
1370
 perf_evlist__deliver_sample(struct evlist *evlist,
1371 1372 1373
			     struct perf_tool *tool,
			     union  perf_event *event,
			     struct perf_sample *sample,
1374
			     struct evsel *evsel,
1375 1376 1377
			     struct machine *machine)
{
	/* We know evsel != NULL. */
1378 1379
	u64 sample_type = evsel->core.attr.sample_type;
	u64 read_format = evsel->core.attr.read_format;
1380

1381
	/* Standard sample delivery. */
1382 1383 1384 1385 1386
	if (!(sample_type & PERF_SAMPLE_READ))
		return tool->sample(tool, event, sample, evsel, machine);

	/* For PERF_SAMPLE_READ we have either single or group mode. */
	if (read_format & PERF_FORMAT_GROUP)
1387
		return deliver_sample_group(evlist, tool, event, sample,
1388 1389
					    machine);
	else
1390
		return deliver_sample_value(evlist, tool, event, sample,
1391 1392 1393
					    &sample->read.one, machine);
}

1394
static int machines__deliver_event(struct machines *machines,
1395
				   struct evlist *evlist,
1396 1397 1398
				   union perf_event *event,
				   struct perf_sample *sample,
				   struct perf_tool *tool, u64 file_offset)
1399
{
1400
	struct evsel *evsel;
1401
	struct machine *machine;
1402

1403
	dump_event(evlist, event, file_offset, sample);
1404

1405
	evsel = perf_evlist__id2evsel(evlist, sample->id);
1406

1407
	machine = machines__find_for_cpumode(machines, event, sample);
1408

1409 1410
	switch (event->header.type) {
	case PERF_RECORD_SAMPLE:
1411
		if (evsel == NULL) {
1412
			++evlist->stats.nr_unknown_id;
1413
			return 0;
1414
		}
1415
		dump_sample(evsel, event, sample);
1416
		if (machine == NULL) {
1417
			++evlist->stats.nr_unprocessable_samples;
1418
			return 0;
1419
		}
1420
		return perf_evlist__deliver_sample(evlist, tool, event, sample, evsel, machine);
1421
	case PERF_RECORD_MMAP:
1422
		return tool->mmap(tool, event, sample, machine);
1423
	case PERF_RECORD_MMAP2:
1424 1425
		if (event->header.misc & PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT)
			++evlist->stats.nr_proc_map_timeout;
1426
		return tool->mmap2(tool, event, sample, machine);
1427
	case PERF_RECORD_COMM:
1428
		return tool->comm(tool, event, sample, machine);
1429 1430
	case PERF_RECORD_NAMESPACES:
		return tool->namespaces(tool, event, sample, machine);
1431
	case PERF_RECORD_FORK:
1432
		return tool->fork(tool, event, sample, machine);
1433
	case PERF_RECORD_EXIT:
1434
		return tool->exit(tool, event, sample, machine);
1435
	case PERF_RECORD_LOST:
1436
		if (tool->lost == perf_event__process_lost)
1437
			evlist->stats.total_lost += event->lost.lost;
1438
		return tool->lost(tool, event, sample, machine);
1439 1440 1441 1442
	case PERF_RECORD_LOST_SAMPLES:
		if (tool->lost_samples == perf_event__process_lost_samples)
			evlist->stats.total_lost_samples += event->lost_samples.lost;
		return tool->lost_samples(tool, event, sample, machine);
1443
	case PERF_RECORD_READ:
J
Jiri Olsa 已提交
1444
		dump_read(evsel, event);
1445
		return tool->read(tool, event, sample, evsel, machine);
1446
	case PERF_RECORD_THROTTLE:
1447
		return tool->throttle(tool, event, sample, machine);
1448
	case PERF_RECORD_UNTHROTTLE:
1449
		return tool->unthrottle(tool, event, sample, machine);
1450
	case PERF_RECORD_AUX:
1451 1452 1453 1454 1455 1456
		if (tool->aux == perf_event__process_aux) {
			if (event->aux.flags & PERF_AUX_FLAG_TRUNCATED)
				evlist->stats.total_aux_lost += 1;
			if (event->aux.flags & PERF_AUX_FLAG_PARTIAL)
				evlist->stats.total_aux_partial += 1;
		}
1457
		return tool->aux(tool, event, sample, machine);
1458 1459
	case PERF_RECORD_ITRACE_START:
		return tool->itrace_start(tool, event, sample, machine);
1460 1461 1462
	case PERF_RECORD_SWITCH:
	case PERF_RECORD_SWITCH_CPU_WIDE:
		return tool->context_switch(tool, event, sample, machine);
1463 1464
	case PERF_RECORD_KSYMBOL:
		return tool->ksymbol(tool, event, sample, machine);
1465
	case PERF_RECORD_BPF_EVENT:
1466
		return tool->bpf(tool, event, sample, machine);
1467
	default:
1468
		++evlist->stats.nr_unknown_events;
1469 1470 1471 1472
		return -1;
	}
}

1473 1474 1475 1476 1477
static int perf_session__deliver_event(struct perf_session *session,
				       union perf_event *event,
				       struct perf_tool *tool,
				       u64 file_offset)
{
1478
	struct perf_sample sample;
1479 1480
	int ret;

1481 1482 1483 1484 1485 1486 1487
	ret = perf_evlist__parse_sample(session->evlist, event, &sample);
	if (ret) {
		pr_err("Can't parse sample, err = %d\n", ret);
		return ret;
	}

	ret = auxtrace__process_event(session, event, &sample, tool);
1488 1489 1490 1491 1492 1493
	if (ret < 0)
		return ret;
	if (ret > 0)
		return 0;

	return machines__deliver_event(&session->machines, session->evlist,
1494
				       event, &sample, tool, file_offset);
1495 1496
}

1497 1498 1499
static s64 perf_session__process_user_event(struct perf_session *session,
					    union perf_event *event,
					    u64 file_offset)
1500
{
1501
	struct ordered_events *oe = &session->ordered_events;
1502
	struct perf_tool *tool = session->tool;
1503
	struct perf_sample sample = { .time = 0, };
1504
	int fd = perf_data__fd(session->data);
1505 1506
	int err;

1507 1508 1509
	if (event->header.type != PERF_RECORD_COMPRESSED ||
	    tool->compressed == perf_session__process_compressed_event_stub)
		dump_event(session->evlist, event, file_offset, &sample);
1510

1511
	/* These events are processed right away */
1512
	switch (event->header.type) {
1513
	case PERF_RECORD_HEADER_ATTR:
1514
		err = tool->attr(tool, event, &session->evlist);
1515
		if (err == 0) {
1516
			perf_session__set_id_hdr_size(session);
1517 1518
			perf_session__set_comm_exec(session);
		}
1519
		return err;
1520 1521
	case PERF_RECORD_EVENT_UPDATE:
		return tool->event_update(tool, event, &session->evlist);
1522 1523 1524 1525 1526 1527
	case PERF_RECORD_HEADER_EVENT_TYPE:
		/*
		 * Depreceated, but we need to handle it for sake
		 * of old data files create in pipe mode.
		 */
		return 0;
1528 1529
	case PERF_RECORD_HEADER_TRACING_DATA:
		/* setup for reading amidst mmap */
1530
		lseek(fd, file_offset, SEEK_SET);
1531
		return tool->tracing_data(session, event);
1532
	case PERF_RECORD_HEADER_BUILD_ID:
1533
		return tool->build_id(session, event);
1534
	case PERF_RECORD_FINISHED_ROUND:
1535
		return tool->finished_round(tool, event, oe);
A
Adrian Hunter 已提交
1536
	case PERF_RECORD_ID_INDEX:
1537
		return tool->id_index(session, event);
1538
	case PERF_RECORD_AUXTRACE_INFO:
1539
		return tool->auxtrace_info(session, event);
1540 1541 1542
	case PERF_RECORD_AUXTRACE:
		/* setup for reading amidst mmap */
		lseek(fd, file_offset + event->header.size, SEEK_SET);
1543
		return tool->auxtrace(session, event);
1544
	case PERF_RECORD_AUXTRACE_ERROR:
1545
		perf_session__auxtrace_error_inc(session, event);
1546
		return tool->auxtrace_error(session, event);
1547
	case PERF_RECORD_THREAD_MAP:
1548
		return tool->thread_map(session, event);
1549
	case PERF_RECORD_CPU_MAP:
1550
		return tool->cpu_map(session, event);
1551
	case PERF_RECORD_STAT_CONFIG:
1552
		return tool->stat_config(session, event);
J
Jiri Olsa 已提交
1553
	case PERF_RECORD_STAT:
1554
		return tool->stat(session, event);
1555
	case PERF_RECORD_STAT_ROUND:
1556
		return tool->stat_round(session, event);
1557 1558
	case PERF_RECORD_TIME_CONV:
		session->time_conv = event->time_conv;
1559
		return tool->time_conv(session, event);
1560
	case PERF_RECORD_HEADER_FEATURE:
1561
		return tool->feature(session, event);
1562 1563 1564 1565 1566
	case PERF_RECORD_COMPRESSED:
		err = tool->compressed(session, event, file_offset);
		if (err)
			dump_event(session->evlist, event, file_offset, &sample);
		return err;
1567
	default:
1568
		return -EINVAL;
1569
	}
1570 1571
}

1572 1573
int perf_session__deliver_synth_event(struct perf_session *session,
				      union perf_event *event,
1574
				      struct perf_sample *sample)
1575
{
1576
	struct evlist *evlist = session->evlist;
1577
	struct perf_tool *tool = session->tool;
1578 1579

	events_stats__inc(&evlist->stats, event->header.type);
1580 1581

	if (event->header.type >= PERF_RECORD_USER_TYPE_START)
1582
		return perf_session__process_user_event(session, event, 0);
1583

1584
	return machines__deliver_event(&session->machines, evlist, event, sample, tool, 0);
1585 1586
}

1587 1588 1589 1590 1591 1592 1593 1594 1595
static void event_swap(union perf_event *event, bool sample_id_all)
{
	perf_event__swap_op swap;

	swap = perf_event__swap_ops[event->header.type];
	if (swap)
		swap(event, sample_id_all);
}

1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610
int perf_session__peek_event(struct perf_session *session, off_t file_offset,
			     void *buf, size_t buf_sz,
			     union perf_event **event_ptr,
			     struct perf_sample *sample)
{
	union perf_event *event;
	size_t hdr_sz, rest;
	int fd;

	if (session->one_mmap && !session->header.needs_swap) {
		event = file_offset - session->one_mmap_offset +
			session->one_mmap_addr;
		goto out_parse_sample;
	}

1611
	if (perf_data__is_pipe(session->data))
1612 1613
		return -1;

1614
	fd = perf_data__fd(session->data);
1615 1616 1617 1618 1619 1620
	hdr_sz = sizeof(struct perf_event_header);

	if (buf_sz < hdr_sz)
		return -1;

	if (lseek(fd, file_offset, SEEK_SET) == (off_t)-1 ||
1621
	    readn(fd, buf, hdr_sz) != (ssize_t)hdr_sz)
1622 1623 1624 1625 1626 1627 1628
		return -1;

	event = (union perf_event *)buf;

	if (session->header.needs_swap)
		perf_event_header__bswap(&event->header);

1629
	if (event->header.size < hdr_sz || event->header.size > buf_sz)
1630 1631 1632 1633
		return -1;

	rest = event->header.size - hdr_sz;

1634
	if (readn(fd, buf, rest) != (ssize_t)rest)
1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650
		return -1;

	if (session->header.needs_swap)
		event_swap(event, perf_evlist__sample_id_all(session->evlist));

out_parse_sample:

	if (sample && event->header.type < PERF_RECORD_USER_TYPE_START &&
	    perf_evlist__parse_sample(session->evlist, event, sample))
		return -1;

	*event_ptr = event;

	return 0;
}

1651
static s64 perf_session__process_event(struct perf_session *session,
1652
				       union perf_event *event, u64 file_offset)
1653
{
1654
	struct evlist *evlist = session->evlist;
1655
	struct perf_tool *tool = session->tool;
1656 1657
	int ret;

1658
	if (session->header.needs_swap)
1659
		event_swap(event, perf_evlist__sample_id_all(evlist));
1660 1661 1662 1663

	if (event->header.type >= PERF_RECORD_HEADER_MAX)
		return -EINVAL;

1664
	events_stats__inc(&evlist->stats, event->header.type);
1665 1666

	if (event->header.type >= PERF_RECORD_USER_TYPE_START)
1667
		return perf_session__process_user_event(session, event, file_offset);
1668

1669
	if (tool->ordered_events) {
1670
		u64 timestamp = -1ULL;
1671 1672

		ret = perf_evlist__parse_sample_timestamp(evlist, event, &timestamp);
1673
		if (ret && ret != -1)
1674 1675 1676
			return ret;

		ret = perf_session__queue_event(session, event, timestamp, file_offset);
1677 1678 1679 1680
		if (ret != -ETIME)
			return ret;
	}

1681
	return perf_session__deliver_event(session, event, tool, file_offset);
1682 1683
}

1684
void perf_event_header__bswap(struct perf_event_header *hdr)
1685
{
1686 1687 1688
	hdr->type = bswap_32(hdr->type);
	hdr->misc = bswap_16(hdr->misc);
	hdr->size = bswap_16(hdr->size);
1689 1690
}

1691 1692
struct thread *perf_session__findnew(struct perf_session *session, pid_t pid)
{
1693
	return machine__findnew_thread(&session->machines.host, -1, pid);
1694 1695
}

1696 1697 1698 1699 1700 1701 1702
/*
 * Threads are identified by pid and tid, and the idle task has pid == tid == 0.
 * So here a single thread is created for that, but actually there is a separate
 * idle task per cpu, so there should be one 'struct thread' per cpu, but there
 * is only 1. That causes problems for some tools, requiring workarounds. For
 * example get_idle_thread() in builtin-sched.c, or thread_stack__per_cpu().
 */
1703
int perf_session__register_idle_thread(struct perf_session *session)
1704
{
1705
	struct thread *thread;
1706
	int err = 0;
1707

1708
	thread = machine__findnew_thread(&session->machines.host, 0, 0);
1709
	if (thread == NULL || thread__set_comm(thread, "swapper", 0)) {
1710
		pr_err("problem inserting idle task.\n");
1711
		err = -1;
1712 1713
	}

1714 1715 1716 1717 1718
	if (thread == NULL || thread__set_namespaces(thread, 0, NULL)) {
		pr_err("problem inserting idle task.\n");
		err = -1;
	}

1719 1720 1721
	/* machine__findnew_thread() got the thread, so put it */
	thread__put(thread);
	return err;
1722 1723
}

1724 1725 1726 1727
static void
perf_session__warn_order(const struct perf_session *session)
{
	const struct ordered_events *oe = &session->ordered_events;
1728
	struct evsel *evsel;
1729 1730 1731
	bool should_warn = true;

	evlist__for_each_entry(session->evlist, evsel) {
1732
		if (evsel->core.attr.write_backward)
1733 1734 1735 1736 1737 1738 1739 1740 1741
			should_warn = false;
	}

	if (!should_warn)
		return;
	if (oe->nr_unordered_events != 0)
		ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events);
}

1742
static void perf_session__warn_about_errors(const struct perf_session *session)
1743
{
1744 1745 1746
	const struct events_stats *stats = &session->evlist->stats;

	if (session->tool->lost == perf_event__process_lost &&
1747
	    stats->nr_events[PERF_RECORD_LOST] != 0) {
1748 1749
		ui__warning("Processed %d events and lost %d chunks!\n\n"
			    "Check IO/CPU overload!\n\n",
1750 1751
			    stats->nr_events[0],
			    stats->nr_events[PERF_RECORD_LOST]);
1752 1753
	}

1754 1755 1756 1757 1758 1759
	if (session->tool->lost_samples == perf_event__process_lost_samples) {
		double drop_rate;

		drop_rate = (double)stats->total_lost_samples /
			    (double) (stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples);
		if (drop_rate > 0.05) {
1760
			ui__warning("Processed %" PRIu64 " samples and lost %3.2f%%!\n\n",
1761 1762 1763 1764 1765
				    stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples,
				    drop_rate * 100.0);
		}
	}

1766 1767 1768 1769 1770 1771 1772
	if (session->tool->aux == perf_event__process_aux &&
	    stats->total_aux_lost != 0) {
		ui__warning("AUX data lost %" PRIu64 " times out of %u!\n\n",
			    stats->total_aux_lost,
			    stats->nr_events[PERF_RECORD_AUX]);
	}

1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789
	if (session->tool->aux == perf_event__process_aux &&
	    stats->total_aux_partial != 0) {
		bool vmm_exclusive = false;

		(void)sysfs__read_bool("module/kvm_intel/parameters/vmm_exclusive",
		                       &vmm_exclusive);

		ui__warning("AUX data had gaps in it %" PRIu64 " times out of %u!\n\n"
		            "Are you running a KVM guest in the background?%s\n\n",
			    stats->total_aux_partial,
			    stats->nr_events[PERF_RECORD_AUX],
			    vmm_exclusive ?
			    "\nReloading kvm_intel module with vmm_exclusive=0\n"
			    "will reduce the gaps to only guest's timeslices." :
			    "");
	}

1790
	if (stats->nr_unknown_events != 0) {
1791 1792 1793 1794 1795
		ui__warning("Found %u unknown events!\n\n"
			    "Is this an older tool processing a perf.data "
			    "file generated by a more recent tool?\n\n"
			    "If that is not the case, consider "
			    "reporting to linux-kernel@vger.kernel.org.\n\n",
1796
			    stats->nr_unknown_events);
1797 1798
	}

1799
	if (stats->nr_unknown_id != 0) {
1800
		ui__warning("%u samples with id not present in the header\n",
1801
			    stats->nr_unknown_id);
1802 1803
	}

1804
	if (stats->nr_invalid_chains != 0) {
1805 1806 1807
		ui__warning("Found invalid callchains!\n\n"
			    "%u out of %u events were discarded for this reason.\n\n"
			    "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
1808 1809
			    stats->nr_invalid_chains,
			    stats->nr_events[PERF_RECORD_SAMPLE]);
1810
	}
1811

1812
	if (stats->nr_unprocessable_samples != 0) {
1813 1814
		ui__warning("%u unprocessable samples recorded.\n"
			    "Do you have a KVM guest running and not using 'perf kvm'?\n",
1815
			    stats->nr_unprocessable_samples);
1816
	}
1817

1818
	perf_session__warn_order(session);
1819 1820

	events_stats__auxtrace_error_warn(stats);
1821 1822 1823 1824 1825 1826

	if (stats->nr_proc_map_timeout != 0) {
		ui__warning("%d map information files for pre-existing threads were\n"
			    "not processed, if there are samples for addresses they\n"
			    "will not be resolved, you may find out which are these\n"
			    "threads by running with -v and redirecting the output\n"
1827 1828 1829
			    "to a file.\n"
			    "The time limit to process proc map is too short?\n"
			    "Increase it by --proc-map-timeout\n",
1830 1831
			    stats->nr_proc_map_timeout);
	}
1832 1833
}

1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846
static int perf_session__flush_thread_stack(struct thread *thread,
					    void *p __maybe_unused)
{
	return thread_stack__flush(thread);
}

static int perf_session__flush_thread_stacks(struct perf_session *session)
{
	return machines__for_each_thread(&session->machines,
					 perf_session__flush_thread_stack,
					 NULL);
}

1847 1848
volatile int session_done;

1849 1850
static int __perf_session__process_decomp_events(struct perf_session *session);

1851
static int __perf_session__process_pipe_events(struct perf_session *session)
1852
{
1853
	struct ordered_events *oe = &session->ordered_events;
1854
	struct perf_tool *tool = session->tool;
1855
	int fd = perf_data__fd(session->data);
1856 1857 1858
	union perf_event *event;
	uint32_t size, cur_size = 0;
	void *buf = NULL;
1859
	s64 skip = 0;
1860
	u64 head;
1861
	ssize_t err;
1862 1863
	void *p;

1864
	perf_tool__fill_defaults(tool);
1865 1866

	head = 0;
1867 1868 1869 1870 1871
	cur_size = sizeof(union perf_event);

	buf = malloc(cur_size);
	if (!buf)
		return -errno;
1872
	ordered_events__set_copy_on_queue(oe, true);
1873
more:
1874
	event = buf;
1875
	err = readn(fd, event, sizeof(struct perf_event_header));
1876 1877 1878 1879 1880 1881 1882 1883
	if (err <= 0) {
		if (err == 0)
			goto done;

		pr_err("failed to read event header\n");
		goto out_err;
	}

1884
	if (session->header.needs_swap)
1885
		perf_event_header__bswap(&event->header);
1886

1887
	size = event->header.size;
1888 1889 1890 1891
	if (size < sizeof(struct perf_event_header)) {
		pr_err("bad event header size\n");
		goto out_err;
	}
1892

1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903
	if (size > cur_size) {
		void *new = realloc(buf, size);
		if (!new) {
			pr_err("failed to allocate memory to read event\n");
			goto out_err;
		}
		buf = new;
		cur_size = size;
		event = buf;
	}
	p = event;
1904 1905
	p += sizeof(struct perf_event_header);

1906
	if (size - sizeof(struct perf_event_header)) {
1907
		err = readn(fd, p, size - sizeof(struct perf_event_header));
1908 1909 1910 1911 1912
		if (err <= 0) {
			if (err == 0) {
				pr_err("unexpected end of event stream\n");
				goto done;
			}
1913

1914 1915 1916
			pr_err("failed to read event data\n");
			goto out_err;
		}
1917 1918
	}

1919
	if ((skip = perf_session__process_event(session, event, head)) < 0) {
1920
		pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
1921
		       head, event->header.size, event->header.type);
1922 1923
		err = -EINVAL;
		goto out_err;
1924 1925 1926 1927 1928 1929 1930
	}

	head += size;

	if (skip > 0)
		head += skip;

1931 1932 1933 1934
	err = __perf_session__process_decomp_events(session);
	if (err)
		goto out_err;

1935 1936 1937
	if (!session_done())
		goto more;
done:
1938
	/* do the final flush for ordered samples */
1939
	err = ordered_events__flush(oe, OE_FLUSH__FINAL);
1940 1941 1942
	if (err)
		goto out_err;
	err = auxtrace__flush_events(session, tool);
1943 1944 1945
	if (err)
		goto out_err;
	err = perf_session__flush_thread_stacks(session);
1946
out_err:
1947
	free(buf);
1948 1949
	if (!tool->no_warn)
		perf_session__warn_about_errors(session);
1950
	ordered_events__free(&session->ordered_events);
1951
	auxtrace__free_events(session);
1952 1953 1954
	return err;
}

1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972
static union perf_event *
fetch_mmaped_event(struct perf_session *session,
		   u64 head, size_t mmap_size, char *buf)
{
	union perf_event *event;

	/*
	 * Ensure we have enough space remaining to read
	 * the size of the event in the headers.
	 */
	if (head + sizeof(event->header) > mmap_size)
		return NULL;

	event = (union perf_event *)(buf + head);

	if (session->header.needs_swap)
		perf_event_header__bswap(&event->header);

1973 1974 1975 1976
	if (head + event->header.size > mmap_size) {
		/* We're not fetching the event so swap back again */
		if (session->header.needs_swap)
			perf_event_header__bswap(&event->header);
1977 1978 1979
		pr_debug("%s: head=%#" PRIx64 " event->header_size=%#x, mmap_size=%#zx: fuzzed perf.data?\n",
			 __func__, head, event->header.size, mmap_size);
		return ERR_PTR(-EINVAL);
1980
	}
1981 1982 1983 1984

	return event;
}

1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996
static int __perf_session__process_decomp_events(struct perf_session *session)
{
	s64 skip;
	u64 size, file_pos = 0;
	struct decomp *decomp = session->decomp_last;

	if (!decomp)
		return 0;

	while (decomp->head < decomp->size && !session_done()) {
		union perf_event *event = fetch_mmaped_event(session, decomp->head, decomp->size, decomp->data);

1997 1998 1999
		if (IS_ERR(event))
			return PTR_ERR(event);

2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020
		if (!event)
			break;

		size = event->header.size;

		if (size < sizeof(struct perf_event_header) ||
		    (skip = perf_session__process_event(session, event, file_pos)) < 0) {
			pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
				decomp->file_pos + decomp->head, event->header.size, event->header.type);
			return -EINVAL;
		}

		if (skip)
			size += skip;

		decomp->head += size;
	}

	return 0;
}

2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032
/*
 * On 64bit we can mmap the data file in one go. No need for tiny mmap
 * slices. On 32bit we use 32MB.
 */
#if BITS_PER_LONG == 64
#define MMAP_SIZE ULLONG_MAX
#define NUM_MMAPS 1
#else
#define MMAP_SIZE (32 * 1024 * 1024ULL)
#define NUM_MMAPS 128
#endif

2033 2034 2035 2036 2037 2038
struct reader;

typedef s64 (*reader_cb_t)(struct perf_session *session,
			   union perf_event *event,
			   u64 file_offset);

J
Jiri Olsa 已提交
2039
struct reader {
2040 2041 2042 2043
	int		 fd;
	u64		 data_size;
	u64		 data_offset;
	reader_cb_t	 process;
J
Jiri Olsa 已提交
2044 2045
};

2046 2047 2048
static int
reader__process_events(struct reader *rd, struct perf_session *session,
		       struct ui_progress *prog)
2049
{
2050
	u64 data_size = rd->data_size;
2051
	u64 head, page_offset, file_offset, file_pos, size;
2052
	int err = 0, mmap_prot, mmap_flags, map_idx = 0;
2053
	size_t	mmap_size;
2054
	char *buf, *mmaps[NUM_MMAPS];
2055
	union perf_event *event;
2056
	s64 skip;
2057

2058
	page_offset = page_size * (rd->data_offset / page_size);
2059
	file_offset = page_offset;
2060
	head = rd->data_offset - page_offset;
2061

2062
	ui_progress__init_size(prog, data_size, "Processing events...");
2063

2064
	data_size += rd->data_offset;
2065

2066
	mmap_size = MMAP_SIZE;
2067 2068
	if (mmap_size > data_size) {
		mmap_size = data_size;
2069 2070
		session->one_mmap = true;
	}
2071

2072 2073
	memset(mmaps, 0, sizeof(mmaps));

2074 2075 2076
	mmap_prot  = PROT_READ;
	mmap_flags = MAP_SHARED;

2077
	if (session->header.needs_swap) {
2078 2079 2080
		mmap_prot  |= PROT_WRITE;
		mmap_flags = MAP_PRIVATE;
	}
2081
remap:
2082
	buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, rd->fd,
2083
		   file_offset);
2084 2085 2086
	if (buf == MAP_FAILED) {
		pr_err("failed to mmap file\n");
		err = -errno;
2087
		goto out;
2088
	}
2089 2090
	mmaps[map_idx] = buf;
	map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1);
2091
	file_pos = file_offset + head;
2092 2093 2094 2095
	if (session->one_mmap) {
		session->one_mmap_addr = buf;
		session->one_mmap_offset = file_offset;
	}
2096 2097

more:
2098
	event = fetch_mmaped_event(session, head, mmap_size, buf);
2099 2100 2101
	if (IS_ERR(event))
		return PTR_ERR(event);

2102
	if (!event) {
2103 2104 2105 2106
		if (mmaps[map_idx]) {
			munmap(mmaps[map_idx], mmap_size);
			mmaps[map_idx] = NULL;
		}
2107

2108 2109 2110
		page_offset = page_size * (head / page_size);
		file_offset += page_offset;
		head -= page_offset;
2111 2112 2113 2114 2115
		goto remap;
	}

	size = event->header.size;

2116 2117
	skip = -EINVAL;

2118
	if (size < sizeof(struct perf_event_header) ||
2119
	    (skip = rd->process(session, event, file_pos)) < 0) {
2120
		pr_err("%#" PRIx64 " [%#x]: failed to process type: %d [%s]\n",
2121
		       file_offset + head, event->header.size,
2122 2123
		       event->header.type, strerror(-skip));
		err = skip;
2124
		goto out;
2125 2126
	}

2127 2128 2129
	if (skip)
		size += skip;

2130
	head += size;
2131
	file_pos += size;
2132

2133 2134 2135 2136
	err = __perf_session__process_decomp_events(session);
	if (err)
		goto out;

2137
	ui_progress__update(prog, size);
2138

2139
	if (session_done())
2140
		goto out;
2141

2142
	if (file_pos < data_size)
2143
		goto more;
2144

2145
out:
2146 2147 2148
	return err;
}

2149 2150 2151 2152 2153 2154 2155
static s64 process_simple(struct perf_session *session,
			  union perf_event *event,
			  u64 file_offset)
{
	return perf_session__process_event(session, event, file_offset);
}

2156 2157 2158 2159 2160 2161
static int __perf_session__process_events(struct perf_session *session)
{
	struct reader rd = {
		.fd		= perf_data__fd(session->data),
		.data_size	= session->header.data_size,
		.data_offset	= session->header.data_offset,
2162
		.process	= process_simple,
2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178
	};
	struct ordered_events *oe = &session->ordered_events;
	struct perf_tool *tool = session->tool;
	struct ui_progress prog;
	int err;

	perf_tool__fill_defaults(tool);

	if (rd.data_size == 0)
		return -1;

	ui_progress__init_size(&prog, rd.data_size, "Processing events...");

	err = reader__process_events(&rd, session, &prog);
	if (err)
		goto out_err;
2179
	/* do the final flush for ordered samples */
2180
	err = ordered_events__flush(oe, OE_FLUSH__FINAL);
2181 2182 2183
	if (err)
		goto out_err;
	err = auxtrace__flush_events(session, tool);
2184 2185 2186
	if (err)
		goto out_err;
	err = perf_session__flush_thread_stacks(session);
2187
out_err:
N
Namhyung Kim 已提交
2188
	ui_progress__finish();
2189 2190
	if (!tool->no_warn)
		perf_session__warn_about_errors(session);
2191 2192 2193 2194 2195
	/*
	 * We may switching perf.data output, make ordered_events
	 * reusable.
	 */
	ordered_events__reinit(&session->ordered_events);
2196
	auxtrace__free_events(session);
2197
	session->one_mmap = false;
2198 2199
	return err;
}
2200

2201
int perf_session__process_events(struct perf_session *session)
2202
{
2203
	if (perf_session__register_idle_thread(session) < 0)
2204 2205
		return -ENOMEM;

2206 2207
	if (perf_data__is_pipe(session->data))
		return __perf_session__process_pipe_events(session);
2208

2209
	return __perf_session__process_events(session);
2210 2211
}

2212
bool perf_session__has_traces(struct perf_session *session, const char *msg)
2213
{
2214
	struct evsel *evsel;
2215

2216
	evlist__for_each_entry(session->evlist, evsel) {
2217
		if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT)
2218
			return true;
2219 2220
	}

2221 2222
	pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg);
	return false;
2223
}
2224

2225
int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, u64 addr)
2226 2227
{
	char *bracket;
2228
	struct ref_reloc_sym *ref;
2229
	struct kmap *kmap;
2230 2231 2232 2233

	ref = zalloc(sizeof(struct ref_reloc_sym));
	if (ref == NULL)
		return -ENOMEM;
2234

2235 2236 2237
	ref->name = strdup(symbol_name);
	if (ref->name == NULL) {
		free(ref);
2238
		return -ENOMEM;
2239
	}
2240

2241
	bracket = strchr(ref->name, ']');
2242 2243 2244
	if (bracket)
		*bracket = '\0';

2245
	ref->addr = addr;
2246

2247 2248
	kmap = map__kmap(map);
	if (kmap)
2249
		kmap->ref_reloc_sym = ref;
2250

2251 2252
	return 0;
}
2253

2254
size_t perf_session__fprintf_dsos(struct perf_session *session, FILE *fp)
2255
{
2256
	return machines__fprintf_dsos(&session->machines, fp);
2257
}
2258

2259
size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp,
2260
					  bool (skip)(struct dso *dso, int parm), int parm)
2261
{
2262
	return machines__fprintf_dsos_buildid(&session->machines, fp, skip, parm);
2263
}
2264 2265 2266

size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
{
2267 2268 2269 2270 2271 2272
	size_t ret;
	const char *msg = "";

	if (perf_header__has_feat(&session->header, HEADER_AUXTRACE))
		msg = " (excludes AUX area (e.g. instruction trace) decoded / synthesized events)";

2273
	ret = fprintf(fp, "\nAggregated stats:%s\n", msg);
2274

2275
	ret += events_stats__fprintf(&session->evlist->stats, fp);
2276 2277
	return ret;
}
2278

2279 2280 2281 2282 2283 2284
size_t perf_session__fprintf(struct perf_session *session, FILE *fp)
{
	/*
	 * FIXME: Here we have to actually print all the machines in this
	 * session, not just the host...
	 */
2285
	return machine__fprintf(&session->machines.host, fp);
2286 2287
}

2288
struct evsel *perf_session__find_first_evtype(struct perf_session *session,
2289 2290
					      unsigned int type)
{
2291
	struct evsel *pos;
2292

2293
	evlist__for_each_entry(session->evlist, pos) {
2294
		if (pos->core.attr.type == type)
2295 2296 2297 2298 2299
			return pos;
	}
	return NULL;
}

2300 2301 2302
int perf_session__cpu_bitmap(struct perf_session *session,
			     const char *cpu_list, unsigned long *cpu_bitmap)
{
2303
	int i, err = -1;
2304
	struct perf_cpu_map *map;
2305
	int nr_cpus = min(session->header.env.nr_cpus_online, MAX_NR_CPUS);
2306 2307

	for (i = 0; i < PERF_TYPE_MAX; ++i) {
2308
		struct evsel *evsel;
2309 2310 2311 2312 2313

		evsel = perf_session__find_first_evtype(session, i);
		if (!evsel)
			continue;

2314
		if (!(evsel->core.attr.sample_type & PERF_SAMPLE_CPU)) {
2315
			pr_err("File does not contain CPU events. "
2316
			       "Remove -C option to proceed.\n");
2317 2318 2319 2320
			return -1;
		}
	}

2321
	map = perf_cpu_map__new(cpu_list);
2322 2323 2324 2325
	if (map == NULL) {
		pr_err("Invalid cpu_list\n");
		return -1;
	}
2326 2327 2328 2329

	for (i = 0; i < map->nr; i++) {
		int cpu = map->map[i];

2330
		if (cpu >= nr_cpus) {
2331 2332
			pr_err("Requested CPU %d too large. "
			       "Consider raising MAX_NR_CPUS\n", cpu);
2333
			goto out_delete_map;
2334 2335 2336 2337 2338
		}

		set_bit(cpu, cpu_bitmap);
	}

2339 2340 2341
	err = 0;

out_delete_map:
2342
	perf_cpu_map__put(map);
2343
	return err;
2344
}
2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355

void perf_session__fprintf_info(struct perf_session *session, FILE *fp,
				bool full)
{
	if (session == NULL || fp == NULL)
		return;

	fprintf(fp, "# ========\n");
	perf_header__fprintf_info(session, fp, full);
	fprintf(fp, "# ========\n#\n");
}
2356 2357 2358


int __perf_session__set_tracepoints_handlers(struct perf_session *session,
2359
					     const struct evsel_str_handler *assocs,
2360 2361
					     size_t nr_assocs)
{
2362
	struct evsel *evsel;
2363 2364 2365 2366
	size_t i;
	int err;

	for (i = 0; i < nr_assocs; i++) {
2367 2368 2369 2370 2371
		/*
		 * Adding a handler for an event not in the session,
		 * just ignore it.
		 */
		evsel = perf_evlist__find_tracepoint_by_name(session->evlist, assocs[i].name);
2372
		if (evsel == NULL)
2373
			continue;
2374 2375

		err = -EEXIST;
2376
		if (evsel->handler != NULL)
2377
			goto out;
2378
		evsel->handler = assocs[i].handler;
2379 2380 2381 2382 2383 2384
	}

	err = 0;
out:
	return err;
}
A
Adrian Hunter 已提交
2385

2386 2387
int perf_event__process_id_index(struct perf_session *session,
				 union perf_event *event)
A
Adrian Hunter 已提交
2388
{
2389
	struct evlist *evlist = session->evlist;
2390
	struct perf_record_id_index *ie = &event->id_index;
A
Adrian Hunter 已提交
2391 2392
	size_t i, nr, max_nr;

2393
	max_nr = (ie->header.size - sizeof(struct perf_record_id_index)) /
A
Adrian Hunter 已提交
2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406
		 sizeof(struct id_index_entry);
	nr = ie->nr;
	if (nr > max_nr)
		return -EINVAL;

	if (dump_trace)
		fprintf(stdout, " nr: %zu\n", nr);

	for (i = 0; i < nr; i++) {
		struct id_index_entry *e = &ie->entries[i];
		struct perf_sample_id *sid;

		if (dump_trace) {
2407 2408 2409 2410
			fprintf(stdout,	" ... id: %"PRI_lu64, e->id);
			fprintf(stdout,	"  idx: %"PRI_lu64, e->idx);
			fprintf(stdout,	"  cpu: %"PRI_ld64, e->cpu);
			fprintf(stdout,	"  tid: %"PRI_ld64"\n", e->tid);
A
Adrian Hunter 已提交
2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421
		}

		sid = perf_evlist__id2sid(evlist, e->id);
		if (!sid)
			return -ENOENT;
		sid->idx = e->idx;
		sid->cpu = e->cpu;
		sid->tid = e->tid;
	}
	return 0;
}