session.c 68.3 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2
#include <errno.h>
3
#include <inttypes.h>
4
#include <linux/err.h>
5
#include <linux/kernel.h>
6
#include <linux/zalloc.h>
7
#include <api/fs/fs.h>
8

9
#include <byteswap.h>
10 11
#include <unistd.h>
#include <sys/types.h>
12
#include <sys/mman.h>
13
#include <perf/cpumap.h>
14

15 16
#include "map_symbol.h"
#include "branch.h"
17
#include "debug.h"
18 19
#include "evlist.h"
#include "evsel.h"
20
#include "memswap.h"
21
#include "map.h"
22
#include "symbol.h"
23
#include "session.h"
24
#include "tool.h"
25
#include "perf_regs.h"
26
#include "asm/bug.h"
27
#include "auxtrace.h"
28
#include "thread.h"
29
#include "thread-stack.h"
30
#include "sample-raw.h"
31
#include "stat.h"
32
#include "tsc.h"
33
#include "ui/progress.h"
34
#include "../perf.h"
35
#include "arch/common.h"
K
Kan Liang 已提交
36
#include "units.h"
37
#include <internal/lib.h>
38

39 40 41 42 43 44 45
#ifdef HAVE_ZSTD_SUPPORT
static int perf_session__process_compressed_event(struct perf_session *session,
						  union perf_event *event, u64 file_offset)
{
	void *src;
	size_t decomp_size, src_size;
	u64 decomp_last_rem = 0;
46
	size_t mmap_len, decomp_len = session->header.env.comp_mmap_len;
47 48
	struct decomp *decomp, *decomp_last = session->decomp_last;

49 50 51 52 53 54 55
	if (decomp_last) {
		decomp_last_rem = decomp_last->size - decomp_last->head;
		decomp_len += decomp_last_rem;
	}

	mmap_len = sizeof(struct decomp) + decomp_len;
	decomp = mmap(NULL, mmap_len, PROT_READ|PROT_WRITE,
56 57 58 59 60 61 62
		      MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
	if (decomp == MAP_FAILED) {
		pr_err("Couldn't allocate memory for decompression\n");
		return -1;
	}

	decomp->file_pos = file_offset;
63
	decomp->mmap_len = mmap_len;
64 65
	decomp->head = 0;

66
	if (decomp_last_rem) {
67 68 69 70
		memcpy(decomp->data, &(decomp_last->data[decomp_last->head]), decomp_last_rem);
		decomp->size = decomp_last_rem;
	}

71 72
	src = (void *)event + sizeof(struct perf_record_compressed);
	src_size = event->pack.header.size - sizeof(struct perf_record_compressed);
73 74 75 76

	decomp_size = zstd_decompress_stream(&(session->zstd_data), src, src_size,
				&(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem);
	if (!decomp_size) {
77
		munmap(decomp, mmap_len);
78 79 80 81 82 83 84 85 86 87 88 89 90 91
		pr_err("Couldn't decompress data\n");
		return -1;
	}

	decomp->size += decomp_size;

	if (session->decomp == NULL) {
		session->decomp = decomp;
		session->decomp_last = decomp;
	} else {
		session->decomp_last->next = decomp;
		session->decomp_last = decomp;
	}

92
	pr_debug("decomp (B): %zd to %zd\n", src_size, decomp_size);
93 94 95 96 97 98 99

	return 0;
}
#else /* !HAVE_ZSTD_SUPPORT */
#define perf_session__process_compressed_event perf_session__process_compressed_event_stub
#endif

100 101 102 103
static int perf_session__deliver_event(struct perf_session *session,
				       union perf_event *event,
				       struct perf_tool *tool,
				       u64 file_offset);
104

105
static int perf_session__open(struct perf_session *session)
106
{
107
	struct perf_data *data = session->data;
108

109
	if (perf_session__read_header(session) < 0) {
110
		pr_err("incompatible file format (rerun with -v to learn more)\n");
111
		return -1;
112 113
	}

114
	if (perf_data__is_pipe(data))
115 116
		return 0;

117 118 119
	if (perf_header__has_feat(&session->header, HEADER_STAT))
		return 0;

120
	if (!evlist__valid_sample_type(session->evlist)) {
121
		pr_err("non matching sample_type\n");
122
		return -1;
123 124
	}

125
	if (!evlist__valid_sample_id_all(session->evlist)) {
126
		pr_err("non matching sample_id_all\n");
127
		return -1;
128 129
	}

130
	if (!evlist__valid_read_format(session->evlist)) {
131
		pr_err("non matching read_format\n");
132
		return -1;
133 134
	}

135 136 137
	return 0;
}

138
void perf_session__set_id_hdr_size(struct perf_session *session)
139
{
140
	u16 id_hdr_size = evlist__id_hdr_size(session->evlist);
141 142

	machines__set_id_hdr_size(&session->machines, id_hdr_size);
143 144
}

145
int perf_session__create_kernel_maps(struct perf_session *session)
146
{
147
	int ret = machine__create_kernel_maps(&session->machines.host);
148 149

	if (ret >= 0)
150
		ret = machines__create_guest_kernel_maps(&session->machines);
151 152 153
	return ret;
}

154
static void perf_session__destroy_kernel_maps(struct perf_session *session)
155
{
156
	machines__destroy_kernel_maps(&session->machines);
157 158
}

159 160
static bool perf_session__has_comm_exec(struct perf_session *session)
{
161
	struct evsel *evsel;
162

163
	evlist__for_each_entry(session->evlist, evsel) {
164
		if (evsel->core.attr.comm_exec)
165 166 167 168 169 170 171 172 173 174 175 176 177
			return true;
	}

	return false;
}

static void perf_session__set_comm_exec(struct perf_session *session)
{
	bool comm_exec = perf_session__has_comm_exec(session);

	machines__set_comm_exec(&session->machines, comm_exec);
}

178
static int ordered_events__deliver_event(struct ordered_events *oe,
179
					 struct ordered_event *event)
180
{
181 182 183
	struct perf_session *session = container_of(oe, struct perf_session,
						    ordered_events);

184
	return perf_session__deliver_event(session, event->event,
185
					   session->tool, event->file_offset);
186 187
}

188
struct perf_session *perf_session__new(struct perf_data *data,
189
				       bool repipe, struct perf_tool *tool)
190
{
191
	int ret = -ENOMEM;
192
	struct perf_session *session = zalloc(sizeof(*session));
193

194
	if (!session)
195 196
		goto out;

197
	session->repipe = repipe;
198
	session->tool   = tool;
199
	INIT_LIST_HEAD(&session->auxtrace_index);
200
	machines__init(&session->machines);
201 202
	ordered_events__init(&session->ordered_events,
			     ordered_events__deliver_event, NULL);
203

204
	perf_env__init(&session->header.env);
205
	if (data) {
206 207
		ret = perf_data__open(data);
		if (ret < 0)
208
			goto out_delete;
209

210
		session->data = data;
211

212
		if (perf_data__is_read(data)) {
213 214
			ret = perf_session__open(session);
			if (ret < 0)
215
				goto out_delete;
216

217 218 219 220
			/*
			 * set session attributes that are present in perf.data
			 * but not in pipe-mode.
			 */
221
			if (!data->is_pipe) {
222 223 224
				perf_session__set_id_hdr_size(session);
				perf_session__set_comm_exec(session);
			}
225

226
			evlist__init_trace_event_sample_raw(session->evlist);
227 228

			/* Open the directory data. */
229 230
			if (data->is_dir) {
				ret = perf_data__open_dir(data);
231 232
				if (ret)
					goto out_delete;
233
			}
234 235 236 237

			if (!symbol_conf.kallsyms_name &&
			    !symbol_conf.vmlinux_name)
				symbol_conf.kallsyms_name = perf_data__kallsyms_name(data);
238
		}
239 240
	} else  {
		session->machines.host.env = &perf_env;
241 242
	}

243 244 245
	session->machines.host.single_address_space =
		perf_env__single_address_space(session->machines.host.env);

246
	if (!data || perf_data__is_write(data)) {
247 248
		/*
		 * In O_RDONLY mode this will be performed when reading the
249
		 * kernel MMAP event, in perf_event__process_mmap().
250
		 */
251
		if (perf_session__create_kernel_maps(session) < 0)
252
			pr_warning("Cannot read kernel map\n");
253
	}
254

255 256
	/*
	 * In pipe-mode, evlist is empty until PERF_RECORD_HEADER_ATTR is
257
	 * processed, so evlist__sample_id_all is not meaningful here.
258
	 */
259
	if ((!data || !data->is_pipe) && tool && tool->ordering_requires_timestamps &&
260
	    tool->ordered_events && !evlist__sample_id_all(session->evlist)) {
261
		dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
262
		tool->ordered_events = false;
263
	}
264

265
	return session;
266 267

 out_delete:
268
	perf_session__delete(session);
269
 out:
270
	return ERR_PTR(ret);
271 272
}

273 274
static void perf_session__delete_threads(struct perf_session *session)
{
275
	machine__delete_threads(&session->machines.host);
276 277
}

278 279 280
static void perf_session__release_decomp_events(struct perf_session *session)
{
	struct decomp *next, *decomp;
281
	size_t mmap_len;
282 283 284 285 286 287
	next = session->decomp;
	do {
		decomp = next;
		if (decomp == NULL)
			break;
		next = decomp->next;
288 289
		mmap_len = decomp->mmap_len;
		munmap(decomp, mmap_len);
290 291 292
	} while (1);
}

293
void perf_session__delete(struct perf_session *session)
294
{
295 296
	if (session == NULL)
		return;
297
	auxtrace__free(session);
298
	auxtrace_index__free(&session->auxtrace_index);
299 300
	perf_session__destroy_kernel_maps(session);
	perf_session__delete_threads(session);
301
	perf_session__release_decomp_events(session);
302
	perf_env__exit(&session->header.env);
303
	machines__exit(&session->machines);
304 305 306
	if (session->data) {
		if (perf_data__is_read(session->data))
			evlist__delete(session->evlist);
307
		perf_data__close(session->data);
308
	}
309
	free(session);
310
}
311

312
static int process_event_synth_tracing_data_stub(struct perf_session *session
313 314
						 __maybe_unused,
						 union perf_event *event
315
						 __maybe_unused)
316 317 318 319 320
{
	dump_printf(": unhandled!\n");
	return 0;
}

321 322
static int process_event_synth_attr_stub(struct perf_tool *tool __maybe_unused,
					 union perf_event *event __maybe_unused,
323
					 struct evlist **pevlist
324
					 __maybe_unused)
325 326 327 328 329
{
	dump_printf(": unhandled!\n");
	return 0;
}

330 331
static int process_event_synth_event_update_stub(struct perf_tool *tool __maybe_unused,
						 union perf_event *event __maybe_unused,
332
						 struct evlist **pevlist
333 334
						 __maybe_unused)
{
335 336 337
	if (dump_trace)
		perf_event__fprintf_event_update(event, stdout);

338 339 340 341
	dump_printf(": unhandled!\n");
	return 0;
}

342 343 344
static int process_event_sample_stub(struct perf_tool *tool __maybe_unused,
				     union perf_event *event __maybe_unused,
				     struct perf_sample *sample __maybe_unused,
345
				     struct evsel *evsel __maybe_unused,
346
				     struct machine *machine __maybe_unused)
347 348 349 350 351
{
	dump_printf(": unhandled!\n");
	return 0;
}

352 353 354 355
static int process_event_stub(struct perf_tool *tool __maybe_unused,
			      union perf_event *event __maybe_unused,
			      struct perf_sample *sample __maybe_unused,
			      struct machine *machine __maybe_unused)
356 357 358 359 360
{
	dump_printf(": unhandled!\n");
	return 0;
}

361 362
static int process_finished_round_stub(struct perf_tool *tool __maybe_unused,
				       union perf_event *event __maybe_unused,
363
				       struct ordered_events *oe __maybe_unused)
364 365 366 367 368
{
	dump_printf(": unhandled!\n");
	return 0;
}

369
static int process_finished_round(struct perf_tool *tool,
370
				  union perf_event *event,
371
				  struct ordered_events *oe);
372

373 374 375 376 377 378 379 380 381 382 383 384 385 386 387
static int skipn(int fd, off_t n)
{
	char buf[4096];
	ssize_t ret;

	while (n > 0) {
		ret = read(fd, buf, min(n, (off_t)sizeof(buf)));
		if (ret <= 0)
			return ret;
		n -= ret;
	}

	return 0;
}

388 389
static s64 process_event_auxtrace_stub(struct perf_session *session __maybe_unused,
				       union perf_event *event)
390 391
{
	dump_printf(": unhandled!\n");
392 393
	if (perf_data__is_pipe(session->data))
		skipn(perf_data__fd(session->data), event->auxtrace.size);
394 395 396
	return event->auxtrace.size;
}

397 398
static int process_event_op2_stub(struct perf_session *session __maybe_unused,
				  union perf_event *event __maybe_unused)
399 400 401 402 403
{
	dump_printf(": unhandled!\n");
	return 0;
}

404 405

static
406 407
int process_event_thread_map_stub(struct perf_session *session __maybe_unused,
				  union perf_event *event __maybe_unused)
408
{
409 410 411
	if (dump_trace)
		perf_event__fprintf_thread_map(event, stdout);

412 413 414 415
	dump_printf(": unhandled!\n");
	return 0;
}

416
static
417 418
int process_event_cpu_map_stub(struct perf_session *session __maybe_unused,
			       union perf_event *event __maybe_unused)
419
{
420 421 422
	if (dump_trace)
		perf_event__fprintf_cpu_map(event, stdout);

423 424 425 426
	dump_printf(": unhandled!\n");
	return 0;
}

427
static
428 429
int process_event_stat_config_stub(struct perf_session *session __maybe_unused,
				   union perf_event *event __maybe_unused)
430
{
431 432 433
	if (dump_trace)
		perf_event__fprintf_stat_config(event, stdout);

434 435 436 437
	dump_printf(": unhandled!\n");
	return 0;
}

438 439
static int process_stat_stub(struct perf_session *perf_session __maybe_unused,
			     union perf_event *event)
J
Jiri Olsa 已提交
440
{
441 442 443
	if (dump_trace)
		perf_event__fprintf_stat(event, stdout);

J
Jiri Olsa 已提交
444 445 446 447
	dump_printf(": unhandled!\n");
	return 0;
}

448 449
static int process_stat_round_stub(struct perf_session *perf_session __maybe_unused,
				   union perf_event *event)
450
{
451 452 453
	if (dump_trace)
		perf_event__fprintf_stat_round(event, stdout);

454 455 456 457
	dump_printf(": unhandled!\n");
	return 0;
}

458 459 460 461 462 463 464 465 466 467
static int process_event_time_conv_stub(struct perf_session *perf_session __maybe_unused,
					union perf_event *event)
{
	if (dump_trace)
		perf_event__fprintf_time_conv(event, stdout);

	dump_printf(": unhandled!\n");
	return 0;
}

468 469 470 471 472 473 474 475
static int perf_session__process_compressed_event_stub(struct perf_session *session __maybe_unused,
						       union perf_event *event __maybe_unused,
						       u64 file_offset __maybe_unused)
{
       dump_printf(": unhandled!\n");
       return 0;
}

476
void perf_tool__fill_defaults(struct perf_tool *tool)
477
{
478 479 480 481
	if (tool->sample == NULL)
		tool->sample = process_event_sample_stub;
	if (tool->mmap == NULL)
		tool->mmap = process_event_stub;
482 483
	if (tool->mmap2 == NULL)
		tool->mmap2 = process_event_stub;
484 485
	if (tool->comm == NULL)
		tool->comm = process_event_stub;
486 487
	if (tool->namespaces == NULL)
		tool->namespaces = process_event_stub;
488 489
	if (tool->cgroup == NULL)
		tool->cgroup = process_event_stub;
490 491 492 493 494 495
	if (tool->fork == NULL)
		tool->fork = process_event_stub;
	if (tool->exit == NULL)
		tool->exit = process_event_stub;
	if (tool->lost == NULL)
		tool->lost = perf_event__process_lost;
496 497
	if (tool->lost_samples == NULL)
		tool->lost_samples = perf_event__process_lost_samples;
498 499
	if (tool->aux == NULL)
		tool->aux = perf_event__process_aux;
500 501
	if (tool->itrace_start == NULL)
		tool->itrace_start = perf_event__process_itrace_start;
502 503
	if (tool->context_switch == NULL)
		tool->context_switch = perf_event__process_switch;
504 505
	if (tool->ksymbol == NULL)
		tool->ksymbol = perf_event__process_ksymbol;
506 507
	if (tool->bpf == NULL)
		tool->bpf = perf_event__process_bpf;
508 509
	if (tool->text_poke == NULL)
		tool->text_poke = perf_event__process_text_poke;
510 511 512 513 514 515 516 517
	if (tool->read == NULL)
		tool->read = process_event_sample_stub;
	if (tool->throttle == NULL)
		tool->throttle = process_event_stub;
	if (tool->unthrottle == NULL)
		tool->unthrottle = process_event_stub;
	if (tool->attr == NULL)
		tool->attr = process_event_synth_attr_stub;
518 519
	if (tool->event_update == NULL)
		tool->event_update = process_event_synth_event_update_stub;
520 521 522
	if (tool->tracing_data == NULL)
		tool->tracing_data = process_event_synth_tracing_data_stub;
	if (tool->build_id == NULL)
523
		tool->build_id = process_event_op2_stub;
524
	if (tool->finished_round == NULL) {
525
		if (tool->ordered_events)
526
			tool->finished_round = process_finished_round;
527
		else
528
			tool->finished_round = process_finished_round_stub;
529
	}
A
Adrian Hunter 已提交
530
	if (tool->id_index == NULL)
531
		tool->id_index = process_event_op2_stub;
532
	if (tool->auxtrace_info == NULL)
533
		tool->auxtrace_info = process_event_op2_stub;
534 535
	if (tool->auxtrace == NULL)
		tool->auxtrace = process_event_auxtrace_stub;
536
	if (tool->auxtrace_error == NULL)
537
		tool->auxtrace_error = process_event_op2_stub;
538 539
	if (tool->thread_map == NULL)
		tool->thread_map = process_event_thread_map_stub;
540 541
	if (tool->cpu_map == NULL)
		tool->cpu_map = process_event_cpu_map_stub;
542 543
	if (tool->stat_config == NULL)
		tool->stat_config = process_event_stat_config_stub;
J
Jiri Olsa 已提交
544 545
	if (tool->stat == NULL)
		tool->stat = process_stat_stub;
546 547
	if (tool->stat_round == NULL)
		tool->stat_round = process_stat_round_stub;
548
	if (tool->time_conv == NULL)
549
		tool->time_conv = process_event_time_conv_stub;
550 551
	if (tool->feature == NULL)
		tool->feature = process_event_op2_stub;
552
	if (tool->compressed == NULL)
553
		tool->compressed = perf_session__process_compressed_event;
554
}
555

556 557 558 559 560 561 562 563 564 565
static void swap_sample_id_all(union perf_event *event, void *data)
{
	void *end = (void *) event + event->header.size;
	int size = end - data;

	BUG_ON(size % sizeof(u64));
	mem_bswap_64(data, size);
}

static void perf_event__all64_swap(union perf_event *event,
566
				   bool sample_id_all __maybe_unused)
567
{
568 569
	struct perf_event_header *hdr = &event->header;
	mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr));
570 571
}

572
static void perf_event__comm_swap(union perf_event *event, bool sample_id_all)
573
{
574 575
	event->comm.pid = bswap_32(event->comm.pid);
	event->comm.tid = bswap_32(event->comm.tid);
576 577 578 579

	if (sample_id_all) {
		void *data = &event->comm.comm;

580
		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
581 582
		swap_sample_id_all(event, data);
	}
583 584
}

585 586
static void perf_event__mmap_swap(union perf_event *event,
				  bool sample_id_all)
587
{
588 589 590 591 592
	event->mmap.pid	  = bswap_32(event->mmap.pid);
	event->mmap.tid	  = bswap_32(event->mmap.tid);
	event->mmap.start = bswap_64(event->mmap.start);
	event->mmap.len	  = bswap_64(event->mmap.len);
	event->mmap.pgoff = bswap_64(event->mmap.pgoff);
593 594 595 596

	if (sample_id_all) {
		void *data = &event->mmap.filename;

597
		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
598 599
		swap_sample_id_all(event, data);
	}
600 601
}

602 603 604 605 606 607 608 609
static void perf_event__mmap2_swap(union perf_event *event,
				  bool sample_id_all)
{
	event->mmap2.pid   = bswap_32(event->mmap2.pid);
	event->mmap2.tid   = bswap_32(event->mmap2.tid);
	event->mmap2.start = bswap_64(event->mmap2.start);
	event->mmap2.len   = bswap_64(event->mmap2.len);
	event->mmap2.pgoff = bswap_64(event->mmap2.pgoff);
610 611 612 613 614 615 616

	if (!(event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID)) {
		event->mmap2.maj   = bswap_32(event->mmap2.maj);
		event->mmap2.min   = bswap_32(event->mmap2.min);
		event->mmap2.ino   = bswap_64(event->mmap2.ino);
		event->mmap2.ino_generation = bswap_64(event->mmap2.ino_generation);
	}
617 618 619 620 621 622 623 624

	if (sample_id_all) {
		void *data = &event->mmap2.filename;

		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
		swap_sample_id_all(event, data);
	}
}
625
static void perf_event__task_swap(union perf_event *event, bool sample_id_all)
626
{
627 628 629 630 631
	event->fork.pid	 = bswap_32(event->fork.pid);
	event->fork.tid	 = bswap_32(event->fork.tid);
	event->fork.ppid = bswap_32(event->fork.ppid);
	event->fork.ptid = bswap_32(event->fork.ptid);
	event->fork.time = bswap_64(event->fork.time);
632 633 634

	if (sample_id_all)
		swap_sample_id_all(event, &event->fork + 1);
635 636
}

637
static void perf_event__read_swap(union perf_event *event, bool sample_id_all)
638
{
639 640 641 642 643 644
	event->read.pid		 = bswap_32(event->read.pid);
	event->read.tid		 = bswap_32(event->read.tid);
	event->read.value	 = bswap_64(event->read.value);
	event->read.time_enabled = bswap_64(event->read.time_enabled);
	event->read.time_running = bswap_64(event->read.time_running);
	event->read.id		 = bswap_64(event->read.id);
645 646 647

	if (sample_id_all)
		swap_sample_id_all(event, &event->read + 1);
648 649
}

650 651 652 653 654 655 656 657 658 659
static void perf_event__aux_swap(union perf_event *event, bool sample_id_all)
{
	event->aux.aux_offset = bswap_64(event->aux.aux_offset);
	event->aux.aux_size   = bswap_64(event->aux.aux_size);
	event->aux.flags      = bswap_64(event->aux.flags);

	if (sample_id_all)
		swap_sample_id_all(event, &event->aux + 1);
}

660 661 662 663 664 665 666 667 668 669
static void perf_event__itrace_start_swap(union perf_event *event,
					  bool sample_id_all)
{
	event->itrace_start.pid	 = bswap_32(event->itrace_start.pid);
	event->itrace_start.tid	 = bswap_32(event->itrace_start.tid);

	if (sample_id_all)
		swap_sample_id_all(event, &event->itrace_start + 1);
}

670 671 672 673 674 675 676 677 678 679 680 681 682
static void perf_event__switch_swap(union perf_event *event, bool sample_id_all)
{
	if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) {
		event->context_switch.next_prev_pid =
				bswap_32(event->context_switch.next_prev_pid);
		event->context_switch.next_prev_tid =
				bswap_32(event->context_switch.next_prev_tid);
	}

	if (sample_id_all)
		swap_sample_id_all(event, &event->context_switch + 1);
}

683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700
static void perf_event__text_poke_swap(union perf_event *event, bool sample_id_all)
{
	event->text_poke.addr    = bswap_64(event->text_poke.addr);
	event->text_poke.old_len = bswap_16(event->text_poke.old_len);
	event->text_poke.new_len = bswap_16(event->text_poke.new_len);

	if (sample_id_all) {
		size_t len = sizeof(event->text_poke.old_len) +
			     sizeof(event->text_poke.new_len) +
			     event->text_poke.old_len +
			     event->text_poke.new_len;
		void *data = &event->text_poke.old_len;

		data += PERF_ALIGN(len, sizeof(u64));
		swap_sample_id_all(event, data);
	}
}

701 702 703 704 705 706 707 708 709 710 711
static void perf_event__throttle_swap(union perf_event *event,
				      bool sample_id_all)
{
	event->throttle.time	  = bswap_64(event->throttle.time);
	event->throttle.id	  = bswap_64(event->throttle.id);
	event->throttle.stream_id = bswap_64(event->throttle.stream_id);

	if (sample_id_all)
		swap_sample_id_all(event, &event->throttle + 1);
}

712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731
static void perf_event__namespaces_swap(union perf_event *event,
					bool sample_id_all)
{
	u64 i;

	event->namespaces.pid		= bswap_32(event->namespaces.pid);
	event->namespaces.tid		= bswap_32(event->namespaces.tid);
	event->namespaces.nr_namespaces	= bswap_64(event->namespaces.nr_namespaces);

	for (i = 0; i < event->namespaces.nr_namespaces; i++) {
		struct perf_ns_link_info *ns = &event->namespaces.link_info[i];

		ns->dev = bswap_64(ns->dev);
		ns->ino = bswap_64(ns->ino);
	}

	if (sample_id_all)
		swap_sample_id_all(event, &event->namespaces.link_info[i]);
}

732 733 734 735 736 737 738 739 740 741 742 743
static void perf_event__cgroup_swap(union perf_event *event, bool sample_id_all)
{
	event->cgroup.id = bswap_64(event->cgroup.id);

	if (sample_id_all) {
		void *data = &event->cgroup.path;

		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
		swap_sample_id_all(event, data);
	}
}

744 745 746 747 748 749 750 751 752 753
static u8 revbyte(u8 b)
{
	int rev = (b >> 4) | ((b & 0xf) << 4);
	rev = ((rev & 0xcc) >> 2) | ((rev & 0x33) << 2);
	rev = ((rev & 0xaa) >> 1) | ((rev & 0x55) << 1);
	return (u8) rev;
}

/*
 * XXX this is hack in attempt to carry flags bitfield
754
 * through endian village. ABI says:
755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775
 *
 * Bit-fields are allocated from right to left (least to most significant)
 * on little-endian implementations and from left to right (most to least
 * significant) on big-endian implementations.
 *
 * The above seems to be byte specific, so we need to reverse each
 * byte of the bitfield. 'Internet' also says this might be implementation
 * specific and we probably need proper fix and carry perf_event_attr
 * bitfield flags in separate data file FEAT_ section. Thought this seems
 * to work for now.
 */
static void swap_bitfield(u8 *p, unsigned len)
{
	unsigned i;

	for (i = 0; i < len; i++) {
		*p = revbyte(*p);
		p++;
	}
}

776 777 778 779 780
/* exported for swapping attributes in file header */
void perf_event__attr_swap(struct perf_event_attr *attr)
{
	attr->type		= bswap_32(attr->type);
	attr->size		= bswap_32(attr->size);
781 782 783 784 785 786 787 788 789

#define bswap_safe(f, n) 					\
	(attr->size > (offsetof(struct perf_event_attr, f) + 	\
		       sizeof(attr->f) * (n)))
#define bswap_field(f, sz) 			\
do { 						\
	if (bswap_safe(f, 0))			\
		attr->f = bswap_##sz(attr->f);	\
} while(0)
790
#define bswap_field_16(f) bswap_field(f, 16)
791 792 793 794 795 796 797 798 799 800 801 802 803 804 805
#define bswap_field_32(f) bswap_field(f, 32)
#define bswap_field_64(f) bswap_field(f, 64)

	bswap_field_64(config);
	bswap_field_64(sample_period);
	bswap_field_64(sample_type);
	bswap_field_64(read_format);
	bswap_field_32(wakeup_events);
	bswap_field_32(bp_type);
	bswap_field_64(bp_addr);
	bswap_field_64(bp_len);
	bswap_field_64(branch_sample_type);
	bswap_field_64(sample_regs_user);
	bswap_field_32(sample_stack_user);
	bswap_field_32(aux_watermark);
806
	bswap_field_16(sample_max_stack);
807
	bswap_field_32(aux_sample_size);
808 809 810 811 812 813 814 815 816 817 818 819

	/*
	 * After read_format are bitfields. Check read_format because
	 * we are unable to use offsetof on bitfield.
	 */
	if (bswap_safe(read_format, 1))
		swap_bitfield((u8 *) (&attr->read_format + 1),
			      sizeof(u64));
#undef bswap_field_64
#undef bswap_field_32
#undef bswap_field
#undef bswap_safe
820 821
}

822
static void perf_event__hdr_attr_swap(union perf_event *event,
823
				      bool sample_id_all __maybe_unused)
824 825 826
{
	size_t size;

827
	perf_event__attr_swap(&event->attr.attr);
828

829 830 831
	size = event->header.size;
	size -= (void *)&event->attr.id - (void *)event;
	mem_bswap_64(event->attr.id, size);
832 833
}

834 835 836 837 838 839 840
static void perf_event__event_update_swap(union perf_event *event,
					  bool sample_id_all __maybe_unused)
{
	event->event_update.type = bswap_64(event->event_update.type);
	event->event_update.id   = bswap_64(event->event_update.id);
}

841
static void perf_event__event_type_swap(union perf_event *event,
842
					bool sample_id_all __maybe_unused)
843
{
844 845
	event->event_type.event_type.event_id =
		bswap_64(event->event_type.event_type.event_id);
846 847
}

848
static void perf_event__tracing_data_swap(union perf_event *event,
849
					  bool sample_id_all __maybe_unused)
850
{
851
	event->tracing_data.size = bswap_32(event->tracing_data.size);
852 853
}

854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876
static void perf_event__auxtrace_info_swap(union perf_event *event,
					   bool sample_id_all __maybe_unused)
{
	size_t size;

	event->auxtrace_info.type = bswap_32(event->auxtrace_info.type);

	size = event->header.size;
	size -= (void *)&event->auxtrace_info.priv - (void *)event;
	mem_bswap_64(event->auxtrace_info.priv, size);
}

static void perf_event__auxtrace_swap(union perf_event *event,
				      bool sample_id_all __maybe_unused)
{
	event->auxtrace.size      = bswap_64(event->auxtrace.size);
	event->auxtrace.offset    = bswap_64(event->auxtrace.offset);
	event->auxtrace.reference = bswap_64(event->auxtrace.reference);
	event->auxtrace.idx       = bswap_32(event->auxtrace.idx);
	event->auxtrace.tid       = bswap_32(event->auxtrace.tid);
	event->auxtrace.cpu       = bswap_32(event->auxtrace.cpu);
}

877 878 879 880 881 882 883 884
static void perf_event__auxtrace_error_swap(union perf_event *event,
					    bool sample_id_all __maybe_unused)
{
	event->auxtrace_error.type = bswap_32(event->auxtrace_error.type);
	event->auxtrace_error.code = bswap_32(event->auxtrace_error.code);
	event->auxtrace_error.cpu  = bswap_32(event->auxtrace_error.cpu);
	event->auxtrace_error.pid  = bswap_32(event->auxtrace_error.pid);
	event->auxtrace_error.tid  = bswap_32(event->auxtrace_error.tid);
885
	event->auxtrace_error.fmt  = bswap_32(event->auxtrace_error.fmt);
886
	event->auxtrace_error.ip   = bswap_64(event->auxtrace_error.ip);
887 888
	if (event->auxtrace_error.fmt)
		event->auxtrace_error.time = bswap_64(event->auxtrace_error.time);
889 890
}

891 892 893 894 895 896 897 898 899 900 901
static void perf_event__thread_map_swap(union perf_event *event,
					bool sample_id_all __maybe_unused)
{
	unsigned i;

	event->thread_map.nr = bswap_64(event->thread_map.nr);

	for (i = 0; i < event->thread_map.nr; i++)
		event->thread_map.entries[i].pid = bswap_64(event->thread_map.entries[i].pid);
}

902 903 904
static void perf_event__cpu_map_swap(union perf_event *event,
				     bool sample_id_all __maybe_unused)
{
905
	struct perf_record_cpu_map_data *data = &event->cpu_map.data;
906
	struct cpu_map_entries *cpus;
907
	struct perf_record_record_cpu_map *mask;
908 909
	unsigned i;

910
	data->type = bswap_16(data->type);
911 912 913 914 915 916 917 918 919 920 921

	switch (data->type) {
	case PERF_CPU_MAP__CPUS:
		cpus = (struct cpu_map_entries *)data->data;

		cpus->nr = bswap_16(cpus->nr);

		for (i = 0; i < cpus->nr; i++)
			cpus->cpu[i] = bswap_16(cpus->cpu[i]);
		break;
	case PERF_CPU_MAP__MASK:
922
		mask = (struct perf_record_record_cpu_map *)data->data;
923 924 925 926 927 928 929 930 931 932 933 934 935 936 937

		mask->nr = bswap_16(mask->nr);
		mask->long_size = bswap_16(mask->long_size);

		switch (mask->long_size) {
		case 4: mem_bswap_32(&mask->mask, mask->nr); break;
		case 8: mem_bswap_64(&mask->mask, mask->nr); break;
		default:
			pr_err("cpu_map swap: unsupported long size\n");
		}
	default:
		break;
	}
}

938 939 940 941 942
static void perf_event__stat_config_swap(union perf_event *event,
					 bool sample_id_all __maybe_unused)
{
	u64 size;

943
	size  = bswap_64(event->stat_config.nr) * sizeof(event->stat_config.data[0]);
944 945 946 947
	size += 1; /* nr item itself */
	mem_bswap_64(&event->stat_config.nr, size);
}

J
Jiri Olsa 已提交
948 949 950 951 952 953 954 955 956 957 958
static void perf_event__stat_swap(union perf_event *event,
				  bool sample_id_all __maybe_unused)
{
	event->stat.id     = bswap_64(event->stat.id);
	event->stat.thread = bswap_32(event->stat.thread);
	event->stat.cpu    = bswap_32(event->stat.cpu);
	event->stat.val    = bswap_64(event->stat.val);
	event->stat.ena    = bswap_64(event->stat.ena);
	event->stat.run    = bswap_64(event->stat.run);
}

959 960 961 962 963 964 965
static void perf_event__stat_round_swap(union perf_event *event,
					bool sample_id_all __maybe_unused)
{
	event->stat_round.type = bswap_64(event->stat_round.type);
	event->stat_round.time = bswap_64(event->stat_round.time);
}

966 967 968 969 970 971 972 973 974 975 976 977 978
static void perf_event__time_conv_swap(union perf_event *event,
				       bool sample_id_all __maybe_unused)
{
	event->time_conv.time_shift = bswap_64(event->time_conv.time_shift);
	event->time_conv.time_mult  = bswap_64(event->time_conv.time_mult);
	event->time_conv.time_zero  = bswap_64(event->time_conv.time_zero);

	if (event_contains(event->time_conv, time_cycles)) {
		event->time_conv.time_cycles = bswap_64(event->time_conv.time_cycles);
		event->time_conv.time_mask = bswap_64(event->time_conv.time_mask);
	}
}

979 980
typedef void (*perf_event__swap_op)(union perf_event *event,
				    bool sample_id_all);
981

982 983
static perf_event__swap_op perf_event__swap_ops[] = {
	[PERF_RECORD_MMAP]		  = perf_event__mmap_swap,
984
	[PERF_RECORD_MMAP2]		  = perf_event__mmap2_swap,
985 986 987 988 989
	[PERF_RECORD_COMM]		  = perf_event__comm_swap,
	[PERF_RECORD_FORK]		  = perf_event__task_swap,
	[PERF_RECORD_EXIT]		  = perf_event__task_swap,
	[PERF_RECORD_LOST]		  = perf_event__all64_swap,
	[PERF_RECORD_READ]		  = perf_event__read_swap,
990 991
	[PERF_RECORD_THROTTLE]		  = perf_event__throttle_swap,
	[PERF_RECORD_UNTHROTTLE]	  = perf_event__throttle_swap,
992
	[PERF_RECORD_SAMPLE]		  = perf_event__all64_swap,
993
	[PERF_RECORD_AUX]		  = perf_event__aux_swap,
994
	[PERF_RECORD_ITRACE_START]	  = perf_event__itrace_start_swap,
995
	[PERF_RECORD_LOST_SAMPLES]	  = perf_event__all64_swap,
996 997
	[PERF_RECORD_SWITCH]		  = perf_event__switch_swap,
	[PERF_RECORD_SWITCH_CPU_WIDE]	  = perf_event__switch_swap,
998
	[PERF_RECORD_NAMESPACES]	  = perf_event__namespaces_swap,
999
	[PERF_RECORD_CGROUP]		  = perf_event__cgroup_swap,
1000
	[PERF_RECORD_TEXT_POKE]		  = perf_event__text_poke_swap,
1001
	[PERF_RECORD_HEADER_ATTR]	  = perf_event__hdr_attr_swap,
1002 1003 1004
	[PERF_RECORD_HEADER_EVENT_TYPE]	  = perf_event__event_type_swap,
	[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
	[PERF_RECORD_HEADER_BUILD_ID]	  = NULL,
A
Adrian Hunter 已提交
1005
	[PERF_RECORD_ID_INDEX]		  = perf_event__all64_swap,
1006 1007
	[PERF_RECORD_AUXTRACE_INFO]	  = perf_event__auxtrace_info_swap,
	[PERF_RECORD_AUXTRACE]		  = perf_event__auxtrace_swap,
1008
	[PERF_RECORD_AUXTRACE_ERROR]	  = perf_event__auxtrace_error_swap,
1009
	[PERF_RECORD_THREAD_MAP]	  = perf_event__thread_map_swap,
1010
	[PERF_RECORD_CPU_MAP]		  = perf_event__cpu_map_swap,
1011
	[PERF_RECORD_STAT_CONFIG]	  = perf_event__stat_config_swap,
J
Jiri Olsa 已提交
1012
	[PERF_RECORD_STAT]		  = perf_event__stat_swap,
1013
	[PERF_RECORD_STAT_ROUND]	  = perf_event__stat_round_swap,
1014
	[PERF_RECORD_EVENT_UPDATE]	  = perf_event__event_update_swap,
1015
	[PERF_RECORD_TIME_CONV]		  = perf_event__time_conv_swap,
1016
	[PERF_RECORD_HEADER_MAX]	  = NULL,
1017 1018
};

1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057
/*
 * When perf record finishes a pass on every buffers, it records this pseudo
 * event.
 * We record the max timestamp t found in the pass n.
 * Assuming these timestamps are monotonic across cpus, we know that if
 * a buffer still has events with timestamps below t, they will be all
 * available and then read in the pass n + 1.
 * Hence when we start to read the pass n + 2, we can safely flush every
 * events with timestamps below t.
 *
 *    ============ PASS n =================
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          1          |         2
 *          2          |         3
 *          -          |         4  <--- max recorded
 *
 *    ============ PASS n + 1 ==============
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          3          |         5
 *          4          |         6
 *          5          |         7 <---- max recorded
 *
 *      Flush every events below timestamp 4
 *
 *    ============ PASS n + 2 ==============
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          6          |         8
 *          7          |         9
 *          -          |         10
 *
 *      Flush every events below timestamp 7
 *      etc...
 */
1058
static int process_finished_round(struct perf_tool *tool __maybe_unused,
1059
				  union perf_event *event __maybe_unused,
1060
				  struct ordered_events *oe)
1061
{
1062 1063
	if (dump_trace)
		fprintf(stdout, "\n");
1064
	return ordered_events__flush(oe, OE_FLUSH__ROUND);
1065 1066
}

1067
int perf_session__queue_event(struct perf_session *s, union perf_event *event,
1068
			      u64 timestamp, u64 file_offset)
1069
{
1070
	return ordered_events__queue(&s->ordered_events, event, timestamp, file_offset);
1071
}
1072

K
Kan Liang 已提交
1073
static void callchain__lbr_callstack_printf(struct perf_sample *sample)
1074
{
K
Kan Liang 已提交
1075 1076
	struct ip_callchain *callchain = sample->callchain;
	struct branch_stack *lbr_stack = sample->branch_stack;
1077
	struct branch_entry *entries = perf_sample__branch_entries(sample);
K
Kan Liang 已提交
1078
	u64 kernel_callchain_nr = callchain->nr;
1079
	unsigned int i;
1080

K
Kan Liang 已提交
1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098
	for (i = 0; i < kernel_callchain_nr; i++) {
		if (callchain->ips[i] == PERF_CONTEXT_USER)
			break;
	}

	if ((i != kernel_callchain_nr) && lbr_stack->nr) {
		u64 total_nr;
		/*
		 * LBR callstack can only get user call chain,
		 * i is kernel call chain number,
		 * 1 is PERF_CONTEXT_USER.
		 *
		 * The user call chain is stored in LBR registers.
		 * LBR are pair registers. The caller is stored
		 * in "from" register, while the callee is stored
		 * in "to" register.
		 * For example, there is a call stack
		 * "A"->"B"->"C"->"D".
1099
		 * The LBR registers will be recorded like
K
Kan Liang 已提交
1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113
		 * "C"->"D", "B"->"C", "A"->"B".
		 * So only the first "to" register and all "from"
		 * registers are needed to construct the whole stack.
		 */
		total_nr = i + 1 + lbr_stack->nr + 1;
		kernel_callchain_nr = i + 1;

		printf("... LBR call chain: nr:%" PRIu64 "\n", total_nr);

		for (i = 0; i < kernel_callchain_nr; i++)
			printf("..... %2d: %016" PRIx64 "\n",
			       i, callchain->ips[i]);

		printf("..... %2d: %016" PRIx64 "\n",
1114
		       (int)(kernel_callchain_nr), entries[0].to);
K
Kan Liang 已提交
1115 1116
		for (i = 0; i < lbr_stack->nr; i++)
			printf("..... %2d: %016" PRIx64 "\n",
1117
			       (int)(i + kernel_callchain_nr + 1), entries[i].from);
K
Kan Liang 已提交
1118 1119 1120
	}
}

1121
static void callchain__printf(struct evsel *evsel,
K
Kan Liang 已提交
1122 1123 1124 1125 1126
			      struct perf_sample *sample)
{
	unsigned int i;
	struct ip_callchain *callchain = sample->callchain;

1127
	if (evsel__has_branch_callstack(evsel))
K
Kan Liang 已提交
1128 1129 1130
		callchain__lbr_callstack_printf(sample);

	printf("... FP chain: nr:%" PRIu64 "\n", callchain->nr);
1131

K
Kan Liang 已提交
1132
	for (i = 0; i < callchain->nr; i++)
1133
		printf("..... %2d: %016" PRIx64 "\n",
K
Kan Liang 已提交
1134
		       i, callchain->ips[i]);
1135 1136
}

1137
static void branch_stack__printf(struct perf_sample *sample, bool callstack)
1138
{
1139
	struct branch_entry *entries = perf_sample__branch_entries(sample);
1140 1141
	uint64_t i;

1142 1143 1144
	printf("%s: nr:%" PRIu64 "\n",
		!callstack ? "... branch stack" : "... branch callstack",
		sample->branch_stack->nr);
1145

1146
	for (i = 0; i < sample->branch_stack->nr; i++) {
1147
		struct branch_entry *e = &entries[i];
1148

1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161
		if (!callstack) {
			printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n",
				i, e->from, e->to,
				(unsigned short)e->flags.cycles,
				e->flags.mispred ? "M" : " ",
				e->flags.predicted ? "P" : " ",
				e->flags.abort ? "A" : " ",
				e->flags.in_tx ? "T" : " ",
				(unsigned)e->flags.reserved);
		} else {
			printf("..... %2"PRIu64": %016" PRIx64 "\n",
				i, i > 0 ? e->from : e->to);
		}
1162
	}
1163 1164
}

1165 1166 1167 1168 1169 1170 1171
static void regs_dump__printf(u64 mask, u64 *regs)
{
	unsigned rid, i = 0;

	for_each_set_bit(rid, (unsigned long *) &mask, sizeof(mask) * 8) {
		u64 val = regs[i++];

1172
		printf(".... %-5s 0x%016" PRIx64 "\n",
1173 1174 1175 1176
		       perf_reg_name(rid), val);
	}
}

1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202
static const char *regs_abi[] = {
	[PERF_SAMPLE_REGS_ABI_NONE] = "none",
	[PERF_SAMPLE_REGS_ABI_32] = "32-bit",
	[PERF_SAMPLE_REGS_ABI_64] = "64-bit",
};

static inline const char *regs_dump_abi(struct regs_dump *d)
{
	if (d->abi > PERF_SAMPLE_REGS_ABI_64)
		return "unknown";

	return regs_abi[d->abi];
}

static void regs__printf(const char *type, struct regs_dump *regs)
{
	u64 mask = regs->mask;

	printf("... %s regs: mask 0x%" PRIx64 " ABI %s\n",
	       type,
	       mask,
	       regs_dump_abi(regs));

	regs_dump__printf(mask, regs->regs);
}

1203
static void regs_user__printf(struct perf_sample *sample)
1204 1205 1206
{
	struct regs_dump *user_regs = &sample->user_regs;

1207 1208 1209 1210 1211 1212 1213 1214 1215 1216
	if (user_regs->regs)
		regs__printf("user", user_regs);
}

static void regs_intr__printf(struct perf_sample *sample)
{
	struct regs_dump *intr_regs = &sample->intr_regs;

	if (intr_regs->regs)
		regs__printf("intr", intr_regs);
1217 1218 1219 1220 1221 1222 1223 1224
}

static void stack_user__printf(struct stack_dump *dump)
{
	printf("... ustack: size %" PRIu64 ", offset 0x%x\n",
	       dump->size, dump->offset);
}

1225
static void evlist__print_tstamp(struct evlist *evlist, union perf_event *event, struct perf_sample *sample)
1226
{
1227
	u64 sample_type = __evlist__combined_sample_type(evlist);
1228

1229
	if (event->header.type != PERF_RECORD_SAMPLE &&
1230
	    !evlist__sample_id_all(evlist)) {
1231 1232 1233 1234
		fputs("-1 -1 ", stdout);
		return;
	}

1235
	if ((sample_type & PERF_SAMPLE_CPU))
1236 1237
		printf("%u ", sample->cpu);

1238
	if (sample_type & PERF_SAMPLE_TIME)
1239
		printf("%" PRIu64 " ", sample->time);
1240 1241
}

1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271
static void sample_read__printf(struct perf_sample *sample, u64 read_format)
{
	printf("... sample_read:\n");

	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
		printf("...... time enabled %016" PRIx64 "\n",
		       sample->read.time_enabled);

	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
		printf("...... time running %016" PRIx64 "\n",
		       sample->read.time_running);

	if (read_format & PERF_FORMAT_GROUP) {
		u64 i;

		printf(".... group nr %" PRIu64 "\n", sample->read.group.nr);

		for (i = 0; i < sample->read.group.nr; i++) {
			struct sample_read_value *value;

			value = &sample->read.group.values[i];
			printf("..... id %016" PRIx64
			       ", value %016" PRIx64 "\n",
			       value->id, value->value);
		}
	} else
		printf("..... id %016" PRIx64 ", value %016" PRIx64 "\n",
			sample->read.one.id, sample->read.one.value);
}

1272
static void dump_event(struct evlist *evlist, union perf_event *event,
1273
		       u64 file_offset, struct perf_sample *sample)
1274 1275 1276 1277
{
	if (!dump_trace)
		return;

1278 1279
	printf("\n%#" PRIx64 " [%#x]: event: %d\n",
	       file_offset, event->header.size, event->header.type);
1280 1281

	trace_event(event);
1282 1283
	if (event->header.type == PERF_RECORD_SAMPLE && evlist->trace_event_sample_raw)
		evlist->trace_event_sample_raw(evlist, event, sample);
1284 1285

	if (sample)
1286
		evlist__print_tstamp(evlist, event, sample);
1287

1288
	printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset,
1289
	       event->header.size, perf_event__name(event->header.type));
1290 1291
}

K
Kan Liang 已提交
1292 1293 1294 1295 1296 1297 1298 1299
char *get_page_size_name(u64 size, char *str)
{
	if (!size || !unit_number__scnprintf(str, PAGE_SIZE_NAME_LEN, size))
		snprintf(str, PAGE_SIZE_NAME_LEN, "%s", "N/A");

	return str;
}

1300
static void dump_sample(struct evsel *evsel, union perf_event *event,
1301
			struct perf_sample *sample)
1302
{
1303
	u64 sample_type;
K
Kan Liang 已提交
1304
	char str[PAGE_SIZE_NAME_LEN];
1305

1306 1307 1308
	if (!dump_trace)
		return;

1309
	printf("(IP, 0x%x): %d/%d: %#" PRIx64 " period: %" PRIu64 " addr: %#" PRIx64 "\n",
1310
	       event->header.misc, sample->pid, sample->tid, sample->ip,
1311
	       sample->period, sample->addr);
1312

1313
	sample_type = evsel->core.attr.sample_type;
1314

1315
	if (evsel__has_callchain(evsel))
K
Kan Liang 已提交
1316
		callchain__printf(evsel, sample);
1317

1318
	if (evsel__has_br_stack(evsel))
1319
		branch_stack__printf(sample, evsel__has_branch_callstack(evsel));
1320 1321

	if (sample_type & PERF_SAMPLE_REGS_USER)
1322
		regs_user__printf(sample);
1323

1324 1325 1326
	if (sample_type & PERF_SAMPLE_REGS_INTR)
		regs_intr__printf(sample);

1327 1328
	if (sample_type & PERF_SAMPLE_STACK_USER)
		stack_user__printf(&sample->user_stack);
1329

1330 1331
	if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
		printf("... weight: %" PRIu64 "", sample->weight);
1332
			if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
1333
				printf(",0x%"PRIx16"", sample->ins_lat);
1334 1335
				printf(",0x%"PRIx16"", sample->p_stage_cyc);
			}
1336 1337
		printf("\n");
	}
1338 1339 1340

	if (sample_type & PERF_SAMPLE_DATA_SRC)
		printf(" . data_src: 0x%"PRIx64"\n", sample->data_src);
1341

1342 1343 1344
	if (sample_type & PERF_SAMPLE_PHYS_ADDR)
		printf(" .. phys_addr: 0x%"PRIx64"\n", sample->phys_addr);

K
Kan Liang 已提交
1345 1346 1347
	if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
		printf(" .. data page size: %s\n", get_page_size_name(sample->data_page_size, str));

1348 1349 1350
	if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
		printf(" .. code page size: %s\n", get_page_size_name(sample->code_page_size, str));

1351 1352 1353
	if (sample_type & PERF_SAMPLE_TRANSACTION)
		printf("... transaction: %" PRIx64 "\n", sample->transaction);

1354
	if (sample_type & PERF_SAMPLE_READ)
1355
		sample_read__printf(sample, evsel->core.attr.read_format);
1356 1357
}

1358
static void dump_read(struct evsel *evsel, union perf_event *event)
J
Jiri Olsa 已提交
1359
{
1360
	struct perf_record_read *read_event = &event->read;
J
Jiri Olsa 已提交
1361 1362 1363 1364 1365
	u64 read_format;

	if (!dump_trace)
		return;

1366
	printf(": %d %d %s %" PRI_lu64 "\n", event->read.pid, event->read.tid,
1367
	       evsel__name(evsel), event->read.value);
J
Jiri Olsa 已提交
1368

1369 1370 1371
	if (!evsel)
		return;

1372
	read_format = evsel->core.attr.read_format;
J
Jiri Olsa 已提交
1373 1374

	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1375
		printf("... time enabled : %" PRI_lu64 "\n", read_event->time_enabled);
J
Jiri Olsa 已提交
1376 1377

	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1378
		printf("... time running : %" PRI_lu64 "\n", read_event->time_running);
J
Jiri Olsa 已提交
1379 1380

	if (read_format & PERF_FORMAT_ID)
1381
		printf("... id           : %" PRI_lu64 "\n", read_event->id);
J
Jiri Olsa 已提交
1382 1383
}

1384
static struct machine *machines__find_for_cpumode(struct machines *machines,
1385 1386
					       union perf_event *event,
					       struct perf_sample *sample)
1387
{
1388
	if (perf_guest &&
1389 1390
	    ((sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ||
	     (sample->cpumode == PERF_RECORD_MISC_GUEST_USER))) {
1391 1392
		u32 pid;

1393 1394
		if (event->header.type == PERF_RECORD_MMAP
		    || event->header.type == PERF_RECORD_MMAP2)
1395 1396
			pid = event->mmap.pid;
		else
1397
			pid = sample->pid;
1398

1399
		return machines__find_guest(machines, pid);
1400
	}
1401

1402
	return &machines->host;
1403 1404
}

1405
static int deliver_sample_value(struct evlist *evlist,
1406 1407 1408 1409 1410 1411
				struct perf_tool *tool,
				union perf_event *event,
				struct perf_sample *sample,
				struct sample_read_value *v,
				struct machine *machine)
{
1412
	struct perf_sample_id *sid = evlist__id2sid(evlist, v->id);
1413
	struct evsel *evsel;
1414 1415 1416 1417 1418 1419 1420 1421

	if (sid) {
		sample->id     = v->id;
		sample->period = v->value - sid->period;
		sid->period    = v->value;
	}

	if (!sid || sid->evsel == NULL) {
1422
		++evlist->stats.nr_unknown_id;
1423 1424 1425
		return 0;
	}

1426 1427 1428 1429 1430 1431 1432
	/*
	 * There's no reason to deliver sample
	 * for zero period, bail out.
	 */
	if (!sample->period)
		return 0;

1433 1434
	evsel = container_of(sid->evsel, struct evsel, core);
	return tool->sample(tool, event, sample, evsel, machine);
1435 1436
}

1437
static int deliver_sample_group(struct evlist *evlist,
1438 1439 1440 1441 1442 1443 1444 1445 1446
				struct perf_tool *tool,
				union  perf_event *event,
				struct perf_sample *sample,
				struct machine *machine)
{
	int ret = -EINVAL;
	u64 i;

	for (i = 0; i < sample->read.group.nr; i++) {
1447
		ret = deliver_sample_value(evlist, tool, event, sample,
1448 1449 1450 1451 1452 1453 1454 1455 1456
					   &sample->read.group.values[i],
					   machine);
		if (ret)
			break;
	}

	return ret;
}

1457 1458 1459
static int evlist__deliver_sample(struct evlist *evlist, struct perf_tool *tool,
				  union  perf_event *event, struct perf_sample *sample,
				  struct evsel *evsel, struct machine *machine)
1460 1461
{
	/* We know evsel != NULL. */
1462 1463
	u64 sample_type = evsel->core.attr.sample_type;
	u64 read_format = evsel->core.attr.read_format;
1464

1465
	/* Standard sample delivery. */
1466 1467 1468 1469 1470
	if (!(sample_type & PERF_SAMPLE_READ))
		return tool->sample(tool, event, sample, evsel, machine);

	/* For PERF_SAMPLE_READ we have either single or group mode. */
	if (read_format & PERF_FORMAT_GROUP)
1471
		return deliver_sample_group(evlist, tool, event, sample,
1472 1473
					    machine);
	else
1474
		return deliver_sample_value(evlist, tool, event, sample,
1475 1476 1477
					    &sample->read.one, machine);
}

1478
static int machines__deliver_event(struct machines *machines,
1479
				   struct evlist *evlist,
1480 1481 1482
				   union perf_event *event,
				   struct perf_sample *sample,
				   struct perf_tool *tool, u64 file_offset)
1483
{
1484
	struct evsel *evsel;
1485
	struct machine *machine;
1486

1487
	dump_event(evlist, event, file_offset, sample);
1488

1489
	evsel = evlist__id2evsel(evlist, sample->id);
1490

1491
	machine = machines__find_for_cpumode(machines, event, sample);
1492

1493 1494
	switch (event->header.type) {
	case PERF_RECORD_SAMPLE:
1495
		if (evsel == NULL) {
1496
			++evlist->stats.nr_unknown_id;
1497
			return 0;
1498
		}
1499
		dump_sample(evsel, event, sample);
1500
		if (machine == NULL) {
1501
			++evlist->stats.nr_unprocessable_samples;
1502
			return 0;
1503
		}
1504
		return evlist__deliver_sample(evlist, tool, event, sample, evsel, machine);
1505
	case PERF_RECORD_MMAP:
1506
		return tool->mmap(tool, event, sample, machine);
1507
	case PERF_RECORD_MMAP2:
1508 1509
		if (event->header.misc & PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT)
			++evlist->stats.nr_proc_map_timeout;
1510
		return tool->mmap2(tool, event, sample, machine);
1511
	case PERF_RECORD_COMM:
1512
		return tool->comm(tool, event, sample, machine);
1513 1514
	case PERF_RECORD_NAMESPACES:
		return tool->namespaces(tool, event, sample, machine);
1515 1516
	case PERF_RECORD_CGROUP:
		return tool->cgroup(tool, event, sample, machine);
1517
	case PERF_RECORD_FORK:
1518
		return tool->fork(tool, event, sample, machine);
1519
	case PERF_RECORD_EXIT:
1520
		return tool->exit(tool, event, sample, machine);
1521
	case PERF_RECORD_LOST:
1522
		if (tool->lost == perf_event__process_lost)
1523
			evlist->stats.total_lost += event->lost.lost;
1524
		return tool->lost(tool, event, sample, machine);
1525 1526 1527 1528
	case PERF_RECORD_LOST_SAMPLES:
		if (tool->lost_samples == perf_event__process_lost_samples)
			evlist->stats.total_lost_samples += event->lost_samples.lost;
		return tool->lost_samples(tool, event, sample, machine);
1529
	case PERF_RECORD_READ:
J
Jiri Olsa 已提交
1530
		dump_read(evsel, event);
1531
		return tool->read(tool, event, sample, evsel, machine);
1532
	case PERF_RECORD_THROTTLE:
1533
		return tool->throttle(tool, event, sample, machine);
1534
	case PERF_RECORD_UNTHROTTLE:
1535
		return tool->unthrottle(tool, event, sample, machine);
1536
	case PERF_RECORD_AUX:
1537 1538 1539 1540 1541 1542
		if (tool->aux == perf_event__process_aux) {
			if (event->aux.flags & PERF_AUX_FLAG_TRUNCATED)
				evlist->stats.total_aux_lost += 1;
			if (event->aux.flags & PERF_AUX_FLAG_PARTIAL)
				evlist->stats.total_aux_partial += 1;
		}
1543
		return tool->aux(tool, event, sample, machine);
1544 1545
	case PERF_RECORD_ITRACE_START:
		return tool->itrace_start(tool, event, sample, machine);
1546 1547 1548
	case PERF_RECORD_SWITCH:
	case PERF_RECORD_SWITCH_CPU_WIDE:
		return tool->context_switch(tool, event, sample, machine);
1549 1550
	case PERF_RECORD_KSYMBOL:
		return tool->ksymbol(tool, event, sample, machine);
1551
	case PERF_RECORD_BPF_EVENT:
1552
		return tool->bpf(tool, event, sample, machine);
1553 1554
	case PERF_RECORD_TEXT_POKE:
		return tool->text_poke(tool, event, sample, machine);
1555
	default:
1556
		++evlist->stats.nr_unknown_events;
1557 1558 1559 1560
		return -1;
	}
}

1561 1562 1563 1564 1565
static int perf_session__deliver_event(struct perf_session *session,
				       union perf_event *event,
				       struct perf_tool *tool,
				       u64 file_offset)
{
1566
	struct perf_sample sample;
1567
	int ret = evlist__parse_sample(session->evlist, event, &sample);
1568

1569 1570 1571 1572 1573 1574
	if (ret) {
		pr_err("Can't parse sample, err = %d\n", ret);
		return ret;
	}

	ret = auxtrace__process_event(session, event, &sample, tool);
1575 1576 1577 1578 1579
	if (ret < 0)
		return ret;
	if (ret > 0)
		return 0;

1580 1581 1582 1583 1584 1585 1586
	ret = machines__deliver_event(&session->machines, session->evlist,
				      event, &sample, tool, file_offset);

	if (dump_trace && sample.aux_sample.size)
		auxtrace__dump_auxtrace_sample(session, &sample);

	return ret;
1587 1588
}

1589 1590 1591
static s64 perf_session__process_user_event(struct perf_session *session,
					    union perf_event *event,
					    u64 file_offset)
1592
{
1593
	struct ordered_events *oe = &session->ordered_events;
1594
	struct perf_tool *tool = session->tool;
1595
	struct perf_sample sample = { .time = 0, };
1596
	int fd = perf_data__fd(session->data);
1597 1598
	int err;

1599 1600 1601
	if (event->header.type != PERF_RECORD_COMPRESSED ||
	    tool->compressed == perf_session__process_compressed_event_stub)
		dump_event(session->evlist, event, file_offset, &sample);
1602

1603
	/* These events are processed right away */
1604
	switch (event->header.type) {
1605
	case PERF_RECORD_HEADER_ATTR:
1606
		err = tool->attr(tool, event, &session->evlist);
1607
		if (err == 0) {
1608
			perf_session__set_id_hdr_size(session);
1609 1610
			perf_session__set_comm_exec(session);
		}
1611
		return err;
1612 1613
	case PERF_RECORD_EVENT_UPDATE:
		return tool->event_update(tool, event, &session->evlist);
1614 1615
	case PERF_RECORD_HEADER_EVENT_TYPE:
		/*
1616
		 * Deprecated, but we need to handle it for sake
1617 1618 1619
		 * of old data files create in pipe mode.
		 */
		return 0;
1620
	case PERF_RECORD_HEADER_TRACING_DATA:
1621 1622 1623 1624 1625 1626 1627
		/*
		 * Setup for reading amidst mmap, but only when we
		 * are in 'file' mode. The 'pipe' fd is in proper
		 * place already.
		 */
		if (!perf_data__is_pipe(session->data))
			lseek(fd, file_offset, SEEK_SET);
1628
		return tool->tracing_data(session, event);
1629
	case PERF_RECORD_HEADER_BUILD_ID:
1630
		return tool->build_id(session, event);
1631
	case PERF_RECORD_FINISHED_ROUND:
1632
		return tool->finished_round(tool, event, oe);
A
Adrian Hunter 已提交
1633
	case PERF_RECORD_ID_INDEX:
1634
		return tool->id_index(session, event);
1635
	case PERF_RECORD_AUXTRACE_INFO:
1636
		return tool->auxtrace_info(session, event);
1637 1638 1639
	case PERF_RECORD_AUXTRACE:
		/* setup for reading amidst mmap */
		lseek(fd, file_offset + event->header.size, SEEK_SET);
1640
		return tool->auxtrace(session, event);
1641
	case PERF_RECORD_AUXTRACE_ERROR:
1642
		perf_session__auxtrace_error_inc(session, event);
1643
		return tool->auxtrace_error(session, event);
1644
	case PERF_RECORD_THREAD_MAP:
1645
		return tool->thread_map(session, event);
1646
	case PERF_RECORD_CPU_MAP:
1647
		return tool->cpu_map(session, event);
1648
	case PERF_RECORD_STAT_CONFIG:
1649
		return tool->stat_config(session, event);
J
Jiri Olsa 已提交
1650
	case PERF_RECORD_STAT:
1651
		return tool->stat(session, event);
1652
	case PERF_RECORD_STAT_ROUND:
1653
		return tool->stat_round(session, event);
1654 1655
	case PERF_RECORD_TIME_CONV:
		session->time_conv = event->time_conv;
1656
		return tool->time_conv(session, event);
1657
	case PERF_RECORD_HEADER_FEATURE:
1658
		return tool->feature(session, event);
1659 1660 1661 1662 1663
	case PERF_RECORD_COMPRESSED:
		err = tool->compressed(session, event, file_offset);
		if (err)
			dump_event(session->evlist, event, file_offset, &sample);
		return err;
1664
	default:
1665
		return -EINVAL;
1666
	}
1667 1668
}

1669 1670
int perf_session__deliver_synth_event(struct perf_session *session,
				      union perf_event *event,
1671
				      struct perf_sample *sample)
1672
{
1673
	struct evlist *evlist = session->evlist;
1674
	struct perf_tool *tool = session->tool;
1675 1676

	events_stats__inc(&evlist->stats, event->header.type);
1677 1678

	if (event->header.type >= PERF_RECORD_USER_TYPE_START)
1679
		return perf_session__process_user_event(session, event, 0);
1680

1681
	return machines__deliver_event(&session->machines, evlist, event, sample, tool, 0);
1682 1683
}

1684 1685 1686 1687 1688 1689 1690 1691 1692
static void event_swap(union perf_event *event, bool sample_id_all)
{
	perf_event__swap_op swap;

	swap = perf_event__swap_ops[event->header.type];
	if (swap)
		swap(event, sample_id_all);
}

1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707
int perf_session__peek_event(struct perf_session *session, off_t file_offset,
			     void *buf, size_t buf_sz,
			     union perf_event **event_ptr,
			     struct perf_sample *sample)
{
	union perf_event *event;
	size_t hdr_sz, rest;
	int fd;

	if (session->one_mmap && !session->header.needs_swap) {
		event = file_offset - session->one_mmap_offset +
			session->one_mmap_addr;
		goto out_parse_sample;
	}

1708
	if (perf_data__is_pipe(session->data))
1709 1710
		return -1;

1711
	fd = perf_data__fd(session->data);
1712 1713 1714 1715 1716 1717
	hdr_sz = sizeof(struct perf_event_header);

	if (buf_sz < hdr_sz)
		return -1;

	if (lseek(fd, file_offset, SEEK_SET) == (off_t)-1 ||
1718
	    readn(fd, buf, hdr_sz) != (ssize_t)hdr_sz)
1719 1720 1721 1722 1723 1724 1725
		return -1;

	event = (union perf_event *)buf;

	if (session->header.needs_swap)
		perf_event_header__bswap(&event->header);

1726
	if (event->header.size < hdr_sz || event->header.size > buf_sz)
1727 1728
		return -1;

1729
	buf += hdr_sz;
1730 1731
	rest = event->header.size - hdr_sz;

1732
	if (readn(fd, buf, rest) != (ssize_t)rest)
1733 1734 1735
		return -1;

	if (session->header.needs_swap)
1736
		event_swap(event, evlist__sample_id_all(session->evlist));
1737 1738 1739 1740

out_parse_sample:

	if (sample && event->header.type < PERF_RECORD_USER_TYPE_START &&
1741
	    evlist__parse_sample(session->evlist, event, sample))
1742 1743 1744 1745 1746 1747 1748
		return -1;

	*event_ptr = event;

	return 0;
}

1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776
int perf_session__peek_events(struct perf_session *session, u64 offset,
			      u64 size, peek_events_cb_t cb, void *data)
{
	u64 max_offset = offset + size;
	char buf[PERF_SAMPLE_MAX_SIZE];
	union perf_event *event;
	int err;

	do {
		err = perf_session__peek_event(session, offset, buf,
					       PERF_SAMPLE_MAX_SIZE, &event,
					       NULL);
		if (err)
			return err;

		err = cb(session, event, offset, data);
		if (err)
			return err;

		offset += event->header.size;
		if (event->header.type == PERF_RECORD_AUXTRACE)
			offset += event->auxtrace.size;

	} while (offset < max_offset);

	return err;
}

1777
static s64 perf_session__process_event(struct perf_session *session,
1778
				       union perf_event *event, u64 file_offset)
1779
{
1780
	struct evlist *evlist = session->evlist;
1781
	struct perf_tool *tool = session->tool;
1782 1783
	int ret;

1784
	if (session->header.needs_swap)
1785
		event_swap(event, evlist__sample_id_all(evlist));
1786 1787 1788 1789

	if (event->header.type >= PERF_RECORD_HEADER_MAX)
		return -EINVAL;

1790
	events_stats__inc(&evlist->stats, event->header.type);
1791 1792

	if (event->header.type >= PERF_RECORD_USER_TYPE_START)
1793
		return perf_session__process_user_event(session, event, file_offset);
1794

1795
	if (tool->ordered_events) {
1796
		u64 timestamp = -1ULL;
1797

1798
		ret = evlist__parse_sample_timestamp(evlist, event, &timestamp);
1799
		if (ret && ret != -1)
1800 1801 1802
			return ret;

		ret = perf_session__queue_event(session, event, timestamp, file_offset);
1803 1804 1805 1806
		if (ret != -ETIME)
			return ret;
	}

1807
	return perf_session__deliver_event(session, event, tool, file_offset);
1808 1809
}

1810
void perf_event_header__bswap(struct perf_event_header *hdr)
1811
{
1812 1813 1814
	hdr->type = bswap_32(hdr->type);
	hdr->misc = bswap_16(hdr->misc);
	hdr->size = bswap_16(hdr->size);
1815 1816
}

1817 1818
struct thread *perf_session__findnew(struct perf_session *session, pid_t pid)
{
1819
	return machine__findnew_thread(&session->machines.host, -1, pid);
1820 1821
}

1822
int perf_session__register_idle_thread(struct perf_session *session)
1823
{
1824
	struct thread *thread = machine__idle_thread(&session->machines.host);
1825

1826
	/* machine__idle_thread() got the thread, so put it */
1827
	thread__put(thread);
1828
	return thread ? 0 : -1;
1829 1830
}

1831 1832 1833 1834
static void
perf_session__warn_order(const struct perf_session *session)
{
	const struct ordered_events *oe = &session->ordered_events;
1835
	struct evsel *evsel;
1836 1837 1838
	bool should_warn = true;

	evlist__for_each_entry(session->evlist, evsel) {
1839
		if (evsel->core.attr.write_backward)
1840 1841 1842 1843 1844 1845 1846 1847 1848
			should_warn = false;
	}

	if (!should_warn)
		return;
	if (oe->nr_unordered_events != 0)
		ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events);
}

1849
static void perf_session__warn_about_errors(const struct perf_session *session)
1850
{
1851 1852 1853
	const struct events_stats *stats = &session->evlist->stats;

	if (session->tool->lost == perf_event__process_lost &&
1854
	    stats->nr_events[PERF_RECORD_LOST] != 0) {
1855 1856
		ui__warning("Processed %d events and lost %d chunks!\n\n"
			    "Check IO/CPU overload!\n\n",
1857 1858
			    stats->nr_events[0],
			    stats->nr_events[PERF_RECORD_LOST]);
1859 1860
	}

1861 1862 1863 1864 1865 1866
	if (session->tool->lost_samples == perf_event__process_lost_samples) {
		double drop_rate;

		drop_rate = (double)stats->total_lost_samples /
			    (double) (stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples);
		if (drop_rate > 0.05) {
1867
			ui__warning("Processed %" PRIu64 " samples and lost %3.2f%%!\n\n",
1868 1869 1870 1871 1872
				    stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples,
				    drop_rate * 100.0);
		}
	}

1873 1874 1875 1876 1877 1878 1879
	if (session->tool->aux == perf_event__process_aux &&
	    stats->total_aux_lost != 0) {
		ui__warning("AUX data lost %" PRIu64 " times out of %u!\n\n",
			    stats->total_aux_lost,
			    stats->nr_events[PERF_RECORD_AUX]);
	}

1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896
	if (session->tool->aux == perf_event__process_aux &&
	    stats->total_aux_partial != 0) {
		bool vmm_exclusive = false;

		(void)sysfs__read_bool("module/kvm_intel/parameters/vmm_exclusive",
		                       &vmm_exclusive);

		ui__warning("AUX data had gaps in it %" PRIu64 " times out of %u!\n\n"
		            "Are you running a KVM guest in the background?%s\n\n",
			    stats->total_aux_partial,
			    stats->nr_events[PERF_RECORD_AUX],
			    vmm_exclusive ?
			    "\nReloading kvm_intel module with vmm_exclusive=0\n"
			    "will reduce the gaps to only guest's timeslices." :
			    "");
	}

1897
	if (stats->nr_unknown_events != 0) {
1898 1899 1900 1901 1902
		ui__warning("Found %u unknown events!\n\n"
			    "Is this an older tool processing a perf.data "
			    "file generated by a more recent tool?\n\n"
			    "If that is not the case, consider "
			    "reporting to linux-kernel@vger.kernel.org.\n\n",
1903
			    stats->nr_unknown_events);
1904 1905
	}

1906
	if (stats->nr_unknown_id != 0) {
1907
		ui__warning("%u samples with id not present in the header\n",
1908
			    stats->nr_unknown_id);
1909 1910
	}

1911
	if (stats->nr_invalid_chains != 0) {
1912 1913 1914
		ui__warning("Found invalid callchains!\n\n"
			    "%u out of %u events were discarded for this reason.\n\n"
			    "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
1915 1916
			    stats->nr_invalid_chains,
			    stats->nr_events[PERF_RECORD_SAMPLE]);
1917
	}
1918

1919
	if (stats->nr_unprocessable_samples != 0) {
1920 1921
		ui__warning("%u unprocessable samples recorded.\n"
			    "Do you have a KVM guest running and not using 'perf kvm'?\n",
1922
			    stats->nr_unprocessable_samples);
1923
	}
1924

1925
	perf_session__warn_order(session);
1926 1927

	events_stats__auxtrace_error_warn(stats);
1928 1929 1930 1931 1932 1933

	if (stats->nr_proc_map_timeout != 0) {
		ui__warning("%d map information files for pre-existing threads were\n"
			    "not processed, if there are samples for addresses they\n"
			    "will not be resolved, you may find out which are these\n"
			    "threads by running with -v and redirecting the output\n"
1934 1935 1936
			    "to a file.\n"
			    "The time limit to process proc map is too short?\n"
			    "Increase it by --proc-map-timeout\n",
1937 1938
			    stats->nr_proc_map_timeout);
	}
1939 1940
}

1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953
static int perf_session__flush_thread_stack(struct thread *thread,
					    void *p __maybe_unused)
{
	return thread_stack__flush(thread);
}

static int perf_session__flush_thread_stacks(struct perf_session *session)
{
	return machines__for_each_thread(&session->machines,
					 perf_session__flush_thread_stack,
					 NULL);
}

1954 1955
volatile int session_done;

1956 1957
static int __perf_session__process_decomp_events(struct perf_session *session);

1958
static int __perf_session__process_pipe_events(struct perf_session *session)
1959
{
1960
	struct ordered_events *oe = &session->ordered_events;
1961
	struct perf_tool *tool = session->tool;
1962 1963 1964
	union perf_event *event;
	uint32_t size, cur_size = 0;
	void *buf = NULL;
1965
	s64 skip = 0;
1966
	u64 head;
1967
	ssize_t err;
1968 1969
	void *p;

1970
	perf_tool__fill_defaults(tool);
1971 1972

	head = 0;
1973 1974 1975 1976 1977
	cur_size = sizeof(union perf_event);

	buf = malloc(cur_size);
	if (!buf)
		return -errno;
1978
	ordered_events__set_copy_on_queue(oe, true);
1979
more:
1980
	event = buf;
1981 1982
	err = perf_data__read(session->data, event,
			      sizeof(struct perf_event_header));
1983 1984 1985 1986 1987 1988 1989 1990
	if (err <= 0) {
		if (err == 0)
			goto done;

		pr_err("failed to read event header\n");
		goto out_err;
	}

1991
	if (session->header.needs_swap)
1992
		perf_event_header__bswap(&event->header);
1993

1994
	size = event->header.size;
1995 1996 1997 1998
	if (size < sizeof(struct perf_event_header)) {
		pr_err("bad event header size\n");
		goto out_err;
	}
1999

2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010
	if (size > cur_size) {
		void *new = realloc(buf, size);
		if (!new) {
			pr_err("failed to allocate memory to read event\n");
			goto out_err;
		}
		buf = new;
		cur_size = size;
		event = buf;
	}
	p = event;
2011 2012
	p += sizeof(struct perf_event_header);

2013
	if (size - sizeof(struct perf_event_header)) {
2014 2015
		err = perf_data__read(session->data, p,
				      size - sizeof(struct perf_event_header));
2016 2017 2018 2019 2020
		if (err <= 0) {
			if (err == 0) {
				pr_err("unexpected end of event stream\n");
				goto done;
			}
2021

2022 2023 2024
			pr_err("failed to read event data\n");
			goto out_err;
		}
2025 2026
	}

2027
	if ((skip = perf_session__process_event(session, event, head)) < 0) {
2028
		pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
2029
		       head, event->header.size, event->header.type);
2030 2031
		err = -EINVAL;
		goto out_err;
2032 2033 2034 2035 2036 2037 2038
	}

	head += size;

	if (skip > 0)
		head += skip;

2039 2040 2041 2042
	err = __perf_session__process_decomp_events(session);
	if (err)
		goto out_err;

2043 2044 2045
	if (!session_done())
		goto more;
done:
2046
	/* do the final flush for ordered samples */
2047
	err = ordered_events__flush(oe, OE_FLUSH__FINAL);
2048 2049 2050
	if (err)
		goto out_err;
	err = auxtrace__flush_events(session, tool);
2051 2052 2053
	if (err)
		goto out_err;
	err = perf_session__flush_thread_stacks(session);
2054
out_err:
2055
	free(buf);
2056 2057
	if (!tool->no_warn)
		perf_session__warn_about_errors(session);
2058
	ordered_events__free(&session->ordered_events);
2059
	auxtrace__free_events(session);
2060 2061 2062
	return err;
}

2063
static union perf_event *
2064 2065
prefetch_event(char *buf, u64 head, size_t mmap_size,
	       bool needs_swap, union perf_event *error)
2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076
{
	union perf_event *event;

	/*
	 * Ensure we have enough space remaining to read
	 * the size of the event in the headers.
	 */
	if (head + sizeof(event->header) > mmap_size)
		return NULL;

	event = (union perf_event *)(buf + head);
2077 2078
	if (needs_swap)
		perf_event_header__bswap(&event->header);
2079

2080 2081 2082 2083 2084
	if (head + event->header.size <= mmap_size)
		return event;

	/* We're not fetching the event so swap back again */
	if (needs_swap)
2085 2086
		perf_event_header__bswap(&event->header);

2087 2088
	pr_debug("%s: head=%#" PRIx64 " event->header_size=%#x, mmap_size=%#zx:"
		 " fuzzed or compressed perf.data?\n",__func__, head, event->header.size, mmap_size);
2089

2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102
	return error;
}

static union perf_event *
fetch_mmaped_event(u64 head, size_t mmap_size, char *buf, bool needs_swap)
{
	return prefetch_event(buf, head, mmap_size, needs_swap, ERR_PTR(-EINVAL));
}

static union perf_event *
fetch_decomp_event(u64 head, size_t mmap_size, char *buf, bool needs_swap)
{
	return prefetch_event(buf, head, mmap_size, needs_swap, NULL);
2103 2104
}

2105 2106 2107 2108 2109 2110 2111 2112 2113 2114
static int __perf_session__process_decomp_events(struct perf_session *session)
{
	s64 skip;
	u64 size, file_pos = 0;
	struct decomp *decomp = session->decomp_last;

	if (!decomp)
		return 0;

	while (decomp->head < decomp->size && !session_done()) {
2115 2116
		union perf_event *event = fetch_decomp_event(decomp->head, decomp->size, decomp->data,
							     session->header.needs_swap);
2117

2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138
		if (!event)
			break;

		size = event->header.size;

		if (size < sizeof(struct perf_event_header) ||
		    (skip = perf_session__process_event(session, event, file_pos)) < 0) {
			pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
				decomp->file_pos + decomp->head, event->header.size, event->header.type);
			return -EINVAL;
		}

		if (skip)
			size += skip;

		decomp->head += size;
	}

	return 0;
}

2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150
/*
 * On 64bit we can mmap the data file in one go. No need for tiny mmap
 * slices. On 32bit we use 32MB.
 */
#if BITS_PER_LONG == 64
#define MMAP_SIZE ULLONG_MAX
#define NUM_MMAPS 1
#else
#define MMAP_SIZE (32 * 1024 * 1024ULL)
#define NUM_MMAPS 128
#endif

2151 2152 2153 2154 2155 2156
struct reader;

typedef s64 (*reader_cb_t)(struct perf_session *session,
			   union perf_event *event,
			   u64 file_offset);

J
Jiri Olsa 已提交
2157
struct reader {
2158 2159 2160 2161
	int		 fd;
	u64		 data_size;
	u64		 data_offset;
	reader_cb_t	 process;
2162
	bool		 in_place_update;
J
Jiri Olsa 已提交
2163 2164
};

2165 2166 2167
static int
reader__process_events(struct reader *rd, struct perf_session *session,
		       struct ui_progress *prog)
2168
{
2169
	u64 data_size = rd->data_size;
2170
	u64 head, page_offset, file_offset, file_pos, size;
2171
	int err = 0, mmap_prot, mmap_flags, map_idx = 0;
2172
	size_t	mmap_size;
2173
	char *buf, *mmaps[NUM_MMAPS];
2174
	union perf_event *event;
2175
	s64 skip;
2176

2177
	page_offset = page_size * (rd->data_offset / page_size);
2178
	file_offset = page_offset;
2179
	head = rd->data_offset - page_offset;
2180

2181
	ui_progress__init_size(prog, data_size, "Processing events...");
2182

2183
	data_size += rd->data_offset;
2184

2185
	mmap_size = MMAP_SIZE;
2186 2187
	if (mmap_size > data_size) {
		mmap_size = data_size;
2188 2189
		session->one_mmap = true;
	}
2190

2191 2192
	memset(mmaps, 0, sizeof(mmaps));

2193 2194 2195
	mmap_prot  = PROT_READ;
	mmap_flags = MAP_SHARED;

2196 2197 2198
	if (rd->in_place_update) {
		mmap_prot  |= PROT_WRITE;
	} else if (session->header.needs_swap) {
2199 2200 2201
		mmap_prot  |= PROT_WRITE;
		mmap_flags = MAP_PRIVATE;
	}
2202
remap:
2203
	buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, rd->fd,
2204
		   file_offset);
2205 2206 2207
	if (buf == MAP_FAILED) {
		pr_err("failed to mmap file\n");
		err = -errno;
2208
		goto out;
2209
	}
2210 2211
	mmaps[map_idx] = buf;
	map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1);
2212
	file_pos = file_offset + head;
2213 2214 2215 2216
	if (session->one_mmap) {
		session->one_mmap_addr = buf;
		session->one_mmap_offset = file_offset;
	}
2217 2218

more:
2219
	event = fetch_mmaped_event(head, mmap_size, buf, session->header.needs_swap);
2220 2221 2222
	if (IS_ERR(event))
		return PTR_ERR(event);

2223
	if (!event) {
2224 2225 2226 2227
		if (mmaps[map_idx]) {
			munmap(mmaps[map_idx], mmap_size);
			mmaps[map_idx] = NULL;
		}
2228

2229 2230 2231
		page_offset = page_size * (head / page_size);
		file_offset += page_offset;
		head -= page_offset;
2232 2233 2234 2235 2236
		goto remap;
	}

	size = event->header.size;

2237 2238
	skip = -EINVAL;

2239
	if (size < sizeof(struct perf_event_header) ||
2240
	    (skip = rd->process(session, event, file_pos)) < 0) {
2241
		pr_err("%#" PRIx64 " [%#x]: failed to process type: %d [%s]\n",
2242
		       file_offset + head, event->header.size,
2243 2244
		       event->header.type, strerror(-skip));
		err = skip;
2245
		goto out;
2246 2247
	}

2248 2249 2250
	if (skip)
		size += skip;

2251
	head += size;
2252
	file_pos += size;
2253

2254 2255 2256 2257
	err = __perf_session__process_decomp_events(session);
	if (err)
		goto out;

2258
	ui_progress__update(prog, size);
2259

2260
	if (session_done())
2261
		goto out;
2262

2263
	if (file_pos < data_size)
2264
		goto more;
2265

2266
out:
2267 2268 2269
	return err;
}

2270 2271 2272 2273 2274 2275 2276
static s64 process_simple(struct perf_session *session,
			  union perf_event *event,
			  u64 file_offset)
{
	return perf_session__process_event(session, event, file_offset);
}

2277 2278 2279 2280 2281 2282
static int __perf_session__process_events(struct perf_session *session)
{
	struct reader rd = {
		.fd		= perf_data__fd(session->data),
		.data_size	= session->header.data_size,
		.data_offset	= session->header.data_offset,
2283
		.process	= process_simple,
2284
		.in_place_update = session->data->in_place_update,
2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300
	};
	struct ordered_events *oe = &session->ordered_events;
	struct perf_tool *tool = session->tool;
	struct ui_progress prog;
	int err;

	perf_tool__fill_defaults(tool);

	if (rd.data_size == 0)
		return -1;

	ui_progress__init_size(&prog, rd.data_size, "Processing events...");

	err = reader__process_events(&rd, session, &prog);
	if (err)
		goto out_err;
2301
	/* do the final flush for ordered samples */
2302
	err = ordered_events__flush(oe, OE_FLUSH__FINAL);
2303 2304 2305
	if (err)
		goto out_err;
	err = auxtrace__flush_events(session, tool);
2306 2307 2308
	if (err)
		goto out_err;
	err = perf_session__flush_thread_stacks(session);
2309
out_err:
N
Namhyung Kim 已提交
2310
	ui_progress__finish();
2311 2312
	if (!tool->no_warn)
		perf_session__warn_about_errors(session);
2313 2314 2315 2316 2317
	/*
	 * We may switching perf.data output, make ordered_events
	 * reusable.
	 */
	ordered_events__reinit(&session->ordered_events);
2318
	auxtrace__free_events(session);
2319
	session->one_mmap = false;
2320 2321
	return err;
}
2322

2323
int perf_session__process_events(struct perf_session *session)
2324
{
2325
	if (perf_session__register_idle_thread(session) < 0)
2326 2327
		return -ENOMEM;

2328 2329
	if (perf_data__is_pipe(session->data))
		return __perf_session__process_pipe_events(session);
2330

2331
	return __perf_session__process_events(session);
2332 2333
}

2334
bool perf_session__has_traces(struct perf_session *session, const char *msg)
2335
{
2336
	struct evsel *evsel;
2337

2338
	evlist__for_each_entry(session->evlist, evsel) {
2339
		if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT)
2340
			return true;
2341 2342
	}

2343 2344
	pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg);
	return false;
2345
}
2346

2347
int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, u64 addr)
2348 2349
{
	char *bracket;
2350
	struct ref_reloc_sym *ref;
2351
	struct kmap *kmap;
2352 2353 2354 2355

	ref = zalloc(sizeof(struct ref_reloc_sym));
	if (ref == NULL)
		return -ENOMEM;
2356

2357 2358 2359
	ref->name = strdup(symbol_name);
	if (ref->name == NULL) {
		free(ref);
2360
		return -ENOMEM;
2361
	}
2362

2363
	bracket = strchr(ref->name, ']');
2364 2365 2366
	if (bracket)
		*bracket = '\0';

2367
	ref->addr = addr;
2368

2369 2370
	kmap = map__kmap(map);
	if (kmap)
2371
		kmap->ref_reloc_sym = ref;
2372

2373 2374
	return 0;
}
2375

2376
size_t perf_session__fprintf_dsos(struct perf_session *session, FILE *fp)
2377
{
2378
	return machines__fprintf_dsos(&session->machines, fp);
2379
}
2380

2381
size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp,
2382
					  bool (skip)(struct dso *dso, int parm), int parm)
2383
{
2384
	return machines__fprintf_dsos_buildid(&session->machines, fp, skip, parm);
2385
}
2386

2387 2388
size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp,
				       bool skip_empty)
2389
{
2390 2391 2392 2393 2394 2395
	size_t ret;
	const char *msg = "";

	if (perf_header__has_feat(&session->header, HEADER_AUXTRACE))
		msg = " (excludes AUX area (e.g. instruction trace) decoded / synthesized events)";

2396
	ret = fprintf(fp, "\nAggregated stats:%s\n", msg);
2397

2398
	ret += events_stats__fprintf(&session->evlist->stats, fp, skip_empty);
2399 2400
	return ret;
}
2401

2402 2403 2404 2405 2406 2407
size_t perf_session__fprintf(struct perf_session *session, FILE *fp)
{
	/*
	 * FIXME: Here we have to actually print all the machines in this
	 * session, not just the host...
	 */
2408
	return machine__fprintf(&session->machines.host, fp);
2409 2410
}

2411
struct evsel *perf_session__find_first_evtype(struct perf_session *session,
2412 2413
					      unsigned int type)
{
2414
	struct evsel *pos;
2415

2416
	evlist__for_each_entry(session->evlist, pos) {
2417
		if (pos->core.attr.type == type)
2418 2419 2420 2421 2422
			return pos;
	}
	return NULL;
}

2423 2424 2425
int perf_session__cpu_bitmap(struct perf_session *session,
			     const char *cpu_list, unsigned long *cpu_bitmap)
{
2426
	int i, err = -1;
2427
	struct perf_cpu_map *map;
2428
	int nr_cpus = min(session->header.env.nr_cpus_avail, MAX_NR_CPUS);
2429 2430

	for (i = 0; i < PERF_TYPE_MAX; ++i) {
2431
		struct evsel *evsel;
2432 2433 2434 2435 2436

		evsel = perf_session__find_first_evtype(session, i);
		if (!evsel)
			continue;

2437
		if (!(evsel->core.attr.sample_type & PERF_SAMPLE_CPU)) {
2438
			pr_err("File does not contain CPU events. "
2439
			       "Remove -C option to proceed.\n");
2440 2441 2442 2443
			return -1;
		}
	}

2444
	map = perf_cpu_map__new(cpu_list);
2445 2446 2447 2448
	if (map == NULL) {
		pr_err("Invalid cpu_list\n");
		return -1;
	}
2449 2450 2451 2452

	for (i = 0; i < map->nr; i++) {
		int cpu = map->map[i];

2453
		if (cpu >= nr_cpus) {
2454 2455
			pr_err("Requested CPU %d too large. "
			       "Consider raising MAX_NR_CPUS\n", cpu);
2456
			goto out_delete_map;
2457 2458 2459 2460 2461
		}

		set_bit(cpu, cpu_bitmap);
	}

2462 2463 2464
	err = 0;

out_delete_map:
2465
	perf_cpu_map__put(map);
2466
	return err;
2467
}
2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478

void perf_session__fprintf_info(struct perf_session *session, FILE *fp,
				bool full)
{
	if (session == NULL || fp == NULL)
		return;

	fprintf(fp, "# ========\n");
	perf_header__fprintf_info(session, fp, full);
	fprintf(fp, "# ========\n#\n");
}
2479

2480 2481
int perf_event__process_id_index(struct perf_session *session,
				 union perf_event *event)
A
Adrian Hunter 已提交
2482
{
2483
	struct evlist *evlist = session->evlist;
2484
	struct perf_record_id_index *ie = &event->id_index;
A
Adrian Hunter 已提交
2485 2486
	size_t i, nr, max_nr;

2487
	max_nr = (ie->header.size - sizeof(struct perf_record_id_index)) /
A
Adrian Hunter 已提交
2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500
		 sizeof(struct id_index_entry);
	nr = ie->nr;
	if (nr > max_nr)
		return -EINVAL;

	if (dump_trace)
		fprintf(stdout, " nr: %zu\n", nr);

	for (i = 0; i < nr; i++) {
		struct id_index_entry *e = &ie->entries[i];
		struct perf_sample_id *sid;

		if (dump_trace) {
2501 2502 2503 2504
			fprintf(stdout,	" ... id: %"PRI_lu64, e->id);
			fprintf(stdout,	"  idx: %"PRI_lu64, e->idx);
			fprintf(stdout,	"  cpu: %"PRI_ld64, e->cpu);
			fprintf(stdout,	"  tid: %"PRI_ld64"\n", e->tid);
A
Adrian Hunter 已提交
2505 2506
		}

2507
		sid = evlist__id2sid(evlist, e->id);
A
Adrian Hunter 已提交
2508 2509 2510 2511 2512 2513 2514 2515
		if (!sid)
			return -ENOENT;
		sid->idx = e->idx;
		sid->cpu = e->cpu;
		sid->tid = e->tid;
	}
	return 0;
}