session.c 44.4 KB
Newer Older
1 2
#define _FILE_OFFSET_BITS 64

3 4
#include <linux/kernel.h>

5
#include <byteswap.h>
6 7
#include <unistd.h>
#include <sys/types.h>
8
#include <sys/mman.h>
9

10 11
#include "evlist.h"
#include "evsel.h"
12
#include "session.h"
13
#include "tool.h"
14
#include "sort.h"
15
#include "util.h"
16
#include "cpumap.h"
17
#include "event-parse.h"
18
#include "perf_regs.h"
19
#include "unwind.h"
20 21 22 23 24

static int perf_session__open(struct perf_session *self, bool force)
{
	struct stat input_stat;

25 26 27 28
	if (!strcmp(self->filename, "-")) {
		self->fd_pipe = true;
		self->fd = STDIN_FILENO;

29
		if (perf_session__read_header(self, self->fd) < 0)
30
			pr_err("incompatible file format (rerun with -v to learn more)");
31 32 33 34

		return 0;
	}

35
	self->fd = open(self->filename, O_RDONLY);
36
	if (self->fd < 0) {
37 38 39 40
		int err = errno;

		pr_err("failed to open %s: %s", self->filename, strerror(err));
		if (err == ENOENT && !strcmp(self->filename, "perf.data"))
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
			pr_err("  (try 'perf record' first)");
		pr_err("\n");
		return -errno;
	}

	if (fstat(self->fd, &input_stat) < 0)
		goto out_close;

	if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
		pr_err("file %s not owned by current user or root\n",
		       self->filename);
		goto out_close;
	}

	if (!input_stat.st_size) {
		pr_info("zero-sized file (%s), nothing to do!\n",
			self->filename);
		goto out_close;
	}

61
	if (perf_session__read_header(self, self->fd) < 0) {
62
		pr_err("incompatible file format (rerun with -v to learn more)");
63 64 65
		goto out_close;
	}

66 67 68 69 70 71 72 73 74 75
	if (!perf_evlist__valid_sample_type(self->evlist)) {
		pr_err("non matching sample_type");
		goto out_close;
	}

	if (!perf_evlist__valid_sample_id_all(self->evlist)) {
		pr_err("non matching sample_id_all");
		goto out_close;
	}

76 77 78 79 80 81 82 83 84
	self->size = input_stat.st_size;
	return 0;

out_close:
	close(self->fd);
	self->fd = -1;
	return -1;
}

85
void perf_session__set_id_hdr_size(struct perf_session *session)
86
{
87 88 89 90
	u16 id_hdr_size = perf_evlist__id_hdr_size(session->evlist);

	session->host_machine.id_hdr_size = id_hdr_size;
	machines__set_id_hdr_size(&session->machines, id_hdr_size);
91 92
}

93 94
int perf_session__create_kernel_maps(struct perf_session *self)
{
95
	int ret = machine__create_kernel_maps(&self->host_machine);
96 97

	if (ret >= 0)
98
		ret = machines__create_guest_kernel_maps(&self->machines);
99 100 101
	return ret;
}

102 103 104 105 106 107
static void perf_session__destroy_kernel_maps(struct perf_session *self)
{
	machine__destroy_kernel_maps(&self->host_machine);
	machines__destroy_guest_kernel_maps(&self->machines);
}

108 109
struct perf_session *perf_session__new(const char *filename, int mode,
				       bool force, bool repipe,
110
				       struct perf_tool *tool)
111
{
112 113 114 115 116 117 118 119 120 121 122 123 124
	struct perf_session *self;
	struct stat st;
	size_t len;

	if (!filename || !strlen(filename)) {
		if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
			filename = "-";
		else
			filename = "perf.data";
	}

	len = strlen(filename);
	self = zalloc(sizeof(*self) + len);
125 126 127 128 129

	if (self == NULL)
		goto out;

	memcpy(self->filename, filename, len);
130 131 132 133 134 135 136 137 138
	/*
	 * On 64bit we can mmap the data file in one go. No need for tiny mmap
	 * slices. On 32bit we use 32MB.
	 */
#if BITS_PER_LONG == 64
	self->mmap_window = ULLONG_MAX;
#else
	self->mmap_window = 32 * 1024 * 1024ULL;
#endif
139
	self->machines = RB_ROOT;
T
Tom Zanussi 已提交
140
	self->repipe = repipe;
141
	INIT_LIST_HEAD(&self->ordered_samples.samples);
142
	INIT_LIST_HEAD(&self->ordered_samples.sample_cache);
143
	INIT_LIST_HEAD(&self->ordered_samples.to_free);
144
	machine__init(&self->host_machine, "", HOST_KERNEL_ID);
145
	hists__init(&self->hists);
146

147 148 149
	if (mode == O_RDONLY) {
		if (perf_session__open(self, force) < 0)
			goto out_delete;
150
		perf_session__set_id_hdr_size(self);
151 152 153
	} else if (mode == O_WRONLY) {
		/*
		 * In O_RDONLY mode this will be performed when reading the
154
		 * kernel MMAP event, in perf_event__process_mmap().
155 156 157 158
		 */
		if (perf_session__create_kernel_maps(self) < 0)
			goto out_delete;
	}
159

160
	if (tool && tool->ordering_requires_timestamps &&
161
	    tool->ordered_samples && !perf_evlist__sample_id_all(self->evlist)) {
162
		dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
163
		tool->ordered_samples = false;
164 165
	}

166 167
out:
	return self;
168 169 170
out_delete:
	perf_session__delete(self);
	return NULL;
171 172
}

173
static void machine__delete_dead_threads(struct machine *machine)
174 175 176
{
	struct thread *n, *t;

177
	list_for_each_entry_safe(t, n, &machine->dead_threads, node) {
178 179 180 181 182
		list_del(&t->node);
		thread__delete(t);
	}
}

183 184 185 186 187 188
static void perf_session__delete_dead_threads(struct perf_session *session)
{
	machine__delete_dead_threads(&session->host_machine);
}

static void machine__delete_threads(struct machine *self)
189 190 191 192 193 194 195 196 197 198 199 200
{
	struct rb_node *nd = rb_first(&self->threads);

	while (nd) {
		struct thread *t = rb_entry(nd, struct thread, rb_node);

		rb_erase(&t->rb_node, &self->threads);
		nd = rb_next(nd);
		thread__delete(t);
	}
}

201 202 203 204 205
static void perf_session__delete_threads(struct perf_session *session)
{
	machine__delete_threads(&session->host_machine);
}

206 207
void perf_session__delete(struct perf_session *self)
{
208
	perf_session__destroy_kernel_maps(self);
209 210 211
	perf_session__delete_dead_threads(self);
	perf_session__delete_threads(self);
	machine__exit(&self->host_machine);
212 213 214
	close(self->fd);
	free(self);
}
215

216
void machine__remove_thread(struct machine *self, struct thread *th)
217
{
218
	self->last_match = NULL;
219 220 221 222 223 224 225 226
	rb_erase(&th->rb_node, &self->threads);
	/*
	 * We may have references to this thread, for instance in some hist_entry
	 * instances, so just move them to a separate list.
	 */
	list_add_tail(&th->node, &self->dead_threads);
}

227 228 229 230 231 232 233 234
static bool symbol__match_parent_regex(struct symbol *sym)
{
	if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
		return 1;

	return 0;
}

235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
static const u8 cpumodes[] = {
	PERF_RECORD_MISC_USER,
	PERF_RECORD_MISC_KERNEL,
	PERF_RECORD_MISC_GUEST_USER,
	PERF_RECORD_MISC_GUEST_KERNEL
};
#define NCPUMODES (sizeof(cpumodes)/sizeof(u8))

static void ip__resolve_ams(struct machine *self, struct thread *thread,
			    struct addr_map_symbol *ams,
			    u64 ip)
{
	struct addr_location al;
	size_t i;
	u8 m;

	memset(&al, 0, sizeof(al));

	for (i = 0; i < NCPUMODES; i++) {
		m = cpumodes[i];
		/*
		 * We cannot use the header.misc hint to determine whether a
		 * branch stack address is user, kernel, guest, hypervisor.
		 * Branches may straddle the kernel/user/hypervisor boundaries.
		 * Thus, we have to try consecutively until we find a match
		 * or else, the symbol is unknown
		 */
		thread__find_addr_location(thread, self, m, MAP__FUNCTION,
				ip, &al, NULL);
		if (al.sym)
			goto found;
	}
found:
	ams->addr = ip;
269
	ams->al_addr = al.addr;
270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
	ams->sym = al.sym;
	ams->map = al.map;
}

struct branch_info *machine__resolve_bstack(struct machine *self,
					    struct thread *thr,
					    struct branch_stack *bs)
{
	struct branch_info *bi;
	unsigned int i;

	bi = calloc(bs->nr, sizeof(struct branch_info));
	if (!bi)
		return NULL;

	for (i = 0; i < bs->nr; i++) {
		ip__resolve_ams(self, thr, &bi[i].to, bs->entries[i].to);
		ip__resolve_ams(self, thr, &bi[i].from, bs->entries[i].from);
		bi[i].flags = bs->entries[i].flags;
	}
	return bi;
}

293 294 295 296 297
static int machine__resolve_callchain_sample(struct machine *machine,
					     struct thread *thread,
					     struct ip_callchain *chain,
					     struct symbol **parent)

298 299 300
{
	u8 cpumode = PERF_RECORD_MISC_USER;
	unsigned int i;
301
	int err;
302

303
	callchain_cursor_reset(&callchain_cursor);
304

305 306 307 308 309
	if (chain->nr > PERF_MAX_STACK_DEPTH) {
		pr_warning("corrupted callchain. skipping...\n");
		return 0;
	}

310
	for (i = 0; i < chain->nr; i++) {
311
		u64 ip;
312 313
		struct addr_location al;

314 315 316 317 318
		if (callchain_param.order == ORDER_CALLEE)
			ip = chain->ips[i];
		else
			ip = chain->ips[chain->nr - i - 1];

319 320 321
		if (ip >= PERF_CONTEXT_MAX) {
			switch (ip) {
			case PERF_CONTEXT_HV:
322 323
				cpumode = PERF_RECORD_MISC_HYPERVISOR;
				break;
324
			case PERF_CONTEXT_KERNEL:
325 326
				cpumode = PERF_RECORD_MISC_KERNEL;
				break;
327
			case PERF_CONTEXT_USER:
328 329
				cpumode = PERF_RECORD_MISC_USER;
				break;
330
			default:
331 332 333 334 335 336 337 338
				pr_debug("invalid callchain context: "
					 "%"PRId64"\n", (s64) ip);
				/*
				 * It seems the callchain is corrupted.
				 * Discard all.
				 */
				callchain_cursor_reset(&callchain_cursor);
				return 0;
339 340 341 342
			}
			continue;
		}

343
		al.filtered = false;
344
		thread__find_addr_location(thread, machine, cpumode,
345
					   MAP__FUNCTION, ip, &al, NULL);
346 347 348 349
		if (al.sym != NULL) {
			if (sort__has_parent && !*parent &&
			    symbol__match_parent_regex(al.sym))
				*parent = al.sym;
350
			if (!symbol_conf.use_callchain)
351 352
				break;
		}
353

354
		err = callchain_cursor_append(&callchain_cursor,
355 356 357
					      ip, al.map, al.sym);
		if (err)
			return err;
358 359
	}

360
	return 0;
361
}
362

363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390
static int unwind_entry(struct unwind_entry *entry, void *arg)
{
	struct callchain_cursor *cursor = arg;
	return callchain_cursor_append(cursor, entry->ip,
				       entry->map, entry->sym);
}

int machine__resolve_callchain(struct machine *machine,
			       struct perf_evsel *evsel,
			       struct thread *thread,
			       struct perf_sample *sample,
			       struct symbol **parent)

{
	int ret;

	callchain_cursor_reset(&callchain_cursor);

	ret = machine__resolve_callchain_sample(machine, thread,
						sample->callchain, parent);
	if (ret)
		return ret;

	/* Can we do dwarf post unwind? */
	if (!((evsel->attr.sample_type & PERF_SAMPLE_REGS_USER) &&
	      (evsel->attr.sample_type & PERF_SAMPLE_STACK_USER)))
		return 0;

391 392 393 394 395
	/* Bail out if nothing was captured. */
	if ((!sample->user_regs.regs) ||
	    (!sample->user_stack.size))
		return 0;

396 397 398 399 400 401
	return unwind__get_entries(unwind_entry, &callchain_cursor, machine,
				   thread, evsel->attr.sample_regs_user,
				   sample);

}

402 403 404 405 406 407 408
static int process_event_synth_tracing_data_stub(union perf_event *event __used,
						 struct perf_session *session __used)
{
	dump_printf(": unhandled!\n");
	return 0;
}

409 410 411 412 413 414 415
static int process_event_synth_attr_stub(union perf_event *event __used,
					 struct perf_evlist **pevlist __used)
{
	dump_printf(": unhandled!\n");
	return 0;
}

416
static int process_event_sample_stub(struct perf_tool *tool __used,
417
				     union perf_event *event __used,
418 419
				     struct perf_sample *sample __used,
				     struct perf_evsel *evsel __used,
420
				     struct machine *machine __used)
421 422 423 424 425
{
	dump_printf(": unhandled!\n");
	return 0;
}

426
static int process_event_stub(struct perf_tool *tool __used,
427
			      union perf_event *event __used,
428
			      struct perf_sample *sample __used,
429
			      struct machine *machine __used)
430 431 432 433 434
{
	dump_printf(": unhandled!\n");
	return 0;
}

435
static int process_finished_round_stub(struct perf_tool *tool __used,
436
				       union perf_event *event __used,
437 438 439 440 441 442
				       struct perf_session *perf_session __used)
{
	dump_printf(": unhandled!\n");
	return 0;
}

443
static int process_event_type_stub(struct perf_tool *tool __used,
444
				   union perf_event *event __used)
445 446 447 448 449
{
	dump_printf(": unhandled!\n");
	return 0;
}

450
static int process_finished_round(struct perf_tool *tool,
451 452
				  union perf_event *event,
				  struct perf_session *session);
453

454
static void perf_tool__fill_defaults(struct perf_tool *tool)
455
{
456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484
	if (tool->sample == NULL)
		tool->sample = process_event_sample_stub;
	if (tool->mmap == NULL)
		tool->mmap = process_event_stub;
	if (tool->comm == NULL)
		tool->comm = process_event_stub;
	if (tool->fork == NULL)
		tool->fork = process_event_stub;
	if (tool->exit == NULL)
		tool->exit = process_event_stub;
	if (tool->lost == NULL)
		tool->lost = perf_event__process_lost;
	if (tool->read == NULL)
		tool->read = process_event_sample_stub;
	if (tool->throttle == NULL)
		tool->throttle = process_event_stub;
	if (tool->unthrottle == NULL)
		tool->unthrottle = process_event_stub;
	if (tool->attr == NULL)
		tool->attr = process_event_synth_attr_stub;
	if (tool->event_type == NULL)
		tool->event_type = process_event_type_stub;
	if (tool->tracing_data == NULL)
		tool->tracing_data = process_event_synth_tracing_data_stub;
	if (tool->build_id == NULL)
		tool->build_id = process_finished_round_stub;
	if (tool->finished_round == NULL) {
		if (tool->ordered_samples)
			tool->finished_round = process_finished_round;
485
		else
486
			tool->finished_round = process_finished_round_stub;
487
	}
488
}
489 490 491 492 493 494 495 496 497 498
 
void mem_bswap_32(void *src, int byte_size)
{
	u32 *m = src;
	while (byte_size > 0) {
		*m = bswap_32(*m);
		byte_size -= sizeof(u32);
		++m;
	}
}
499

500 501 502 503 504 505 506 507 508 509 510
void mem_bswap_64(void *src, int byte_size)
{
	u64 *m = src;

	while (byte_size > 0) {
		*m = bswap_64(*m);
		byte_size -= sizeof(u64);
		++m;
	}
}

511 512 513 514 515 516 517 518 519 520 521
static void swap_sample_id_all(union perf_event *event, void *data)
{
	void *end = (void *) event + event->header.size;
	int size = end - data;

	BUG_ON(size % sizeof(u64));
	mem_bswap_64(data, size);
}

static void perf_event__all64_swap(union perf_event *event,
				   bool sample_id_all __used)
522
{
523 524
	struct perf_event_header *hdr = &event->header;
	mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr));
525 526
}

527
static void perf_event__comm_swap(union perf_event *event, bool sample_id_all)
528
{
529 530
	event->comm.pid = bswap_32(event->comm.pid);
	event->comm.tid = bswap_32(event->comm.tid);
531 532 533 534

	if (sample_id_all) {
		void *data = &event->comm.comm;

535
		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
536 537
		swap_sample_id_all(event, data);
	}
538 539
}

540 541
static void perf_event__mmap_swap(union perf_event *event,
				  bool sample_id_all)
542
{
543 544 545 546 547
	event->mmap.pid	  = bswap_32(event->mmap.pid);
	event->mmap.tid	  = bswap_32(event->mmap.tid);
	event->mmap.start = bswap_64(event->mmap.start);
	event->mmap.len	  = bswap_64(event->mmap.len);
	event->mmap.pgoff = bswap_64(event->mmap.pgoff);
548 549 550 551

	if (sample_id_all) {
		void *data = &event->mmap.filename;

552
		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
553 554
		swap_sample_id_all(event, data);
	}
555 556
}

557
static void perf_event__task_swap(union perf_event *event, bool sample_id_all)
558
{
559 560 561 562 563
	event->fork.pid	 = bswap_32(event->fork.pid);
	event->fork.tid	 = bswap_32(event->fork.tid);
	event->fork.ppid = bswap_32(event->fork.ppid);
	event->fork.ptid = bswap_32(event->fork.ptid);
	event->fork.time = bswap_64(event->fork.time);
564 565 566

	if (sample_id_all)
		swap_sample_id_all(event, &event->fork + 1);
567 568
}

569
static void perf_event__read_swap(union perf_event *event, bool sample_id_all)
570
{
571 572 573 574 575 576
	event->read.pid		 = bswap_32(event->read.pid);
	event->read.tid		 = bswap_32(event->read.tid);
	event->read.value	 = bswap_64(event->read.value);
	event->read.time_enabled = bswap_64(event->read.time_enabled);
	event->read.time_running = bswap_64(event->read.time_running);
	event->read.id		 = bswap_64(event->read.id);
577 578 579

	if (sample_id_all)
		swap_sample_id_all(event, &event->read + 1);
580 581
}

582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613
static u8 revbyte(u8 b)
{
	int rev = (b >> 4) | ((b & 0xf) << 4);
	rev = ((rev & 0xcc) >> 2) | ((rev & 0x33) << 2);
	rev = ((rev & 0xaa) >> 1) | ((rev & 0x55) << 1);
	return (u8) rev;
}

/*
 * XXX this is hack in attempt to carry flags bitfield
 * throught endian village. ABI says:
 *
 * Bit-fields are allocated from right to left (least to most significant)
 * on little-endian implementations and from left to right (most to least
 * significant) on big-endian implementations.
 *
 * The above seems to be byte specific, so we need to reverse each
 * byte of the bitfield. 'Internet' also says this might be implementation
 * specific and we probably need proper fix and carry perf_event_attr
 * bitfield flags in separate data file FEAT_ section. Thought this seems
 * to work for now.
 */
static void swap_bitfield(u8 *p, unsigned len)
{
	unsigned i;

	for (i = 0; i < len; i++) {
		*p = revbyte(*p);
		p++;
	}
}

614 615 616 617 618 619 620 621 622 623 624 625 626
/* exported for swapping attributes in file header */
void perf_event__attr_swap(struct perf_event_attr *attr)
{
	attr->type		= bswap_32(attr->type);
	attr->size		= bswap_32(attr->size);
	attr->config		= bswap_64(attr->config);
	attr->sample_period	= bswap_64(attr->sample_period);
	attr->sample_type	= bswap_64(attr->sample_type);
	attr->read_format	= bswap_64(attr->read_format);
	attr->wakeup_events	= bswap_32(attr->wakeup_events);
	attr->bp_type		= bswap_32(attr->bp_type);
	attr->bp_addr		= bswap_64(attr->bp_addr);
	attr->bp_len		= bswap_64(attr->bp_len);
627 628

	swap_bitfield((u8 *) (&attr->read_format + 1), sizeof(u64));
629 630
}

631 632
static void perf_event__hdr_attr_swap(union perf_event *event,
				      bool sample_id_all __used)
633 634 635
{
	size_t size;

636
	perf_event__attr_swap(&event->attr.attr);
637

638 639 640
	size = event->header.size;
	size -= (void *)&event->attr.id - (void *)event;
	mem_bswap_64(event->attr.id, size);
641 642
}

643 644
static void perf_event__event_type_swap(union perf_event *event,
					bool sample_id_all __used)
645
{
646 647
	event->event_type.event_type.event_id =
		bswap_64(event->event_type.event_type.event_id);
648 649
}

650 651
static void perf_event__tracing_data_swap(union perf_event *event,
					  bool sample_id_all __used)
652
{
653
	event->tracing_data.size = bswap_32(event->tracing_data.size);
654 655
}

656 657
typedef void (*perf_event__swap_op)(union perf_event *event,
				    bool sample_id_all);
658

659 660 661 662 663 664 665 666
static perf_event__swap_op perf_event__swap_ops[] = {
	[PERF_RECORD_MMAP]		  = perf_event__mmap_swap,
	[PERF_RECORD_COMM]		  = perf_event__comm_swap,
	[PERF_RECORD_FORK]		  = perf_event__task_swap,
	[PERF_RECORD_EXIT]		  = perf_event__task_swap,
	[PERF_RECORD_LOST]		  = perf_event__all64_swap,
	[PERF_RECORD_READ]		  = perf_event__read_swap,
	[PERF_RECORD_SAMPLE]		  = perf_event__all64_swap,
667
	[PERF_RECORD_HEADER_ATTR]	  = perf_event__hdr_attr_swap,
668 669 670 671
	[PERF_RECORD_HEADER_EVENT_TYPE]	  = perf_event__event_type_swap,
	[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
	[PERF_RECORD_HEADER_BUILD_ID]	  = NULL,
	[PERF_RECORD_HEADER_MAX]	  = NULL,
672 673
};

674 675
struct sample_queue {
	u64			timestamp;
676
	u64			file_offset;
677
	union perf_event	*event;
678 679 680
	struct list_head	list;
};

681 682 683 684
static void perf_session_free_sample_buffers(struct perf_session *session)
{
	struct ordered_samples *os = &session->ordered_samples;

685
	while (!list_empty(&os->to_free)) {
686 687
		struct sample_queue *sq;

688
		sq = list_entry(os->to_free.next, struct sample_queue, list);
689 690 691 692 693
		list_del(&sq->list);
		free(sq);
	}
}

694
static int perf_session_deliver_event(struct perf_session *session,
695
				      union perf_event *event,
696
				      struct perf_sample *sample,
697
				      struct perf_tool *tool,
698
				      u64 file_offset);
699

700
static int flush_sample_queue(struct perf_session *s,
701
			       struct perf_tool *tool)
702
{
703 704
	struct ordered_samples *os = &s->ordered_samples;
	struct list_head *head = &os->samples;
705
	struct sample_queue *tmp, *iter;
706
	struct perf_sample sample;
707 708
	u64 limit = os->next_flush;
	u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
709
	unsigned idx = 0, progress_next = os->nr_samples / 16;
710
	int ret;
711

712
	if (!tool->ordered_samples || !limit)
713
		return 0;
714 715 716

	list_for_each_entry_safe(iter, tmp, head, list) {
		if (iter->timestamp > limit)
717
			break;
718

719 720
		ret = perf_evlist__parse_sample(s->evlist, iter->event, &sample,
						s->header.needs_swap);
721 722
		if (ret)
			pr_err("Can't parse sample, err = %d\n", ret);
723 724 725 726 727 728
		else {
			ret = perf_session_deliver_event(s, iter->event, &sample, tool,
							 iter->file_offset);
			if (ret)
				return ret;
		}
729

730
		os->last_flush = iter->timestamp;
731
		list_del(&iter->list);
732
		list_add(&iter->list, &os->sample_cache);
733 734 735 736 737
		if (++idx >= progress_next) {
			progress_next += os->nr_samples / 16;
			ui_progress__update(idx, os->nr_samples,
					    "Processing time ordered events...");
		}
738
	}
739 740 741 742 743 744 745

	if (list_empty(head)) {
		os->last_sample = NULL;
	} else if (last_ts <= limit) {
		os->last_sample =
			list_entry(head->prev, struct sample_queue, list);
	}
746 747

	os->nr_samples = 0;
748 749

	return 0;
750 751
}

752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790
/*
 * When perf record finishes a pass on every buffers, it records this pseudo
 * event.
 * We record the max timestamp t found in the pass n.
 * Assuming these timestamps are monotonic across cpus, we know that if
 * a buffer still has events with timestamps below t, they will be all
 * available and then read in the pass n + 1.
 * Hence when we start to read the pass n + 2, we can safely flush every
 * events with timestamps below t.
 *
 *    ============ PASS n =================
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          1          |         2
 *          2          |         3
 *          -          |         4  <--- max recorded
 *
 *    ============ PASS n + 1 ==============
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          3          |         5
 *          4          |         6
 *          5          |         7 <---- max recorded
 *
 *      Flush every events below timestamp 4
 *
 *    ============ PASS n + 2 ==============
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          6          |         8
 *          7          |         9
 *          -          |         10
 *
 *      Flush every events below timestamp 7
 *      etc...
 */
791
static int process_finished_round(struct perf_tool *tool,
792 793
				  union perf_event *event __used,
				  struct perf_session *session)
794
{
795 796 797
	int ret = flush_sample_queue(session, tool);
	if (!ret)
		session->ordered_samples.next_flush = session->ordered_samples.max_timestamp;
798

799
	return ret;
800 801
}

802
/* The queue is ordered by time */
803
static void __queue_event(struct sample_queue *new, struct perf_session *s)
804
{
805 806 807 808
	struct ordered_samples *os = &s->ordered_samples;
	struct sample_queue *sample = os->last_sample;
	u64 timestamp = new->timestamp;
	struct list_head *p;
809

810
	++os->nr_samples;
811
	os->last_sample = new;
812

813 814 815
	if (!sample) {
		list_add(&new->list, &os->samples);
		os->max_timestamp = timestamp;
816 817 818 819
		return;
	}

	/*
820 821 822
	 * last_sample might point to some random place in the list as it's
	 * the last queued event. We expect that the new event is close to
	 * this.
823
	 */
824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845
	if (sample->timestamp <= timestamp) {
		while (sample->timestamp <= timestamp) {
			p = sample->list.next;
			if (p == &os->samples) {
				list_add_tail(&new->list, &os->samples);
				os->max_timestamp = timestamp;
				return;
			}
			sample = list_entry(p, struct sample_queue, list);
		}
		list_add_tail(&new->list, &sample->list);
	} else {
		while (sample->timestamp > timestamp) {
			p = sample->list.prev;
			if (p == &os->samples) {
				list_add(&new->list, &os->samples);
				return;
			}
			sample = list_entry(p, struct sample_queue, list);
		}
		list_add(&new->list, &sample->list);
	}
846 847
}

848 849
#define MAX_SAMPLE_BUFFER	(64 * 1024 / sizeof(struct sample_queue))

850
static int perf_session_queue_event(struct perf_session *s, union perf_event *event,
851
				    struct perf_sample *sample, u64 file_offset)
852
{
853 854
	struct ordered_samples *os = &s->ordered_samples;
	struct list_head *sc = &os->sample_cache;
855
	u64 timestamp = sample->time;
856 857
	struct sample_queue *new;

858
	if (!timestamp || timestamp == ~0ULL)
859 860
		return -ETIME;

861 862 863 864 865
	if (timestamp < s->ordered_samples.last_flush) {
		printf("Warning: Timestamp below last timeslice flush\n");
		return -EINVAL;
	}

866 867 868
	if (!list_empty(sc)) {
		new = list_entry(sc->next, struct sample_queue, list);
		list_del(&new->list);
869 870 871 872
	} else if (os->sample_buffer) {
		new = os->sample_buffer + os->sample_buffer_idx;
		if (++os->sample_buffer_idx == MAX_SAMPLE_BUFFER)
			os->sample_buffer = NULL;
873
	} else {
874 875
		os->sample_buffer = malloc(MAX_SAMPLE_BUFFER * sizeof(*new));
		if (!os->sample_buffer)
876
			return -ENOMEM;
877 878 879
		list_add(&os->sample_buffer->list, &os->to_free);
		os->sample_buffer_idx = 2;
		new = os->sample_buffer + 1;
880
	}
881 882

	new->timestamp = timestamp;
883
	new->file_offset = file_offset;
884
	new->event = event;
885

886
	__queue_event(new, s);
887 888 889

	return 0;
}
890

891
static void callchain__printf(struct perf_sample *sample)
892 893
{
	unsigned int i;
894

895
	printf("... chain: nr:%" PRIu64 "\n", sample->callchain->nr);
896 897

	for (i = 0; i < sample->callchain->nr; i++)
898 899
		printf("..... %2d: %016" PRIx64 "\n",
		       i, sample->callchain->ips[i]);
900 901
}

902 903 904 905 906 907 908 909 910 911 912 913
static void branch_stack__printf(struct perf_sample *sample)
{
	uint64_t i;

	printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr);

	for (i = 0; i < sample->branch_stack->nr; i++)
		printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 "\n",
			i, sample->branch_stack->entries[i].from,
			sample->branch_stack->entries[i].to);
}

914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941
static void regs_dump__printf(u64 mask, u64 *regs)
{
	unsigned rid, i = 0;

	for_each_set_bit(rid, (unsigned long *) &mask, sizeof(mask) * 8) {
		u64 val = regs[i++];

		printf(".... %-5s 0x%" PRIx64 "\n",
		       perf_reg_name(rid), val);
	}
}

static void regs_user__printf(struct perf_sample *sample, u64 mask)
{
	struct regs_dump *user_regs = &sample->user_regs;

	if (user_regs->regs) {
		printf("... user regs: mask 0x%" PRIx64 "\n", mask);
		regs_dump__printf(mask, user_regs->regs);
	}
}

static void stack_user__printf(struct stack_dump *dump)
{
	printf("... ustack: size %" PRIu64 ", offset 0x%x\n",
	       dump->size, dump->offset);
}

942
static void perf_session__print_tstamp(struct perf_session *session,
943
				       union perf_event *event,
944
				       struct perf_sample *sample)
945
{
946 947
	u64 sample_type = perf_evlist__sample_type(session->evlist);

948
	if (event->header.type != PERF_RECORD_SAMPLE &&
949
	    !perf_evlist__sample_id_all(session->evlist)) {
950 951 952 953
		fputs("-1 -1 ", stdout);
		return;
	}

954
	if ((sample_type & PERF_SAMPLE_CPU))
955 956
		printf("%u ", sample->cpu);

957
	if (sample_type & PERF_SAMPLE_TIME)
958
		printf("%" PRIu64 " ", sample->time);
959 960
}

961
static void dump_event(struct perf_session *session, union perf_event *event,
962
		       u64 file_offset, struct perf_sample *sample)
963 964 965 966
{
	if (!dump_trace)
		return;

967 968
	printf("\n%#" PRIx64 " [%#x]: event: %d\n",
	       file_offset, event->header.size, event->header.type);
969 970 971 972 973 974

	trace_event(event);

	if (sample)
		perf_session__print_tstamp(session, event, sample);

975
	printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset,
976
	       event->header.size, perf_event__name(event->header.type));
977 978
}

979
static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
980
			struct perf_sample *sample)
981
{
982 983
	u64 sample_type;

984 985 986
	if (!dump_trace)
		return;

987
	printf("(IP, %d): %d/%d: %#" PRIx64 " period: %" PRIu64 " addr: %#" PRIx64 "\n",
988
	       event->header.misc, sample->pid, sample->tid, sample->ip,
989
	       sample->period, sample->addr);
990

991
	sample_type = evsel->attr.sample_type;
992 993

	if (sample_type & PERF_SAMPLE_CALLCHAIN)
994
		callchain__printf(sample);
995

996
	if (sample_type & PERF_SAMPLE_BRANCH_STACK)
997
		branch_stack__printf(sample);
998 999 1000 1001 1002 1003

	if (sample_type & PERF_SAMPLE_REGS_USER)
		regs_user__printf(sample, evsel->attr.sample_regs_user);

	if (sample_type & PERF_SAMPLE_STACK_USER)
		stack_user__printf(&sample->user_stack);
1004 1005
}

1006 1007 1008 1009 1010 1011
static struct machine *
	perf_session__find_machine_for_cpumode(struct perf_session *session,
					       union perf_event *event)
{
	const u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;

1012 1013 1014
	if (perf_guest &&
	    ((cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ||
	     (cpumode == PERF_RECORD_MISC_GUEST_USER))) {
1015 1016 1017 1018 1019 1020 1021
		u32 pid;

		if (event->header.type == PERF_RECORD_MMAP)
			pid = event->mmap.pid;
		else
			pid = event->ip.pid;

1022
		return perf_session__findnew_machine(session, pid);
1023
	}
1024 1025 1026 1027

	return perf_session__find_host_machine(session);
}

1028
static int perf_session_deliver_event(struct perf_session *session,
1029
				      union perf_event *event,
1030
				      struct perf_sample *sample,
1031
				      struct perf_tool *tool,
1032
				      u64 file_offset)
1033
{
1034
	struct perf_evsel *evsel;
1035
	struct machine *machine;
1036

1037 1038
	dump_event(session, event, file_offset, sample);

1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056
	evsel = perf_evlist__id2evsel(session->evlist, sample->id);
	if (evsel != NULL && event->header.type != PERF_RECORD_SAMPLE) {
		/*
		 * XXX We're leaving PERF_RECORD_SAMPLE unnacounted here
		 * because the tools right now may apply filters, discarding
		 * some of the samples. For consistency, in the future we
		 * should have something like nr_filtered_samples and remove
		 * the sample->period from total_sample_period, etc, KISS for
		 * now tho.
		 *
		 * Also testing against NULL allows us to handle files without
		 * attr.sample_id_all and/or without PERF_SAMPLE_ID. In the
		 * future probably it'll be a good idea to restrict event
		 * processing via perf_session to files with both set.
		 */
		hists__inc_nr_events(&evsel->hists, event->header.type);
	}

1057 1058
	machine = perf_session__find_machine_for_cpumode(session, event);

1059 1060
	switch (event->header.type) {
	case PERF_RECORD_SAMPLE:
1061
		dump_sample(evsel, event, sample);
1062 1063
		if (evsel == NULL) {
			++session->hists.stats.nr_unknown_id;
1064
			return 0;
1065
		}
1066 1067
		if (machine == NULL) {
			++session->hists.stats.nr_unprocessable_samples;
1068
			return 0;
1069
		}
1070
		return tool->sample(tool, event, sample, evsel, machine);
1071
	case PERF_RECORD_MMAP:
1072
		return tool->mmap(tool, event, sample, machine);
1073
	case PERF_RECORD_COMM:
1074
		return tool->comm(tool, event, sample, machine);
1075
	case PERF_RECORD_FORK:
1076
		return tool->fork(tool, event, sample, machine);
1077
	case PERF_RECORD_EXIT:
1078
		return tool->exit(tool, event, sample, machine);
1079
	case PERF_RECORD_LOST:
1080
		if (tool->lost == perf_event__process_lost)
1081
			session->hists.stats.total_lost += event->lost.lost;
1082
		return tool->lost(tool, event, sample, machine);
1083
	case PERF_RECORD_READ:
1084
		return tool->read(tool, event, sample, evsel, machine);
1085
	case PERF_RECORD_THROTTLE:
1086
		return tool->throttle(tool, event, sample, machine);
1087
	case PERF_RECORD_UNTHROTTLE:
1088
		return tool->unthrottle(tool, event, sample, machine);
1089 1090 1091 1092 1093 1094
	default:
		++session->hists.stats.nr_unknown_events;
		return -1;
	}
}

1095
static int perf_session__preprocess_sample(struct perf_session *session,
1096
					   union perf_event *event, struct perf_sample *sample)
1097 1098
{
	if (event->header.type != PERF_RECORD_SAMPLE ||
1099
	    !(perf_evlist__sample_type(session->evlist) & PERF_SAMPLE_CALLCHAIN))
1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110
		return 0;

	if (!ip_callchain__valid(sample->callchain, event)) {
		pr_debug("call-chain problem with event, skipping it.\n");
		++session->hists.stats.nr_invalid_chains;
		session->hists.stats.total_invalid_chains += sample->period;
		return -EINVAL;
	}
	return 0;
}

1111
static int perf_session__process_user_event(struct perf_session *session, union perf_event *event,
1112
					    struct perf_tool *tool, u64 file_offset)
1113
{
1114 1115
	int err;

1116
	dump_event(session, event, file_offset, NULL);
1117

1118
	/* These events are processed right away */
1119
	switch (event->header.type) {
1120
	case PERF_RECORD_HEADER_ATTR:
1121
		err = tool->attr(event, &session->evlist);
1122
		if (err == 0)
1123
			perf_session__set_id_hdr_size(session);
1124
		return err;
1125
	case PERF_RECORD_HEADER_EVENT_TYPE:
1126
		return tool->event_type(tool, event);
1127 1128
	case PERF_RECORD_HEADER_TRACING_DATA:
		/* setup for reading amidst mmap */
1129
		lseek(session->fd, file_offset, SEEK_SET);
1130
		return tool->tracing_data(event, session);
1131
	case PERF_RECORD_HEADER_BUILD_ID:
1132
		return tool->build_id(tool, event, session);
1133
	case PERF_RECORD_FINISHED_ROUND:
1134
		return tool->finished_round(tool, event, session);
1135
	default:
1136
		return -EINVAL;
1137
	}
1138 1139
}

1140 1141 1142 1143 1144 1145 1146 1147 1148
static void event_swap(union perf_event *event, bool sample_id_all)
{
	perf_event__swap_op swap;

	swap = perf_event__swap_ops[event->header.type];
	if (swap)
		swap(event, sample_id_all);
}

1149
static int perf_session__process_event(struct perf_session *session,
1150
				       union perf_event *event,
1151
				       struct perf_tool *tool,
1152 1153
				       u64 file_offset)
{
1154
	struct perf_sample sample;
1155 1156
	int ret;

1157
	if (session->header.needs_swap)
1158
		event_swap(event, perf_evlist__sample_id_all(session->evlist));
1159 1160 1161 1162 1163 1164 1165

	if (event->header.type >= PERF_RECORD_HEADER_MAX)
		return -EINVAL;

	hists__inc_nr_events(&session->hists, event->header.type);

	if (event->header.type >= PERF_RECORD_USER_TYPE_START)
1166
		return perf_session__process_user_event(session, event, tool, file_offset);
1167

1168 1169 1170
	/*
	 * For all kernel events we get the sample data
	 */
1171 1172
	ret = perf_evlist__parse_sample(session->evlist, event, &sample,
					session->header.needs_swap);
1173 1174
	if (ret)
		return ret;
1175 1176 1177 1178 1179

	/* Preprocess sample records - precheck callchains */
	if (perf_session__preprocess_sample(session, event, &sample))
		return 0;

1180
	if (tool->ordered_samples) {
1181 1182
		ret = perf_session_queue_event(session, event, &sample,
					       file_offset);
1183 1184 1185 1186
		if (ret != -ETIME)
			return ret;
	}

1187
	return perf_session_deliver_event(session, event, &sample, tool,
1188
					  file_offset);
1189 1190
}

1191 1192 1193 1194 1195 1196 1197
void perf_event_header__bswap(struct perf_event_header *self)
{
	self->type = bswap_32(self->type);
	self->misc = bswap_16(self->misc);
	self->size = bswap_16(self->size);
}

1198 1199 1200 1201 1202
struct thread *perf_session__findnew(struct perf_session *session, pid_t pid)
{
	return machine__findnew_thread(&session->host_machine, pid);
}

1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214
static struct thread *perf_session__register_idle_thread(struct perf_session *self)
{
	struct thread *thread = perf_session__findnew(self, 0);

	if (thread == NULL || thread__set_comm(thread, "swapper")) {
		pr_err("problem inserting idle task.\n");
		thread = NULL;
	}

	return thread;
}

1215
static void perf_session__warn_about_errors(const struct perf_session *session,
1216
					    const struct perf_tool *tool)
1217
{
1218
	if (tool->lost == perf_event__process_lost &&
1219 1220 1221 1222 1223
	    session->hists.stats.nr_events[PERF_RECORD_LOST] != 0) {
		ui__warning("Processed %d events and lost %d chunks!\n\n"
			    "Check IO/CPU overload!\n\n",
			    session->hists.stats.nr_events[0],
			    session->hists.stats.nr_events[PERF_RECORD_LOST]);
1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234
	}

	if (session->hists.stats.nr_unknown_events != 0) {
		ui__warning("Found %u unknown events!\n\n"
			    "Is this an older tool processing a perf.data "
			    "file generated by a more recent tool?\n\n"
			    "If that is not the case, consider "
			    "reporting to linux-kernel@vger.kernel.org.\n\n",
			    session->hists.stats.nr_unknown_events);
	}

1235 1236 1237 1238 1239
	if (session->hists.stats.nr_unknown_id != 0) {
		ui__warning("%u samples with id not present in the header\n",
			    session->hists.stats.nr_unknown_id);
	}

1240 1241 1242 1243 1244 1245 1246
 	if (session->hists.stats.nr_invalid_chains != 0) {
 		ui__warning("Found invalid callchains!\n\n"
 			    "%u out of %u events were discarded for this reason.\n\n"
 			    "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
 			    session->hists.stats.nr_invalid_chains,
 			    session->hists.stats.nr_events[PERF_RECORD_SAMPLE]);
 	}
1247 1248 1249 1250 1251 1252

	if (session->hists.stats.nr_unprocessable_samples != 0) {
		ui__warning("%u unprocessable samples recorded.\n"
			    "Do you have a KVM guest running and not using 'perf kvm'?\n",
			    session->hists.stats.nr_unprocessable_samples);
	}
1253 1254
}

1255 1256 1257 1258
#define session_done()	(*(volatile int *)(&session_done))
volatile int session_done;

static int __perf_session__process_pipe_events(struct perf_session *self,
1259
					       struct perf_tool *tool)
1260
{
1261 1262 1263
	union perf_event *event;
	uint32_t size, cur_size = 0;
	void *buf = NULL;
1264 1265 1266 1267 1268
	int skip = 0;
	u64 head;
	int err;
	void *p;

1269
	perf_tool__fill_defaults(tool);
1270 1271

	head = 0;
1272 1273 1274 1275 1276
	cur_size = sizeof(union perf_event);

	buf = malloc(cur_size);
	if (!buf)
		return -errno;
1277
more:
1278 1279
	event = buf;
	err = readn(self->fd, event, sizeof(struct perf_event_header));
1280 1281 1282 1283 1284 1285 1286 1287 1288
	if (err <= 0) {
		if (err == 0)
			goto done;

		pr_err("failed to read event header\n");
		goto out_err;
	}

	if (self->header.needs_swap)
1289
		perf_event_header__bswap(&event->header);
1290

1291
	size = event->header.size;
1292 1293 1294
	if (size == 0)
		size = 8;

1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305
	if (size > cur_size) {
		void *new = realloc(buf, size);
		if (!new) {
			pr_err("failed to allocate memory to read event\n");
			goto out_err;
		}
		buf = new;
		cur_size = size;
		event = buf;
	}
	p = event;
1306 1307
	p += sizeof(struct perf_event_header);

1308
	if (size - sizeof(struct perf_event_header)) {
1309
		err = readn(self->fd, p, size - sizeof(struct perf_event_header));
1310 1311 1312 1313 1314
		if (err <= 0) {
			if (err == 0) {
				pr_err("unexpected end of event stream\n");
				goto done;
			}
1315

1316 1317 1318
			pr_err("failed to read event data\n");
			goto out_err;
		}
1319 1320
	}

1321
	if ((skip = perf_session__process_event(self, event, tool, head)) < 0) {
1322
		pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
1323
		       head, event->header.size, event->header.type);
1324 1325
		err = -EINVAL;
		goto out_err;
1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337
	}

	head += size;

	if (skip > 0)
		head += skip;

	if (!session_done())
		goto more;
done:
	err = 0;
out_err:
1338
	free(buf);
1339
	perf_session__warn_about_errors(self, tool);
1340
	perf_session_free_sample_buffers(self);
1341 1342 1343
	return err;
}

1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367
static union perf_event *
fetch_mmaped_event(struct perf_session *session,
		   u64 head, size_t mmap_size, char *buf)
{
	union perf_event *event;

	/*
	 * Ensure we have enough space remaining to read
	 * the size of the event in the headers.
	 */
	if (head + sizeof(event->header) > mmap_size)
		return NULL;

	event = (union perf_event *)(buf + head);

	if (session->header.needs_swap)
		perf_event_header__bswap(&event->header);

	if (head + event->header.size > mmap_size)
		return NULL;

	return event;
}

1368
int __perf_session__process_events(struct perf_session *session,
1369
				   u64 data_offset, u64 data_size,
1370
				   u64 file_size, struct perf_tool *tool)
1371
{
1372
	u64 head, page_offset, file_offset, file_pos, progress_next;
1373
	int err, mmap_prot, mmap_flags, map_idx = 0;
1374
	size_t	page_size, mmap_size;
1375
	char *buf, *mmaps[8];
1376
	union perf_event *event;
1377
	uint32_t size;
1378

1379
	perf_tool__fill_defaults(tool);
1380

1381
	page_size = sysconf(_SC_PAGESIZE);
1382

1383 1384 1385
	page_offset = page_size * (data_offset / page_size);
	file_offset = page_offset;
	head = data_offset - page_offset;
1386

1387 1388 1389
	if (data_offset + data_size < file_size)
		file_size = data_offset + data_size;

1390 1391 1392 1393 1394 1395
	progress_next = file_size / 16;

	mmap_size = session->mmap_window;
	if (mmap_size > file_size)
		mmap_size = file_size;

1396 1397
	memset(mmaps, 0, sizeof(mmaps));

1398 1399 1400
	mmap_prot  = PROT_READ;
	mmap_flags = MAP_SHARED;

1401
	if (session->header.needs_swap) {
1402 1403 1404
		mmap_prot  |= PROT_WRITE;
		mmap_flags = MAP_PRIVATE;
	}
1405
remap:
1406 1407
	buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, session->fd,
		   file_offset);
1408 1409 1410 1411 1412
	if (buf == MAP_FAILED) {
		pr_err("failed to mmap file\n");
		err = -errno;
		goto out_err;
	}
1413 1414
	mmaps[map_idx] = buf;
	map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1);
1415
	file_pos = file_offset + head;
1416 1417

more:
1418 1419
	event = fetch_mmaped_event(session, head, mmap_size, buf);
	if (!event) {
1420 1421 1422 1423
		if (mmaps[map_idx]) {
			munmap(mmaps[map_idx], mmap_size);
			mmaps[map_idx] = NULL;
		}
1424

1425 1426 1427
		page_offset = page_size * (head / page_size);
		file_offset += page_offset;
		head -= page_offset;
1428 1429 1430 1431 1432
		goto remap;
	}

	size = event->header.size;

1433
	if (size == 0 ||
1434
	    perf_session__process_event(session, event, tool, file_pos) < 0) {
1435 1436 1437 1438 1439
		pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
		       file_offset + head, event->header.size,
		       event->header.type);
		err = -EINVAL;
		goto out_err;
1440 1441 1442
	}

	head += size;
1443
	file_pos += size;
1444

1445 1446
	if (file_pos >= progress_next) {
		progress_next += file_size / 16;
1447 1448
		ui_progress__update(file_pos, file_size,
				    "Processing events...");
1449 1450
	}

1451
	if (file_pos < file_size)
1452
		goto more;
1453

1454
	err = 0;
1455
	/* do the final flush for ordered samples */
1456
	session->ordered_samples.next_flush = ULLONG_MAX;
1457
	err = flush_sample_queue(session, tool);
1458
out_err:
1459
	perf_session__warn_about_errors(session, tool);
1460
	perf_session_free_sample_buffers(session);
1461 1462
	return err;
}
1463

1464
int perf_session__process_events(struct perf_session *self,
1465
				 struct perf_tool *tool)
1466 1467 1468 1469 1470 1471
{
	int err;

	if (perf_session__register_idle_thread(self) == NULL)
		return -ENOMEM;

1472 1473 1474 1475
	if (!self->fd_pipe)
		err = __perf_session__process_events(self,
						     self->header.data_offset,
						     self->header.data_size,
1476
						     self->size, tool);
1477
	else
1478
		err = __perf_session__process_pipe_events(self, tool);
1479

1480 1481 1482
	return err;
}

1483
bool perf_session__has_traces(struct perf_session *session, const char *msg)
1484
{
1485
	if (!(perf_evlist__sample_type(session->evlist) & PERF_SAMPLE_RAW)) {
1486 1487
		pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg);
		return false;
1488 1489
	}

1490
	return true;
1491
}
1492

1493 1494
int maps__set_kallsyms_ref_reloc_sym(struct map **maps,
				     const char *symbol_name, u64 addr)
1495 1496
{
	char *bracket;
1497
	enum map_type i;
1498 1499 1500 1501 1502
	struct ref_reloc_sym *ref;

	ref = zalloc(sizeof(struct ref_reloc_sym));
	if (ref == NULL)
		return -ENOMEM;
1503

1504 1505 1506
	ref->name = strdup(symbol_name);
	if (ref->name == NULL) {
		free(ref);
1507
		return -ENOMEM;
1508
	}
1509

1510
	bracket = strchr(ref->name, ']');
1511 1512 1513
	if (bracket)
		*bracket = '\0';

1514
	ref->addr = addr;
1515 1516

	for (i = 0; i < MAP__NR_TYPES; ++i) {
1517 1518
		struct kmap *kmap = map__kmap(maps[i]);
		kmap->ref_reloc_sym = ref;
1519 1520
	}

1521 1522
	return 0;
}
1523 1524 1525 1526 1527 1528 1529

size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp)
{
	return __dsos__fprintf(&self->host_machine.kernel_dsos, fp) +
	       __dsos__fprintf(&self->host_machine.user_dsos, fp) +
	       machines__fprintf_dsos(&self->machines, fp);
}
1530 1531 1532 1533 1534 1535 1536

size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, FILE *fp,
					  bool with_hits)
{
	size_t ret = machine__fprintf_dsos_buildid(&self->host_machine, fp, with_hits);
	return ret + machines__fprintf_dsos_buildid(&self->machines, fp, with_hits);
}
1537 1538 1539 1540 1541 1542 1543 1544 1545

size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
{
	struct perf_evsel *pos;
	size_t ret = fprintf(fp, "Aggregated stats:\n");

	ret += hists__fprintf_nr_events(&session->hists, fp);

	list_for_each_entry(pos, &session->evlist->entries, node) {
1546
		ret += fprintf(fp, "%s stats:\n", perf_evsel__name(pos));
1547 1548 1549 1550 1551
		ret += hists__fprintf_nr_events(&pos->hists, fp);
	}

	return ret;
}
1552

1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573
size_t perf_session__fprintf(struct perf_session *session, FILE *fp)
{
	/*
	 * FIXME: Here we have to actually print all the machines in this
	 * session, not just the host...
	 */
	return machine__fprintf(&session->host_machine, fp);
}

void perf_session__remove_thread(struct perf_session *session,
				 struct thread *th)
{
	/*
	 * FIXME: This one makes no sense, we need to remove the thread from
	 * the machine it belongs to, perf_session can have many machines, so
	 * doing it always on ->host_machine is wrong.  Fix when auditing all
	 * the 'perf kvm' code.
	 */
	machine__remove_thread(&session->host_machine, th);
}

1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585
struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
					      unsigned int type)
{
	struct perf_evsel *pos;

	list_for_each_entry(pos, &session->evlist->entries, node) {
		if (pos->attr.type == type)
			return pos;
	}
	return NULL;
}

1586 1587 1588
void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
			  struct perf_sample *sample, struct machine *machine,
			  int print_sym, int print_dso, int print_symoffset)
1589 1590 1591 1592
{
	struct addr_location al;
	struct callchain_cursor_node *node;

1593
	if (perf_event__preprocess_sample(event, machine, &al, sample,
1594 1595 1596 1597 1598 1599 1600 1601
					  NULL) < 0) {
		error("problem processing %d event, skipping it.\n",
			event->header.type);
		return;
	}

	if (symbol_conf.use_callchain && sample->callchain) {

1602 1603 1604

		if (machine__resolve_callchain(machine, evsel, al.thread,
					       sample, NULL) != 0) {
1605 1606 1607 1608
			if (verbose)
				error("Failed to resolve callchain. Skipping\n");
			return;
		}
1609
		callchain_cursor_commit(&callchain_cursor);
1610 1611

		while (1) {
1612
			node = callchain_cursor_current(&callchain_cursor);
1613 1614 1615
			if (!node)
				break;

1616 1617
			printf("\t%16" PRIx64, node->ip);
			if (print_sym) {
1618 1619
				printf(" ");
				symbol__fprintf_symname(node->sym, stdout);
1620 1621
			}
			if (print_dso) {
1622
				printf(" (");
1623
				map__fprintf_dsoname(node->map, stdout);
1624
				printf(")");
1625 1626
			}
			printf("\n");
1627

1628
			callchain_cursor_advance(&callchain_cursor);
1629 1630 1631
		}

	} else {
1632
		printf("%16" PRIx64, sample->ip);
1633
		if (print_sym) {
1634
			printf(" ");
1635 1636 1637 1638 1639
			if (print_symoffset)
				symbol__fprintf_symname_offs(al.sym, &al,
							     stdout);
			else
				symbol__fprintf_symname(al.sym, stdout);
1640 1641 1642
		}

		if (print_dso) {
1643 1644 1645
			printf(" (");
			map__fprintf_dsoname(al.map, stdout);
			printf(")");
1646
		}
1647 1648
	}
}
1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670

int perf_session__cpu_bitmap(struct perf_session *session,
			     const char *cpu_list, unsigned long *cpu_bitmap)
{
	int i;
	struct cpu_map *map;

	for (i = 0; i < PERF_TYPE_MAX; ++i) {
		struct perf_evsel *evsel;

		evsel = perf_session__find_first_evtype(session, i);
		if (!evsel)
			continue;

		if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) {
			pr_err("File does not contain CPU events. "
			       "Remove -c option to proceed.\n");
			return -1;
		}
	}

	map = cpu_map__new(cpu_list);
1671 1672 1673 1674
	if (map == NULL) {
		pr_err("Invalid cpu_list\n");
		return -1;
	}
1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689

	for (i = 0; i < map->nr; i++) {
		int cpu = map->map[i];

		if (cpu >= MAX_NR_CPUS) {
			pr_err("Requested CPU %d too large. "
			       "Consider raising MAX_NR_CPUS\n", cpu);
			return -1;
		}

		set_bit(cpu, cpu_bitmap);
	}

	return 0;
}
1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708

void perf_session__fprintf_info(struct perf_session *session, FILE *fp,
				bool full)
{
	struct stat st;
	int ret;

	if (session == NULL || fp == NULL)
		return;

	ret = fstat(session->fd, &st);
	if (ret == -1)
		return;

	fprintf(fp, "# ========\n");
	fprintf(fp, "# captured on: %s", ctime(&st.st_ctime));
	perf_header__fprintf_info(session, fp, full);
	fprintf(fp, "# ========\n#\n");
}
1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763


int __perf_session__set_tracepoints_handlers(struct perf_session *session,
					     const struct perf_evsel_str_handler *assocs,
					     size_t nr_assocs)
{
	struct perf_evlist *evlist = session->evlist;
	struct event_format *format;
	struct perf_evsel *evsel;
	char *tracepoint, *name;
	size_t i;
	int err;

	for (i = 0; i < nr_assocs; i++) {
		err = -ENOMEM;
		tracepoint = strdup(assocs[i].name);
		if (tracepoint == NULL)
			goto out;

		err = -ENOENT;
		name = strchr(tracepoint, ':');
		if (name == NULL)
			goto out_free;

		*name++ = '\0';
		format = pevent_find_event_by_name(session->pevent,
						   tracepoint, name);
		if (format == NULL) {
			/*
			 * Adding a handler for an event not in the session,
			 * just ignore it.
			 */
			goto next;
		}

		evsel = perf_evlist__find_tracepoint_by_id(evlist, format->id);
		if (evsel == NULL)
			goto next;

		err = -EEXIST;
		if (evsel->handler.func != NULL)
			goto out_free;
		evsel->handler.func = assocs[i].handler;
next:
		free(tracepoint);
	}

	err = 0;
out:
	return err;

out_free:
	free(tracepoint);
	goto out;
}