session.c 31.4 KB
Newer Older
1 2
#define _FILE_OFFSET_BITS 64

3 4
#include <linux/kernel.h>

5
#include <byteswap.h>
6 7
#include <unistd.h>
#include <sys/types.h>
8
#include <sys/mman.h>
9

10 11
#include "evlist.h"
#include "evsel.h"
12
#include "session.h"
13
#include "sort.h"
14 15
#include "util.h"

16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
static int perf_session__read_evlist(struct perf_session *session)
{
	int i, j;

	session->evlist = perf_evlist__new(NULL, NULL);
	if (session->evlist == NULL)
		return -ENOMEM;

	for (i = 0; i < session->header.attrs; ++i) {
		struct perf_header_attr *hattr = session->header.attr[i];
		struct perf_evsel *evsel = perf_evsel__new(&hattr->attr, i);

		if (evsel == NULL)
			goto out_delete_evlist;
		/*
		 * Do it before so that if perf_evsel__alloc_id fails, this
		 * entry gets purged too at perf_evlist__delete().
		 */
		perf_evlist__add(session->evlist, evsel);
		/*
		 * We don't have the cpu and thread maps on the header, so
		 * for allocating the perf_sample_id table we fake 1 cpu and
		 * hattr->ids threads.
		 */
		if (perf_evsel__alloc_id(evsel, 1, hattr->ids))
			goto out_delete_evlist;

		for (j = 0; j < hattr->ids; ++j)
			perf_evlist__id_hash(session->evlist, evsel, 0, j,
					     hattr->id[j]);
	}

	return 0;

out_delete_evlist:
	perf_evlist__delete(session->evlist);
	session->evlist = NULL;
	return -ENOMEM;
}

56 57 58 59
static int perf_session__open(struct perf_session *self, bool force)
{
	struct stat input_stat;

60 61 62 63 64 65 66 67 68 69
	if (!strcmp(self->filename, "-")) {
		self->fd_pipe = true;
		self->fd = STDIN_FILENO;

		if (perf_header__read(self, self->fd) < 0)
			pr_err("incompatible file format");

		return 0;
	}

70
	self->fd = open(self->filename, O_RDONLY);
71
	if (self->fd < 0) {
72 73 74 75
		int err = errno;

		pr_err("failed to open %s: %s", self->filename, strerror(err));
		if (err == ENOENT && !strcmp(self->filename, "perf.data"))
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
			pr_err("  (try 'perf record' first)");
		pr_err("\n");
		return -errno;
	}

	if (fstat(self->fd, &input_stat) < 0)
		goto out_close;

	if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
		pr_err("file %s not owned by current user or root\n",
		       self->filename);
		goto out_close;
	}

	if (!input_stat.st_size) {
		pr_info("zero-sized file (%s), nothing to do!\n",
			self->filename);
		goto out_close;
	}

96
	if (perf_header__read(self, self->fd) < 0) {
97 98 99 100
		pr_err("incompatible file format");
		goto out_close;
	}

101 102 103 104 105
	if (perf_session__read_evlist(self) < 0) {
		pr_err("Not enough memory to read the event selector list\n");
		goto out_close;
	}

106 107 108 109 110 111 112 113 114
	self->size = input_stat.st_size;
	return 0;

out_close:
	close(self->fd);
	self->fd = -1;
	return -1;
}

115
static void perf_session__id_header_size(struct perf_session *session)
116
{
117
       struct perf_sample *data;
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
       u64 sample_type = session->sample_type;
       u16 size = 0;

	if (!session->sample_id_all)
		goto out;

       if (sample_type & PERF_SAMPLE_TID)
               size += sizeof(data->tid) * 2;

       if (sample_type & PERF_SAMPLE_TIME)
               size += sizeof(data->time);

       if (sample_type & PERF_SAMPLE_ID)
               size += sizeof(data->id);

       if (sample_type & PERF_SAMPLE_STREAM_ID)
               size += sizeof(data->stream_id);

       if (sample_type & PERF_SAMPLE_CPU)
               size += sizeof(data->cpu) * 2;
out:
       session->id_hdr_size = size;
}

void perf_session__set_sample_id_all(struct perf_session *session, bool value)
{
	session->sample_id_all = value;
	perf_session__id_header_size(session);
146 147
}

148 149 150 151 152
void perf_session__set_sample_type(struct perf_session *session, u64 type)
{
	session->sample_type = type;
}

153 154 155 156 157 158 159
void perf_session__update_sample_type(struct perf_session *self)
{
	self->sample_type = perf_header__sample_type(&self->header);
	self->sample_id_all = perf_header__sample_id_all(&self->header);
	perf_session__id_header_size(self);
}

160 161
int perf_session__create_kernel_maps(struct perf_session *self)
{
162
	int ret = machine__create_kernel_maps(&self->host_machine);
163 164

	if (ret >= 0)
165
		ret = machines__create_guest_kernel_maps(&self->machines);
166 167 168
	return ret;
}

169 170 171 172 173 174
static void perf_session__destroy_kernel_maps(struct perf_session *self)
{
	machine__destroy_kernel_maps(&self->host_machine);
	machines__destroy_guest_kernel_maps(&self->machines);
}

175 176 177
struct perf_session *perf_session__new(const char *filename, int mode,
				       bool force, bool repipe,
				       struct perf_event_ops *ops)
178
{
179
	size_t len = filename ? strlen(filename) + 1 : 0;
180 181 182 183 184 185
	struct perf_session *self = zalloc(sizeof(*self) + len);

	if (self == NULL)
		goto out;

	if (perf_header__init(&self->header) < 0)
186
		goto out_free;
187 188

	memcpy(self->filename, filename, len);
189
	self->threads = RB_ROOT;
190
	INIT_LIST_HEAD(&self->dead_threads);
191
	self->last_match = NULL;
192 193 194 195 196 197 198 199 200
	/*
	 * On 64bit we can mmap the data file in one go. No need for tiny mmap
	 * slices. On 32bit we use 32MB.
	 */
#if BITS_PER_LONG == 64
	self->mmap_window = ULLONG_MAX;
#else
	self->mmap_window = 32 * 1024 * 1024ULL;
#endif
201
	self->machines = RB_ROOT;
T
Tom Zanussi 已提交
202
	self->repipe = repipe;
203
	INIT_LIST_HEAD(&self->ordered_samples.samples);
204
	INIT_LIST_HEAD(&self->ordered_samples.sample_cache);
205
	INIT_LIST_HEAD(&self->ordered_samples.to_free);
206
	machine__init(&self->host_machine, "", HOST_KERNEL_ID);
207

208 209 210 211 212 213
	if (mode == O_RDONLY) {
		if (perf_session__open(self, force) < 0)
			goto out_delete;
	} else if (mode == O_WRONLY) {
		/*
		 * In O_RDONLY mode this will be performed when reading the
214
		 * kernel MMAP event, in perf_event__process_mmap().
215 216 217 218
		 */
		if (perf_session__create_kernel_maps(self) < 0)
			goto out_delete;
	}
219

220
	perf_session__update_sample_type(self);
221 222 223 224 225 226 227

	if (ops && ops->ordering_requires_timestamps &&
	    ops->ordered_samples && !self->sample_id_all) {
		dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
		ops->ordered_samples = false;
	}

228 229
out:
	return self;
230
out_free:
231 232
	free(self);
	return NULL;
233 234 235
out_delete:
	perf_session__delete(self);
	return NULL;
236 237
}

238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
static void perf_session__delete_dead_threads(struct perf_session *self)
{
	struct thread *n, *t;

	list_for_each_entry_safe(t, n, &self->dead_threads, node) {
		list_del(&t->node);
		thread__delete(t);
	}
}

static void perf_session__delete_threads(struct perf_session *self)
{
	struct rb_node *nd = rb_first(&self->threads);

	while (nd) {
		struct thread *t = rb_entry(nd, struct thread, rb_node);

		rb_erase(&t->rb_node, &self->threads);
		nd = rb_next(nd);
		thread__delete(t);
	}
}

261 262 263
void perf_session__delete(struct perf_session *self)
{
	perf_header__exit(&self->header);
264
	perf_session__destroy_kernel_maps(self);
265 266 267
	perf_session__delete_dead_threads(self);
	perf_session__delete_threads(self);
	machine__exit(&self->host_machine);
268 269 270
	close(self->fd);
	free(self);
}
271

272 273
void perf_session__remove_thread(struct perf_session *self, struct thread *th)
{
274
	self->last_match = NULL;
275 276 277 278 279 280 281 282
	rb_erase(&th->rb_node, &self->threads);
	/*
	 * We may have references to this thread, for instance in some hist_entry
	 * instances, so just move them to a separate list.
	 */
	list_add_tail(&th->node, &self->dead_threads);
}

283 284 285 286 287 288 289 290
static bool symbol__match_parent_regex(struct symbol *sym)
{
	if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
		return 1;

	return 0;
}

291 292 293 294
int perf_session__resolve_callchain(struct perf_session *self,
				    struct thread *thread,
				    struct ip_callchain *chain,
				    struct symbol **parent)
295 296 297
{
	u8 cpumode = PERF_RECORD_MISC_USER;
	unsigned int i;
298
	int err;
299

300
	callchain_cursor_reset(&self->callchain_cursor);
301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319

	for (i = 0; i < chain->nr; i++) {
		u64 ip = chain->ips[i];
		struct addr_location al;

		if (ip >= PERF_CONTEXT_MAX) {
			switch (ip) {
			case PERF_CONTEXT_HV:
				cpumode = PERF_RECORD_MISC_HYPERVISOR;	break;
			case PERF_CONTEXT_KERNEL:
				cpumode = PERF_RECORD_MISC_KERNEL;	break;
			case PERF_CONTEXT_USER:
				cpumode = PERF_RECORD_MISC_USER;	break;
			default:
				break;
			}
			continue;
		}

320
		al.filtered = false;
321
		thread__find_addr_location(thread, self, cpumode,
322
				MAP__FUNCTION, thread->pid, ip, &al, NULL);
323 324 325 326
		if (al.sym != NULL) {
			if (sort__has_parent && !*parent &&
			    symbol__match_parent_regex(al.sym))
				*parent = al.sym;
327
			if (!symbol_conf.use_callchain)
328 329
				break;
		}
330 331 332 333 334

		err = callchain_cursor_append(&self->callchain_cursor,
					      ip, al.map, al.sym);
		if (err)
			return err;
335 336
	}

337
	return 0;
338
}
339

340
static int process_event_synth_stub(union perf_event *event __used,
341 342 343 344 345 346
				    struct perf_session *session __used)
{
	dump_printf(": unhandled!\n");
	return 0;
}

347
static int process_event_stub(union perf_event *event __used,
348
			      struct perf_sample *sample __used,
349 350 351 352 353 354
			      struct perf_session *session __used)
{
	dump_printf(": unhandled!\n");
	return 0;
}

355
static int process_finished_round_stub(union perf_event *event __used,
356 357 358 359 360 361 362
				       struct perf_session *session __used,
				       struct perf_event_ops *ops __used)
{
	dump_printf(": unhandled!\n");
	return 0;
}

363
static int process_finished_round(union perf_event *event,
364 365 366
				  struct perf_session *session,
				  struct perf_event_ops *ops);

367 368
static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
{
369 370 371 372 373 374 375 376 377 378 379
	if (handler->sample == NULL)
		handler->sample = process_event_stub;
	if (handler->mmap == NULL)
		handler->mmap = process_event_stub;
	if (handler->comm == NULL)
		handler->comm = process_event_stub;
	if (handler->fork == NULL)
		handler->fork = process_event_stub;
	if (handler->exit == NULL)
		handler->exit = process_event_stub;
	if (handler->lost == NULL)
380
		handler->lost = perf_event__process_lost;
381 382 383 384 385 386
	if (handler->read == NULL)
		handler->read = process_event_stub;
	if (handler->throttle == NULL)
		handler->throttle = process_event_stub;
	if (handler->unthrottle == NULL)
		handler->unthrottle = process_event_stub;
387
	if (handler->attr == NULL)
388
		handler->attr = process_event_synth_stub;
389
	if (handler->event_type == NULL)
390
		handler->event_type = process_event_synth_stub;
391
	if (handler->tracing_data == NULL)
392
		handler->tracing_data = process_event_synth_stub;
393
	if (handler->build_id == NULL)
394
		handler->build_id = process_event_synth_stub;
395 396 397 398 399 400
	if (handler->finished_round == NULL) {
		if (handler->ordered_samples)
			handler->finished_round = process_finished_round;
		else
			handler->finished_round = process_finished_round_stub;
	}
401 402
}

403 404 405 406 407 408 409 410 411 412 413
void mem_bswap_64(void *src, int byte_size)
{
	u64 *m = src;

	while (byte_size > 0) {
		*m = bswap_64(*m);
		byte_size -= sizeof(u64);
		++m;
	}
}

414
static void perf_event__all64_swap(union perf_event *event)
415
{
416 417
	struct perf_event_header *hdr = &event->header;
	mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr));
418 419
}

420
static void perf_event__comm_swap(union perf_event *event)
421
{
422 423
	event->comm.pid = bswap_32(event->comm.pid);
	event->comm.tid = bswap_32(event->comm.tid);
424 425
}

426
static void perf_event__mmap_swap(union perf_event *event)
427
{
428 429 430 431 432
	event->mmap.pid	  = bswap_32(event->mmap.pid);
	event->mmap.tid	  = bswap_32(event->mmap.tid);
	event->mmap.start = bswap_64(event->mmap.start);
	event->mmap.len	  = bswap_64(event->mmap.len);
	event->mmap.pgoff = bswap_64(event->mmap.pgoff);
433 434
}

435
static void perf_event__task_swap(union perf_event *event)
436
{
437 438 439 440 441
	event->fork.pid	 = bswap_32(event->fork.pid);
	event->fork.tid	 = bswap_32(event->fork.tid);
	event->fork.ppid = bswap_32(event->fork.ppid);
	event->fork.ptid = bswap_32(event->fork.ptid);
	event->fork.time = bswap_64(event->fork.time);
442 443
}

444
static void perf_event__read_swap(union perf_event *event)
445
{
446 447 448 449 450 451
	event->read.pid		 = bswap_32(event->read.pid);
	event->read.tid		 = bswap_32(event->read.tid);
	event->read.value	 = bswap_64(event->read.value);
	event->read.time_enabled = bswap_64(event->read.time_enabled);
	event->read.time_running = bswap_64(event->read.time_running);
	event->read.id		 = bswap_64(event->read.id);
452 453
}

454
static void perf_event__attr_swap(union perf_event *event)
455 456 457
{
	size_t size;

458 459 460 461 462 463 464 465 466 467
	event->attr.attr.type		= bswap_32(event->attr.attr.type);
	event->attr.attr.size		= bswap_32(event->attr.attr.size);
	event->attr.attr.config		= bswap_64(event->attr.attr.config);
	event->attr.attr.sample_period	= bswap_64(event->attr.attr.sample_period);
	event->attr.attr.sample_type	= bswap_64(event->attr.attr.sample_type);
	event->attr.attr.read_format	= bswap_64(event->attr.attr.read_format);
	event->attr.attr.wakeup_events	= bswap_32(event->attr.attr.wakeup_events);
	event->attr.attr.bp_type	= bswap_32(event->attr.attr.bp_type);
	event->attr.attr.bp_addr	= bswap_64(event->attr.attr.bp_addr);
	event->attr.attr.bp_len		= bswap_64(event->attr.attr.bp_len);
468

469 470 471
	size = event->header.size;
	size -= (void *)&event->attr.id - (void *)event;
	mem_bswap_64(event->attr.id, size);
472 473
}

474
static void perf_event__event_type_swap(union perf_event *event)
475
{
476 477
	event->event_type.event_type.event_id =
		bswap_64(event->event_type.event_type.event_id);
478 479
}

480
static void perf_event__tracing_data_swap(union perf_event *event)
481
{
482
	event->tracing_data.size = bswap_32(event->tracing_data.size);
483 484
}

485
typedef void (*perf_event__swap_op)(union perf_event *event);
486

487 488 489 490 491 492 493 494 495 496 497 498 499
static perf_event__swap_op perf_event__swap_ops[] = {
	[PERF_RECORD_MMAP]		  = perf_event__mmap_swap,
	[PERF_RECORD_COMM]		  = perf_event__comm_swap,
	[PERF_RECORD_FORK]		  = perf_event__task_swap,
	[PERF_RECORD_EXIT]		  = perf_event__task_swap,
	[PERF_RECORD_LOST]		  = perf_event__all64_swap,
	[PERF_RECORD_READ]		  = perf_event__read_swap,
	[PERF_RECORD_SAMPLE]		  = perf_event__all64_swap,
	[PERF_RECORD_HEADER_ATTR]	  = perf_event__attr_swap,
	[PERF_RECORD_HEADER_EVENT_TYPE]	  = perf_event__event_type_swap,
	[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
	[PERF_RECORD_HEADER_BUILD_ID]	  = NULL,
	[PERF_RECORD_HEADER_MAX]	  = NULL,
500 501
};

502 503
struct sample_queue {
	u64			timestamp;
504
	u64			file_offset;
505
	union perf_event	*event;
506 507 508
	struct list_head	list;
};

509 510 511 512
static void perf_session_free_sample_buffers(struct perf_session *session)
{
	struct ordered_samples *os = &session->ordered_samples;

513
	while (!list_empty(&os->to_free)) {
514 515
		struct sample_queue *sq;

516
		sq = list_entry(os->to_free.next, struct sample_queue, list);
517 518 519 520 521
		list_del(&sq->list);
		free(sq);
	}
}

522
static int perf_session_deliver_event(struct perf_session *session,
523
				      union perf_event *event,
524
				      struct perf_sample *sample,
525 526
				      struct perf_event_ops *ops,
				      u64 file_offset);
527

528 529 530
static void flush_sample_queue(struct perf_session *s,
			       struct perf_event_ops *ops)
{
531 532
	struct ordered_samples *os = &s->ordered_samples;
	struct list_head *head = &os->samples;
533
	struct sample_queue *tmp, *iter;
534
	struct perf_sample sample;
535 536
	u64 limit = os->next_flush;
	u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
537

538
	if (!ops->ordered_samples || !limit)
539 540 541 542
		return;

	list_for_each_entry_safe(iter, tmp, head, list) {
		if (iter->timestamp > limit)
543
			break;
544

545
		perf_session__parse_sample(s, iter->event, &sample);
546 547
		perf_session_deliver_event(s, iter->event, &sample, ops,
					   iter->file_offset);
548

549
		os->last_flush = iter->timestamp;
550
		list_del(&iter->list);
551
		list_add(&iter->list, &os->sample_cache);
552
	}
553 554 555 556 557 558 559

	if (list_empty(head)) {
		os->last_sample = NULL;
	} else if (last_ts <= limit) {
		os->last_sample =
			list_entry(head->prev, struct sample_queue, list);
	}
560 561
}

562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600
/*
 * When perf record finishes a pass on every buffers, it records this pseudo
 * event.
 * We record the max timestamp t found in the pass n.
 * Assuming these timestamps are monotonic across cpus, we know that if
 * a buffer still has events with timestamps below t, they will be all
 * available and then read in the pass n + 1.
 * Hence when we start to read the pass n + 2, we can safely flush every
 * events with timestamps below t.
 *
 *    ============ PASS n =================
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          1          |         2
 *          2          |         3
 *          -          |         4  <--- max recorded
 *
 *    ============ PASS n + 1 ==============
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          3          |         5
 *          4          |         6
 *          5          |         7 <---- max recorded
 *
 *      Flush every events below timestamp 4
 *
 *    ============ PASS n + 2 ==============
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          6          |         8
 *          7          |         9
 *          -          |         10
 *
 *      Flush every events below timestamp 7
 *      etc...
 */
601
static int process_finished_round(union perf_event *event __used,
602 603 604 605 606 607 608 609 610
				  struct perf_session *session,
				  struct perf_event_ops *ops)
{
	flush_sample_queue(session, ops);
	session->ordered_samples.next_flush = session->ordered_samples.max_timestamp;

	return 0;
}

611
/* The queue is ordered by time */
612
static void __queue_event(struct sample_queue *new, struct perf_session *s)
613
{
614 615 616 617
	struct ordered_samples *os = &s->ordered_samples;
	struct sample_queue *sample = os->last_sample;
	u64 timestamp = new->timestamp;
	struct list_head *p;
618

619
	os->last_sample = new;
620

621 622 623
	if (!sample) {
		list_add(&new->list, &os->samples);
		os->max_timestamp = timestamp;
624 625 626 627
		return;
	}

	/*
628 629 630
	 * last_sample might point to some random place in the list as it's
	 * the last queued event. We expect that the new event is close to
	 * this.
631
	 */
632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653
	if (sample->timestamp <= timestamp) {
		while (sample->timestamp <= timestamp) {
			p = sample->list.next;
			if (p == &os->samples) {
				list_add_tail(&new->list, &os->samples);
				os->max_timestamp = timestamp;
				return;
			}
			sample = list_entry(p, struct sample_queue, list);
		}
		list_add_tail(&new->list, &sample->list);
	} else {
		while (sample->timestamp > timestamp) {
			p = sample->list.prev;
			if (p == &os->samples) {
				list_add(&new->list, &os->samples);
				return;
			}
			sample = list_entry(p, struct sample_queue, list);
		}
		list_add(&new->list, &sample->list);
	}
654 655
}

656 657
#define MAX_SAMPLE_BUFFER	(64 * 1024 / sizeof(struct sample_queue))

658
static int perf_session_queue_event(struct perf_session *s, union perf_event *event,
659
				    struct perf_sample *sample, u64 file_offset)
660
{
661 662
	struct ordered_samples *os = &s->ordered_samples;
	struct list_head *sc = &os->sample_cache;
663
	u64 timestamp = sample->time;
664 665
	struct sample_queue *new;

666
	if (!timestamp || timestamp == ~0ULL)
667 668
		return -ETIME;

669 670 671 672 673
	if (timestamp < s->ordered_samples.last_flush) {
		printf("Warning: Timestamp below last timeslice flush\n");
		return -EINVAL;
	}

674 675 676
	if (!list_empty(sc)) {
		new = list_entry(sc->next, struct sample_queue, list);
		list_del(&new->list);
677 678 679 680
	} else if (os->sample_buffer) {
		new = os->sample_buffer + os->sample_buffer_idx;
		if (++os->sample_buffer_idx == MAX_SAMPLE_BUFFER)
			os->sample_buffer = NULL;
681
	} else {
682 683
		os->sample_buffer = malloc(MAX_SAMPLE_BUFFER * sizeof(*new));
		if (!os->sample_buffer)
684
			return -ENOMEM;
685 686 687
		list_add(&os->sample_buffer->list, &os->to_free);
		os->sample_buffer_idx = 2;
		new = os->sample_buffer + 1;
688
	}
689 690

	new->timestamp = timestamp;
691
	new->file_offset = file_offset;
692
	new->event = event;
693

694
	__queue_event(new, s);
695 696 697

	return 0;
}
698

699
static void callchain__printf(struct perf_sample *sample)
700 701
{
	unsigned int i;
702

703
	printf("... chain: nr:%" PRIu64 "\n", sample->callchain->nr);
704 705

	for (i = 0; i < sample->callchain->nr; i++)
706 707
		printf("..... %2d: %016" PRIx64 "\n",
		       i, sample->callchain->ips[i]);
708 709
}

710
static void perf_session__print_tstamp(struct perf_session *session,
711
				       union perf_event *event,
712
				       struct perf_sample *sample)
713 714 715 716 717 718 719 720 721 722 723
{
	if (event->header.type != PERF_RECORD_SAMPLE &&
	    !session->sample_id_all) {
		fputs("-1 -1 ", stdout);
		return;
	}

	if ((session->sample_type & PERF_SAMPLE_CPU))
		printf("%u ", sample->cpu);

	if (session->sample_type & PERF_SAMPLE_TIME)
724
		printf("%" PRIu64 " ", sample->time);
725 726
}

727
static void dump_event(struct perf_session *session, union perf_event *event,
728
		       u64 file_offset, struct perf_sample *sample)
729 730 731 732
{
	if (!dump_trace)
		return;

733 734
	printf("\n%#" PRIx64 " [%#x]: event: %d\n",
	       file_offset, event->header.size, event->header.type);
735 736 737 738 739 740

	trace_event(event);

	if (sample)
		perf_session__print_tstamp(session, event, sample);

741
	printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset,
742
	       event->header.size, perf_event__name(event->header.type));
743 744
}

745
static void dump_sample(struct perf_session *session, union perf_event *event,
746
			struct perf_sample *sample)
747
{
748 749 750
	if (!dump_trace)
		return;

751 752 753
	printf("(IP, %d): %d/%d: %#" PRIx64 " period: %" PRIu64 "\n",
	       event->header.misc, sample->pid, sample->tid, sample->ip,
	       sample->period);
754 755

	if (session->sample_type & PERF_SAMPLE_CALLCHAIN)
756
		callchain__printf(sample);
757 758
}

759
static int perf_session_deliver_event(struct perf_session *session,
760
				      union perf_event *event,
761
				      struct perf_sample *sample,
762
				      struct perf_event_ops *ops,
763
				      u64 file_offset)
764
{
765 766
	dump_event(session, event, file_offset, sample);

767 768
	switch (event->header.type) {
	case PERF_RECORD_SAMPLE:
769
		dump_sample(session, event, sample);
770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792
		return ops->sample(event, sample, session);
	case PERF_RECORD_MMAP:
		return ops->mmap(event, sample, session);
	case PERF_RECORD_COMM:
		return ops->comm(event, sample, session);
	case PERF_RECORD_FORK:
		return ops->fork(event, sample, session);
	case PERF_RECORD_EXIT:
		return ops->exit(event, sample, session);
	case PERF_RECORD_LOST:
		return ops->lost(event, sample, session);
	case PERF_RECORD_READ:
		return ops->read(event, sample, session);
	case PERF_RECORD_THROTTLE:
		return ops->throttle(event, sample, session);
	case PERF_RECORD_UNTHROTTLE:
		return ops->unthrottle(event, sample, session);
	default:
		++session->hists.stats.nr_unknown_events;
		return -1;
	}
}

793
static int perf_session__preprocess_sample(struct perf_session *session,
794
					   union perf_event *event, struct perf_sample *sample)
795 796 797 798 799 800 801 802 803 804 805 806 807 808
{
	if (event->header.type != PERF_RECORD_SAMPLE ||
	    !(session->sample_type & PERF_SAMPLE_CALLCHAIN))
		return 0;

	if (!ip_callchain__valid(sample->callchain, event)) {
		pr_debug("call-chain problem with event, skipping it.\n");
		++session->hists.stats.nr_invalid_chains;
		session->hists.stats.total_invalid_chains += sample->period;
		return -EINVAL;
	}
	return 0;
}

809
static int perf_session__process_user_event(struct perf_session *session, union perf_event *event,
810
					    struct perf_event_ops *ops, u64 file_offset)
811
{
812
	dump_event(session, event, file_offset, NULL);
813

814
	/* These events are processed right away */
815
	switch (event->header.type) {
816
	case PERF_RECORD_HEADER_ATTR:
817
		return ops->attr(event, session);
818
	case PERF_RECORD_HEADER_EVENT_TYPE:
819
		return ops->event_type(event, session);
820 821
	case PERF_RECORD_HEADER_TRACING_DATA:
		/* setup for reading amidst mmap */
822 823
		lseek(session->fd, file_offset, SEEK_SET);
		return ops->tracing_data(event, session);
824
	case PERF_RECORD_HEADER_BUILD_ID:
825
		return ops->build_id(event, session);
826
	case PERF_RECORD_FINISHED_ROUND:
827
		return ops->finished_round(event, session, ops);
828
	default:
829
		return -EINVAL;
830
	}
831 832 833
}

static int perf_session__process_event(struct perf_session *session,
834
				       union perf_event *event,
835 836 837
				       struct perf_event_ops *ops,
				       u64 file_offset)
{
838
	struct perf_sample sample;
839 840
	int ret;

841 842 843
	if (session->header.needs_swap &&
	    perf_event__swap_ops[event->header.type])
		perf_event__swap_ops[event->header.type](event);
844 845 846 847 848 849 850 851

	if (event->header.type >= PERF_RECORD_HEADER_MAX)
		return -EINVAL;

	hists__inc_nr_events(&session->hists, event->header.type);

	if (event->header.type >= PERF_RECORD_USER_TYPE_START)
		return perf_session__process_user_event(session, event, ops, file_offset);
852

853 854 855
	/*
	 * For all kernel events we get the sample data
	 */
856
	perf_session__parse_sample(session, event, &sample);
857 858 859 860 861

	/* Preprocess sample records - precheck callchains */
	if (perf_session__preprocess_sample(session, event, &sample))
		return 0;

862
	if (ops->ordered_samples) {
863 864
		ret = perf_session_queue_event(session, event, &sample,
					       file_offset);
865 866 867 868
		if (ret != -ETIME)
			return ret;
	}

869 870
	return perf_session_deliver_event(session, event, &sample, ops,
					  file_offset);
871 872
}

873 874 875 876 877 878 879
void perf_event_header__bswap(struct perf_event_header *self)
{
	self->type = bswap_32(self->type);
	self->misc = bswap_16(self->misc);
	self->size = bswap_16(self->size);
}

880 881 882 883 884 885 886 887 888 889 890 891
static struct thread *perf_session__register_idle_thread(struct perf_session *self)
{
	struct thread *thread = perf_session__findnew(self, 0);

	if (thread == NULL || thread__set_comm(thread, "swapper")) {
		pr_err("problem inserting idle task.\n");
		thread = NULL;
	}

	return thread;
}

892 893 894
static void perf_session__warn_about_errors(const struct perf_session *session,
					    const struct perf_event_ops *ops)
{
895
	if (ops->lost == perf_event__process_lost &&
896
	    session->hists.stats.total_lost != 0) {
897 898
		ui__warning("Processed %" PRIu64 " events and LOST %" PRIu64
			    "!\n\nCheck IO/CPU overload!\n\n",
899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920
			    session->hists.stats.total_period,
			    session->hists.stats.total_lost);
	}

	if (session->hists.stats.nr_unknown_events != 0) {
		ui__warning("Found %u unknown events!\n\n"
			    "Is this an older tool processing a perf.data "
			    "file generated by a more recent tool?\n\n"
			    "If that is not the case, consider "
			    "reporting to linux-kernel@vger.kernel.org.\n\n",
			    session->hists.stats.nr_unknown_events);
	}

 	if (session->hists.stats.nr_invalid_chains != 0) {
 		ui__warning("Found invalid callchains!\n\n"
 			    "%u out of %u events were discarded for this reason.\n\n"
 			    "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
 			    session->hists.stats.nr_invalid_chains,
 			    session->hists.stats.nr_events[PERF_RECORD_SAMPLE]);
 	}
}

921 922 923 924 925 926
#define session_done()	(*(volatile int *)(&session_done))
volatile int session_done;

static int __perf_session__process_pipe_events(struct perf_session *self,
					       struct perf_event_ops *ops)
{
927
	union perf_event event;
928 929 930 931 932 933 934 935 936 937
	uint32_t size;
	int skip = 0;
	u64 head;
	int err;
	void *p;

	perf_event_ops__fill_defaults(ops);

	head = 0;
more:
938
	err = readn(self->fd, &event, sizeof(struct perf_event_header));
939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956
	if (err <= 0) {
		if (err == 0)
			goto done;

		pr_err("failed to read event header\n");
		goto out_err;
	}

	if (self->header.needs_swap)
		perf_event_header__bswap(&event.header);

	size = event.header.size;
	if (size == 0)
		size = 8;

	p = &event;
	p += sizeof(struct perf_event_header);

957
	if (size - sizeof(struct perf_event_header)) {
958
		err = readn(self->fd, p, size - sizeof(struct perf_event_header));
959 960 961 962 963
		if (err <= 0) {
			if (err == 0) {
				pr_err("unexpected end of event stream\n");
				goto done;
			}
964

965 966 967
			pr_err("failed to read event data\n");
			goto out_err;
		}
968 969 970
	}

	if (size == 0 ||
971
	    (skip = perf_session__process_event(self, &event, ops, head)) < 0) {
972
		dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n",
973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993
			    head, event.header.size, event.header.type);
		/*
		 * assume we lost track of the stream, check alignment, and
		 * increment a single u64 in the hope to catch on again 'soon'.
		 */
		if (unlikely(head & 7))
			head &= ~7ULL;

		size = 8;
	}

	head += size;

	if (skip > 0)
		head += skip;

	if (!session_done())
		goto more;
done:
	err = 0;
out_err:
994
	perf_session__warn_about_errors(self, ops);
995
	perf_session_free_sample_buffers(self);
996 997 998
	return err;
}

999
int __perf_session__process_events(struct perf_session *session,
1000 1001
				   u64 data_offset, u64 data_size,
				   u64 file_size, struct perf_event_ops *ops)
1002
{
1003
	u64 head, page_offset, file_offset, file_pos, progress_next;
1004
	int err, mmap_prot, mmap_flags, map_idx = 0;
1005
	struct ui_progress *progress;
1006
	size_t	page_size, mmap_size;
1007
	char *buf, *mmaps[8];
1008
	union perf_event *event;
1009
	uint32_t size;
1010

1011 1012
	perf_event_ops__fill_defaults(ops);

1013
	page_size = sysconf(_SC_PAGESIZE);
1014

1015 1016 1017
	page_offset = page_size * (data_offset / page_size);
	file_offset = page_offset;
	head = data_offset - page_offset;
1018

1019 1020 1021
	if (data_offset + data_size < file_size)
		file_size = data_offset + data_size;

1022 1023 1024 1025 1026 1027 1028 1029 1030
	progress_next = file_size / 16;
	progress = ui_progress__new("Processing events...", file_size);
	if (progress == NULL)
		return -1;

	mmap_size = session->mmap_window;
	if (mmap_size > file_size)
		mmap_size = file_size;

1031 1032
	memset(mmaps, 0, sizeof(mmaps));

1033 1034 1035
	mmap_prot  = PROT_READ;
	mmap_flags = MAP_SHARED;

1036
	if (session->header.needs_swap) {
1037 1038 1039
		mmap_prot  |= PROT_WRITE;
		mmap_flags = MAP_PRIVATE;
	}
1040
remap:
1041 1042
	buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, session->fd,
		   file_offset);
1043 1044 1045 1046 1047
	if (buf == MAP_FAILED) {
		pr_err("failed to mmap file\n");
		err = -errno;
		goto out_err;
	}
1048 1049
	mmaps[map_idx] = buf;
	map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1);
1050
	file_pos = file_offset + head;
1051 1052

more:
1053
	event = (union perf_event *)(buf + head);
1054

1055
	if (session->header.needs_swap)
1056
		perf_event_header__bswap(&event->header);
1057 1058 1059 1060
	size = event->header.size;
	if (size == 0)
		size = 8;

1061
	if (head + event->header.size > mmap_size) {
1062 1063 1064 1065
		if (mmaps[map_idx]) {
			munmap(mmaps[map_idx], mmap_size);
			mmaps[map_idx] = NULL;
		}
1066

1067 1068 1069
		page_offset = page_size * (head / page_size);
		file_offset += page_offset;
		head -= page_offset;
1070 1071 1072 1073 1074
		goto remap;
	}

	size = event->header.size;

1075 1076
	if (size == 0 ||
	    perf_session__process_event(session, event, ops, file_pos) < 0) {
1077
		dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n",
1078
			    file_offset + head, event->header.size,
1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090
			    event->header.type);
		/*
		 * assume we lost track of the stream, check alignment, and
		 * increment a single u64 in the hope to catch on again 'soon'.
		 */
		if (unlikely(head & 7))
			head &= ~7ULL;

		size = 8;
	}

	head += size;
1091
	file_pos += size;
1092

1093 1094 1095 1096 1097
	if (file_pos >= progress_next) {
		progress_next += file_size / 16;
		ui_progress__update(progress, file_pos);
	}

1098
	if (file_pos < file_size)
1099
		goto more;
1100

1101
	err = 0;
1102
	/* do the final flush for ordered samples */
1103 1104
	session->ordered_samples.next_flush = ULLONG_MAX;
	flush_sample_queue(session, ops);
1105
out_err:
1106
	ui_progress__delete(progress);
1107
	perf_session__warn_about_errors(session, ops);
1108
	perf_session_free_sample_buffers(session);
1109 1110
	return err;
}
1111

1112 1113 1114 1115 1116 1117 1118 1119
int perf_session__process_events(struct perf_session *self,
				 struct perf_event_ops *ops)
{
	int err;

	if (perf_session__register_idle_thread(self) == NULL)
		return -ENOMEM;

1120 1121 1122 1123 1124 1125 1126
	if (!self->fd_pipe)
		err = __perf_session__process_events(self,
						     self->header.data_offset,
						     self->header.data_size,
						     self->size, ops);
	else
		err = __perf_session__process_pipe_events(self, ops);
1127

1128 1129 1130
	return err;
}

1131
bool perf_session__has_traces(struct perf_session *self, const char *msg)
1132 1133
{
	if (!(self->sample_type & PERF_SAMPLE_RAW)) {
1134 1135
		pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg);
		return false;
1136 1137
	}

1138
	return true;
1139
}
1140

1141
int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps,
1142 1143 1144 1145
					     const char *symbol_name,
					     u64 addr)
{
	char *bracket;
1146
	enum map_type i;
1147 1148 1149 1150 1151
	struct ref_reloc_sym *ref;

	ref = zalloc(sizeof(struct ref_reloc_sym));
	if (ref == NULL)
		return -ENOMEM;
1152

1153 1154 1155
	ref->name = strdup(symbol_name);
	if (ref->name == NULL) {
		free(ref);
1156
		return -ENOMEM;
1157
	}
1158

1159
	bracket = strchr(ref->name, ']');
1160 1161 1162
	if (bracket)
		*bracket = '\0';

1163
	ref->addr = addr;
1164 1165

	for (i = 0; i < MAP__NR_TYPES; ++i) {
1166 1167
		struct kmap *kmap = map__kmap(maps[i]);
		kmap->ref_reloc_sym = ref;
1168 1169
	}

1170 1171
	return 0;
}
1172 1173 1174 1175 1176 1177 1178

size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp)
{
	return __dsos__fprintf(&self->host_machine.kernel_dsos, fp) +
	       __dsos__fprintf(&self->host_machine.user_dsos, fp) +
	       machines__fprintf_dsos(&self->machines, fp);
}
1179 1180 1181 1182 1183 1184 1185

size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, FILE *fp,
					  bool with_hits)
{
	size_t ret = machine__fprintf_dsos_buildid(&self->host_machine, fp, with_hits);
	return ret + machines__fprintf_dsos_buildid(&self->machines, fp, with_hits);
}
1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200

size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
{
	struct perf_evsel *pos;
	size_t ret = fprintf(fp, "Aggregated stats:\n");

	ret += hists__fprintf_nr_events(&session->hists, fp);

	list_for_each_entry(pos, &session->evlist->entries, node) {
		ret += fprintf(fp, "%s stats:\n", event_name(pos));
		ret += hists__fprintf_nr_events(&pos->hists, fp);
	}

	return ret;
}