session.c 29.2 KB
Newer Older
1 2
#define _FILE_OFFSET_BITS 64

3 4
#include <linux/kernel.h>

5
#include <byteswap.h>
6 7
#include <unistd.h>
#include <sys/types.h>
8
#include <sys/mman.h>
9 10

#include "session.h"
11
#include "sort.h"
12 13 14 15 16 17
#include "util.h"

static int perf_session__open(struct perf_session *self, bool force)
{
	struct stat input_stat;

18 19 20 21 22 23 24 25 26 27
	if (!strcmp(self->filename, "-")) {
		self->fd_pipe = true;
		self->fd = STDIN_FILENO;

		if (perf_header__read(self, self->fd) < 0)
			pr_err("incompatible file format");

		return 0;
	}

28
	self->fd = open(self->filename, O_RDONLY);
29
	if (self->fd < 0) {
30 31 32 33
		int err = errno;

		pr_err("failed to open %s: %s", self->filename, strerror(err));
		if (err == ENOENT && !strcmp(self->filename, "perf.data"))
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
			pr_err("  (try 'perf record' first)");
		pr_err("\n");
		return -errno;
	}

	if (fstat(self->fd, &input_stat) < 0)
		goto out_close;

	if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
		pr_err("file %s not owned by current user or root\n",
		       self->filename);
		goto out_close;
	}

	if (!input_stat.st_size) {
		pr_info("zero-sized file (%s), nothing to do!\n",
			self->filename);
		goto out_close;
	}

54
	if (perf_header__read(self, self->fd) < 0) {
55 56 57 58 59 60 61 62 63 64 65 66 67
		pr_err("incompatible file format");
		goto out_close;
	}

	self->size = input_stat.st_size;
	return 0;

out_close:
	close(self->fd);
	self->fd = -1;
	return -1;
}

68
static void perf_session__id_header_size(struct perf_session *session)
69
{
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
       struct sample_data *data;
       u64 sample_type = session->sample_type;
       u16 size = 0;

	if (!session->sample_id_all)
		goto out;

       if (sample_type & PERF_SAMPLE_TID)
               size += sizeof(data->tid) * 2;

       if (sample_type & PERF_SAMPLE_TIME)
               size += sizeof(data->time);

       if (sample_type & PERF_SAMPLE_ID)
               size += sizeof(data->id);

       if (sample_type & PERF_SAMPLE_STREAM_ID)
               size += sizeof(data->stream_id);

       if (sample_type & PERF_SAMPLE_CPU)
               size += sizeof(data->cpu) * 2;
out:
       session->id_hdr_size = size;
}

void perf_session__set_sample_id_all(struct perf_session *session, bool value)
{
	session->sample_id_all = value;
	perf_session__id_header_size(session);
99 100
}

101 102 103 104 105
void perf_session__set_sample_type(struct perf_session *session, u64 type)
{
	session->sample_type = type;
}

106 107 108 109 110 111 112
void perf_session__update_sample_type(struct perf_session *self)
{
	self->sample_type = perf_header__sample_type(&self->header);
	self->sample_id_all = perf_header__sample_id_all(&self->header);
	perf_session__id_header_size(self);
}

113 114
int perf_session__create_kernel_maps(struct perf_session *self)
{
115
	int ret = machine__create_kernel_maps(&self->host_machine);
116 117

	if (ret >= 0)
118
		ret = machines__create_guest_kernel_maps(&self->machines);
119 120 121
	return ret;
}

122 123 124 125 126 127
static void perf_session__destroy_kernel_maps(struct perf_session *self)
{
	machine__destroy_kernel_maps(&self->host_machine);
	machines__destroy_guest_kernel_maps(&self->machines);
}

128 129 130
struct perf_session *perf_session__new(const char *filename, int mode,
				       bool force, bool repipe,
				       struct perf_event_ops *ops)
131
{
132
	size_t len = filename ? strlen(filename) + 1 : 0;
133 134 135 136 137 138
	struct perf_session *self = zalloc(sizeof(*self) + len);

	if (self == NULL)
		goto out;

	if (perf_header__init(&self->header) < 0)
139
		goto out_free;
140 141

	memcpy(self->filename, filename, len);
142
	self->threads = RB_ROOT;
143
	INIT_LIST_HEAD(&self->dead_threads);
144
	self->hists_tree = RB_ROOT;
145
	self->last_match = NULL;
146 147 148 149 150 151 152 153 154
	/*
	 * On 64bit we can mmap the data file in one go. No need for tiny mmap
	 * slices. On 32bit we use 32MB.
	 */
#if BITS_PER_LONG == 64
	self->mmap_window = ULLONG_MAX;
#else
	self->mmap_window = 32 * 1024 * 1024ULL;
#endif
155
	self->machines = RB_ROOT;
T
Tom Zanussi 已提交
156
	self->repipe = repipe;
157
	INIT_LIST_HEAD(&self->ordered_samples.samples);
158
	INIT_LIST_HEAD(&self->ordered_samples.sample_cache);
159
	INIT_LIST_HEAD(&self->ordered_samples.to_free);
160
	machine__init(&self->host_machine, "", HOST_KERNEL_ID);
161

162 163 164 165 166 167 168 169 170 171 172
	if (mode == O_RDONLY) {
		if (perf_session__open(self, force) < 0)
			goto out_delete;
	} else if (mode == O_WRONLY) {
		/*
		 * In O_RDONLY mode this will be performed when reading the
		 * kernel MMAP event, in event__process_mmap().
		 */
		if (perf_session__create_kernel_maps(self) < 0)
			goto out_delete;
	}
173

174
	perf_session__update_sample_type(self);
175 176 177 178 179 180 181

	if (ops && ops->ordering_requires_timestamps &&
	    ops->ordered_samples && !self->sample_id_all) {
		dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
		ops->ordered_samples = false;
	}

182 183
out:
	return self;
184
out_free:
185 186
	free(self);
	return NULL;
187 188 189
out_delete:
	perf_session__delete(self);
	return NULL;
190 191
}

192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
static void perf_session__delete_dead_threads(struct perf_session *self)
{
	struct thread *n, *t;

	list_for_each_entry_safe(t, n, &self->dead_threads, node) {
		list_del(&t->node);
		thread__delete(t);
	}
}

static void perf_session__delete_threads(struct perf_session *self)
{
	struct rb_node *nd = rb_first(&self->threads);

	while (nd) {
		struct thread *t = rb_entry(nd, struct thread, rb_node);

		rb_erase(&t->rb_node, &self->threads);
		nd = rb_next(nd);
		thread__delete(t);
	}
}

215 216 217
void perf_session__delete(struct perf_session *self)
{
	perf_header__exit(&self->header);
218
	perf_session__destroy_kernel_maps(self);
219 220 221
	perf_session__delete_dead_threads(self);
	perf_session__delete_threads(self);
	machine__exit(&self->host_machine);
222 223 224
	close(self->fd);
	free(self);
}
225

226 227
void perf_session__remove_thread(struct perf_session *self, struct thread *th)
{
228
	self->last_match = NULL;
229 230 231 232 233 234 235 236
	rb_erase(&th->rb_node, &self->threads);
	/*
	 * We may have references to this thread, for instance in some hist_entry
	 * instances, so just move them to a separate list.
	 */
	list_add_tail(&th->node, &self->dead_threads);
}

237 238 239 240 241 242 243 244
static bool symbol__match_parent_regex(struct symbol *sym)
{
	if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
		return 1;

	return 0;
}

245 246 247 248
struct map_symbol *perf_session__resolve_callchain(struct perf_session *self,
						   struct thread *thread,
						   struct ip_callchain *chain,
						   struct symbol **parent)
249 250 251
{
	u8 cpumode = PERF_RECORD_MISC_USER;
	unsigned int i;
252
	struct map_symbol *syms = calloc(chain->nr, sizeof(*syms));
253

254 255
	if (!syms)
		return NULL;
256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274

	for (i = 0; i < chain->nr; i++) {
		u64 ip = chain->ips[i];
		struct addr_location al;

		if (ip >= PERF_CONTEXT_MAX) {
			switch (ip) {
			case PERF_CONTEXT_HV:
				cpumode = PERF_RECORD_MISC_HYPERVISOR;	break;
			case PERF_CONTEXT_KERNEL:
				cpumode = PERF_RECORD_MISC_KERNEL;	break;
			case PERF_CONTEXT_USER:
				cpumode = PERF_RECORD_MISC_USER;	break;
			default:
				break;
			}
			continue;
		}

275
		al.filtered = false;
276
		thread__find_addr_location(thread, self, cpumode,
277
				MAP__FUNCTION, thread->pid, ip, &al, NULL);
278 279 280 281
		if (al.sym != NULL) {
			if (sort__has_parent && !*parent &&
			    symbol__match_parent_regex(al.sym))
				*parent = al.sym;
282
			if (!symbol_conf.use_callchain)
283
				break;
284 285
			syms[i].map = al.map;
			syms[i].sym = al.sym;
286 287 288 289 290
		}
	}

	return syms;
}
291

292 293 294 295 296 297 298
static int process_event_synth_stub(event_t *event __used,
				    struct perf_session *session __used)
{
	dump_printf(": unhandled!\n");
	return 0;
}

299
static int process_event_stub(event_t *event __used,
300
			      struct sample_data *sample __used,
301 302 303 304 305 306
			      struct perf_session *session __used)
{
	dump_printf(": unhandled!\n");
	return 0;
}

307 308 309 310 311 312 313 314 315 316 317 318
static int process_finished_round_stub(event_t *event __used,
				       struct perf_session *session __used,
				       struct perf_event_ops *ops __used)
{
	dump_printf(": unhandled!\n");
	return 0;
}

static int process_finished_round(event_t *event,
				  struct perf_session *session,
				  struct perf_event_ops *ops);

319 320
static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
{
321 322 323 324 325 326 327 328 329 330 331
	if (handler->sample == NULL)
		handler->sample = process_event_stub;
	if (handler->mmap == NULL)
		handler->mmap = process_event_stub;
	if (handler->comm == NULL)
		handler->comm = process_event_stub;
	if (handler->fork == NULL)
		handler->fork = process_event_stub;
	if (handler->exit == NULL)
		handler->exit = process_event_stub;
	if (handler->lost == NULL)
332
		handler->lost = event__process_lost;
333 334 335 336 337 338
	if (handler->read == NULL)
		handler->read = process_event_stub;
	if (handler->throttle == NULL)
		handler->throttle = process_event_stub;
	if (handler->unthrottle == NULL)
		handler->unthrottle = process_event_stub;
339
	if (handler->attr == NULL)
340
		handler->attr = process_event_synth_stub;
341
	if (handler->event_type == NULL)
342
		handler->event_type = process_event_synth_stub;
343
	if (handler->tracing_data == NULL)
344
		handler->tracing_data = process_event_synth_stub;
345
	if (handler->build_id == NULL)
346
		handler->build_id = process_event_synth_stub;
347 348 349 350 351 352
	if (handler->finished_round == NULL) {
		if (handler->ordered_samples)
			handler->finished_round = process_finished_round;
		else
			handler->finished_round = process_finished_round_stub;
	}
353 354
}

355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405
void mem_bswap_64(void *src, int byte_size)
{
	u64 *m = src;

	while (byte_size > 0) {
		*m = bswap_64(*m);
		byte_size -= sizeof(u64);
		++m;
	}
}

static void event__all64_swap(event_t *self)
{
	struct perf_event_header *hdr = &self->header;
	mem_bswap_64(hdr + 1, self->header.size - sizeof(*hdr));
}

static void event__comm_swap(event_t *self)
{
	self->comm.pid = bswap_32(self->comm.pid);
	self->comm.tid = bswap_32(self->comm.tid);
}

static void event__mmap_swap(event_t *self)
{
	self->mmap.pid	 = bswap_32(self->mmap.pid);
	self->mmap.tid	 = bswap_32(self->mmap.tid);
	self->mmap.start = bswap_64(self->mmap.start);
	self->mmap.len	 = bswap_64(self->mmap.len);
	self->mmap.pgoff = bswap_64(self->mmap.pgoff);
}

static void event__task_swap(event_t *self)
{
	self->fork.pid	= bswap_32(self->fork.pid);
	self->fork.tid	= bswap_32(self->fork.tid);
	self->fork.ppid	= bswap_32(self->fork.ppid);
	self->fork.ptid	= bswap_32(self->fork.ptid);
	self->fork.time	= bswap_64(self->fork.time);
}

static void event__read_swap(event_t *self)
{
	self->read.pid		= bswap_32(self->read.pid);
	self->read.tid		= bswap_32(self->read.tid);
	self->read.value	= bswap_64(self->read.value);
	self->read.time_enabled	= bswap_64(self->read.time_enabled);
	self->read.time_running	= bswap_64(self->read.time_running);
	self->read.id		= bswap_64(self->read.id);
}

406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425
static void event__attr_swap(event_t *self)
{
	size_t size;

	self->attr.attr.type		= bswap_32(self->attr.attr.type);
	self->attr.attr.size		= bswap_32(self->attr.attr.size);
	self->attr.attr.config		= bswap_64(self->attr.attr.config);
	self->attr.attr.sample_period	= bswap_64(self->attr.attr.sample_period);
	self->attr.attr.sample_type	= bswap_64(self->attr.attr.sample_type);
	self->attr.attr.read_format	= bswap_64(self->attr.attr.read_format);
	self->attr.attr.wakeup_events	= bswap_32(self->attr.attr.wakeup_events);
	self->attr.attr.bp_type		= bswap_32(self->attr.attr.bp_type);
	self->attr.attr.bp_addr		= bswap_64(self->attr.attr.bp_addr);
	self->attr.attr.bp_len		= bswap_64(self->attr.attr.bp_len);

	size = self->header.size;
	size -= (void *)&self->attr.id - (void *)self;
	mem_bswap_64(self->attr.id, size);
}

426 427 428 429 430 431
static void event__event_type_swap(event_t *self)
{
	self->event_type.event_type.event_id =
		bswap_64(self->event_type.event_type.event_id);
}

432 433 434 435 436
static void event__tracing_data_swap(event_t *self)
{
	self->tracing_data.size = bswap_32(self->tracing_data.size);
}

437 438 439 440 441 442 443 444 445 446
typedef void (*event__swap_op)(event_t *self);

static event__swap_op event__swap_ops[] = {
	[PERF_RECORD_MMAP]   = event__mmap_swap,
	[PERF_RECORD_COMM]   = event__comm_swap,
	[PERF_RECORD_FORK]   = event__task_swap,
	[PERF_RECORD_EXIT]   = event__task_swap,
	[PERF_RECORD_LOST]   = event__all64_swap,
	[PERF_RECORD_READ]   = event__read_swap,
	[PERF_RECORD_SAMPLE] = event__all64_swap,
447
	[PERF_RECORD_HEADER_ATTR]   = event__attr_swap,
448
	[PERF_RECORD_HEADER_EVENT_TYPE]   = event__event_type_swap,
449
	[PERF_RECORD_HEADER_TRACING_DATA]   = event__tracing_data_swap,
450
	[PERF_RECORD_HEADER_BUILD_ID]   = NULL,
451
	[PERF_RECORD_HEADER_MAX]    = NULL,
452 453
};

454 455
struct sample_queue {
	u64			timestamp;
456
	u64			file_offset;
457
	event_t			*event;
458 459 460
	struct list_head	list;
};

461 462 463 464
static void perf_session_free_sample_buffers(struct perf_session *session)
{
	struct ordered_samples *os = &session->ordered_samples;

465
	while (!list_empty(&os->to_free)) {
466 467
		struct sample_queue *sq;

468
		sq = list_entry(os->to_free.next, struct sample_queue, list);
469 470 471 472 473
		list_del(&sq->list);
		free(sq);
	}
}

474 475 476
static int perf_session_deliver_event(struct perf_session *session,
				      event_t *event,
				      struct sample_data *sample,
477 478
				      struct perf_event_ops *ops,
				      u64 file_offset);
479

480 481 482
static void flush_sample_queue(struct perf_session *s,
			       struct perf_event_ops *ops)
{
483 484
	struct ordered_samples *os = &s->ordered_samples;
	struct list_head *head = &os->samples;
485
	struct sample_queue *tmp, *iter;
486
	struct sample_data sample;
487 488
	u64 limit = os->next_flush;
	u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
489

490
	if (!ops->ordered_samples || !limit)
491 492 493 494
		return;

	list_for_each_entry_safe(iter, tmp, head, list) {
		if (iter->timestamp > limit)
495
			break;
496

497
		event__parse_sample(iter->event, s, &sample);
498 499
		perf_session_deliver_event(s, iter->event, &sample, ops,
					   iter->file_offset);
500

501
		os->last_flush = iter->timestamp;
502
		list_del(&iter->list);
503
		list_add(&iter->list, &os->sample_cache);
504
	}
505 506 507 508 509 510 511

	if (list_empty(head)) {
		os->last_sample = NULL;
	} else if (last_ts <= limit) {
		os->last_sample =
			list_entry(head->prev, struct sample_queue, list);
	}
512 513
}

514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562
/*
 * When perf record finishes a pass on every buffers, it records this pseudo
 * event.
 * We record the max timestamp t found in the pass n.
 * Assuming these timestamps are monotonic across cpus, we know that if
 * a buffer still has events with timestamps below t, they will be all
 * available and then read in the pass n + 1.
 * Hence when we start to read the pass n + 2, we can safely flush every
 * events with timestamps below t.
 *
 *    ============ PASS n =================
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          1          |         2
 *          2          |         3
 *          -          |         4  <--- max recorded
 *
 *    ============ PASS n + 1 ==============
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          3          |         5
 *          4          |         6
 *          5          |         7 <---- max recorded
 *
 *      Flush every events below timestamp 4
 *
 *    ============ PASS n + 2 ==============
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          6          |         8
 *          7          |         9
 *          -          |         10
 *
 *      Flush every events below timestamp 7
 *      etc...
 */
static int process_finished_round(event_t *event __used,
				  struct perf_session *session,
				  struct perf_event_ops *ops)
{
	flush_sample_queue(session, ops);
	session->ordered_samples.next_flush = session->ordered_samples.max_timestamp;

	return 0;
}

563
/* The queue is ordered by time */
564
static void __queue_event(struct sample_queue *new, struct perf_session *s)
565
{
566 567 568 569
	struct ordered_samples *os = &s->ordered_samples;
	struct sample_queue *sample = os->last_sample;
	u64 timestamp = new->timestamp;
	struct list_head *p;
570

571
	os->last_sample = new;
572

573 574 575
	if (!sample) {
		list_add(&new->list, &os->samples);
		os->max_timestamp = timestamp;
576 577 578 579
		return;
	}

	/*
580 581 582
	 * last_sample might point to some random place in the list as it's
	 * the last queued event. We expect that the new event is close to
	 * this.
583
	 */
584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605
	if (sample->timestamp <= timestamp) {
		while (sample->timestamp <= timestamp) {
			p = sample->list.next;
			if (p == &os->samples) {
				list_add_tail(&new->list, &os->samples);
				os->max_timestamp = timestamp;
				return;
			}
			sample = list_entry(p, struct sample_queue, list);
		}
		list_add_tail(&new->list, &sample->list);
	} else {
		while (sample->timestamp > timestamp) {
			p = sample->list.prev;
			if (p == &os->samples) {
				list_add(&new->list, &os->samples);
				return;
			}
			sample = list_entry(p, struct sample_queue, list);
		}
		list_add(&new->list, &sample->list);
	}
606 607
}

608 609
#define MAX_SAMPLE_BUFFER	(64 * 1024 / sizeof(struct sample_queue))

610
static int perf_session_queue_event(struct perf_session *s, event_t *event,
611
				    struct sample_data *data, u64 file_offset)
612
{
613 614
	struct ordered_samples *os = &s->ordered_samples;
	struct list_head *sc = &os->sample_cache;
615 616 617
	u64 timestamp = data->time;
	struct sample_queue *new;

618
	if (!timestamp || timestamp == ~0ULL)
619 620
		return -ETIME;

621 622 623 624 625
	if (timestamp < s->ordered_samples.last_flush) {
		printf("Warning: Timestamp below last timeslice flush\n");
		return -EINVAL;
	}

626 627 628
	if (!list_empty(sc)) {
		new = list_entry(sc->next, struct sample_queue, list);
		list_del(&new->list);
629 630 631 632
	} else if (os->sample_buffer) {
		new = os->sample_buffer + os->sample_buffer_idx;
		if (++os->sample_buffer_idx == MAX_SAMPLE_BUFFER)
			os->sample_buffer = NULL;
633
	} else {
634 635
		os->sample_buffer = malloc(MAX_SAMPLE_BUFFER * sizeof(*new));
		if (!os->sample_buffer)
636
			return -ENOMEM;
637 638 639
		list_add(&os->sample_buffer->list, &os->to_free);
		os->sample_buffer_idx = 2;
		new = os->sample_buffer + 1;
640
	}
641 642

	new->timestamp = timestamp;
643
	new->file_offset = file_offset;
644
	new->event = event;
645

646
	__queue_event(new, s);
647 648 649

	return 0;
}
650

651
static void callchain__printf(struct sample_data *sample)
652 653
{
	unsigned int i;
654

655 656 657 658
	printf("... chain: nr:%Lu\n", sample->callchain->nr);

	for (i = 0; i < sample->callchain->nr; i++)
		printf("..... %2d: %016Lx\n", i, sample->callchain->ips[i]);
659 660
}

661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677
static void perf_session__print_tstamp(struct perf_session *session,
				       event_t *event,
				       struct sample_data *sample)
{
	if (event->header.type != PERF_RECORD_SAMPLE &&
	    !session->sample_id_all) {
		fputs("-1 -1 ", stdout);
		return;
	}

	if ((session->sample_type & PERF_SAMPLE_CPU))
		printf("%u ", sample->cpu);

	if (session->sample_type & PERF_SAMPLE_TIME)
		printf("%Lu ", sample->time);
}

678 679 680 681 682 683
static void dump_event(struct perf_session *session, event_t *event,
		       u64 file_offset, struct sample_data *sample)
{
	if (!dump_trace)
		return;

684 685
	printf("\n%#Lx [%#x]: event: %d\n", file_offset, event->header.size,
	       event->header.type);
686 687 688 689 690 691

	trace_event(event);

	if (sample)
		perf_session__print_tstamp(session, event, sample);

692 693
	printf("%#Lx [%#x]: PERF_RECORD_%s", file_offset, event->header.size,
	       event__get_event_name(event->header.type));
694 695 696 697 698
}

static void dump_sample(struct perf_session *session, event_t *event,
			struct sample_data *sample)
{
699 700 701 702 703
	if (!dump_trace)
		return;

	printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
	       sample->pid, sample->tid, sample->ip, sample->period);
704 705

	if (session->sample_type & PERF_SAMPLE_CALLCHAIN)
706
		callchain__printf(sample);
707 708
}

709 710 711
static int perf_session_deliver_event(struct perf_session *session,
				      event_t *event,
				      struct sample_data *sample,
712
				      struct perf_event_ops *ops,
713
				      u64 file_offset)
714
{
715 716
	dump_event(session, event, file_offset, sample);

717 718
	switch (event->header.type) {
	case PERF_RECORD_SAMPLE:
719
		dump_sample(session, event, sample);
720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742
		return ops->sample(event, sample, session);
	case PERF_RECORD_MMAP:
		return ops->mmap(event, sample, session);
	case PERF_RECORD_COMM:
		return ops->comm(event, sample, session);
	case PERF_RECORD_FORK:
		return ops->fork(event, sample, session);
	case PERF_RECORD_EXIT:
		return ops->exit(event, sample, session);
	case PERF_RECORD_LOST:
		return ops->lost(event, sample, session);
	case PERF_RECORD_READ:
		return ops->read(event, sample, session);
	case PERF_RECORD_THROTTLE:
		return ops->throttle(event, sample, session);
	case PERF_RECORD_UNTHROTTLE:
		return ops->unthrottle(event, sample, session);
	default:
		++session->hists.stats.nr_unknown_events;
		return -1;
	}
}

743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758
static int perf_session__preprocess_sample(struct perf_session *session,
					   event_t *event, struct sample_data *sample)
{
	if (event->header.type != PERF_RECORD_SAMPLE ||
	    !(session->sample_type & PERF_SAMPLE_CALLCHAIN))
		return 0;

	if (!ip_callchain__valid(sample->callchain, event)) {
		pr_debug("call-chain problem with event, skipping it.\n");
		++session->hists.stats.nr_invalid_chains;
		session->hists.stats.total_invalid_chains += sample->period;
		return -EINVAL;
	}
	return 0;
}

759 760
static int perf_session__process_user_event(struct perf_session *session, event_t *event,
					    struct perf_event_ops *ops, u64 file_offset)
761
{
762
	dump_event(session, event, file_offset, NULL);
763

764
	/* These events are processed right away */
765
	switch (event->header.type) {
766
	case PERF_RECORD_HEADER_ATTR:
767
		return ops->attr(event, session);
768
	case PERF_RECORD_HEADER_EVENT_TYPE:
769
		return ops->event_type(event, session);
770 771
	case PERF_RECORD_HEADER_TRACING_DATA:
		/* setup for reading amidst mmap */
772 773
		lseek(session->fd, file_offset, SEEK_SET);
		return ops->tracing_data(event, session);
774
	case PERF_RECORD_HEADER_BUILD_ID:
775
		return ops->build_id(event, session);
776
	case PERF_RECORD_FINISHED_ROUND:
777
		return ops->finished_round(event, session, ops);
778
	default:
779
		return -EINVAL;
780
	}
781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800
}

static int perf_session__process_event(struct perf_session *session,
				       event_t *event,
				       struct perf_event_ops *ops,
				       u64 file_offset)
{
	struct sample_data sample;
	int ret;

	if (session->header.needs_swap && event__swap_ops[event->header.type])
		event__swap_ops[event->header.type](event);

	if (event->header.type >= PERF_RECORD_HEADER_MAX)
		return -EINVAL;

	hists__inc_nr_events(&session->hists, event->header.type);

	if (event->header.type >= PERF_RECORD_USER_TYPE_START)
		return perf_session__process_user_event(session, event, ops, file_offset);
801

802 803 804 805 806 807 808 809 810
	/*
	 * For all kernel events we get the sample data
	 */
	event__parse_sample(event, session, &sample);

	/* Preprocess sample records - precheck callchains */
	if (perf_session__preprocess_sample(session, event, &sample))
		return 0;

811
	if (ops->ordered_samples) {
812 813
		ret = perf_session_queue_event(session, event, &sample,
					       file_offset);
814 815 816 817
		if (ret != -ETIME)
			return ret;
	}

818 819
	return perf_session_deliver_event(session, event, &sample, ops,
					  file_offset);
820 821
}

822 823 824 825 826 827 828
void perf_event_header__bswap(struct perf_event_header *self)
{
	self->type = bswap_32(self->type);
	self->misc = bswap_16(self->misc);
	self->size = bswap_16(self->size);
}

829 830 831 832 833 834 835 836 837 838 839 840
static struct thread *perf_session__register_idle_thread(struct perf_session *self)
{
	struct thread *thread = perf_session__findnew(self, 0);

	if (thread == NULL || thread__set_comm(thread, "swapper")) {
		pr_err("problem inserting idle task.\n");
		thread = NULL;
	}

	return thread;
}

841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869
static void perf_session__warn_about_errors(const struct perf_session *session,
					    const struct perf_event_ops *ops)
{
	if (ops->lost == event__process_lost &&
	    session->hists.stats.total_lost != 0) {
		ui__warning("Processed %Lu events and LOST %Lu!\n\n"
			    "Check IO/CPU overload!\n\n",
			    session->hists.stats.total_period,
			    session->hists.stats.total_lost);
	}

	if (session->hists.stats.nr_unknown_events != 0) {
		ui__warning("Found %u unknown events!\n\n"
			    "Is this an older tool processing a perf.data "
			    "file generated by a more recent tool?\n\n"
			    "If that is not the case, consider "
			    "reporting to linux-kernel@vger.kernel.org.\n\n",
			    session->hists.stats.nr_unknown_events);
	}

 	if (session->hists.stats.nr_invalid_chains != 0) {
 		ui__warning("Found invalid callchains!\n\n"
 			    "%u out of %u events were discarded for this reason.\n\n"
 			    "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
 			    session->hists.stats.nr_invalid_chains,
 			    session->hists.stats.nr_events[PERF_RECORD_SAMPLE]);
 	}
}

870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886
#define session_done()	(*(volatile int *)(&session_done))
volatile int session_done;

static int __perf_session__process_pipe_events(struct perf_session *self,
					       struct perf_event_ops *ops)
{
	event_t event;
	uint32_t size;
	int skip = 0;
	u64 head;
	int err;
	void *p;

	perf_event_ops__fill_defaults(ops);

	head = 0;
more:
887
	err = readn(self->fd, &event, sizeof(struct perf_event_header));
888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905
	if (err <= 0) {
		if (err == 0)
			goto done;

		pr_err("failed to read event header\n");
		goto out_err;
	}

	if (self->header.needs_swap)
		perf_event_header__bswap(&event.header);

	size = event.header.size;
	if (size == 0)
		size = 8;

	p = &event;
	p += sizeof(struct perf_event_header);

906
	if (size - sizeof(struct perf_event_header)) {
907
		err = readn(self->fd, p, size - sizeof(struct perf_event_header));
908 909 910 911 912
		if (err <= 0) {
			if (err == 0) {
				pr_err("unexpected end of event stream\n");
				goto done;
			}
913

914 915 916
			pr_err("failed to read event data\n");
			goto out_err;
		}
917 918 919
	}

	if (size == 0 ||
920
	    (skip = perf_session__process_event(self, &event, ops, head)) < 0) {
921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942
		dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
			    head, event.header.size, event.header.type);
		/*
		 * assume we lost track of the stream, check alignment, and
		 * increment a single u64 in the hope to catch on again 'soon'.
		 */
		if (unlikely(head & 7))
			head &= ~7ULL;

		size = 8;
	}

	head += size;

	if (skip > 0)
		head += skip;

	if (!session_done())
		goto more;
done:
	err = 0;
out_err:
943
	perf_session__warn_about_errors(self, ops);
944
	perf_session_free_sample_buffers(self);
945 946 947
	return err;
}

948
int __perf_session__process_events(struct perf_session *session,
949 950
				   u64 data_offset, u64 data_size,
				   u64 file_size, struct perf_event_ops *ops)
951
{
952
	u64 head, page_offset, file_offset, file_pos, progress_next;
953
	int err, mmap_prot, mmap_flags, map_idx = 0;
954
	struct ui_progress *progress;
955
	size_t	page_size, mmap_size;
956
	char *buf, *mmaps[8];
957 958
	event_t *event;
	uint32_t size;
959

960 961
	perf_event_ops__fill_defaults(ops);

962
	page_size = sysconf(_SC_PAGESIZE);
963

964 965 966
	page_offset = page_size * (data_offset / page_size);
	file_offset = page_offset;
	head = data_offset - page_offset;
967

968 969 970
	if (data_offset + data_size < file_size)
		file_size = data_offset + data_size;

971 972 973 974 975 976 977 978 979
	progress_next = file_size / 16;
	progress = ui_progress__new("Processing events...", file_size);
	if (progress == NULL)
		return -1;

	mmap_size = session->mmap_window;
	if (mmap_size > file_size)
		mmap_size = file_size;

980 981
	memset(mmaps, 0, sizeof(mmaps));

982 983 984
	mmap_prot  = PROT_READ;
	mmap_flags = MAP_SHARED;

985
	if (session->header.needs_swap) {
986 987 988
		mmap_prot  |= PROT_WRITE;
		mmap_flags = MAP_PRIVATE;
	}
989
remap:
990 991
	buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, session->fd,
		   file_offset);
992 993 994 995 996
	if (buf == MAP_FAILED) {
		pr_err("failed to mmap file\n");
		err = -errno;
		goto out_err;
	}
997 998
	mmaps[map_idx] = buf;
	map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1);
999
	file_pos = file_offset + head;
1000 1001 1002 1003

more:
	event = (event_t *)(buf + head);

1004
	if (session->header.needs_swap)
1005
		perf_event_header__bswap(&event->header);
1006 1007 1008 1009
	size = event->header.size;
	if (size == 0)
		size = 8;

1010
	if (head + event->header.size > mmap_size) {
1011 1012 1013 1014
		if (mmaps[map_idx]) {
			munmap(mmaps[map_idx], mmap_size);
			mmaps[map_idx] = NULL;
		}
1015

1016 1017 1018
		page_offset = page_size * (head / page_size);
		file_offset += page_offset;
		head -= page_offset;
1019 1020 1021 1022 1023
		goto remap;
	}

	size = event->header.size;

1024 1025
	if (size == 0 ||
	    perf_session__process_event(session, event, ops, file_pos) < 0) {
1026
		dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
1027
			    file_offset + head, event->header.size,
1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039
			    event->header.type);
		/*
		 * assume we lost track of the stream, check alignment, and
		 * increment a single u64 in the hope to catch on again 'soon'.
		 */
		if (unlikely(head & 7))
			head &= ~7ULL;

		size = 8;
	}

	head += size;
1040
	file_pos += size;
1041

1042 1043 1044 1045 1046
	if (file_pos >= progress_next) {
		progress_next += file_size / 16;
		ui_progress__update(progress, file_pos);
	}

1047
	if (file_pos < file_size)
1048
		goto more;
1049

1050
	err = 0;
1051
	/* do the final flush for ordered samples */
1052 1053
	session->ordered_samples.next_flush = ULLONG_MAX;
	flush_sample_queue(session, ops);
1054
out_err:
1055
	ui_progress__delete(progress);
1056
	perf_session__warn_about_errors(session, ops);
1057
	perf_session_free_sample_buffers(session);
1058 1059
	return err;
}
1060

1061 1062 1063 1064 1065 1066 1067 1068
int perf_session__process_events(struct perf_session *self,
				 struct perf_event_ops *ops)
{
	int err;

	if (perf_session__register_idle_thread(self) == NULL)
		return -ENOMEM;

1069 1070 1071 1072 1073 1074 1075
	if (!self->fd_pipe)
		err = __perf_session__process_events(self,
						     self->header.data_offset,
						     self->header.data_size,
						     self->size, ops);
	else
		err = __perf_session__process_pipe_events(self, ops);
1076

1077 1078 1079
	return err;
}

1080
bool perf_session__has_traces(struct perf_session *self, const char *msg)
1081 1082
{
	if (!(self->sample_type & PERF_SAMPLE_RAW)) {
1083 1084
		pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg);
		return false;
1085 1086
	}

1087
	return true;
1088
}
1089

1090
int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps,
1091 1092 1093 1094
					     const char *symbol_name,
					     u64 addr)
{
	char *bracket;
1095
	enum map_type i;
1096 1097 1098 1099 1100
	struct ref_reloc_sym *ref;

	ref = zalloc(sizeof(struct ref_reloc_sym));
	if (ref == NULL)
		return -ENOMEM;
1101

1102 1103 1104
	ref->name = strdup(symbol_name);
	if (ref->name == NULL) {
		free(ref);
1105
		return -ENOMEM;
1106
	}
1107

1108
	bracket = strchr(ref->name, ']');
1109 1110 1111
	if (bracket)
		*bracket = '\0';

1112
	ref->addr = addr;
1113 1114

	for (i = 0; i < MAP__NR_TYPES; ++i) {
1115 1116
		struct kmap *kmap = map__kmap(maps[i]);
		kmap->ref_reloc_sym = ref;
1117 1118
	}

1119 1120
	return 0;
}
1121 1122 1123 1124 1125 1126 1127

size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp)
{
	return __dsos__fprintf(&self->host_machine.kernel_dsos, fp) +
	       __dsos__fprintf(&self->host_machine.user_dsos, fp) +
	       machines__fprintf_dsos(&self->machines, fp);
}
1128 1129 1130 1131 1132 1133 1134

size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, FILE *fp,
					  bool with_hits)
{
	size_t ret = machine__fprintf_dsos_buildid(&self->host_machine, fp, with_hits);
	return ret + machines__fprintf_dsos_buildid(&self->machines, fp, with_hits);
}