session.c 29.4 KB
Newer Older
1 2
#define _FILE_OFFSET_BITS 64

3 4
#include <linux/kernel.h>

5
#include <byteswap.h>
6 7
#include <unistd.h>
#include <sys/types.h>
8
#include <sys/mman.h>
9 10

#include "session.h"
11
#include "sort.h"
12 13 14 15 16 17
#include "util.h"

static int perf_session__open(struct perf_session *self, bool force)
{
	struct stat input_stat;

18 19 20 21 22 23 24 25 26 27
	if (!strcmp(self->filename, "-")) {
		self->fd_pipe = true;
		self->fd = STDIN_FILENO;

		if (perf_header__read(self, self->fd) < 0)
			pr_err("incompatible file format");

		return 0;
	}

28
	self->fd = open(self->filename, O_RDONLY);
29
	if (self->fd < 0) {
30 31 32 33
		int err = errno;

		pr_err("failed to open %s: %s", self->filename, strerror(err));
		if (err == ENOENT && !strcmp(self->filename, "perf.data"))
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
			pr_err("  (try 'perf record' first)");
		pr_err("\n");
		return -errno;
	}

	if (fstat(self->fd, &input_stat) < 0)
		goto out_close;

	if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
		pr_err("file %s not owned by current user or root\n",
		       self->filename);
		goto out_close;
	}

	if (!input_stat.st_size) {
		pr_info("zero-sized file (%s), nothing to do!\n",
			self->filename);
		goto out_close;
	}

54
	if (perf_header__read(self, self->fd) < 0) {
55 56 57 58 59 60 61 62 63 64 65 66 67
		pr_err("incompatible file format");
		goto out_close;
	}

	self->size = input_stat.st_size;
	return 0;

out_close:
	close(self->fd);
	self->fd = -1;
	return -1;
}

68
static void perf_session__id_header_size(struct perf_session *session)
69
{
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
       struct sample_data *data;
       u64 sample_type = session->sample_type;
       u16 size = 0;

	if (!session->sample_id_all)
		goto out;

       if (sample_type & PERF_SAMPLE_TID)
               size += sizeof(data->tid) * 2;

       if (sample_type & PERF_SAMPLE_TIME)
               size += sizeof(data->time);

       if (sample_type & PERF_SAMPLE_ID)
               size += sizeof(data->id);

       if (sample_type & PERF_SAMPLE_STREAM_ID)
               size += sizeof(data->stream_id);

       if (sample_type & PERF_SAMPLE_CPU)
               size += sizeof(data->cpu) * 2;
out:
       session->id_hdr_size = size;
}

void perf_session__set_sample_id_all(struct perf_session *session, bool value)
{
	session->sample_id_all = value;
	perf_session__id_header_size(session);
99 100
}

101 102 103 104 105
void perf_session__set_sample_type(struct perf_session *session, u64 type)
{
	session->sample_type = type;
}

106 107 108 109 110 111 112
void perf_session__update_sample_type(struct perf_session *self)
{
	self->sample_type = perf_header__sample_type(&self->header);
	self->sample_id_all = perf_header__sample_id_all(&self->header);
	perf_session__id_header_size(self);
}

113 114
int perf_session__create_kernel_maps(struct perf_session *self)
{
115
	int ret = machine__create_kernel_maps(&self->host_machine);
116 117

	if (ret >= 0)
118
		ret = machines__create_guest_kernel_maps(&self->machines);
119 120 121
	return ret;
}

122 123 124 125 126 127
static void perf_session__destroy_kernel_maps(struct perf_session *self)
{
	machine__destroy_kernel_maps(&self->host_machine);
	machines__destroy_guest_kernel_maps(&self->machines);
}

128 129 130
struct perf_session *perf_session__new(const char *filename, int mode,
				       bool force, bool repipe,
				       struct perf_event_ops *ops)
131
{
132
	size_t len = filename ? strlen(filename) + 1 : 0;
133 134 135 136 137 138
	struct perf_session *self = zalloc(sizeof(*self) + len);

	if (self == NULL)
		goto out;

	if (perf_header__init(&self->header) < 0)
139
		goto out_free;
140 141

	memcpy(self->filename, filename, len);
142
	self->threads = RB_ROOT;
143
	INIT_LIST_HEAD(&self->dead_threads);
144
	self->hists_tree = RB_ROOT;
145
	self->last_match = NULL;
146 147 148 149 150 151 152 153 154
	/*
	 * On 64bit we can mmap the data file in one go. No need for tiny mmap
	 * slices. On 32bit we use 32MB.
	 */
#if BITS_PER_LONG == 64
	self->mmap_window = ULLONG_MAX;
#else
	self->mmap_window = 32 * 1024 * 1024ULL;
#endif
155
	self->machines = RB_ROOT;
T
Tom Zanussi 已提交
156
	self->repipe = repipe;
157
	INIT_LIST_HEAD(&self->ordered_samples.samples);
158
	INIT_LIST_HEAD(&self->ordered_samples.sample_cache);
159
	INIT_LIST_HEAD(&self->ordered_samples.to_free);
160
	machine__init(&self->host_machine, "", HOST_KERNEL_ID);
161

162 163 164 165 166 167 168 169 170 171 172
	if (mode == O_RDONLY) {
		if (perf_session__open(self, force) < 0)
			goto out_delete;
	} else if (mode == O_WRONLY) {
		/*
		 * In O_RDONLY mode this will be performed when reading the
		 * kernel MMAP event, in event__process_mmap().
		 */
		if (perf_session__create_kernel_maps(self) < 0)
			goto out_delete;
	}
173

174
	perf_session__update_sample_type(self);
175 176 177 178 179 180 181

	if (ops && ops->ordering_requires_timestamps &&
	    ops->ordered_samples && !self->sample_id_all) {
		dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
		ops->ordered_samples = false;
	}

182 183
out:
	return self;
184
out_free:
185 186
	free(self);
	return NULL;
187 188 189
out_delete:
	perf_session__delete(self);
	return NULL;
190 191
}

192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
static void perf_session__delete_dead_threads(struct perf_session *self)
{
	struct thread *n, *t;

	list_for_each_entry_safe(t, n, &self->dead_threads, node) {
		list_del(&t->node);
		thread__delete(t);
	}
}

static void perf_session__delete_threads(struct perf_session *self)
{
	struct rb_node *nd = rb_first(&self->threads);

	while (nd) {
		struct thread *t = rb_entry(nd, struct thread, rb_node);

		rb_erase(&t->rb_node, &self->threads);
		nd = rb_next(nd);
		thread__delete(t);
	}
}

215 216 217
void perf_session__delete(struct perf_session *self)
{
	perf_header__exit(&self->header);
218
	perf_session__destroy_kernel_maps(self);
219 220 221
	perf_session__delete_dead_threads(self);
	perf_session__delete_threads(self);
	machine__exit(&self->host_machine);
222 223 224
	close(self->fd);
	free(self);
}
225

226 227
void perf_session__remove_thread(struct perf_session *self, struct thread *th)
{
228
	self->last_match = NULL;
229 230 231 232 233 234 235 236
	rb_erase(&th->rb_node, &self->threads);
	/*
	 * We may have references to this thread, for instance in some hist_entry
	 * instances, so just move them to a separate list.
	 */
	list_add_tail(&th->node, &self->dead_threads);
}

237 238 239 240 241 242 243 244
static bool symbol__match_parent_regex(struct symbol *sym)
{
	if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
		return 1;

	return 0;
}

245 246 247 248
int perf_session__resolve_callchain(struct perf_session *self,
				    struct thread *thread,
				    struct ip_callchain *chain,
				    struct symbol **parent)
249 250 251
{
	u8 cpumode = PERF_RECORD_MISC_USER;
	unsigned int i;
252
	int err;
253

254
	callchain_cursor_reset(&self->callchain_cursor);
255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273

	for (i = 0; i < chain->nr; i++) {
		u64 ip = chain->ips[i];
		struct addr_location al;

		if (ip >= PERF_CONTEXT_MAX) {
			switch (ip) {
			case PERF_CONTEXT_HV:
				cpumode = PERF_RECORD_MISC_HYPERVISOR;	break;
			case PERF_CONTEXT_KERNEL:
				cpumode = PERF_RECORD_MISC_KERNEL;	break;
			case PERF_CONTEXT_USER:
				cpumode = PERF_RECORD_MISC_USER;	break;
			default:
				break;
			}
			continue;
		}

274
		al.filtered = false;
275
		thread__find_addr_location(thread, self, cpumode,
276
				MAP__FUNCTION, thread->pid, ip, &al, NULL);
277 278 279 280
		if (al.sym != NULL) {
			if (sort__has_parent && !*parent &&
			    symbol__match_parent_regex(al.sym))
				*parent = al.sym;
281
			if (!symbol_conf.use_callchain)
282 283
				break;
		}
284 285 286 287 288

		err = callchain_cursor_append(&self->callchain_cursor,
					      ip, al.map, al.sym);
		if (err)
			return err;
289 290
	}

291
	return 0;
292
}
293

294 295 296 297 298 299 300
static int process_event_synth_stub(event_t *event __used,
				    struct perf_session *session __used)
{
	dump_printf(": unhandled!\n");
	return 0;
}

301
static int process_event_stub(event_t *event __used,
302
			      struct sample_data *sample __used,
303 304 305 306 307 308
			      struct perf_session *session __used)
{
	dump_printf(": unhandled!\n");
	return 0;
}

309 310 311 312 313 314 315 316 317 318 319 320
static int process_finished_round_stub(event_t *event __used,
				       struct perf_session *session __used,
				       struct perf_event_ops *ops __used)
{
	dump_printf(": unhandled!\n");
	return 0;
}

static int process_finished_round(event_t *event,
				  struct perf_session *session,
				  struct perf_event_ops *ops);

321 322
static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
{
323 324 325 326 327 328 329 330 331 332 333
	if (handler->sample == NULL)
		handler->sample = process_event_stub;
	if (handler->mmap == NULL)
		handler->mmap = process_event_stub;
	if (handler->comm == NULL)
		handler->comm = process_event_stub;
	if (handler->fork == NULL)
		handler->fork = process_event_stub;
	if (handler->exit == NULL)
		handler->exit = process_event_stub;
	if (handler->lost == NULL)
334
		handler->lost = event__process_lost;
335 336 337 338 339 340
	if (handler->read == NULL)
		handler->read = process_event_stub;
	if (handler->throttle == NULL)
		handler->throttle = process_event_stub;
	if (handler->unthrottle == NULL)
		handler->unthrottle = process_event_stub;
341
	if (handler->attr == NULL)
342
		handler->attr = process_event_synth_stub;
343
	if (handler->event_type == NULL)
344
		handler->event_type = process_event_synth_stub;
345
	if (handler->tracing_data == NULL)
346
		handler->tracing_data = process_event_synth_stub;
347
	if (handler->build_id == NULL)
348
		handler->build_id = process_event_synth_stub;
349 350 351 352 353 354
	if (handler->finished_round == NULL) {
		if (handler->ordered_samples)
			handler->finished_round = process_finished_round;
		else
			handler->finished_round = process_finished_round_stub;
	}
355 356
}

357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407
void mem_bswap_64(void *src, int byte_size)
{
	u64 *m = src;

	while (byte_size > 0) {
		*m = bswap_64(*m);
		byte_size -= sizeof(u64);
		++m;
	}
}

static void event__all64_swap(event_t *self)
{
	struct perf_event_header *hdr = &self->header;
	mem_bswap_64(hdr + 1, self->header.size - sizeof(*hdr));
}

static void event__comm_swap(event_t *self)
{
	self->comm.pid = bswap_32(self->comm.pid);
	self->comm.tid = bswap_32(self->comm.tid);
}

static void event__mmap_swap(event_t *self)
{
	self->mmap.pid	 = bswap_32(self->mmap.pid);
	self->mmap.tid	 = bswap_32(self->mmap.tid);
	self->mmap.start = bswap_64(self->mmap.start);
	self->mmap.len	 = bswap_64(self->mmap.len);
	self->mmap.pgoff = bswap_64(self->mmap.pgoff);
}

static void event__task_swap(event_t *self)
{
	self->fork.pid	= bswap_32(self->fork.pid);
	self->fork.tid	= bswap_32(self->fork.tid);
	self->fork.ppid	= bswap_32(self->fork.ppid);
	self->fork.ptid	= bswap_32(self->fork.ptid);
	self->fork.time	= bswap_64(self->fork.time);
}

static void event__read_swap(event_t *self)
{
	self->read.pid		= bswap_32(self->read.pid);
	self->read.tid		= bswap_32(self->read.tid);
	self->read.value	= bswap_64(self->read.value);
	self->read.time_enabled	= bswap_64(self->read.time_enabled);
	self->read.time_running	= bswap_64(self->read.time_running);
	self->read.id		= bswap_64(self->read.id);
}

408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427
static void event__attr_swap(event_t *self)
{
	size_t size;

	self->attr.attr.type		= bswap_32(self->attr.attr.type);
	self->attr.attr.size		= bswap_32(self->attr.attr.size);
	self->attr.attr.config		= bswap_64(self->attr.attr.config);
	self->attr.attr.sample_period	= bswap_64(self->attr.attr.sample_period);
	self->attr.attr.sample_type	= bswap_64(self->attr.attr.sample_type);
	self->attr.attr.read_format	= bswap_64(self->attr.attr.read_format);
	self->attr.attr.wakeup_events	= bswap_32(self->attr.attr.wakeup_events);
	self->attr.attr.bp_type		= bswap_32(self->attr.attr.bp_type);
	self->attr.attr.bp_addr		= bswap_64(self->attr.attr.bp_addr);
	self->attr.attr.bp_len		= bswap_64(self->attr.attr.bp_len);

	size = self->header.size;
	size -= (void *)&self->attr.id - (void *)self;
	mem_bswap_64(self->attr.id, size);
}

428 429 430 431 432 433
static void event__event_type_swap(event_t *self)
{
	self->event_type.event_type.event_id =
		bswap_64(self->event_type.event_type.event_id);
}

434 435 436 437 438
static void event__tracing_data_swap(event_t *self)
{
	self->tracing_data.size = bswap_32(self->tracing_data.size);
}

439 440 441 442 443 444 445 446 447 448
typedef void (*event__swap_op)(event_t *self);

static event__swap_op event__swap_ops[] = {
	[PERF_RECORD_MMAP]   = event__mmap_swap,
	[PERF_RECORD_COMM]   = event__comm_swap,
	[PERF_RECORD_FORK]   = event__task_swap,
	[PERF_RECORD_EXIT]   = event__task_swap,
	[PERF_RECORD_LOST]   = event__all64_swap,
	[PERF_RECORD_READ]   = event__read_swap,
	[PERF_RECORD_SAMPLE] = event__all64_swap,
449
	[PERF_RECORD_HEADER_ATTR]   = event__attr_swap,
450
	[PERF_RECORD_HEADER_EVENT_TYPE]   = event__event_type_swap,
451
	[PERF_RECORD_HEADER_TRACING_DATA]   = event__tracing_data_swap,
452
	[PERF_RECORD_HEADER_BUILD_ID]   = NULL,
453
	[PERF_RECORD_HEADER_MAX]    = NULL,
454 455
};

456 457
struct sample_queue {
	u64			timestamp;
458
	u64			file_offset;
459
	event_t			*event;
460 461 462
	struct list_head	list;
};

463 464 465 466
static void perf_session_free_sample_buffers(struct perf_session *session)
{
	struct ordered_samples *os = &session->ordered_samples;

467
	while (!list_empty(&os->to_free)) {
468 469
		struct sample_queue *sq;

470
		sq = list_entry(os->to_free.next, struct sample_queue, list);
471 472 473 474 475
		list_del(&sq->list);
		free(sq);
	}
}

476 477 478
static int perf_session_deliver_event(struct perf_session *session,
				      event_t *event,
				      struct sample_data *sample,
479 480
				      struct perf_event_ops *ops,
				      u64 file_offset);
481

482 483 484
static void flush_sample_queue(struct perf_session *s,
			       struct perf_event_ops *ops)
{
485 486
	struct ordered_samples *os = &s->ordered_samples;
	struct list_head *head = &os->samples;
487
	struct sample_queue *tmp, *iter;
488
	struct sample_data sample;
489 490
	u64 limit = os->next_flush;
	u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
491

492
	if (!ops->ordered_samples || !limit)
493 494 495 496
		return;

	list_for_each_entry_safe(iter, tmp, head, list) {
		if (iter->timestamp > limit)
497
			break;
498

499
		event__parse_sample(iter->event, s, &sample);
500 501
		perf_session_deliver_event(s, iter->event, &sample, ops,
					   iter->file_offset);
502

503
		os->last_flush = iter->timestamp;
504
		list_del(&iter->list);
505
		list_add(&iter->list, &os->sample_cache);
506
	}
507 508 509 510 511 512 513

	if (list_empty(head)) {
		os->last_sample = NULL;
	} else if (last_ts <= limit) {
		os->last_sample =
			list_entry(head->prev, struct sample_queue, list);
	}
514 515
}

516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564
/*
 * When perf record finishes a pass on every buffers, it records this pseudo
 * event.
 * We record the max timestamp t found in the pass n.
 * Assuming these timestamps are monotonic across cpus, we know that if
 * a buffer still has events with timestamps below t, they will be all
 * available and then read in the pass n + 1.
 * Hence when we start to read the pass n + 2, we can safely flush every
 * events with timestamps below t.
 *
 *    ============ PASS n =================
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          1          |         2
 *          2          |         3
 *          -          |         4  <--- max recorded
 *
 *    ============ PASS n + 1 ==============
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          3          |         5
 *          4          |         6
 *          5          |         7 <---- max recorded
 *
 *      Flush every events below timestamp 4
 *
 *    ============ PASS n + 2 ==============
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          6          |         8
 *          7          |         9
 *          -          |         10
 *
 *      Flush every events below timestamp 7
 *      etc...
 */
static int process_finished_round(event_t *event __used,
				  struct perf_session *session,
				  struct perf_event_ops *ops)
{
	flush_sample_queue(session, ops);
	session->ordered_samples.next_flush = session->ordered_samples.max_timestamp;

	return 0;
}

565
/* The queue is ordered by time */
566
static void __queue_event(struct sample_queue *new, struct perf_session *s)
567
{
568 569 570 571
	struct ordered_samples *os = &s->ordered_samples;
	struct sample_queue *sample = os->last_sample;
	u64 timestamp = new->timestamp;
	struct list_head *p;
572

573
	os->last_sample = new;
574

575 576 577
	if (!sample) {
		list_add(&new->list, &os->samples);
		os->max_timestamp = timestamp;
578 579 580 581
		return;
	}

	/*
582 583 584
	 * last_sample might point to some random place in the list as it's
	 * the last queued event. We expect that the new event is close to
	 * this.
585
	 */
586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607
	if (sample->timestamp <= timestamp) {
		while (sample->timestamp <= timestamp) {
			p = sample->list.next;
			if (p == &os->samples) {
				list_add_tail(&new->list, &os->samples);
				os->max_timestamp = timestamp;
				return;
			}
			sample = list_entry(p, struct sample_queue, list);
		}
		list_add_tail(&new->list, &sample->list);
	} else {
		while (sample->timestamp > timestamp) {
			p = sample->list.prev;
			if (p == &os->samples) {
				list_add(&new->list, &os->samples);
				return;
			}
			sample = list_entry(p, struct sample_queue, list);
		}
		list_add(&new->list, &sample->list);
	}
608 609
}

610 611
#define MAX_SAMPLE_BUFFER	(64 * 1024 / sizeof(struct sample_queue))

612
static int perf_session_queue_event(struct perf_session *s, event_t *event,
613
				    struct sample_data *data, u64 file_offset)
614
{
615 616
	struct ordered_samples *os = &s->ordered_samples;
	struct list_head *sc = &os->sample_cache;
617 618 619
	u64 timestamp = data->time;
	struct sample_queue *new;

620
	if (!timestamp || timestamp == ~0ULL)
621 622
		return -ETIME;

623 624 625 626 627
	if (timestamp < s->ordered_samples.last_flush) {
		printf("Warning: Timestamp below last timeslice flush\n");
		return -EINVAL;
	}

628 629 630
	if (!list_empty(sc)) {
		new = list_entry(sc->next, struct sample_queue, list);
		list_del(&new->list);
631 632 633 634
	} else if (os->sample_buffer) {
		new = os->sample_buffer + os->sample_buffer_idx;
		if (++os->sample_buffer_idx == MAX_SAMPLE_BUFFER)
			os->sample_buffer = NULL;
635
	} else {
636 637
		os->sample_buffer = malloc(MAX_SAMPLE_BUFFER * sizeof(*new));
		if (!os->sample_buffer)
638
			return -ENOMEM;
639 640 641
		list_add(&os->sample_buffer->list, &os->to_free);
		os->sample_buffer_idx = 2;
		new = os->sample_buffer + 1;
642
	}
643 644

	new->timestamp = timestamp;
645
	new->file_offset = file_offset;
646
	new->event = event;
647

648
	__queue_event(new, s);
649 650 651

	return 0;
}
652

653
static void callchain__printf(struct sample_data *sample)
654 655
{
	unsigned int i;
656

657
	printf("... chain: nr:%" PRIu64 "\n", sample->callchain->nr);
658 659

	for (i = 0; i < sample->callchain->nr; i++)
660 661
		printf("..... %2d: %016" PRIx64 "\n",
		       i, sample->callchain->ips[i]);
662 663
}

664 665 666 667 668 669 670 671 672 673 674 675 676 677
static void perf_session__print_tstamp(struct perf_session *session,
				       event_t *event,
				       struct sample_data *sample)
{
	if (event->header.type != PERF_RECORD_SAMPLE &&
	    !session->sample_id_all) {
		fputs("-1 -1 ", stdout);
		return;
	}

	if ((session->sample_type & PERF_SAMPLE_CPU))
		printf("%u ", sample->cpu);

	if (session->sample_type & PERF_SAMPLE_TIME)
678
		printf("%" PRIu64 " ", sample->time);
679 680
}

681 682 683 684 685 686
static void dump_event(struct perf_session *session, event_t *event,
		       u64 file_offset, struct sample_data *sample)
{
	if (!dump_trace)
		return;

687 688
	printf("\n%#" PRIx64 " [%#x]: event: %d\n",
	       file_offset, event->header.size, event->header.type);
689 690 691 692 693 694

	trace_event(event);

	if (sample)
		perf_session__print_tstamp(session, event, sample);

695 696
	printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset,
	       event->header.size, event__get_event_name(event->header.type));
697 698 699 700 701
}

static void dump_sample(struct perf_session *session, event_t *event,
			struct sample_data *sample)
{
702 703 704
	if (!dump_trace)
		return;

705 706 707
	printf("(IP, %d): %d/%d: %#" PRIx64 " period: %" PRIu64 "\n",
	       event->header.misc, sample->pid, sample->tid, sample->ip,
	       sample->period);
708 709

	if (session->sample_type & PERF_SAMPLE_CALLCHAIN)
710
		callchain__printf(sample);
711 712
}

713 714 715
static int perf_session_deliver_event(struct perf_session *session,
				      event_t *event,
				      struct sample_data *sample,
716
				      struct perf_event_ops *ops,
717
				      u64 file_offset)
718
{
719 720
	dump_event(session, event, file_offset, sample);

721 722
	switch (event->header.type) {
	case PERF_RECORD_SAMPLE:
723
		dump_sample(session, event, sample);
724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746
		return ops->sample(event, sample, session);
	case PERF_RECORD_MMAP:
		return ops->mmap(event, sample, session);
	case PERF_RECORD_COMM:
		return ops->comm(event, sample, session);
	case PERF_RECORD_FORK:
		return ops->fork(event, sample, session);
	case PERF_RECORD_EXIT:
		return ops->exit(event, sample, session);
	case PERF_RECORD_LOST:
		return ops->lost(event, sample, session);
	case PERF_RECORD_READ:
		return ops->read(event, sample, session);
	case PERF_RECORD_THROTTLE:
		return ops->throttle(event, sample, session);
	case PERF_RECORD_UNTHROTTLE:
		return ops->unthrottle(event, sample, session);
	default:
		++session->hists.stats.nr_unknown_events;
		return -1;
	}
}

747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762
static int perf_session__preprocess_sample(struct perf_session *session,
					   event_t *event, struct sample_data *sample)
{
	if (event->header.type != PERF_RECORD_SAMPLE ||
	    !(session->sample_type & PERF_SAMPLE_CALLCHAIN))
		return 0;

	if (!ip_callchain__valid(sample->callchain, event)) {
		pr_debug("call-chain problem with event, skipping it.\n");
		++session->hists.stats.nr_invalid_chains;
		session->hists.stats.total_invalid_chains += sample->period;
		return -EINVAL;
	}
	return 0;
}

763 764
static int perf_session__process_user_event(struct perf_session *session, event_t *event,
					    struct perf_event_ops *ops, u64 file_offset)
765
{
766
	dump_event(session, event, file_offset, NULL);
767

768
	/* These events are processed right away */
769
	switch (event->header.type) {
770
	case PERF_RECORD_HEADER_ATTR:
771
		return ops->attr(event, session);
772
	case PERF_RECORD_HEADER_EVENT_TYPE:
773
		return ops->event_type(event, session);
774 775
	case PERF_RECORD_HEADER_TRACING_DATA:
		/* setup for reading amidst mmap */
776 777
		lseek(session->fd, file_offset, SEEK_SET);
		return ops->tracing_data(event, session);
778
	case PERF_RECORD_HEADER_BUILD_ID:
779
		return ops->build_id(event, session);
780
	case PERF_RECORD_FINISHED_ROUND:
781
		return ops->finished_round(event, session, ops);
782
	default:
783
		return -EINVAL;
784
	}
785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804
}

static int perf_session__process_event(struct perf_session *session,
				       event_t *event,
				       struct perf_event_ops *ops,
				       u64 file_offset)
{
	struct sample_data sample;
	int ret;

	if (session->header.needs_swap && event__swap_ops[event->header.type])
		event__swap_ops[event->header.type](event);

	if (event->header.type >= PERF_RECORD_HEADER_MAX)
		return -EINVAL;

	hists__inc_nr_events(&session->hists, event->header.type);

	if (event->header.type >= PERF_RECORD_USER_TYPE_START)
		return perf_session__process_user_event(session, event, ops, file_offset);
805

806 807 808 809 810 811 812 813 814
	/*
	 * For all kernel events we get the sample data
	 */
	event__parse_sample(event, session, &sample);

	/* Preprocess sample records - precheck callchains */
	if (perf_session__preprocess_sample(session, event, &sample))
		return 0;

815
	if (ops->ordered_samples) {
816 817
		ret = perf_session_queue_event(session, event, &sample,
					       file_offset);
818 819 820 821
		if (ret != -ETIME)
			return ret;
	}

822 823
	return perf_session_deliver_event(session, event, &sample, ops,
					  file_offset);
824 825
}

826 827 828 829 830 831 832
void perf_event_header__bswap(struct perf_event_header *self)
{
	self->type = bswap_32(self->type);
	self->misc = bswap_16(self->misc);
	self->size = bswap_16(self->size);
}

833 834 835 836 837 838 839 840 841 842 843 844
static struct thread *perf_session__register_idle_thread(struct perf_session *self)
{
	struct thread *thread = perf_session__findnew(self, 0);

	if (thread == NULL || thread__set_comm(thread, "swapper")) {
		pr_err("problem inserting idle task.\n");
		thread = NULL;
	}

	return thread;
}

845 846 847 848 849
static void perf_session__warn_about_errors(const struct perf_session *session,
					    const struct perf_event_ops *ops)
{
	if (ops->lost == event__process_lost &&
	    session->hists.stats.total_lost != 0) {
850 851
		ui__warning("Processed %" PRIu64 " events and LOST %" PRIu64
			    "!\n\nCheck IO/CPU overload!\n\n",
852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873
			    session->hists.stats.total_period,
			    session->hists.stats.total_lost);
	}

	if (session->hists.stats.nr_unknown_events != 0) {
		ui__warning("Found %u unknown events!\n\n"
			    "Is this an older tool processing a perf.data "
			    "file generated by a more recent tool?\n\n"
			    "If that is not the case, consider "
			    "reporting to linux-kernel@vger.kernel.org.\n\n",
			    session->hists.stats.nr_unknown_events);
	}

 	if (session->hists.stats.nr_invalid_chains != 0) {
 		ui__warning("Found invalid callchains!\n\n"
 			    "%u out of %u events were discarded for this reason.\n\n"
 			    "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
 			    session->hists.stats.nr_invalid_chains,
 			    session->hists.stats.nr_events[PERF_RECORD_SAMPLE]);
 	}
}

874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890
#define session_done()	(*(volatile int *)(&session_done))
volatile int session_done;

static int __perf_session__process_pipe_events(struct perf_session *self,
					       struct perf_event_ops *ops)
{
	event_t event;
	uint32_t size;
	int skip = 0;
	u64 head;
	int err;
	void *p;

	perf_event_ops__fill_defaults(ops);

	head = 0;
more:
891
	err = readn(self->fd, &event, sizeof(struct perf_event_header));
892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909
	if (err <= 0) {
		if (err == 0)
			goto done;

		pr_err("failed to read event header\n");
		goto out_err;
	}

	if (self->header.needs_swap)
		perf_event_header__bswap(&event.header);

	size = event.header.size;
	if (size == 0)
		size = 8;

	p = &event;
	p += sizeof(struct perf_event_header);

910
	if (size - sizeof(struct perf_event_header)) {
911
		err = readn(self->fd, p, size - sizeof(struct perf_event_header));
912 913 914 915 916
		if (err <= 0) {
			if (err == 0) {
				pr_err("unexpected end of event stream\n");
				goto done;
			}
917

918 919 920
			pr_err("failed to read event data\n");
			goto out_err;
		}
921 922 923
	}

	if (size == 0 ||
924
	    (skip = perf_session__process_event(self, &event, ops, head)) < 0) {
925
		dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n",
926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946
			    head, event.header.size, event.header.type);
		/*
		 * assume we lost track of the stream, check alignment, and
		 * increment a single u64 in the hope to catch on again 'soon'.
		 */
		if (unlikely(head & 7))
			head &= ~7ULL;

		size = 8;
	}

	head += size;

	if (skip > 0)
		head += skip;

	if (!session_done())
		goto more;
done:
	err = 0;
out_err:
947
	perf_session__warn_about_errors(self, ops);
948
	perf_session_free_sample_buffers(self);
949 950 951
	return err;
}

952
int __perf_session__process_events(struct perf_session *session,
953 954
				   u64 data_offset, u64 data_size,
				   u64 file_size, struct perf_event_ops *ops)
955
{
956
	u64 head, page_offset, file_offset, file_pos, progress_next;
957
	int err, mmap_prot, mmap_flags, map_idx = 0;
958
	struct ui_progress *progress;
959
	size_t	page_size, mmap_size;
960
	char *buf, *mmaps[8];
961 962
	event_t *event;
	uint32_t size;
963

964 965
	perf_event_ops__fill_defaults(ops);

966
	page_size = sysconf(_SC_PAGESIZE);
967

968 969 970
	page_offset = page_size * (data_offset / page_size);
	file_offset = page_offset;
	head = data_offset - page_offset;
971

972 973 974
	if (data_offset + data_size < file_size)
		file_size = data_offset + data_size;

975 976 977 978 979 980 981 982 983
	progress_next = file_size / 16;
	progress = ui_progress__new("Processing events...", file_size);
	if (progress == NULL)
		return -1;

	mmap_size = session->mmap_window;
	if (mmap_size > file_size)
		mmap_size = file_size;

984 985
	memset(mmaps, 0, sizeof(mmaps));

986 987 988
	mmap_prot  = PROT_READ;
	mmap_flags = MAP_SHARED;

989
	if (session->header.needs_swap) {
990 991 992
		mmap_prot  |= PROT_WRITE;
		mmap_flags = MAP_PRIVATE;
	}
993
remap:
994 995
	buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, session->fd,
		   file_offset);
996 997 998 999 1000
	if (buf == MAP_FAILED) {
		pr_err("failed to mmap file\n");
		err = -errno;
		goto out_err;
	}
1001 1002
	mmaps[map_idx] = buf;
	map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1);
1003
	file_pos = file_offset + head;
1004 1005 1006 1007

more:
	event = (event_t *)(buf + head);

1008
	if (session->header.needs_swap)
1009
		perf_event_header__bswap(&event->header);
1010 1011 1012 1013
	size = event->header.size;
	if (size == 0)
		size = 8;

1014
	if (head + event->header.size > mmap_size) {
1015 1016 1017 1018
		if (mmaps[map_idx]) {
			munmap(mmaps[map_idx], mmap_size);
			mmaps[map_idx] = NULL;
		}
1019

1020 1021 1022
		page_offset = page_size * (head / page_size);
		file_offset += page_offset;
		head -= page_offset;
1023 1024 1025 1026 1027
		goto remap;
	}

	size = event->header.size;

1028 1029
	if (size == 0 ||
	    perf_session__process_event(session, event, ops, file_pos) < 0) {
1030
		dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n",
1031
			    file_offset + head, event->header.size,
1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043
			    event->header.type);
		/*
		 * assume we lost track of the stream, check alignment, and
		 * increment a single u64 in the hope to catch on again 'soon'.
		 */
		if (unlikely(head & 7))
			head &= ~7ULL;

		size = 8;
	}

	head += size;
1044
	file_pos += size;
1045

1046 1047 1048 1049 1050
	if (file_pos >= progress_next) {
		progress_next += file_size / 16;
		ui_progress__update(progress, file_pos);
	}

1051
	if (file_pos < file_size)
1052
		goto more;
1053

1054
	err = 0;
1055
	/* do the final flush for ordered samples */
1056 1057
	session->ordered_samples.next_flush = ULLONG_MAX;
	flush_sample_queue(session, ops);
1058
out_err:
1059
	ui_progress__delete(progress);
1060
	perf_session__warn_about_errors(session, ops);
1061
	perf_session_free_sample_buffers(session);
1062 1063
	return err;
}
1064

1065 1066 1067 1068 1069 1070 1071 1072
int perf_session__process_events(struct perf_session *self,
				 struct perf_event_ops *ops)
{
	int err;

	if (perf_session__register_idle_thread(self) == NULL)
		return -ENOMEM;

1073 1074 1075 1076 1077 1078 1079
	if (!self->fd_pipe)
		err = __perf_session__process_events(self,
						     self->header.data_offset,
						     self->header.data_size,
						     self->size, ops);
	else
		err = __perf_session__process_pipe_events(self, ops);
1080

1081 1082 1083
	return err;
}

1084
bool perf_session__has_traces(struct perf_session *self, const char *msg)
1085 1086
{
	if (!(self->sample_type & PERF_SAMPLE_RAW)) {
1087 1088
		pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg);
		return false;
1089 1090
	}

1091
	return true;
1092
}
1093

1094
int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps,
1095 1096 1097 1098
					     const char *symbol_name,
					     u64 addr)
{
	char *bracket;
1099
	enum map_type i;
1100 1101 1102 1103 1104
	struct ref_reloc_sym *ref;

	ref = zalloc(sizeof(struct ref_reloc_sym));
	if (ref == NULL)
		return -ENOMEM;
1105

1106 1107 1108
	ref->name = strdup(symbol_name);
	if (ref->name == NULL) {
		free(ref);
1109
		return -ENOMEM;
1110
	}
1111

1112
	bracket = strchr(ref->name, ']');
1113 1114 1115
	if (bracket)
		*bracket = '\0';

1116
	ref->addr = addr;
1117 1118

	for (i = 0; i < MAP__NR_TYPES; ++i) {
1119 1120
		struct kmap *kmap = map__kmap(maps[i]);
		kmap->ref_reloc_sym = ref;
1121 1122
	}

1123 1124
	return 0;
}
1125 1126 1127 1128 1129 1130 1131

size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp)
{
	return __dsos__fprintf(&self->host_machine.kernel_dsos, fp) +
	       __dsos__fprintf(&self->host_machine.user_dsos, fp) +
	       machines__fprintf_dsos(&self->machines, fp);
}
1132 1133 1134 1135 1136 1137 1138

size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, FILE *fp,
					  bool with_hits)
{
	size_t ret = machine__fprintf_dsos_buildid(&self->host_machine, fp, with_hits);
	return ret + machines__fprintf_dsos_buildid(&self->machines, fp, with_hits);
}