session.c 32.2 KB
Newer Older
1 2
#define _FILE_OFFSET_BITS 64

3 4
#include <linux/kernel.h>

5
#include <byteswap.h>
6 7
#include <unistd.h>
#include <sys/types.h>
8
#include <sys/mman.h>
9

10 11
#include "evlist.h"
#include "evsel.h"
12
#include "session.h"
13
#include "sort.h"
14 15 16 17 18 19
#include "util.h"

static int perf_session__open(struct perf_session *self, bool force)
{
	struct stat input_stat;

20 21 22 23
	if (!strcmp(self->filename, "-")) {
		self->fd_pipe = true;
		self->fd = STDIN_FILENO;

24
		if (perf_session__read_header(self, self->fd) < 0)
25 26 27 28 29
			pr_err("incompatible file format");

		return 0;
	}

30
	self->fd = open(self->filename, O_RDONLY);
31
	if (self->fd < 0) {
32 33 34 35
		int err = errno;

		pr_err("failed to open %s: %s", self->filename, strerror(err));
		if (err == ENOENT && !strcmp(self->filename, "perf.data"))
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
			pr_err("  (try 'perf record' first)");
		pr_err("\n");
		return -errno;
	}

	if (fstat(self->fd, &input_stat) < 0)
		goto out_close;

	if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
		pr_err("file %s not owned by current user or root\n",
		       self->filename);
		goto out_close;
	}

	if (!input_stat.st_size) {
		pr_info("zero-sized file (%s), nothing to do!\n",
			self->filename);
		goto out_close;
	}

56
	if (perf_session__read_header(self, self->fd) < 0) {
57 58 59 60 61 62 63 64 65 66 67 68 69
		pr_err("incompatible file format");
		goto out_close;
	}

	self->size = input_stat.st_size;
	return 0;

out_close:
	close(self->fd);
	self->fd = -1;
	return -1;
}

70
static void perf_session__id_header_size(struct perf_session *session)
71
{
72
       struct perf_sample *data;
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
       u64 sample_type = session->sample_type;
       u16 size = 0;

	if (!session->sample_id_all)
		goto out;

       if (sample_type & PERF_SAMPLE_TID)
               size += sizeof(data->tid) * 2;

       if (sample_type & PERF_SAMPLE_TIME)
               size += sizeof(data->time);

       if (sample_type & PERF_SAMPLE_ID)
               size += sizeof(data->id);

       if (sample_type & PERF_SAMPLE_STREAM_ID)
               size += sizeof(data->stream_id);

       if (sample_type & PERF_SAMPLE_CPU)
               size += sizeof(data->cpu) * 2;
out:
       session->id_hdr_size = size;
}

void perf_session__update_sample_type(struct perf_session *self)
{
99
	self->sample_type = perf_evlist__sample_type(self->evlist);
100
	self->sample_size = perf_sample_size(self->sample_type);
101
	self->sample_id_all = perf_evlist__sample_id_all(self->evlist);
102 103 104
	perf_session__id_header_size(self);
}

105 106
int perf_session__create_kernel_maps(struct perf_session *self)
{
107
	int ret = machine__create_kernel_maps(&self->host_machine);
108 109

	if (ret >= 0)
110
		ret = machines__create_guest_kernel_maps(&self->machines);
111 112 113
	return ret;
}

114 115 116 117 118 119
static void perf_session__destroy_kernel_maps(struct perf_session *self)
{
	machine__destroy_kernel_maps(&self->host_machine);
	machines__destroy_guest_kernel_maps(&self->machines);
}

120 121 122
struct perf_session *perf_session__new(const char *filename, int mode,
				       bool force, bool repipe,
				       struct perf_event_ops *ops)
123
{
124
	size_t len = filename ? strlen(filename) + 1 : 0;
125 126 127 128 129 130
	struct perf_session *self = zalloc(sizeof(*self) + len);

	if (self == NULL)
		goto out;

	memcpy(self->filename, filename, len);
131
	self->threads = RB_ROOT;
132
	INIT_LIST_HEAD(&self->dead_threads);
133
	self->last_match = NULL;
134 135 136 137 138 139 140 141 142
	/*
	 * On 64bit we can mmap the data file in one go. No need for tiny mmap
	 * slices. On 32bit we use 32MB.
	 */
#if BITS_PER_LONG == 64
	self->mmap_window = ULLONG_MAX;
#else
	self->mmap_window = 32 * 1024 * 1024ULL;
#endif
143
	self->machines = RB_ROOT;
T
Tom Zanussi 已提交
144
	self->repipe = repipe;
145
	INIT_LIST_HEAD(&self->ordered_samples.samples);
146
	INIT_LIST_HEAD(&self->ordered_samples.sample_cache);
147
	INIT_LIST_HEAD(&self->ordered_samples.to_free);
148
	machine__init(&self->host_machine, "", HOST_KERNEL_ID);
149

150 151 152
	if (mode == O_RDONLY) {
		if (perf_session__open(self, force) < 0)
			goto out_delete;
153
		perf_session__update_sample_type(self);
154 155 156
	} else if (mode == O_WRONLY) {
		/*
		 * In O_RDONLY mode this will be performed when reading the
157
		 * kernel MMAP event, in perf_event__process_mmap().
158 159 160 161
		 */
		if (perf_session__create_kernel_maps(self) < 0)
			goto out_delete;
	}
162

163 164 165 166 167 168
	if (ops && ops->ordering_requires_timestamps &&
	    ops->ordered_samples && !self->sample_id_all) {
		dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
		ops->ordered_samples = false;
	}

169 170
out:
	return self;
171 172 173
out_delete:
	perf_session__delete(self);
	return NULL;
174 175
}

176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
static void perf_session__delete_dead_threads(struct perf_session *self)
{
	struct thread *n, *t;

	list_for_each_entry_safe(t, n, &self->dead_threads, node) {
		list_del(&t->node);
		thread__delete(t);
	}
}

static void perf_session__delete_threads(struct perf_session *self)
{
	struct rb_node *nd = rb_first(&self->threads);

	while (nd) {
		struct thread *t = rb_entry(nd, struct thread, rb_node);

		rb_erase(&t->rb_node, &self->threads);
		nd = rb_next(nd);
		thread__delete(t);
	}
}

199 200
void perf_session__delete(struct perf_session *self)
{
201
	perf_session__destroy_kernel_maps(self);
202 203 204
	perf_session__delete_dead_threads(self);
	perf_session__delete_threads(self);
	machine__exit(&self->host_machine);
205 206 207
	close(self->fd);
	free(self);
}
208

209 210
void perf_session__remove_thread(struct perf_session *self, struct thread *th)
{
211
	self->last_match = NULL;
212 213 214 215 216 217 218 219
	rb_erase(&th->rb_node, &self->threads);
	/*
	 * We may have references to this thread, for instance in some hist_entry
	 * instances, so just move them to a separate list.
	 */
	list_add_tail(&th->node, &self->dead_threads);
}

220 221 222 223 224 225 226 227
static bool symbol__match_parent_regex(struct symbol *sym)
{
	if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
		return 1;

	return 0;
}

228 229 230 231
int perf_session__resolve_callchain(struct perf_session *self,
				    struct thread *thread,
				    struct ip_callchain *chain,
				    struct symbol **parent)
232 233 234
{
	u8 cpumode = PERF_RECORD_MISC_USER;
	unsigned int i;
235
	int err;
236

237
	callchain_cursor_reset(&self->callchain_cursor);
238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256

	for (i = 0; i < chain->nr; i++) {
		u64 ip = chain->ips[i];
		struct addr_location al;

		if (ip >= PERF_CONTEXT_MAX) {
			switch (ip) {
			case PERF_CONTEXT_HV:
				cpumode = PERF_RECORD_MISC_HYPERVISOR;	break;
			case PERF_CONTEXT_KERNEL:
				cpumode = PERF_RECORD_MISC_KERNEL;	break;
			case PERF_CONTEXT_USER:
				cpumode = PERF_RECORD_MISC_USER;	break;
			default:
				break;
			}
			continue;
		}

257
		al.filtered = false;
258
		thread__find_addr_location(thread, self, cpumode,
259
				MAP__FUNCTION, thread->pid, ip, &al, NULL);
260 261 262 263
		if (al.sym != NULL) {
			if (sort__has_parent && !*parent &&
			    symbol__match_parent_regex(al.sym))
				*parent = al.sym;
264
			if (!symbol_conf.use_callchain)
265 266
				break;
		}
267 268 269 270 271

		err = callchain_cursor_append(&self->callchain_cursor,
					      ip, al.map, al.sym);
		if (err)
			return err;
272 273
	}

274
	return 0;
275
}
276

277
static int process_event_synth_stub(union perf_event *event __used,
278 279 280 281 282 283
				    struct perf_session *session __used)
{
	dump_printf(": unhandled!\n");
	return 0;
}

284 285 286 287 288 289 290 291 292
static int process_event_sample_stub(union perf_event *event __used,
				     struct perf_sample *sample __used,
				     struct perf_evsel *evsel __used,
				     struct perf_session *session __used)
{
	dump_printf(": unhandled!\n");
	return 0;
}

293
static int process_event_stub(union perf_event *event __used,
294
			      struct perf_sample *sample __used,
295 296 297 298 299 300
			      struct perf_session *session __used)
{
	dump_printf(": unhandled!\n");
	return 0;
}

301
static int process_finished_round_stub(union perf_event *event __used,
302 303 304 305 306 307 308
				       struct perf_session *session __used,
				       struct perf_event_ops *ops __used)
{
	dump_printf(": unhandled!\n");
	return 0;
}

309
static int process_finished_round(union perf_event *event,
310 311 312
				  struct perf_session *session,
				  struct perf_event_ops *ops);

313 314
static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
{
315
	if (handler->sample == NULL)
316
		handler->sample = process_event_sample_stub;
317 318 319 320 321 322 323 324 325
	if (handler->mmap == NULL)
		handler->mmap = process_event_stub;
	if (handler->comm == NULL)
		handler->comm = process_event_stub;
	if (handler->fork == NULL)
		handler->fork = process_event_stub;
	if (handler->exit == NULL)
		handler->exit = process_event_stub;
	if (handler->lost == NULL)
326
		handler->lost = perf_event__process_lost;
327 328 329 330 331 332
	if (handler->read == NULL)
		handler->read = process_event_stub;
	if (handler->throttle == NULL)
		handler->throttle = process_event_stub;
	if (handler->unthrottle == NULL)
		handler->unthrottle = process_event_stub;
333
	if (handler->attr == NULL)
334
		handler->attr = process_event_synth_stub;
335
	if (handler->event_type == NULL)
336
		handler->event_type = process_event_synth_stub;
337
	if (handler->tracing_data == NULL)
338
		handler->tracing_data = process_event_synth_stub;
339
	if (handler->build_id == NULL)
340
		handler->build_id = process_event_synth_stub;
341 342 343 344 345 346
	if (handler->finished_round == NULL) {
		if (handler->ordered_samples)
			handler->finished_round = process_finished_round;
		else
			handler->finished_round = process_finished_round_stub;
	}
347 348
}

349 350 351 352 353 354 355 356 357 358 359
void mem_bswap_64(void *src, int byte_size)
{
	u64 *m = src;

	while (byte_size > 0) {
		*m = bswap_64(*m);
		byte_size -= sizeof(u64);
		++m;
	}
}

360
static void perf_event__all64_swap(union perf_event *event)
361
{
362 363
	struct perf_event_header *hdr = &event->header;
	mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr));
364 365
}

366
static void perf_event__comm_swap(union perf_event *event)
367
{
368 369
	event->comm.pid = bswap_32(event->comm.pid);
	event->comm.tid = bswap_32(event->comm.tid);
370 371
}

372
static void perf_event__mmap_swap(union perf_event *event)
373
{
374 375 376 377 378
	event->mmap.pid	  = bswap_32(event->mmap.pid);
	event->mmap.tid	  = bswap_32(event->mmap.tid);
	event->mmap.start = bswap_64(event->mmap.start);
	event->mmap.len	  = bswap_64(event->mmap.len);
	event->mmap.pgoff = bswap_64(event->mmap.pgoff);
379 380
}

381
static void perf_event__task_swap(union perf_event *event)
382
{
383 384 385 386 387
	event->fork.pid	 = bswap_32(event->fork.pid);
	event->fork.tid	 = bswap_32(event->fork.tid);
	event->fork.ppid = bswap_32(event->fork.ppid);
	event->fork.ptid = bswap_32(event->fork.ptid);
	event->fork.time = bswap_64(event->fork.time);
388 389
}

390
static void perf_event__read_swap(union perf_event *event)
391
{
392 393 394 395 396 397
	event->read.pid		 = bswap_32(event->read.pid);
	event->read.tid		 = bswap_32(event->read.tid);
	event->read.value	 = bswap_64(event->read.value);
	event->read.time_enabled = bswap_64(event->read.time_enabled);
	event->read.time_running = bswap_64(event->read.time_running);
	event->read.id		 = bswap_64(event->read.id);
398 399
}

400
static void perf_event__attr_swap(union perf_event *event)
401 402 403
{
	size_t size;

404 405 406 407 408 409 410 411 412 413
	event->attr.attr.type		= bswap_32(event->attr.attr.type);
	event->attr.attr.size		= bswap_32(event->attr.attr.size);
	event->attr.attr.config		= bswap_64(event->attr.attr.config);
	event->attr.attr.sample_period	= bswap_64(event->attr.attr.sample_period);
	event->attr.attr.sample_type	= bswap_64(event->attr.attr.sample_type);
	event->attr.attr.read_format	= bswap_64(event->attr.attr.read_format);
	event->attr.attr.wakeup_events	= bswap_32(event->attr.attr.wakeup_events);
	event->attr.attr.bp_type	= bswap_32(event->attr.attr.bp_type);
	event->attr.attr.bp_addr	= bswap_64(event->attr.attr.bp_addr);
	event->attr.attr.bp_len		= bswap_64(event->attr.attr.bp_len);
414

415 416 417
	size = event->header.size;
	size -= (void *)&event->attr.id - (void *)event;
	mem_bswap_64(event->attr.id, size);
418 419
}

420
static void perf_event__event_type_swap(union perf_event *event)
421
{
422 423
	event->event_type.event_type.event_id =
		bswap_64(event->event_type.event_type.event_id);
424 425
}

426
static void perf_event__tracing_data_swap(union perf_event *event)
427
{
428
	event->tracing_data.size = bswap_32(event->tracing_data.size);
429 430
}

431
typedef void (*perf_event__swap_op)(union perf_event *event);
432

433 434 435 436 437 438 439 440 441 442 443 444 445
static perf_event__swap_op perf_event__swap_ops[] = {
	[PERF_RECORD_MMAP]		  = perf_event__mmap_swap,
	[PERF_RECORD_COMM]		  = perf_event__comm_swap,
	[PERF_RECORD_FORK]		  = perf_event__task_swap,
	[PERF_RECORD_EXIT]		  = perf_event__task_swap,
	[PERF_RECORD_LOST]		  = perf_event__all64_swap,
	[PERF_RECORD_READ]		  = perf_event__read_swap,
	[PERF_RECORD_SAMPLE]		  = perf_event__all64_swap,
	[PERF_RECORD_HEADER_ATTR]	  = perf_event__attr_swap,
	[PERF_RECORD_HEADER_EVENT_TYPE]	  = perf_event__event_type_swap,
	[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
	[PERF_RECORD_HEADER_BUILD_ID]	  = NULL,
	[PERF_RECORD_HEADER_MAX]	  = NULL,
446 447
};

448 449
struct sample_queue {
	u64			timestamp;
450
	u64			file_offset;
451
	union perf_event	*event;
452 453 454
	struct list_head	list;
};

455 456 457 458
static void perf_session_free_sample_buffers(struct perf_session *session)
{
	struct ordered_samples *os = &session->ordered_samples;

459
	while (!list_empty(&os->to_free)) {
460 461
		struct sample_queue *sq;

462
		sq = list_entry(os->to_free.next, struct sample_queue, list);
463 464 465 466 467
		list_del(&sq->list);
		free(sq);
	}
}

468
static int perf_session_deliver_event(struct perf_session *session,
469
				      union perf_event *event,
470
				      struct perf_sample *sample,
471 472
				      struct perf_event_ops *ops,
				      u64 file_offset);
473

474 475 476
static void flush_sample_queue(struct perf_session *s,
			       struct perf_event_ops *ops)
{
477 478
	struct ordered_samples *os = &s->ordered_samples;
	struct list_head *head = &os->samples;
479
	struct sample_queue *tmp, *iter;
480
	struct perf_sample sample;
481 482
	u64 limit = os->next_flush;
	u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
483

484
	if (!ops->ordered_samples || !limit)
485 486 487 488
		return;

	list_for_each_entry_safe(iter, tmp, head, list) {
		if (iter->timestamp > limit)
489
			break;
490

491
		perf_session__parse_sample(s, iter->event, &sample);
492 493
		perf_session_deliver_event(s, iter->event, &sample, ops,
					   iter->file_offset);
494

495
		os->last_flush = iter->timestamp;
496
		list_del(&iter->list);
497
		list_add(&iter->list, &os->sample_cache);
498
	}
499 500 501 502 503 504 505

	if (list_empty(head)) {
		os->last_sample = NULL;
	} else if (last_ts <= limit) {
		os->last_sample =
			list_entry(head->prev, struct sample_queue, list);
	}
506 507
}

508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546
/*
 * When perf record finishes a pass on every buffers, it records this pseudo
 * event.
 * We record the max timestamp t found in the pass n.
 * Assuming these timestamps are monotonic across cpus, we know that if
 * a buffer still has events with timestamps below t, they will be all
 * available and then read in the pass n + 1.
 * Hence when we start to read the pass n + 2, we can safely flush every
 * events with timestamps below t.
 *
 *    ============ PASS n =================
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          1          |         2
 *          2          |         3
 *          -          |         4  <--- max recorded
 *
 *    ============ PASS n + 1 ==============
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          3          |         5
 *          4          |         6
 *          5          |         7 <---- max recorded
 *
 *      Flush every events below timestamp 4
 *
 *    ============ PASS n + 2 ==============
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          6          |         8
 *          7          |         9
 *          -          |         10
 *
 *      Flush every events below timestamp 7
 *      etc...
 */
547
static int process_finished_round(union perf_event *event __used,
548 549 550 551 552 553 554 555 556
				  struct perf_session *session,
				  struct perf_event_ops *ops)
{
	flush_sample_queue(session, ops);
	session->ordered_samples.next_flush = session->ordered_samples.max_timestamp;

	return 0;
}

557
/* The queue is ordered by time */
558
static void __queue_event(struct sample_queue *new, struct perf_session *s)
559
{
560 561 562 563
	struct ordered_samples *os = &s->ordered_samples;
	struct sample_queue *sample = os->last_sample;
	u64 timestamp = new->timestamp;
	struct list_head *p;
564

565
	os->last_sample = new;
566

567 568 569
	if (!sample) {
		list_add(&new->list, &os->samples);
		os->max_timestamp = timestamp;
570 571 572 573
		return;
	}

	/*
574 575 576
	 * last_sample might point to some random place in the list as it's
	 * the last queued event. We expect that the new event is close to
	 * this.
577
	 */
578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599
	if (sample->timestamp <= timestamp) {
		while (sample->timestamp <= timestamp) {
			p = sample->list.next;
			if (p == &os->samples) {
				list_add_tail(&new->list, &os->samples);
				os->max_timestamp = timestamp;
				return;
			}
			sample = list_entry(p, struct sample_queue, list);
		}
		list_add_tail(&new->list, &sample->list);
	} else {
		while (sample->timestamp > timestamp) {
			p = sample->list.prev;
			if (p == &os->samples) {
				list_add(&new->list, &os->samples);
				return;
			}
			sample = list_entry(p, struct sample_queue, list);
		}
		list_add(&new->list, &sample->list);
	}
600 601
}

602 603
#define MAX_SAMPLE_BUFFER	(64 * 1024 / sizeof(struct sample_queue))

604
static int perf_session_queue_event(struct perf_session *s, union perf_event *event,
605
				    struct perf_sample *sample, u64 file_offset)
606
{
607 608
	struct ordered_samples *os = &s->ordered_samples;
	struct list_head *sc = &os->sample_cache;
609
	u64 timestamp = sample->time;
610 611
	struct sample_queue *new;

612
	if (!timestamp || timestamp == ~0ULL)
613 614
		return -ETIME;

615 616 617 618 619
	if (timestamp < s->ordered_samples.last_flush) {
		printf("Warning: Timestamp below last timeslice flush\n");
		return -EINVAL;
	}

620 621 622
	if (!list_empty(sc)) {
		new = list_entry(sc->next, struct sample_queue, list);
		list_del(&new->list);
623 624 625 626
	} else if (os->sample_buffer) {
		new = os->sample_buffer + os->sample_buffer_idx;
		if (++os->sample_buffer_idx == MAX_SAMPLE_BUFFER)
			os->sample_buffer = NULL;
627
	} else {
628 629
		os->sample_buffer = malloc(MAX_SAMPLE_BUFFER * sizeof(*new));
		if (!os->sample_buffer)
630
			return -ENOMEM;
631 632 633
		list_add(&os->sample_buffer->list, &os->to_free);
		os->sample_buffer_idx = 2;
		new = os->sample_buffer + 1;
634
	}
635 636

	new->timestamp = timestamp;
637
	new->file_offset = file_offset;
638
	new->event = event;
639

640
	__queue_event(new, s);
641 642 643

	return 0;
}
644

645
static void callchain__printf(struct perf_sample *sample)
646 647
{
	unsigned int i;
648

649
	printf("... chain: nr:%" PRIu64 "\n", sample->callchain->nr);
650 651

	for (i = 0; i < sample->callchain->nr; i++)
652 653
		printf("..... %2d: %016" PRIx64 "\n",
		       i, sample->callchain->ips[i]);
654 655
}

656
static void perf_session__print_tstamp(struct perf_session *session,
657
				       union perf_event *event,
658
				       struct perf_sample *sample)
659 660 661 662 663 664 665 666 667 668 669
{
	if (event->header.type != PERF_RECORD_SAMPLE &&
	    !session->sample_id_all) {
		fputs("-1 -1 ", stdout);
		return;
	}

	if ((session->sample_type & PERF_SAMPLE_CPU))
		printf("%u ", sample->cpu);

	if (session->sample_type & PERF_SAMPLE_TIME)
670
		printf("%" PRIu64 " ", sample->time);
671 672
}

673
static void dump_event(struct perf_session *session, union perf_event *event,
674
		       u64 file_offset, struct perf_sample *sample)
675 676 677 678
{
	if (!dump_trace)
		return;

679 680
	printf("\n%#" PRIx64 " [%#x]: event: %d\n",
	       file_offset, event->header.size, event->header.type);
681 682 683 684 685 686

	trace_event(event);

	if (sample)
		perf_session__print_tstamp(session, event, sample);

687
	printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset,
688
	       event->header.size, perf_event__name(event->header.type));
689 690
}

691
static void dump_sample(struct perf_session *session, union perf_event *event,
692
			struct perf_sample *sample)
693
{
694 695 696
	if (!dump_trace)
		return;

697 698 699
	printf("(IP, %d): %d/%d: %#" PRIx64 " period: %" PRIu64 "\n",
	       event->header.misc, sample->pid, sample->tid, sample->ip,
	       sample->period);
700 701

	if (session->sample_type & PERF_SAMPLE_CALLCHAIN)
702
		callchain__printf(sample);
703 704
}

705
static int perf_session_deliver_event(struct perf_session *session,
706
				      union perf_event *event,
707
				      struct perf_sample *sample,
708
				      struct perf_event_ops *ops,
709
				      u64 file_offset)
710
{
711 712
	struct perf_evsel *evsel;

713 714
	dump_event(session, event, file_offset, sample);

715 716
	switch (event->header.type) {
	case PERF_RECORD_SAMPLE:
717
		dump_sample(session, event, sample);
718 719 720 721 722 723
		evsel = perf_evlist__id2evsel(session->evlist, sample->id);
		if (evsel == NULL) {
			++session->hists.stats.nr_unknown_id;
			return -1;
		}
		return ops->sample(event, sample, evsel, session);
724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745
	case PERF_RECORD_MMAP:
		return ops->mmap(event, sample, session);
	case PERF_RECORD_COMM:
		return ops->comm(event, sample, session);
	case PERF_RECORD_FORK:
		return ops->fork(event, sample, session);
	case PERF_RECORD_EXIT:
		return ops->exit(event, sample, session);
	case PERF_RECORD_LOST:
		return ops->lost(event, sample, session);
	case PERF_RECORD_READ:
		return ops->read(event, sample, session);
	case PERF_RECORD_THROTTLE:
		return ops->throttle(event, sample, session);
	case PERF_RECORD_UNTHROTTLE:
		return ops->unthrottle(event, sample, session);
	default:
		++session->hists.stats.nr_unknown_events;
		return -1;
	}
}

746
static int perf_session__preprocess_sample(struct perf_session *session,
747
					   union perf_event *event, struct perf_sample *sample)
748 749 750 751 752 753 754 755 756 757 758 759 760 761
{
	if (event->header.type != PERF_RECORD_SAMPLE ||
	    !(session->sample_type & PERF_SAMPLE_CALLCHAIN))
		return 0;

	if (!ip_callchain__valid(sample->callchain, event)) {
		pr_debug("call-chain problem with event, skipping it.\n");
		++session->hists.stats.nr_invalid_chains;
		session->hists.stats.total_invalid_chains += sample->period;
		return -EINVAL;
	}
	return 0;
}

762
static int perf_session__process_user_event(struct perf_session *session, union perf_event *event,
763
					    struct perf_event_ops *ops, u64 file_offset)
764
{
765
	dump_event(session, event, file_offset, NULL);
766

767
	/* These events are processed right away */
768
	switch (event->header.type) {
769
	case PERF_RECORD_HEADER_ATTR:
770
		return ops->attr(event, session);
771
	case PERF_RECORD_HEADER_EVENT_TYPE:
772
		return ops->event_type(event, session);
773 774
	case PERF_RECORD_HEADER_TRACING_DATA:
		/* setup for reading amidst mmap */
775 776
		lseek(session->fd, file_offset, SEEK_SET);
		return ops->tracing_data(event, session);
777
	case PERF_RECORD_HEADER_BUILD_ID:
778
		return ops->build_id(event, session);
779
	case PERF_RECORD_FINISHED_ROUND:
780
		return ops->finished_round(event, session, ops);
781
	default:
782
		return -EINVAL;
783
	}
784 785 786
}

static int perf_session__process_event(struct perf_session *session,
787
				       union perf_event *event,
788 789 790
				       struct perf_event_ops *ops,
				       u64 file_offset)
{
791
	struct perf_sample sample;
792 793
	int ret;

794 795 796
	if (session->header.needs_swap &&
	    perf_event__swap_ops[event->header.type])
		perf_event__swap_ops[event->header.type](event);
797 798 799 800 801 802 803 804

	if (event->header.type >= PERF_RECORD_HEADER_MAX)
		return -EINVAL;

	hists__inc_nr_events(&session->hists, event->header.type);

	if (event->header.type >= PERF_RECORD_USER_TYPE_START)
		return perf_session__process_user_event(session, event, ops, file_offset);
805

806 807 808
	/*
	 * For all kernel events we get the sample data
	 */
809
	perf_session__parse_sample(session, event, &sample);
810 811 812 813 814

	/* Preprocess sample records - precheck callchains */
	if (perf_session__preprocess_sample(session, event, &sample))
		return 0;

815
	if (ops->ordered_samples) {
816 817
		ret = perf_session_queue_event(session, event, &sample,
					       file_offset);
818 819 820 821
		if (ret != -ETIME)
			return ret;
	}

822 823
	return perf_session_deliver_event(session, event, &sample, ops,
					  file_offset);
824 825
}

826 827 828 829 830 831 832
void perf_event_header__bswap(struct perf_event_header *self)
{
	self->type = bswap_32(self->type);
	self->misc = bswap_16(self->misc);
	self->size = bswap_16(self->size);
}

833 834 835 836 837 838 839 840 841 842 843 844
static struct thread *perf_session__register_idle_thread(struct perf_session *self)
{
	struct thread *thread = perf_session__findnew(self, 0);

	if (thread == NULL || thread__set_comm(thread, "swapper")) {
		pr_err("problem inserting idle task.\n");
		thread = NULL;
	}

	return thread;
}

845 846 847
static void perf_session__warn_about_errors(const struct perf_session *session,
					    const struct perf_event_ops *ops)
{
848
	if (ops->lost == perf_event__process_lost &&
849
	    session->hists.stats.total_lost != 0) {
850 851
		ui__warning("Processed %" PRIu64 " events and LOST %" PRIu64
			    "!\n\nCheck IO/CPU overload!\n\n",
852 853 854 855 856 857 858 859 860 861 862 863 864
			    session->hists.stats.total_period,
			    session->hists.stats.total_lost);
	}

	if (session->hists.stats.nr_unknown_events != 0) {
		ui__warning("Found %u unknown events!\n\n"
			    "Is this an older tool processing a perf.data "
			    "file generated by a more recent tool?\n\n"
			    "If that is not the case, consider "
			    "reporting to linux-kernel@vger.kernel.org.\n\n",
			    session->hists.stats.nr_unknown_events);
	}

865 866 867 868 869
	if (session->hists.stats.nr_unknown_id != 0) {
		ui__warning("%u samples with id not present in the header\n",
			    session->hists.stats.nr_unknown_id);
	}

870 871 872 873 874 875 876 877 878
 	if (session->hists.stats.nr_invalid_chains != 0) {
 		ui__warning("Found invalid callchains!\n\n"
 			    "%u out of %u events were discarded for this reason.\n\n"
 			    "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
 			    session->hists.stats.nr_invalid_chains,
 			    session->hists.stats.nr_events[PERF_RECORD_SAMPLE]);
 	}
}

879 880 881 882 883 884
#define session_done()	(*(volatile int *)(&session_done))
volatile int session_done;

static int __perf_session__process_pipe_events(struct perf_session *self,
					       struct perf_event_ops *ops)
{
885
	union perf_event event;
886 887 888 889 890 891 892 893 894 895
	uint32_t size;
	int skip = 0;
	u64 head;
	int err;
	void *p;

	perf_event_ops__fill_defaults(ops);

	head = 0;
more:
896
	err = readn(self->fd, &event, sizeof(struct perf_event_header));
897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914
	if (err <= 0) {
		if (err == 0)
			goto done;

		pr_err("failed to read event header\n");
		goto out_err;
	}

	if (self->header.needs_swap)
		perf_event_header__bswap(&event.header);

	size = event.header.size;
	if (size == 0)
		size = 8;

	p = &event;
	p += sizeof(struct perf_event_header);

915
	if (size - sizeof(struct perf_event_header)) {
916
		err = readn(self->fd, p, size - sizeof(struct perf_event_header));
917 918 919 920 921
		if (err <= 0) {
			if (err == 0) {
				pr_err("unexpected end of event stream\n");
				goto done;
			}
922

923 924 925
			pr_err("failed to read event data\n");
			goto out_err;
		}
926 927 928
	}

	if (size == 0 ||
929
	    (skip = perf_session__process_event(self, &event, ops, head)) < 0) {
930
		dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n",
931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951
			    head, event.header.size, event.header.type);
		/*
		 * assume we lost track of the stream, check alignment, and
		 * increment a single u64 in the hope to catch on again 'soon'.
		 */
		if (unlikely(head & 7))
			head &= ~7ULL;

		size = 8;
	}

	head += size;

	if (skip > 0)
		head += skip;

	if (!session_done())
		goto more;
done:
	err = 0;
out_err:
952
	perf_session__warn_about_errors(self, ops);
953
	perf_session_free_sample_buffers(self);
954 955 956
	return err;
}

957
int __perf_session__process_events(struct perf_session *session,
958 959
				   u64 data_offset, u64 data_size,
				   u64 file_size, struct perf_event_ops *ops)
960
{
961
	u64 head, page_offset, file_offset, file_pos, progress_next;
962
	int err, mmap_prot, mmap_flags, map_idx = 0;
963
	struct ui_progress *progress;
964
	size_t	page_size, mmap_size;
965
	char *buf, *mmaps[8];
966
	union perf_event *event;
967
	uint32_t size;
968

969 970
	perf_event_ops__fill_defaults(ops);

971
	page_size = sysconf(_SC_PAGESIZE);
972

973 974 975
	page_offset = page_size * (data_offset / page_size);
	file_offset = page_offset;
	head = data_offset - page_offset;
976

977 978 979
	if (data_offset + data_size < file_size)
		file_size = data_offset + data_size;

980 981 982 983 984 985 986 987 988
	progress_next = file_size / 16;
	progress = ui_progress__new("Processing events...", file_size);
	if (progress == NULL)
		return -1;

	mmap_size = session->mmap_window;
	if (mmap_size > file_size)
		mmap_size = file_size;

989 990
	memset(mmaps, 0, sizeof(mmaps));

991 992 993
	mmap_prot  = PROT_READ;
	mmap_flags = MAP_SHARED;

994
	if (session->header.needs_swap) {
995 996 997
		mmap_prot  |= PROT_WRITE;
		mmap_flags = MAP_PRIVATE;
	}
998
remap:
999 1000
	buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, session->fd,
		   file_offset);
1001 1002 1003 1004 1005
	if (buf == MAP_FAILED) {
		pr_err("failed to mmap file\n");
		err = -errno;
		goto out_err;
	}
1006 1007
	mmaps[map_idx] = buf;
	map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1);
1008
	file_pos = file_offset + head;
1009 1010

more:
1011 1012 1013 1014 1015 1016 1017
	/*
	 * Ensure we have enough space remaining to read
	 * the size of the event in the headers.
	 */
	if (head + sizeof(event->header) > mmap_size)
		goto remap;

1018
	event = (union perf_event *)(buf + head);
1019

1020
	if (session->header.needs_swap)
1021
		perf_event_header__bswap(&event->header);
1022

1023
	if (head + event->header.size > mmap_size) {
1024 1025 1026 1027
		if (mmaps[map_idx]) {
			munmap(mmaps[map_idx], mmap_size);
			mmaps[map_idx] = NULL;
		}
1028

1029 1030 1031
		page_offset = page_size * (head / page_size);
		file_offset += page_offset;
		head -= page_offset;
1032 1033 1034 1035 1036
		goto remap;
	}

	size = event->header.size;

1037 1038
	if (size == 0 ||
	    perf_session__process_event(session, event, ops, file_pos) < 0) {
1039
		dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n",
1040
			    file_offset + head, event->header.size,
1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052
			    event->header.type);
		/*
		 * assume we lost track of the stream, check alignment, and
		 * increment a single u64 in the hope to catch on again 'soon'.
		 */
		if (unlikely(head & 7))
			head &= ~7ULL;

		size = 8;
	}

	head += size;
1053
	file_pos += size;
1054

1055 1056 1057 1058 1059
	if (file_pos >= progress_next) {
		progress_next += file_size / 16;
		ui_progress__update(progress, file_pos);
	}

1060
	if (file_pos < file_size)
1061
		goto more;
1062

1063
	err = 0;
1064
	/* do the final flush for ordered samples */
1065 1066
	session->ordered_samples.next_flush = ULLONG_MAX;
	flush_sample_queue(session, ops);
1067
out_err:
1068
	ui_progress__delete(progress);
1069
	perf_session__warn_about_errors(session, ops);
1070
	perf_session_free_sample_buffers(session);
1071 1072
	return err;
}
1073

1074 1075 1076 1077 1078 1079 1080 1081
int perf_session__process_events(struct perf_session *self,
				 struct perf_event_ops *ops)
{
	int err;

	if (perf_session__register_idle_thread(self) == NULL)
		return -ENOMEM;

1082 1083 1084 1085 1086 1087 1088
	if (!self->fd_pipe)
		err = __perf_session__process_events(self,
						     self->header.data_offset,
						     self->header.data_size,
						     self->size, ops);
	else
		err = __perf_session__process_pipe_events(self, ops);
1089

1090 1091 1092
	return err;
}

1093
bool perf_session__has_traces(struct perf_session *self, const char *msg)
1094 1095
{
	if (!(self->sample_type & PERF_SAMPLE_RAW)) {
1096 1097
		pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg);
		return false;
1098 1099
	}

1100
	return true;
1101
}
1102

1103
int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps,
1104 1105 1106 1107
					     const char *symbol_name,
					     u64 addr)
{
	char *bracket;
1108
	enum map_type i;
1109 1110 1111 1112 1113
	struct ref_reloc_sym *ref;

	ref = zalloc(sizeof(struct ref_reloc_sym));
	if (ref == NULL)
		return -ENOMEM;
1114

1115 1116 1117
	ref->name = strdup(symbol_name);
	if (ref->name == NULL) {
		free(ref);
1118
		return -ENOMEM;
1119
	}
1120

1121
	bracket = strchr(ref->name, ']');
1122 1123 1124
	if (bracket)
		*bracket = '\0';

1125
	ref->addr = addr;
1126 1127

	for (i = 0; i < MAP__NR_TYPES; ++i) {
1128 1129
		struct kmap *kmap = map__kmap(maps[i]);
		kmap->ref_reloc_sym = ref;
1130 1131
	}

1132 1133
	return 0;
}
1134 1135 1136 1137 1138 1139 1140

size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp)
{
	return __dsos__fprintf(&self->host_machine.kernel_dsos, fp) +
	       __dsos__fprintf(&self->host_machine.user_dsos, fp) +
	       machines__fprintf_dsos(&self->machines, fp);
}
1141 1142 1143 1144 1145 1146 1147

size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, FILE *fp,
					  bool with_hits)
{
	size_t ret = machine__fprintf_dsos_buildid(&self->host_machine, fp, with_hits);
	return ret + machines__fprintf_dsos_buildid(&self->machines, fp, with_hits);
}
1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162

size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
{
	struct perf_evsel *pos;
	size_t ret = fprintf(fp, "Aggregated stats:\n");

	ret += hists__fprintf_nr_events(&session->hists, fp);

	list_for_each_entry(pos, &session->evlist->entries, node) {
		ret += fprintf(fp, "%s stats:\n", event_name(pos));
		ret += hists__fprintf_nr_events(&pos->hists, fp);
	}

	return ret;
}
1163

1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175
struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
					      unsigned int type)
{
	struct perf_evsel *pos;

	list_for_each_entry(pos, &session->evlist->entries, node) {
		if (pos->attr.type == type)
			return pos;
	}
	return NULL;
}

1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235
void perf_session__print_symbols(union perf_event *event,
				struct perf_sample *sample,
				struct perf_session *session)
{
	struct addr_location al;
	const char *symname, *dsoname;
	struct callchain_cursor *cursor = &session->callchain_cursor;
	struct callchain_cursor_node *node;

	if (perf_event__preprocess_sample(event, session, &al, sample,
					  NULL) < 0) {
		error("problem processing %d event, skipping it.\n",
			event->header.type);
		return;
	}

	if (symbol_conf.use_callchain && sample->callchain) {

		if (perf_session__resolve_callchain(session, al.thread,
						sample->callchain, NULL) != 0) {
			if (verbose)
				error("Failed to resolve callchain. Skipping\n");
			return;
		}
		callchain_cursor_commit(cursor);

		while (1) {
			node = callchain_cursor_current(cursor);
			if (!node)
				break;

			if (node->sym && node->sym->name)
				symname = node->sym->name;
			else
				symname = "";

			if (node->map && node->map->dso && node->map->dso->name)
				dsoname = node->map->dso->name;
			else
				dsoname = "";

			printf("\t%16" PRIx64 " %s (%s)\n", node->ip, symname, dsoname);

			callchain_cursor_advance(cursor);
		}

	} else {
		if (al.sym && al.sym->name)
			symname = al.sym->name;
		else
			symname = "";

		if (al.map && al.map->dso && al.map->dso->name)
			dsoname = al.map->dso->name;
		else
			dsoname = "";

		printf("%16" PRIx64 " %s (%s)", al.addr, symname, dsoname);
	}
}