session.c 32.6 KB
Newer Older
1 2
#define _FILE_OFFSET_BITS 64

3 4
#include <linux/kernel.h>

5
#include <byteswap.h>
6 7
#include <unistd.h>
#include <sys/types.h>
8
#include <sys/mman.h>
9

10 11
#include "evlist.h"
#include "evsel.h"
12
#include "session.h"
13
#include "sort.h"
14 15 16 17 18 19
#include "util.h"

static int perf_session__open(struct perf_session *self, bool force)
{
	struct stat input_stat;

20 21 22 23
	if (!strcmp(self->filename, "-")) {
		self->fd_pipe = true;
		self->fd = STDIN_FILENO;

24
		if (perf_session__read_header(self, self->fd) < 0)
25 26 27 28 29
			pr_err("incompatible file format");

		return 0;
	}

30
	self->fd = open(self->filename, O_RDONLY);
31
	if (self->fd < 0) {
32 33 34 35
		int err = errno;

		pr_err("failed to open %s: %s", self->filename, strerror(err));
		if (err == ENOENT && !strcmp(self->filename, "perf.data"))
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
			pr_err("  (try 'perf record' first)");
		pr_err("\n");
		return -errno;
	}

	if (fstat(self->fd, &input_stat) < 0)
		goto out_close;

	if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
		pr_err("file %s not owned by current user or root\n",
		       self->filename);
		goto out_close;
	}

	if (!input_stat.st_size) {
		pr_info("zero-sized file (%s), nothing to do!\n",
			self->filename);
		goto out_close;
	}

56
	if (perf_session__read_header(self, self->fd) < 0) {
57 58 59 60 61 62 63 64 65 66 67 68 69
		pr_err("incompatible file format");
		goto out_close;
	}

	self->size = input_stat.st_size;
	return 0;

out_close:
	close(self->fd);
	self->fd = -1;
	return -1;
}

70
static void perf_session__id_header_size(struct perf_session *session)
71
{
72
       struct perf_sample *data;
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
       u64 sample_type = session->sample_type;
       u16 size = 0;

	if (!session->sample_id_all)
		goto out;

       if (sample_type & PERF_SAMPLE_TID)
               size += sizeof(data->tid) * 2;

       if (sample_type & PERF_SAMPLE_TIME)
               size += sizeof(data->time);

       if (sample_type & PERF_SAMPLE_ID)
               size += sizeof(data->id);

       if (sample_type & PERF_SAMPLE_STREAM_ID)
               size += sizeof(data->stream_id);

       if (sample_type & PERF_SAMPLE_CPU)
               size += sizeof(data->cpu) * 2;
out:
       session->id_hdr_size = size;
}

void perf_session__update_sample_type(struct perf_session *self)
{
99
	self->sample_type = perf_evlist__sample_type(self->evlist);
100
	self->sample_size = perf_sample_size(self->sample_type);
101
	self->sample_id_all = perf_evlist__sample_id_all(self->evlist);
102 103 104
	perf_session__id_header_size(self);
}

105 106
int perf_session__create_kernel_maps(struct perf_session *self)
{
107
	int ret = machine__create_kernel_maps(&self->host_machine);
108 109

	if (ret >= 0)
110
		ret = machines__create_guest_kernel_maps(&self->machines);
111 112 113
	return ret;
}

114 115 116 117 118 119
static void perf_session__destroy_kernel_maps(struct perf_session *self)
{
	machine__destroy_kernel_maps(&self->host_machine);
	machines__destroy_guest_kernel_maps(&self->machines);
}

120 121 122
struct perf_session *perf_session__new(const char *filename, int mode,
				       bool force, bool repipe,
				       struct perf_event_ops *ops)
123
{
124
	size_t len = filename ? strlen(filename) + 1 : 0;
125 126 127 128 129 130
	struct perf_session *self = zalloc(sizeof(*self) + len);

	if (self == NULL)
		goto out;

	memcpy(self->filename, filename, len);
131
	self->threads = RB_ROOT;
132
	INIT_LIST_HEAD(&self->dead_threads);
133
	self->last_match = NULL;
134 135 136 137 138 139 140 141 142
	/*
	 * On 64bit we can mmap the data file in one go. No need for tiny mmap
	 * slices. On 32bit we use 32MB.
	 */
#if BITS_PER_LONG == 64
	self->mmap_window = ULLONG_MAX;
#else
	self->mmap_window = 32 * 1024 * 1024ULL;
#endif
143
	self->machines = RB_ROOT;
T
Tom Zanussi 已提交
144
	self->repipe = repipe;
145
	INIT_LIST_HEAD(&self->ordered_samples.samples);
146
	INIT_LIST_HEAD(&self->ordered_samples.sample_cache);
147
	INIT_LIST_HEAD(&self->ordered_samples.to_free);
148
	machine__init(&self->host_machine, "", HOST_KERNEL_ID);
149

150 151 152
	if (mode == O_RDONLY) {
		if (perf_session__open(self, force) < 0)
			goto out_delete;
153
		perf_session__update_sample_type(self);
154 155 156
	} else if (mode == O_WRONLY) {
		/*
		 * In O_RDONLY mode this will be performed when reading the
157
		 * kernel MMAP event, in perf_event__process_mmap().
158 159 160 161
		 */
		if (perf_session__create_kernel_maps(self) < 0)
			goto out_delete;
	}
162

163 164 165 166 167 168
	if (ops && ops->ordering_requires_timestamps &&
	    ops->ordered_samples && !self->sample_id_all) {
		dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
		ops->ordered_samples = false;
	}

169 170
out:
	return self;
171 172 173
out_delete:
	perf_session__delete(self);
	return NULL;
174 175
}

176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
static void perf_session__delete_dead_threads(struct perf_session *self)
{
	struct thread *n, *t;

	list_for_each_entry_safe(t, n, &self->dead_threads, node) {
		list_del(&t->node);
		thread__delete(t);
	}
}

static void perf_session__delete_threads(struct perf_session *self)
{
	struct rb_node *nd = rb_first(&self->threads);

	while (nd) {
		struct thread *t = rb_entry(nd, struct thread, rb_node);

		rb_erase(&t->rb_node, &self->threads);
		nd = rb_next(nd);
		thread__delete(t);
	}
}

199 200
void perf_session__delete(struct perf_session *self)
{
201
	perf_session__destroy_kernel_maps(self);
202 203 204
	perf_session__delete_dead_threads(self);
	perf_session__delete_threads(self);
	machine__exit(&self->host_machine);
205 206 207
	close(self->fd);
	free(self);
}
208

209 210
void perf_session__remove_thread(struct perf_session *self, struct thread *th)
{
211
	self->last_match = NULL;
212 213 214 215 216 217 218 219
	rb_erase(&th->rb_node, &self->threads);
	/*
	 * We may have references to this thread, for instance in some hist_entry
	 * instances, so just move them to a separate list.
	 */
	list_add_tail(&th->node, &self->dead_threads);
}

220 221 222 223 224 225 226 227
static bool symbol__match_parent_regex(struct symbol *sym)
{
	if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
		return 1;

	return 0;
}

228 229 230 231
int perf_session__resolve_callchain(struct perf_session *self,
				    struct thread *thread,
				    struct ip_callchain *chain,
				    struct symbol **parent)
232 233 234
{
	u8 cpumode = PERF_RECORD_MISC_USER;
	unsigned int i;
235
	int err;
236

237
	callchain_cursor_reset(&self->callchain_cursor);
238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256

	for (i = 0; i < chain->nr; i++) {
		u64 ip = chain->ips[i];
		struct addr_location al;

		if (ip >= PERF_CONTEXT_MAX) {
			switch (ip) {
			case PERF_CONTEXT_HV:
				cpumode = PERF_RECORD_MISC_HYPERVISOR;	break;
			case PERF_CONTEXT_KERNEL:
				cpumode = PERF_RECORD_MISC_KERNEL;	break;
			case PERF_CONTEXT_USER:
				cpumode = PERF_RECORD_MISC_USER;	break;
			default:
				break;
			}
			continue;
		}

257
		al.filtered = false;
258
		thread__find_addr_location(thread, self, cpumode,
259
				MAP__FUNCTION, thread->pid, ip, &al, NULL);
260 261 262 263
		if (al.sym != NULL) {
			if (sort__has_parent && !*parent &&
			    symbol__match_parent_regex(al.sym))
				*parent = al.sym;
264
			if (!symbol_conf.use_callchain)
265 266
				break;
		}
267 268 269 270 271

		err = callchain_cursor_append(&self->callchain_cursor,
					      ip, al.map, al.sym);
		if (err)
			return err;
272 273
	}

274
	return 0;
275
}
276

277
static int process_event_synth_stub(union perf_event *event __used,
278 279 280 281 282 283
				    struct perf_session *session __used)
{
	dump_printf(": unhandled!\n");
	return 0;
}

284 285 286 287 288 289 290 291 292
static int process_event_sample_stub(union perf_event *event __used,
				     struct perf_sample *sample __used,
				     struct perf_evsel *evsel __used,
				     struct perf_session *session __used)
{
	dump_printf(": unhandled!\n");
	return 0;
}

293
static int process_event_stub(union perf_event *event __used,
294
			      struct perf_sample *sample __used,
295 296 297 298 299 300
			      struct perf_session *session __used)
{
	dump_printf(": unhandled!\n");
	return 0;
}

301
static int process_finished_round_stub(union perf_event *event __used,
302 303 304 305 306 307 308
				       struct perf_session *session __used,
				       struct perf_event_ops *ops __used)
{
	dump_printf(": unhandled!\n");
	return 0;
}

309
static int process_finished_round(union perf_event *event,
310 311 312
				  struct perf_session *session,
				  struct perf_event_ops *ops);

313 314
static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
{
315
	if (handler->sample == NULL)
316
		handler->sample = process_event_sample_stub;
317 318 319 320 321 322 323 324 325
	if (handler->mmap == NULL)
		handler->mmap = process_event_stub;
	if (handler->comm == NULL)
		handler->comm = process_event_stub;
	if (handler->fork == NULL)
		handler->fork = process_event_stub;
	if (handler->exit == NULL)
		handler->exit = process_event_stub;
	if (handler->lost == NULL)
326
		handler->lost = perf_event__process_lost;
327 328 329 330 331 332
	if (handler->read == NULL)
		handler->read = process_event_stub;
	if (handler->throttle == NULL)
		handler->throttle = process_event_stub;
	if (handler->unthrottle == NULL)
		handler->unthrottle = process_event_stub;
333
	if (handler->attr == NULL)
334
		handler->attr = process_event_synth_stub;
335
	if (handler->event_type == NULL)
336
		handler->event_type = process_event_synth_stub;
337
	if (handler->tracing_data == NULL)
338
		handler->tracing_data = process_event_synth_stub;
339
	if (handler->build_id == NULL)
340
		handler->build_id = process_event_synth_stub;
341 342 343 344 345 346
	if (handler->finished_round == NULL) {
		if (handler->ordered_samples)
			handler->finished_round = process_finished_round;
		else
			handler->finished_round = process_finished_round_stub;
	}
347 348
}

349 350 351 352 353 354 355 356 357 358 359
void mem_bswap_64(void *src, int byte_size)
{
	u64 *m = src;

	while (byte_size > 0) {
		*m = bswap_64(*m);
		byte_size -= sizeof(u64);
		++m;
	}
}

360
static void perf_event__all64_swap(union perf_event *event)
361
{
362 363
	struct perf_event_header *hdr = &event->header;
	mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr));
364 365
}

366
static void perf_event__comm_swap(union perf_event *event)
367
{
368 369
	event->comm.pid = bswap_32(event->comm.pid);
	event->comm.tid = bswap_32(event->comm.tid);
370 371
}

372
static void perf_event__mmap_swap(union perf_event *event)
373
{
374 375 376 377 378
	event->mmap.pid	  = bswap_32(event->mmap.pid);
	event->mmap.tid	  = bswap_32(event->mmap.tid);
	event->mmap.start = bswap_64(event->mmap.start);
	event->mmap.len	  = bswap_64(event->mmap.len);
	event->mmap.pgoff = bswap_64(event->mmap.pgoff);
379 380
}

381
static void perf_event__task_swap(union perf_event *event)
382
{
383 384 385 386 387
	event->fork.pid	 = bswap_32(event->fork.pid);
	event->fork.tid	 = bswap_32(event->fork.tid);
	event->fork.ppid = bswap_32(event->fork.ppid);
	event->fork.ptid = bswap_32(event->fork.ptid);
	event->fork.time = bswap_64(event->fork.time);
388 389
}

390
static void perf_event__read_swap(union perf_event *event)
391
{
392 393 394 395 396 397
	event->read.pid		 = bswap_32(event->read.pid);
	event->read.tid		 = bswap_32(event->read.tid);
	event->read.value	 = bswap_64(event->read.value);
	event->read.time_enabled = bswap_64(event->read.time_enabled);
	event->read.time_running = bswap_64(event->read.time_running);
	event->read.id		 = bswap_64(event->read.id);
398 399
}

400
static void perf_event__attr_swap(union perf_event *event)
401 402 403
{
	size_t size;

404 405 406 407 408 409 410 411 412 413
	event->attr.attr.type		= bswap_32(event->attr.attr.type);
	event->attr.attr.size		= bswap_32(event->attr.attr.size);
	event->attr.attr.config		= bswap_64(event->attr.attr.config);
	event->attr.attr.sample_period	= bswap_64(event->attr.attr.sample_period);
	event->attr.attr.sample_type	= bswap_64(event->attr.attr.sample_type);
	event->attr.attr.read_format	= bswap_64(event->attr.attr.read_format);
	event->attr.attr.wakeup_events	= bswap_32(event->attr.attr.wakeup_events);
	event->attr.attr.bp_type	= bswap_32(event->attr.attr.bp_type);
	event->attr.attr.bp_addr	= bswap_64(event->attr.attr.bp_addr);
	event->attr.attr.bp_len		= bswap_64(event->attr.attr.bp_len);
414

415 416 417
	size = event->header.size;
	size -= (void *)&event->attr.id - (void *)event;
	mem_bswap_64(event->attr.id, size);
418 419
}

420
static void perf_event__event_type_swap(union perf_event *event)
421
{
422 423
	event->event_type.event_type.event_id =
		bswap_64(event->event_type.event_type.event_id);
424 425
}

426
static void perf_event__tracing_data_swap(union perf_event *event)
427
{
428
	event->tracing_data.size = bswap_32(event->tracing_data.size);
429 430
}

431
typedef void (*perf_event__swap_op)(union perf_event *event);
432

433 434 435 436 437 438 439 440 441 442 443 444 445
static perf_event__swap_op perf_event__swap_ops[] = {
	[PERF_RECORD_MMAP]		  = perf_event__mmap_swap,
	[PERF_RECORD_COMM]		  = perf_event__comm_swap,
	[PERF_RECORD_FORK]		  = perf_event__task_swap,
	[PERF_RECORD_EXIT]		  = perf_event__task_swap,
	[PERF_RECORD_LOST]		  = perf_event__all64_swap,
	[PERF_RECORD_READ]		  = perf_event__read_swap,
	[PERF_RECORD_SAMPLE]		  = perf_event__all64_swap,
	[PERF_RECORD_HEADER_ATTR]	  = perf_event__attr_swap,
	[PERF_RECORD_HEADER_EVENT_TYPE]	  = perf_event__event_type_swap,
	[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
	[PERF_RECORD_HEADER_BUILD_ID]	  = NULL,
	[PERF_RECORD_HEADER_MAX]	  = NULL,
446 447
};

448 449
struct sample_queue {
	u64			timestamp;
450
	u64			file_offset;
451
	union perf_event	*event;
452 453 454
	struct list_head	list;
};

455 456 457 458
static void perf_session_free_sample_buffers(struct perf_session *session)
{
	struct ordered_samples *os = &session->ordered_samples;

459
	while (!list_empty(&os->to_free)) {
460 461
		struct sample_queue *sq;

462
		sq = list_entry(os->to_free.next, struct sample_queue, list);
463 464 465 466 467
		list_del(&sq->list);
		free(sq);
	}
}

468
static int perf_session_deliver_event(struct perf_session *session,
469
				      union perf_event *event,
470
				      struct perf_sample *sample,
471 472
				      struct perf_event_ops *ops,
				      u64 file_offset);
473

474 475 476
static void flush_sample_queue(struct perf_session *s,
			       struct perf_event_ops *ops)
{
477 478
	struct ordered_samples *os = &s->ordered_samples;
	struct list_head *head = &os->samples;
479
	struct sample_queue *tmp, *iter;
480
	struct perf_sample sample;
481 482
	u64 limit = os->next_flush;
	u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
483
	int ret;
484

485
	if (!ops->ordered_samples || !limit)
486 487 488 489
		return;

	list_for_each_entry_safe(iter, tmp, head, list) {
		if (iter->timestamp > limit)
490
			break;
491

492 493 494 495 496 497
		ret = perf_session__parse_sample(s, iter->event, &sample);
		if (ret)
			pr_err("Can't parse sample, err = %d\n", ret);
		else
			perf_session_deliver_event(s, iter->event, &sample, ops,
						   iter->file_offset);
498

499
		os->last_flush = iter->timestamp;
500
		list_del(&iter->list);
501
		list_add(&iter->list, &os->sample_cache);
502
	}
503 504 505 506 507 508 509

	if (list_empty(head)) {
		os->last_sample = NULL;
	} else if (last_ts <= limit) {
		os->last_sample =
			list_entry(head->prev, struct sample_queue, list);
	}
510 511
}

512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550
/*
 * When perf record finishes a pass on every buffers, it records this pseudo
 * event.
 * We record the max timestamp t found in the pass n.
 * Assuming these timestamps are monotonic across cpus, we know that if
 * a buffer still has events with timestamps below t, they will be all
 * available and then read in the pass n + 1.
 * Hence when we start to read the pass n + 2, we can safely flush every
 * events with timestamps below t.
 *
 *    ============ PASS n =================
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          1          |         2
 *          2          |         3
 *          -          |         4  <--- max recorded
 *
 *    ============ PASS n + 1 ==============
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          3          |         5
 *          4          |         6
 *          5          |         7 <---- max recorded
 *
 *      Flush every events below timestamp 4
 *
 *    ============ PASS n + 2 ==============
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          6          |         8
 *          7          |         9
 *          -          |         10
 *
 *      Flush every events below timestamp 7
 *      etc...
 */
551
static int process_finished_round(union perf_event *event __used,
552 553 554 555 556 557 558 559 560
				  struct perf_session *session,
				  struct perf_event_ops *ops)
{
	flush_sample_queue(session, ops);
	session->ordered_samples.next_flush = session->ordered_samples.max_timestamp;

	return 0;
}

561
/* The queue is ordered by time */
562
static void __queue_event(struct sample_queue *new, struct perf_session *s)
563
{
564 565 566 567
	struct ordered_samples *os = &s->ordered_samples;
	struct sample_queue *sample = os->last_sample;
	u64 timestamp = new->timestamp;
	struct list_head *p;
568

569
	os->last_sample = new;
570

571 572 573
	if (!sample) {
		list_add(&new->list, &os->samples);
		os->max_timestamp = timestamp;
574 575 576 577
		return;
	}

	/*
578 579 580
	 * last_sample might point to some random place in the list as it's
	 * the last queued event. We expect that the new event is close to
	 * this.
581
	 */
582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603
	if (sample->timestamp <= timestamp) {
		while (sample->timestamp <= timestamp) {
			p = sample->list.next;
			if (p == &os->samples) {
				list_add_tail(&new->list, &os->samples);
				os->max_timestamp = timestamp;
				return;
			}
			sample = list_entry(p, struct sample_queue, list);
		}
		list_add_tail(&new->list, &sample->list);
	} else {
		while (sample->timestamp > timestamp) {
			p = sample->list.prev;
			if (p == &os->samples) {
				list_add(&new->list, &os->samples);
				return;
			}
			sample = list_entry(p, struct sample_queue, list);
		}
		list_add(&new->list, &sample->list);
	}
604 605
}

606 607
#define MAX_SAMPLE_BUFFER	(64 * 1024 / sizeof(struct sample_queue))

608
static int perf_session_queue_event(struct perf_session *s, union perf_event *event,
609
				    struct perf_sample *sample, u64 file_offset)
610
{
611 612
	struct ordered_samples *os = &s->ordered_samples;
	struct list_head *sc = &os->sample_cache;
613
	u64 timestamp = sample->time;
614 615
	struct sample_queue *new;

616
	if (!timestamp || timestamp == ~0ULL)
617 618
		return -ETIME;

619 620 621 622 623
	if (timestamp < s->ordered_samples.last_flush) {
		printf("Warning: Timestamp below last timeslice flush\n");
		return -EINVAL;
	}

624 625 626
	if (!list_empty(sc)) {
		new = list_entry(sc->next, struct sample_queue, list);
		list_del(&new->list);
627 628 629 630
	} else if (os->sample_buffer) {
		new = os->sample_buffer + os->sample_buffer_idx;
		if (++os->sample_buffer_idx == MAX_SAMPLE_BUFFER)
			os->sample_buffer = NULL;
631
	} else {
632 633
		os->sample_buffer = malloc(MAX_SAMPLE_BUFFER * sizeof(*new));
		if (!os->sample_buffer)
634
			return -ENOMEM;
635 636 637
		list_add(&os->sample_buffer->list, &os->to_free);
		os->sample_buffer_idx = 2;
		new = os->sample_buffer + 1;
638
	}
639 640

	new->timestamp = timestamp;
641
	new->file_offset = file_offset;
642
	new->event = event;
643

644
	__queue_event(new, s);
645 646 647

	return 0;
}
648

649
static void callchain__printf(struct perf_sample *sample)
650 651
{
	unsigned int i;
652

653
	printf("... chain: nr:%" PRIu64 "\n", sample->callchain->nr);
654 655

	for (i = 0; i < sample->callchain->nr; i++)
656 657
		printf("..... %2d: %016" PRIx64 "\n",
		       i, sample->callchain->ips[i]);
658 659
}

660
static void perf_session__print_tstamp(struct perf_session *session,
661
				       union perf_event *event,
662
				       struct perf_sample *sample)
663 664 665 666 667 668 669 670 671 672 673
{
	if (event->header.type != PERF_RECORD_SAMPLE &&
	    !session->sample_id_all) {
		fputs("-1 -1 ", stdout);
		return;
	}

	if ((session->sample_type & PERF_SAMPLE_CPU))
		printf("%u ", sample->cpu);

	if (session->sample_type & PERF_SAMPLE_TIME)
674
		printf("%" PRIu64 " ", sample->time);
675 676
}

677
static void dump_event(struct perf_session *session, union perf_event *event,
678
		       u64 file_offset, struct perf_sample *sample)
679 680 681 682
{
	if (!dump_trace)
		return;

683 684
	printf("\n%#" PRIx64 " [%#x]: event: %d\n",
	       file_offset, event->header.size, event->header.type);
685 686 687 688 689 690

	trace_event(event);

	if (sample)
		perf_session__print_tstamp(session, event, sample);

691
	printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset,
692
	       event->header.size, perf_event__name(event->header.type));
693 694
}

695
static void dump_sample(struct perf_session *session, union perf_event *event,
696
			struct perf_sample *sample)
697
{
698 699 700
	if (!dump_trace)
		return;

701 702 703
	printf("(IP, %d): %d/%d: %#" PRIx64 " period: %" PRIu64 "\n",
	       event->header.misc, sample->pid, sample->tid, sample->ip,
	       sample->period);
704 705

	if (session->sample_type & PERF_SAMPLE_CALLCHAIN)
706
		callchain__printf(sample);
707 708
}

709
static int perf_session_deliver_event(struct perf_session *session,
710
				      union perf_event *event,
711
				      struct perf_sample *sample,
712
				      struct perf_event_ops *ops,
713
				      u64 file_offset)
714
{
715 716
	struct perf_evsel *evsel;

717 718
	dump_event(session, event, file_offset, sample);

719 720
	switch (event->header.type) {
	case PERF_RECORD_SAMPLE:
721
		dump_sample(session, event, sample);
722 723 724 725 726 727
		evsel = perf_evlist__id2evsel(session->evlist, sample->id);
		if (evsel == NULL) {
			++session->hists.stats.nr_unknown_id;
			return -1;
		}
		return ops->sample(event, sample, evsel, session);
728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749
	case PERF_RECORD_MMAP:
		return ops->mmap(event, sample, session);
	case PERF_RECORD_COMM:
		return ops->comm(event, sample, session);
	case PERF_RECORD_FORK:
		return ops->fork(event, sample, session);
	case PERF_RECORD_EXIT:
		return ops->exit(event, sample, session);
	case PERF_RECORD_LOST:
		return ops->lost(event, sample, session);
	case PERF_RECORD_READ:
		return ops->read(event, sample, session);
	case PERF_RECORD_THROTTLE:
		return ops->throttle(event, sample, session);
	case PERF_RECORD_UNTHROTTLE:
		return ops->unthrottle(event, sample, session);
	default:
		++session->hists.stats.nr_unknown_events;
		return -1;
	}
}

750
static int perf_session__preprocess_sample(struct perf_session *session,
751
					   union perf_event *event, struct perf_sample *sample)
752 753 754 755 756 757 758 759 760 761 762 763 764 765
{
	if (event->header.type != PERF_RECORD_SAMPLE ||
	    !(session->sample_type & PERF_SAMPLE_CALLCHAIN))
		return 0;

	if (!ip_callchain__valid(sample->callchain, event)) {
		pr_debug("call-chain problem with event, skipping it.\n");
		++session->hists.stats.nr_invalid_chains;
		session->hists.stats.total_invalid_chains += sample->period;
		return -EINVAL;
	}
	return 0;
}

766
static int perf_session__process_user_event(struct perf_session *session, union perf_event *event,
767
					    struct perf_event_ops *ops, u64 file_offset)
768
{
769
	dump_event(session, event, file_offset, NULL);
770

771
	/* These events are processed right away */
772
	switch (event->header.type) {
773
	case PERF_RECORD_HEADER_ATTR:
774
		return ops->attr(event, session);
775
	case PERF_RECORD_HEADER_EVENT_TYPE:
776
		return ops->event_type(event, session);
777 778
	case PERF_RECORD_HEADER_TRACING_DATA:
		/* setup for reading amidst mmap */
779 780
		lseek(session->fd, file_offset, SEEK_SET);
		return ops->tracing_data(event, session);
781
	case PERF_RECORD_HEADER_BUILD_ID:
782
		return ops->build_id(event, session);
783
	case PERF_RECORD_FINISHED_ROUND:
784
		return ops->finished_round(event, session, ops);
785
	default:
786
		return -EINVAL;
787
	}
788 789 790
}

static int perf_session__process_event(struct perf_session *session,
791
				       union perf_event *event,
792 793 794
				       struct perf_event_ops *ops,
				       u64 file_offset)
{
795
	struct perf_sample sample;
796 797
	int ret;

798 799 800
	if (session->header.needs_swap &&
	    perf_event__swap_ops[event->header.type])
		perf_event__swap_ops[event->header.type](event);
801 802 803 804 805 806 807 808

	if (event->header.type >= PERF_RECORD_HEADER_MAX)
		return -EINVAL;

	hists__inc_nr_events(&session->hists, event->header.type);

	if (event->header.type >= PERF_RECORD_USER_TYPE_START)
		return perf_session__process_user_event(session, event, ops, file_offset);
809

810 811 812
	/*
	 * For all kernel events we get the sample data
	 */
813 814 815
	ret = perf_session__parse_sample(session, event, &sample);
	if (ret)
		return ret;
816 817 818 819 820

	/* Preprocess sample records - precheck callchains */
	if (perf_session__preprocess_sample(session, event, &sample))
		return 0;

821
	if (ops->ordered_samples) {
822 823
		ret = perf_session_queue_event(session, event, &sample,
					       file_offset);
824 825 826 827
		if (ret != -ETIME)
			return ret;
	}

828 829
	return perf_session_deliver_event(session, event, &sample, ops,
					  file_offset);
830 831
}

832 833 834 835 836 837 838
void perf_event_header__bswap(struct perf_event_header *self)
{
	self->type = bswap_32(self->type);
	self->misc = bswap_16(self->misc);
	self->size = bswap_16(self->size);
}

839 840 841 842 843 844 845 846 847 848 849 850
static struct thread *perf_session__register_idle_thread(struct perf_session *self)
{
	struct thread *thread = perf_session__findnew(self, 0);

	if (thread == NULL || thread__set_comm(thread, "swapper")) {
		pr_err("problem inserting idle task.\n");
		thread = NULL;
	}

	return thread;
}

851 852 853
static void perf_session__warn_about_errors(const struct perf_session *session,
					    const struct perf_event_ops *ops)
{
854
	if (ops->lost == perf_event__process_lost &&
855
	    session->hists.stats.total_lost != 0) {
856 857
		ui__warning("Processed %" PRIu64 " events and LOST %" PRIu64
			    "!\n\nCheck IO/CPU overload!\n\n",
858 859 860 861 862 863 864 865 866 867 868 869 870
			    session->hists.stats.total_period,
			    session->hists.stats.total_lost);
	}

	if (session->hists.stats.nr_unknown_events != 0) {
		ui__warning("Found %u unknown events!\n\n"
			    "Is this an older tool processing a perf.data "
			    "file generated by a more recent tool?\n\n"
			    "If that is not the case, consider "
			    "reporting to linux-kernel@vger.kernel.org.\n\n",
			    session->hists.stats.nr_unknown_events);
	}

871 872 873 874 875
	if (session->hists.stats.nr_unknown_id != 0) {
		ui__warning("%u samples with id not present in the header\n",
			    session->hists.stats.nr_unknown_id);
	}

876 877 878 879 880 881 882 883 884
 	if (session->hists.stats.nr_invalid_chains != 0) {
 		ui__warning("Found invalid callchains!\n\n"
 			    "%u out of %u events were discarded for this reason.\n\n"
 			    "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
 			    session->hists.stats.nr_invalid_chains,
 			    session->hists.stats.nr_events[PERF_RECORD_SAMPLE]);
 	}
}

885 886 887 888 889 890
#define session_done()	(*(volatile int *)(&session_done))
volatile int session_done;

static int __perf_session__process_pipe_events(struct perf_session *self,
					       struct perf_event_ops *ops)
{
891
	union perf_event event;
892 893 894 895 896 897 898 899 900 901
	uint32_t size;
	int skip = 0;
	u64 head;
	int err;
	void *p;

	perf_event_ops__fill_defaults(ops);

	head = 0;
more:
902
	err = readn(self->fd, &event, sizeof(struct perf_event_header));
903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920
	if (err <= 0) {
		if (err == 0)
			goto done;

		pr_err("failed to read event header\n");
		goto out_err;
	}

	if (self->header.needs_swap)
		perf_event_header__bswap(&event.header);

	size = event.header.size;
	if (size == 0)
		size = 8;

	p = &event;
	p += sizeof(struct perf_event_header);

921
	if (size - sizeof(struct perf_event_header)) {
922
		err = readn(self->fd, p, size - sizeof(struct perf_event_header));
923 924 925 926 927
		if (err <= 0) {
			if (err == 0) {
				pr_err("unexpected end of event stream\n");
				goto done;
			}
928

929 930 931
			pr_err("failed to read event data\n");
			goto out_err;
		}
932 933 934
	}

	if (size == 0 ||
935
	    (skip = perf_session__process_event(self, &event, ops, head)) < 0) {
936
		dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n",
937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957
			    head, event.header.size, event.header.type);
		/*
		 * assume we lost track of the stream, check alignment, and
		 * increment a single u64 in the hope to catch on again 'soon'.
		 */
		if (unlikely(head & 7))
			head &= ~7ULL;

		size = 8;
	}

	head += size;

	if (skip > 0)
		head += skip;

	if (!session_done())
		goto more;
done:
	err = 0;
out_err:
958
	perf_session__warn_about_errors(self, ops);
959
	perf_session_free_sample_buffers(self);
960 961 962
	return err;
}

963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986
static union perf_event *
fetch_mmaped_event(struct perf_session *session,
		   u64 head, size_t mmap_size, char *buf)
{
	union perf_event *event;

	/*
	 * Ensure we have enough space remaining to read
	 * the size of the event in the headers.
	 */
	if (head + sizeof(event->header) > mmap_size)
		return NULL;

	event = (union perf_event *)(buf + head);

	if (session->header.needs_swap)
		perf_event_header__bswap(&event->header);

	if (head + event->header.size > mmap_size)
		return NULL;

	return event;
}

987
int __perf_session__process_events(struct perf_session *session,
988 989
				   u64 data_offset, u64 data_size,
				   u64 file_size, struct perf_event_ops *ops)
990
{
991
	u64 head, page_offset, file_offset, file_pos, progress_next;
992
	int err, mmap_prot, mmap_flags, map_idx = 0;
993
	struct ui_progress *progress;
994
	size_t	page_size, mmap_size;
995
	char *buf, *mmaps[8];
996
	union perf_event *event;
997
	uint32_t size;
998

999 1000
	perf_event_ops__fill_defaults(ops);

1001
	page_size = sysconf(_SC_PAGESIZE);
1002

1003 1004 1005
	page_offset = page_size * (data_offset / page_size);
	file_offset = page_offset;
	head = data_offset - page_offset;
1006

1007 1008 1009
	if (data_offset + data_size < file_size)
		file_size = data_offset + data_size;

1010 1011 1012 1013 1014 1015 1016 1017 1018
	progress_next = file_size / 16;
	progress = ui_progress__new("Processing events...", file_size);
	if (progress == NULL)
		return -1;

	mmap_size = session->mmap_window;
	if (mmap_size > file_size)
		mmap_size = file_size;

1019 1020
	memset(mmaps, 0, sizeof(mmaps));

1021 1022 1023
	mmap_prot  = PROT_READ;
	mmap_flags = MAP_SHARED;

1024
	if (session->header.needs_swap) {
1025 1026 1027
		mmap_prot  |= PROT_WRITE;
		mmap_flags = MAP_PRIVATE;
	}
1028
remap:
1029 1030
	buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, session->fd,
		   file_offset);
1031 1032 1033 1034 1035
	if (buf == MAP_FAILED) {
		pr_err("failed to mmap file\n");
		err = -errno;
		goto out_err;
	}
1036 1037
	mmaps[map_idx] = buf;
	map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1);
1038
	file_pos = file_offset + head;
1039 1040

more:
1041 1042
	event = fetch_mmaped_event(session, head, mmap_size, buf);
	if (!event) {
1043 1044 1045 1046
		if (mmaps[map_idx]) {
			munmap(mmaps[map_idx], mmap_size);
			mmaps[map_idx] = NULL;
		}
1047

1048 1049 1050
		page_offset = page_size * (head / page_size);
		file_offset += page_offset;
		head -= page_offset;
1051 1052 1053 1054 1055
		goto remap;
	}

	size = event->header.size;

1056 1057
	if (size == 0 ||
	    perf_session__process_event(session, event, ops, file_pos) < 0) {
1058
		dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n",
1059
			    file_offset + head, event->header.size,
1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071
			    event->header.type);
		/*
		 * assume we lost track of the stream, check alignment, and
		 * increment a single u64 in the hope to catch on again 'soon'.
		 */
		if (unlikely(head & 7))
			head &= ~7ULL;

		size = 8;
	}

	head += size;
1072
	file_pos += size;
1073

1074 1075 1076 1077 1078
	if (file_pos >= progress_next) {
		progress_next += file_size / 16;
		ui_progress__update(progress, file_pos);
	}

1079
	if (file_pos < file_size)
1080
		goto more;
1081

1082
	err = 0;
1083
	/* do the final flush for ordered samples */
1084 1085
	session->ordered_samples.next_flush = ULLONG_MAX;
	flush_sample_queue(session, ops);
1086
out_err:
1087
	ui_progress__delete(progress);
1088
	perf_session__warn_about_errors(session, ops);
1089
	perf_session_free_sample_buffers(session);
1090 1091
	return err;
}
1092

1093 1094 1095 1096 1097 1098 1099 1100
int perf_session__process_events(struct perf_session *self,
				 struct perf_event_ops *ops)
{
	int err;

	if (perf_session__register_idle_thread(self) == NULL)
		return -ENOMEM;

1101 1102 1103 1104 1105 1106 1107
	if (!self->fd_pipe)
		err = __perf_session__process_events(self,
						     self->header.data_offset,
						     self->header.data_size,
						     self->size, ops);
	else
		err = __perf_session__process_pipe_events(self, ops);
1108

1109 1110 1111
	return err;
}

1112
bool perf_session__has_traces(struct perf_session *self, const char *msg)
1113 1114
{
	if (!(self->sample_type & PERF_SAMPLE_RAW)) {
1115 1116
		pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg);
		return false;
1117 1118
	}

1119
	return true;
1120
}
1121

1122
int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps,
1123 1124 1125 1126
					     const char *symbol_name,
					     u64 addr)
{
	char *bracket;
1127
	enum map_type i;
1128 1129 1130 1131 1132
	struct ref_reloc_sym *ref;

	ref = zalloc(sizeof(struct ref_reloc_sym));
	if (ref == NULL)
		return -ENOMEM;
1133

1134 1135 1136
	ref->name = strdup(symbol_name);
	if (ref->name == NULL) {
		free(ref);
1137
		return -ENOMEM;
1138
	}
1139

1140
	bracket = strchr(ref->name, ']');
1141 1142 1143
	if (bracket)
		*bracket = '\0';

1144
	ref->addr = addr;
1145 1146

	for (i = 0; i < MAP__NR_TYPES; ++i) {
1147 1148
		struct kmap *kmap = map__kmap(maps[i]);
		kmap->ref_reloc_sym = ref;
1149 1150
	}

1151 1152
	return 0;
}
1153 1154 1155 1156 1157 1158 1159

size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp)
{
	return __dsos__fprintf(&self->host_machine.kernel_dsos, fp) +
	       __dsos__fprintf(&self->host_machine.user_dsos, fp) +
	       machines__fprintf_dsos(&self->machines, fp);
}
1160 1161 1162 1163 1164 1165 1166

size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, FILE *fp,
					  bool with_hits)
{
	size_t ret = machine__fprintf_dsos_buildid(&self->host_machine, fp, with_hits);
	return ret + machines__fprintf_dsos_buildid(&self->machines, fp, with_hits);
}
1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181

size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
{
	struct perf_evsel *pos;
	size_t ret = fprintf(fp, "Aggregated stats:\n");

	ret += hists__fprintf_nr_events(&session->hists, fp);

	list_for_each_entry(pos, &session->evlist->entries, node) {
		ret += fprintf(fp, "%s stats:\n", event_name(pos));
		ret += hists__fprintf_nr_events(&pos->hists, fp);
	}

	return ret;
}
1182

1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194
struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
					      unsigned int type)
{
	struct perf_evsel *pos;

	list_for_each_entry(pos, &session->evlist->entries, node) {
		if (pos->attr.type == type)
			return pos;
	}
	return NULL;
}

1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254
void perf_session__print_symbols(union perf_event *event,
				struct perf_sample *sample,
				struct perf_session *session)
{
	struct addr_location al;
	const char *symname, *dsoname;
	struct callchain_cursor *cursor = &session->callchain_cursor;
	struct callchain_cursor_node *node;

	if (perf_event__preprocess_sample(event, session, &al, sample,
					  NULL) < 0) {
		error("problem processing %d event, skipping it.\n",
			event->header.type);
		return;
	}

	if (symbol_conf.use_callchain && sample->callchain) {

		if (perf_session__resolve_callchain(session, al.thread,
						sample->callchain, NULL) != 0) {
			if (verbose)
				error("Failed to resolve callchain. Skipping\n");
			return;
		}
		callchain_cursor_commit(cursor);

		while (1) {
			node = callchain_cursor_current(cursor);
			if (!node)
				break;

			if (node->sym && node->sym->name)
				symname = node->sym->name;
			else
				symname = "";

			if (node->map && node->map->dso && node->map->dso->name)
				dsoname = node->map->dso->name;
			else
				dsoname = "";

			printf("\t%16" PRIx64 " %s (%s)\n", node->ip, symname, dsoname);

			callchain_cursor_advance(cursor);
		}

	} else {
		if (al.sym && al.sym->name)
			symname = al.sym->name;
		else
			symname = "";

		if (al.map && al.map->dso && al.map->dso->name)
			dsoname = al.map->dso->name;
		else
			dsoname = "";

		printf("%16" PRIx64 " %s (%s)", al.addr, symname, dsoname);
	}
}