session.c 33.0 KB
Newer Older
1 2
#define _FILE_OFFSET_BITS 64

3 4
#include <linux/kernel.h>

5
#include <byteswap.h>
6 7
#include <unistd.h>
#include <sys/types.h>
8
#include <sys/mman.h>
9

10 11
#include "evlist.h"
#include "evsel.h"
12
#include "session.h"
13
#include "sort.h"
14 15 16 17 18 19
#include "util.h"

static int perf_session__open(struct perf_session *self, bool force)
{
	struct stat input_stat;

20 21 22 23
	if (!strcmp(self->filename, "-")) {
		self->fd_pipe = true;
		self->fd = STDIN_FILENO;

24
		if (perf_session__read_header(self, self->fd) < 0)
25 26 27 28 29
			pr_err("incompatible file format");

		return 0;
	}

30
	self->fd = open(self->filename, O_RDONLY);
31
	if (self->fd < 0) {
32 33 34 35
		int err = errno;

		pr_err("failed to open %s: %s", self->filename, strerror(err));
		if (err == ENOENT && !strcmp(self->filename, "perf.data"))
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
			pr_err("  (try 'perf record' first)");
		pr_err("\n");
		return -errno;
	}

	if (fstat(self->fd, &input_stat) < 0)
		goto out_close;

	if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
		pr_err("file %s not owned by current user or root\n",
		       self->filename);
		goto out_close;
	}

	if (!input_stat.st_size) {
		pr_info("zero-sized file (%s), nothing to do!\n",
			self->filename);
		goto out_close;
	}

56
	if (perf_session__read_header(self, self->fd) < 0) {
57 58 59 60
		pr_err("incompatible file format");
		goto out_close;
	}

61 62 63 64 65 66 67 68 69 70
	if (!perf_evlist__valid_sample_type(self->evlist)) {
		pr_err("non matching sample_type");
		goto out_close;
	}

	if (!perf_evlist__valid_sample_id_all(self->evlist)) {
		pr_err("non matching sample_id_all");
		goto out_close;
	}

71 72 73 74 75 76 77 78 79
	self->size = input_stat.st_size;
	return 0;

out_close:
	close(self->fd);
	self->fd = -1;
	return -1;
}

80
static void perf_session__id_header_size(struct perf_session *session)
81
{
82
       struct perf_sample *data;
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
       u64 sample_type = session->sample_type;
       u16 size = 0;

	if (!session->sample_id_all)
		goto out;

       if (sample_type & PERF_SAMPLE_TID)
               size += sizeof(data->tid) * 2;

       if (sample_type & PERF_SAMPLE_TIME)
               size += sizeof(data->time);

       if (sample_type & PERF_SAMPLE_ID)
               size += sizeof(data->id);

       if (sample_type & PERF_SAMPLE_STREAM_ID)
               size += sizeof(data->stream_id);

       if (sample_type & PERF_SAMPLE_CPU)
               size += sizeof(data->cpu) * 2;
out:
       session->id_hdr_size = size;
}

void perf_session__update_sample_type(struct perf_session *self)
{
109
	self->sample_type = perf_evlist__sample_type(self->evlist);
110
	self->sample_size = __perf_evsel__sample_size(self->sample_type);
111
	self->sample_id_all = perf_evlist__sample_id_all(self->evlist);
112 113 114
	perf_session__id_header_size(self);
}

115 116
int perf_session__create_kernel_maps(struct perf_session *self)
{
117
	int ret = machine__create_kernel_maps(&self->host_machine);
118 119

	if (ret >= 0)
120
		ret = machines__create_guest_kernel_maps(&self->machines);
121 122 123
	return ret;
}

124 125 126 127 128 129
static void perf_session__destroy_kernel_maps(struct perf_session *self)
{
	machine__destroy_kernel_maps(&self->host_machine);
	machines__destroy_guest_kernel_maps(&self->machines);
}

130 131 132
struct perf_session *perf_session__new(const char *filename, int mode,
				       bool force, bool repipe,
				       struct perf_event_ops *ops)
133
{
134
	size_t len = filename ? strlen(filename) + 1 : 0;
135 136 137 138 139 140
	struct perf_session *self = zalloc(sizeof(*self) + len);

	if (self == NULL)
		goto out;

	memcpy(self->filename, filename, len);
141
	self->threads = RB_ROOT;
142
	INIT_LIST_HEAD(&self->dead_threads);
143
	self->last_match = NULL;
144 145 146 147 148 149 150 151 152
	/*
	 * On 64bit we can mmap the data file in one go. No need for tiny mmap
	 * slices. On 32bit we use 32MB.
	 */
#if BITS_PER_LONG == 64
	self->mmap_window = ULLONG_MAX;
#else
	self->mmap_window = 32 * 1024 * 1024ULL;
#endif
153
	self->machines = RB_ROOT;
T
Tom Zanussi 已提交
154
	self->repipe = repipe;
155
	INIT_LIST_HEAD(&self->ordered_samples.samples);
156
	INIT_LIST_HEAD(&self->ordered_samples.sample_cache);
157
	INIT_LIST_HEAD(&self->ordered_samples.to_free);
158
	machine__init(&self->host_machine, "", HOST_KERNEL_ID);
159

160 161 162
	if (mode == O_RDONLY) {
		if (perf_session__open(self, force) < 0)
			goto out_delete;
163
		perf_session__update_sample_type(self);
164 165 166
	} else if (mode == O_WRONLY) {
		/*
		 * In O_RDONLY mode this will be performed when reading the
167
		 * kernel MMAP event, in perf_event__process_mmap().
168 169 170 171
		 */
		if (perf_session__create_kernel_maps(self) < 0)
			goto out_delete;
	}
172

173 174 175 176 177 178
	if (ops && ops->ordering_requires_timestamps &&
	    ops->ordered_samples && !self->sample_id_all) {
		dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
		ops->ordered_samples = false;
	}

179 180
out:
	return self;
181 182 183
out_delete:
	perf_session__delete(self);
	return NULL;
184 185
}

186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
static void perf_session__delete_dead_threads(struct perf_session *self)
{
	struct thread *n, *t;

	list_for_each_entry_safe(t, n, &self->dead_threads, node) {
		list_del(&t->node);
		thread__delete(t);
	}
}

static void perf_session__delete_threads(struct perf_session *self)
{
	struct rb_node *nd = rb_first(&self->threads);

	while (nd) {
		struct thread *t = rb_entry(nd, struct thread, rb_node);

		rb_erase(&t->rb_node, &self->threads);
		nd = rb_next(nd);
		thread__delete(t);
	}
}

209 210
void perf_session__delete(struct perf_session *self)
{
211
	perf_session__destroy_kernel_maps(self);
212 213 214
	perf_session__delete_dead_threads(self);
	perf_session__delete_threads(self);
	machine__exit(&self->host_machine);
215 216 217
	close(self->fd);
	free(self);
}
218

219 220
void perf_session__remove_thread(struct perf_session *self, struct thread *th)
{
221
	self->last_match = NULL;
222 223 224 225 226 227 228 229
	rb_erase(&th->rb_node, &self->threads);
	/*
	 * We may have references to this thread, for instance in some hist_entry
	 * instances, so just move them to a separate list.
	 */
	list_add_tail(&th->node, &self->dead_threads);
}

230 231 232 233 234 235 236 237
static bool symbol__match_parent_regex(struct symbol *sym)
{
	if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
		return 1;

	return 0;
}

238 239 240 241
int perf_session__resolve_callchain(struct perf_session *self,
				    struct thread *thread,
				    struct ip_callchain *chain,
				    struct symbol **parent)
242 243 244
{
	u8 cpumode = PERF_RECORD_MISC_USER;
	unsigned int i;
245
	int err;
246

247
	callchain_cursor_reset(&self->callchain_cursor);
248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266

	for (i = 0; i < chain->nr; i++) {
		u64 ip = chain->ips[i];
		struct addr_location al;

		if (ip >= PERF_CONTEXT_MAX) {
			switch (ip) {
			case PERF_CONTEXT_HV:
				cpumode = PERF_RECORD_MISC_HYPERVISOR;	break;
			case PERF_CONTEXT_KERNEL:
				cpumode = PERF_RECORD_MISC_KERNEL;	break;
			case PERF_CONTEXT_USER:
				cpumode = PERF_RECORD_MISC_USER;	break;
			default:
				break;
			}
			continue;
		}

267
		al.filtered = false;
268
		thread__find_addr_location(thread, self, cpumode,
269
				MAP__FUNCTION, thread->pid, ip, &al, NULL);
270 271 272 273
		if (al.sym != NULL) {
			if (sort__has_parent && !*parent &&
			    symbol__match_parent_regex(al.sym))
				*parent = al.sym;
274
			if (!symbol_conf.use_callchain)
275 276
				break;
		}
277 278 279 280 281

		err = callchain_cursor_append(&self->callchain_cursor,
					      ip, al.map, al.sym);
		if (err)
			return err;
282 283
	}

284
	return 0;
285
}
286

287
static int process_event_synth_stub(union perf_event *event __used,
288 289 290 291 292 293
				    struct perf_session *session __used)
{
	dump_printf(": unhandled!\n");
	return 0;
}

294 295 296 297 298 299 300 301 302
static int process_event_sample_stub(union perf_event *event __used,
				     struct perf_sample *sample __used,
				     struct perf_evsel *evsel __used,
				     struct perf_session *session __used)
{
	dump_printf(": unhandled!\n");
	return 0;
}

303
static int process_event_stub(union perf_event *event __used,
304
			      struct perf_sample *sample __used,
305 306 307 308 309 310
			      struct perf_session *session __used)
{
	dump_printf(": unhandled!\n");
	return 0;
}

311
static int process_finished_round_stub(union perf_event *event __used,
312 313 314 315 316 317 318
				       struct perf_session *session __used,
				       struct perf_event_ops *ops __used)
{
	dump_printf(": unhandled!\n");
	return 0;
}

319
static int process_finished_round(union perf_event *event,
320 321 322
				  struct perf_session *session,
				  struct perf_event_ops *ops);

323 324
static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
{
325
	if (handler->sample == NULL)
326
		handler->sample = process_event_sample_stub;
327 328 329 330 331 332 333 334 335
	if (handler->mmap == NULL)
		handler->mmap = process_event_stub;
	if (handler->comm == NULL)
		handler->comm = process_event_stub;
	if (handler->fork == NULL)
		handler->fork = process_event_stub;
	if (handler->exit == NULL)
		handler->exit = process_event_stub;
	if (handler->lost == NULL)
336
		handler->lost = perf_event__process_lost;
337 338 339 340 341 342
	if (handler->read == NULL)
		handler->read = process_event_stub;
	if (handler->throttle == NULL)
		handler->throttle = process_event_stub;
	if (handler->unthrottle == NULL)
		handler->unthrottle = process_event_stub;
343
	if (handler->attr == NULL)
344
		handler->attr = process_event_synth_stub;
345
	if (handler->event_type == NULL)
346
		handler->event_type = process_event_synth_stub;
347
	if (handler->tracing_data == NULL)
348
		handler->tracing_data = process_event_synth_stub;
349
	if (handler->build_id == NULL)
350
		handler->build_id = process_event_synth_stub;
351 352 353 354 355 356
	if (handler->finished_round == NULL) {
		if (handler->ordered_samples)
			handler->finished_round = process_finished_round;
		else
			handler->finished_round = process_finished_round_stub;
	}
357 358
}

359 360 361 362 363 364 365 366 367 368 369
void mem_bswap_64(void *src, int byte_size)
{
	u64 *m = src;

	while (byte_size > 0) {
		*m = bswap_64(*m);
		byte_size -= sizeof(u64);
		++m;
	}
}

370
static void perf_event__all64_swap(union perf_event *event)
371
{
372 373
	struct perf_event_header *hdr = &event->header;
	mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr));
374 375
}

376
static void perf_event__comm_swap(union perf_event *event)
377
{
378 379
	event->comm.pid = bswap_32(event->comm.pid);
	event->comm.tid = bswap_32(event->comm.tid);
380 381
}

382
static void perf_event__mmap_swap(union perf_event *event)
383
{
384 385 386 387 388
	event->mmap.pid	  = bswap_32(event->mmap.pid);
	event->mmap.tid	  = bswap_32(event->mmap.tid);
	event->mmap.start = bswap_64(event->mmap.start);
	event->mmap.len	  = bswap_64(event->mmap.len);
	event->mmap.pgoff = bswap_64(event->mmap.pgoff);
389 390
}

391
static void perf_event__task_swap(union perf_event *event)
392
{
393 394 395 396 397
	event->fork.pid	 = bswap_32(event->fork.pid);
	event->fork.tid	 = bswap_32(event->fork.tid);
	event->fork.ppid = bswap_32(event->fork.ppid);
	event->fork.ptid = bswap_32(event->fork.ptid);
	event->fork.time = bswap_64(event->fork.time);
398 399
}

400
static void perf_event__read_swap(union perf_event *event)
401
{
402 403 404 405 406 407
	event->read.pid		 = bswap_32(event->read.pid);
	event->read.tid		 = bswap_32(event->read.tid);
	event->read.value	 = bswap_64(event->read.value);
	event->read.time_enabled = bswap_64(event->read.time_enabled);
	event->read.time_running = bswap_64(event->read.time_running);
	event->read.id		 = bswap_64(event->read.id);
408 409
}

410
static void perf_event__attr_swap(union perf_event *event)
411 412 413
{
	size_t size;

414 415 416 417 418 419 420 421 422 423
	event->attr.attr.type		= bswap_32(event->attr.attr.type);
	event->attr.attr.size		= bswap_32(event->attr.attr.size);
	event->attr.attr.config		= bswap_64(event->attr.attr.config);
	event->attr.attr.sample_period	= bswap_64(event->attr.attr.sample_period);
	event->attr.attr.sample_type	= bswap_64(event->attr.attr.sample_type);
	event->attr.attr.read_format	= bswap_64(event->attr.attr.read_format);
	event->attr.attr.wakeup_events	= bswap_32(event->attr.attr.wakeup_events);
	event->attr.attr.bp_type	= bswap_32(event->attr.attr.bp_type);
	event->attr.attr.bp_addr	= bswap_64(event->attr.attr.bp_addr);
	event->attr.attr.bp_len		= bswap_64(event->attr.attr.bp_len);
424

425 426 427
	size = event->header.size;
	size -= (void *)&event->attr.id - (void *)event;
	mem_bswap_64(event->attr.id, size);
428 429
}

430
static void perf_event__event_type_swap(union perf_event *event)
431
{
432 433
	event->event_type.event_type.event_id =
		bswap_64(event->event_type.event_type.event_id);
434 435
}

436
static void perf_event__tracing_data_swap(union perf_event *event)
437
{
438
	event->tracing_data.size = bswap_32(event->tracing_data.size);
439 440
}

441
typedef void (*perf_event__swap_op)(union perf_event *event);
442

443 444 445 446 447 448 449 450 451 452 453 454 455
static perf_event__swap_op perf_event__swap_ops[] = {
	[PERF_RECORD_MMAP]		  = perf_event__mmap_swap,
	[PERF_RECORD_COMM]		  = perf_event__comm_swap,
	[PERF_RECORD_FORK]		  = perf_event__task_swap,
	[PERF_RECORD_EXIT]		  = perf_event__task_swap,
	[PERF_RECORD_LOST]		  = perf_event__all64_swap,
	[PERF_RECORD_READ]		  = perf_event__read_swap,
	[PERF_RECORD_SAMPLE]		  = perf_event__all64_swap,
	[PERF_RECORD_HEADER_ATTR]	  = perf_event__attr_swap,
	[PERF_RECORD_HEADER_EVENT_TYPE]	  = perf_event__event_type_swap,
	[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
	[PERF_RECORD_HEADER_BUILD_ID]	  = NULL,
	[PERF_RECORD_HEADER_MAX]	  = NULL,
456 457
};

458 459
struct sample_queue {
	u64			timestamp;
460
	u64			file_offset;
461
	union perf_event	*event;
462 463 464
	struct list_head	list;
};

465 466 467 468
static void perf_session_free_sample_buffers(struct perf_session *session)
{
	struct ordered_samples *os = &session->ordered_samples;

469
	while (!list_empty(&os->to_free)) {
470 471
		struct sample_queue *sq;

472
		sq = list_entry(os->to_free.next, struct sample_queue, list);
473 474 475 476 477
		list_del(&sq->list);
		free(sq);
	}
}

478
static int perf_session_deliver_event(struct perf_session *session,
479
				      union perf_event *event,
480
				      struct perf_sample *sample,
481 482
				      struct perf_event_ops *ops,
				      u64 file_offset);
483

484 485 486
static void flush_sample_queue(struct perf_session *s,
			       struct perf_event_ops *ops)
{
487 488
	struct ordered_samples *os = &s->ordered_samples;
	struct list_head *head = &os->samples;
489
	struct sample_queue *tmp, *iter;
490
	struct perf_sample sample;
491 492
	u64 limit = os->next_flush;
	u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
493
	int ret;
494

495
	if (!ops->ordered_samples || !limit)
496 497 498 499
		return;

	list_for_each_entry_safe(iter, tmp, head, list) {
		if (iter->timestamp > limit)
500
			break;
501

502 503 504 505 506 507
		ret = perf_session__parse_sample(s, iter->event, &sample);
		if (ret)
			pr_err("Can't parse sample, err = %d\n", ret);
		else
			perf_session_deliver_event(s, iter->event, &sample, ops,
						   iter->file_offset);
508

509
		os->last_flush = iter->timestamp;
510
		list_del(&iter->list);
511
		list_add(&iter->list, &os->sample_cache);
512
	}
513 514 515 516 517 518 519

	if (list_empty(head)) {
		os->last_sample = NULL;
	} else if (last_ts <= limit) {
		os->last_sample =
			list_entry(head->prev, struct sample_queue, list);
	}
520 521
}

522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560
/*
 * When perf record finishes a pass on every buffers, it records this pseudo
 * event.
 * We record the max timestamp t found in the pass n.
 * Assuming these timestamps are monotonic across cpus, we know that if
 * a buffer still has events with timestamps below t, they will be all
 * available and then read in the pass n + 1.
 * Hence when we start to read the pass n + 2, we can safely flush every
 * events with timestamps below t.
 *
 *    ============ PASS n =================
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          1          |         2
 *          2          |         3
 *          -          |         4  <--- max recorded
 *
 *    ============ PASS n + 1 ==============
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          3          |         5
 *          4          |         6
 *          5          |         7 <---- max recorded
 *
 *      Flush every events below timestamp 4
 *
 *    ============ PASS n + 2 ==============
 *       CPU 0         |   CPU 1
 *                     |
 *    cnt1 timestamps  |   cnt2 timestamps
 *          6          |         8
 *          7          |         9
 *          -          |         10
 *
 *      Flush every events below timestamp 7
 *      etc...
 */
561
static int process_finished_round(union perf_event *event __used,
562 563 564 565 566 567 568 569 570
				  struct perf_session *session,
				  struct perf_event_ops *ops)
{
	flush_sample_queue(session, ops);
	session->ordered_samples.next_flush = session->ordered_samples.max_timestamp;

	return 0;
}

571
/* The queue is ordered by time */
572
static void __queue_event(struct sample_queue *new, struct perf_session *s)
573
{
574 575 576 577
	struct ordered_samples *os = &s->ordered_samples;
	struct sample_queue *sample = os->last_sample;
	u64 timestamp = new->timestamp;
	struct list_head *p;
578

579
	os->last_sample = new;
580

581 582 583
	if (!sample) {
		list_add(&new->list, &os->samples);
		os->max_timestamp = timestamp;
584 585 586 587
		return;
	}

	/*
588 589 590
	 * last_sample might point to some random place in the list as it's
	 * the last queued event. We expect that the new event is close to
	 * this.
591
	 */
592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613
	if (sample->timestamp <= timestamp) {
		while (sample->timestamp <= timestamp) {
			p = sample->list.next;
			if (p == &os->samples) {
				list_add_tail(&new->list, &os->samples);
				os->max_timestamp = timestamp;
				return;
			}
			sample = list_entry(p, struct sample_queue, list);
		}
		list_add_tail(&new->list, &sample->list);
	} else {
		while (sample->timestamp > timestamp) {
			p = sample->list.prev;
			if (p == &os->samples) {
				list_add(&new->list, &os->samples);
				return;
			}
			sample = list_entry(p, struct sample_queue, list);
		}
		list_add(&new->list, &sample->list);
	}
614 615
}

616 617
#define MAX_SAMPLE_BUFFER	(64 * 1024 / sizeof(struct sample_queue))

618
static int perf_session_queue_event(struct perf_session *s, union perf_event *event,
619
				    struct perf_sample *sample, u64 file_offset)
620
{
621 622
	struct ordered_samples *os = &s->ordered_samples;
	struct list_head *sc = &os->sample_cache;
623
	u64 timestamp = sample->time;
624 625
	struct sample_queue *new;

626
	if (!timestamp || timestamp == ~0ULL)
627 628
		return -ETIME;

629 630 631 632 633
	if (timestamp < s->ordered_samples.last_flush) {
		printf("Warning: Timestamp below last timeslice flush\n");
		return -EINVAL;
	}

634 635 636
	if (!list_empty(sc)) {
		new = list_entry(sc->next, struct sample_queue, list);
		list_del(&new->list);
637 638 639 640
	} else if (os->sample_buffer) {
		new = os->sample_buffer + os->sample_buffer_idx;
		if (++os->sample_buffer_idx == MAX_SAMPLE_BUFFER)
			os->sample_buffer = NULL;
641
	} else {
642 643
		os->sample_buffer = malloc(MAX_SAMPLE_BUFFER * sizeof(*new));
		if (!os->sample_buffer)
644
			return -ENOMEM;
645 646 647
		list_add(&os->sample_buffer->list, &os->to_free);
		os->sample_buffer_idx = 2;
		new = os->sample_buffer + 1;
648
	}
649 650

	new->timestamp = timestamp;
651
	new->file_offset = file_offset;
652
	new->event = event;
653

654
	__queue_event(new, s);
655 656 657

	return 0;
}
658

659
static void callchain__printf(struct perf_sample *sample)
660 661
{
	unsigned int i;
662

663
	printf("... chain: nr:%" PRIu64 "\n", sample->callchain->nr);
664 665

	for (i = 0; i < sample->callchain->nr; i++)
666 667
		printf("..... %2d: %016" PRIx64 "\n",
		       i, sample->callchain->ips[i]);
668 669
}

670
static void perf_session__print_tstamp(struct perf_session *session,
671
				       union perf_event *event,
672
				       struct perf_sample *sample)
673 674 675 676 677 678 679 680 681 682 683
{
	if (event->header.type != PERF_RECORD_SAMPLE &&
	    !session->sample_id_all) {
		fputs("-1 -1 ", stdout);
		return;
	}

	if ((session->sample_type & PERF_SAMPLE_CPU))
		printf("%u ", sample->cpu);

	if (session->sample_type & PERF_SAMPLE_TIME)
684
		printf("%" PRIu64 " ", sample->time);
685 686
}

687
static void dump_event(struct perf_session *session, union perf_event *event,
688
		       u64 file_offset, struct perf_sample *sample)
689 690 691 692
{
	if (!dump_trace)
		return;

693 694
	printf("\n%#" PRIx64 " [%#x]: event: %d\n",
	       file_offset, event->header.size, event->header.type);
695 696 697 698 699 700

	trace_event(event);

	if (sample)
		perf_session__print_tstamp(session, event, sample);

701
	printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset,
702
	       event->header.size, perf_event__name(event->header.type));
703 704
}

705
static void dump_sample(struct perf_session *session, union perf_event *event,
706
			struct perf_sample *sample)
707
{
708 709 710
	if (!dump_trace)
		return;

711 712 713
	printf("(IP, %d): %d/%d: %#" PRIx64 " period: %" PRIu64 "\n",
	       event->header.misc, sample->pid, sample->tid, sample->ip,
	       sample->period);
714 715

	if (session->sample_type & PERF_SAMPLE_CALLCHAIN)
716
		callchain__printf(sample);
717 718
}

719
static int perf_session_deliver_event(struct perf_session *session,
720
				      union perf_event *event,
721
				      struct perf_sample *sample,
722
				      struct perf_event_ops *ops,
723
				      u64 file_offset)
724
{
725 726
	struct perf_evsel *evsel;

727 728
	dump_event(session, event, file_offset, sample);

729 730
	switch (event->header.type) {
	case PERF_RECORD_SAMPLE:
731
		dump_sample(session, event, sample);
732 733 734 735 736 737
		evsel = perf_evlist__id2evsel(session->evlist, sample->id);
		if (evsel == NULL) {
			++session->hists.stats.nr_unknown_id;
			return -1;
		}
		return ops->sample(event, sample, evsel, session);
738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759
	case PERF_RECORD_MMAP:
		return ops->mmap(event, sample, session);
	case PERF_RECORD_COMM:
		return ops->comm(event, sample, session);
	case PERF_RECORD_FORK:
		return ops->fork(event, sample, session);
	case PERF_RECORD_EXIT:
		return ops->exit(event, sample, session);
	case PERF_RECORD_LOST:
		return ops->lost(event, sample, session);
	case PERF_RECORD_READ:
		return ops->read(event, sample, session);
	case PERF_RECORD_THROTTLE:
		return ops->throttle(event, sample, session);
	case PERF_RECORD_UNTHROTTLE:
		return ops->unthrottle(event, sample, session);
	default:
		++session->hists.stats.nr_unknown_events;
		return -1;
	}
}

760
static int perf_session__preprocess_sample(struct perf_session *session,
761
					   union perf_event *event, struct perf_sample *sample)
762 763 764 765 766 767 768 769 770 771 772 773 774 775
{
	if (event->header.type != PERF_RECORD_SAMPLE ||
	    !(session->sample_type & PERF_SAMPLE_CALLCHAIN))
		return 0;

	if (!ip_callchain__valid(sample->callchain, event)) {
		pr_debug("call-chain problem with event, skipping it.\n");
		++session->hists.stats.nr_invalid_chains;
		session->hists.stats.total_invalid_chains += sample->period;
		return -EINVAL;
	}
	return 0;
}

776
static int perf_session__process_user_event(struct perf_session *session, union perf_event *event,
777
					    struct perf_event_ops *ops, u64 file_offset)
778
{
779
	dump_event(session, event, file_offset, NULL);
780

781
	/* These events are processed right away */
782
	switch (event->header.type) {
783
	case PERF_RECORD_HEADER_ATTR:
784
		return ops->attr(event, session);
785
	case PERF_RECORD_HEADER_EVENT_TYPE:
786
		return ops->event_type(event, session);
787 788
	case PERF_RECORD_HEADER_TRACING_DATA:
		/* setup for reading amidst mmap */
789 790
		lseek(session->fd, file_offset, SEEK_SET);
		return ops->tracing_data(event, session);
791
	case PERF_RECORD_HEADER_BUILD_ID:
792
		return ops->build_id(event, session);
793
	case PERF_RECORD_FINISHED_ROUND:
794
		return ops->finished_round(event, session, ops);
795
	default:
796
		return -EINVAL;
797
	}
798 799 800
}

static int perf_session__process_event(struct perf_session *session,
801
				       union perf_event *event,
802 803 804
				       struct perf_event_ops *ops,
				       u64 file_offset)
{
805
	struct perf_sample sample;
806 807
	int ret;

808 809 810
	if (session->header.needs_swap &&
	    perf_event__swap_ops[event->header.type])
		perf_event__swap_ops[event->header.type](event);
811 812 813 814 815 816 817 818

	if (event->header.type >= PERF_RECORD_HEADER_MAX)
		return -EINVAL;

	hists__inc_nr_events(&session->hists, event->header.type);

	if (event->header.type >= PERF_RECORD_USER_TYPE_START)
		return perf_session__process_user_event(session, event, ops, file_offset);
819

820 821 822
	/*
	 * For all kernel events we get the sample data
	 */
823 824 825
	ret = perf_session__parse_sample(session, event, &sample);
	if (ret)
		return ret;
826 827 828 829 830

	/* Preprocess sample records - precheck callchains */
	if (perf_session__preprocess_sample(session, event, &sample))
		return 0;

831
	if (ops->ordered_samples) {
832 833
		ret = perf_session_queue_event(session, event, &sample,
					       file_offset);
834 835 836 837
		if (ret != -ETIME)
			return ret;
	}

838 839
	return perf_session_deliver_event(session, event, &sample, ops,
					  file_offset);
840 841
}

842 843 844 845 846 847 848
void perf_event_header__bswap(struct perf_event_header *self)
{
	self->type = bswap_32(self->type);
	self->misc = bswap_16(self->misc);
	self->size = bswap_16(self->size);
}

849 850 851 852 853 854 855 856 857 858 859 860
static struct thread *perf_session__register_idle_thread(struct perf_session *self)
{
	struct thread *thread = perf_session__findnew(self, 0);

	if (thread == NULL || thread__set_comm(thread, "swapper")) {
		pr_err("problem inserting idle task.\n");
		thread = NULL;
	}

	return thread;
}

861 862 863
static void perf_session__warn_about_errors(const struct perf_session *session,
					    const struct perf_event_ops *ops)
{
864
	if (ops->lost == perf_event__process_lost &&
865
	    session->hists.stats.total_lost != 0) {
866 867
		ui__warning("Processed %" PRIu64 " events and LOST %" PRIu64
			    "!\n\nCheck IO/CPU overload!\n\n",
868 869 870 871 872 873 874 875 876 877 878 879 880
			    session->hists.stats.total_period,
			    session->hists.stats.total_lost);
	}

	if (session->hists.stats.nr_unknown_events != 0) {
		ui__warning("Found %u unknown events!\n\n"
			    "Is this an older tool processing a perf.data "
			    "file generated by a more recent tool?\n\n"
			    "If that is not the case, consider "
			    "reporting to linux-kernel@vger.kernel.org.\n\n",
			    session->hists.stats.nr_unknown_events);
	}

881 882 883 884 885
	if (session->hists.stats.nr_unknown_id != 0) {
		ui__warning("%u samples with id not present in the header\n",
			    session->hists.stats.nr_unknown_id);
	}

886 887 888 889 890 891 892 893 894
 	if (session->hists.stats.nr_invalid_chains != 0) {
 		ui__warning("Found invalid callchains!\n\n"
 			    "%u out of %u events were discarded for this reason.\n\n"
 			    "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
 			    session->hists.stats.nr_invalid_chains,
 			    session->hists.stats.nr_events[PERF_RECORD_SAMPLE]);
 	}
}

895 896 897 898 899 900
#define session_done()	(*(volatile int *)(&session_done))
volatile int session_done;

static int __perf_session__process_pipe_events(struct perf_session *self,
					       struct perf_event_ops *ops)
{
901
	union perf_event event;
902 903 904 905 906 907 908 909 910 911
	uint32_t size;
	int skip = 0;
	u64 head;
	int err;
	void *p;

	perf_event_ops__fill_defaults(ops);

	head = 0;
more:
912
	err = readn(self->fd, &event, sizeof(struct perf_event_header));
913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930
	if (err <= 0) {
		if (err == 0)
			goto done;

		pr_err("failed to read event header\n");
		goto out_err;
	}

	if (self->header.needs_swap)
		perf_event_header__bswap(&event.header);

	size = event.header.size;
	if (size == 0)
		size = 8;

	p = &event;
	p += sizeof(struct perf_event_header);

931
	if (size - sizeof(struct perf_event_header)) {
932
		err = readn(self->fd, p, size - sizeof(struct perf_event_header));
933 934 935 936 937
		if (err <= 0) {
			if (err == 0) {
				pr_err("unexpected end of event stream\n");
				goto done;
			}
938

939 940 941
			pr_err("failed to read event data\n");
			goto out_err;
		}
942 943 944
	}

	if (size == 0 ||
945
	    (skip = perf_session__process_event(self, &event, ops, head)) < 0) {
946
		dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n",
947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967
			    head, event.header.size, event.header.type);
		/*
		 * assume we lost track of the stream, check alignment, and
		 * increment a single u64 in the hope to catch on again 'soon'.
		 */
		if (unlikely(head & 7))
			head &= ~7ULL;

		size = 8;
	}

	head += size;

	if (skip > 0)
		head += skip;

	if (!session_done())
		goto more;
done:
	err = 0;
out_err:
968
	perf_session__warn_about_errors(self, ops);
969
	perf_session_free_sample_buffers(self);
970 971 972
	return err;
}

973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996
static union perf_event *
fetch_mmaped_event(struct perf_session *session,
		   u64 head, size_t mmap_size, char *buf)
{
	union perf_event *event;

	/*
	 * Ensure we have enough space remaining to read
	 * the size of the event in the headers.
	 */
	if (head + sizeof(event->header) > mmap_size)
		return NULL;

	event = (union perf_event *)(buf + head);

	if (session->header.needs_swap)
		perf_event_header__bswap(&event->header);

	if (head + event->header.size > mmap_size)
		return NULL;

	return event;
}

997
int __perf_session__process_events(struct perf_session *session,
998 999
				   u64 data_offset, u64 data_size,
				   u64 file_size, struct perf_event_ops *ops)
1000
{
1001
	u64 head, page_offset, file_offset, file_pos, progress_next;
1002
	int err, mmap_prot, mmap_flags, map_idx = 0;
1003
	struct ui_progress *progress;
1004
	size_t	page_size, mmap_size;
1005
	char *buf, *mmaps[8];
1006
	union perf_event *event;
1007
	uint32_t size;
1008

1009 1010
	perf_event_ops__fill_defaults(ops);

1011
	page_size = sysconf(_SC_PAGESIZE);
1012

1013 1014 1015
	page_offset = page_size * (data_offset / page_size);
	file_offset = page_offset;
	head = data_offset - page_offset;
1016

1017 1018 1019
	if (data_offset + data_size < file_size)
		file_size = data_offset + data_size;

1020 1021 1022 1023 1024 1025 1026 1027 1028
	progress_next = file_size / 16;
	progress = ui_progress__new("Processing events...", file_size);
	if (progress == NULL)
		return -1;

	mmap_size = session->mmap_window;
	if (mmap_size > file_size)
		mmap_size = file_size;

1029 1030
	memset(mmaps, 0, sizeof(mmaps));

1031 1032 1033
	mmap_prot  = PROT_READ;
	mmap_flags = MAP_SHARED;

1034
	if (session->header.needs_swap) {
1035 1036 1037
		mmap_prot  |= PROT_WRITE;
		mmap_flags = MAP_PRIVATE;
	}
1038
remap:
1039 1040
	buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, session->fd,
		   file_offset);
1041 1042 1043 1044 1045
	if (buf == MAP_FAILED) {
		pr_err("failed to mmap file\n");
		err = -errno;
		goto out_err;
	}
1046 1047
	mmaps[map_idx] = buf;
	map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1);
1048
	file_pos = file_offset + head;
1049 1050

more:
1051 1052
	event = fetch_mmaped_event(session, head, mmap_size, buf);
	if (!event) {
1053 1054 1055 1056
		if (mmaps[map_idx]) {
			munmap(mmaps[map_idx], mmap_size);
			mmaps[map_idx] = NULL;
		}
1057

1058 1059 1060
		page_offset = page_size * (head / page_size);
		file_offset += page_offset;
		head -= page_offset;
1061 1062 1063 1064 1065
		goto remap;
	}

	size = event->header.size;

1066 1067
	if (size == 0 ||
	    perf_session__process_event(session, event, ops, file_pos) < 0) {
1068
		dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n",
1069
			    file_offset + head, event->header.size,
1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081
			    event->header.type);
		/*
		 * assume we lost track of the stream, check alignment, and
		 * increment a single u64 in the hope to catch on again 'soon'.
		 */
		if (unlikely(head & 7))
			head &= ~7ULL;

		size = 8;
	}

	head += size;
1082
	file_pos += size;
1083

1084 1085 1086 1087 1088
	if (file_pos >= progress_next) {
		progress_next += file_size / 16;
		ui_progress__update(progress, file_pos);
	}

1089
	if (file_pos < file_size)
1090
		goto more;
1091

1092
	err = 0;
1093
	/* do the final flush for ordered samples */
1094 1095
	session->ordered_samples.next_flush = ULLONG_MAX;
	flush_sample_queue(session, ops);
1096
out_err:
1097
	ui_progress__delete(progress);
1098
	perf_session__warn_about_errors(session, ops);
1099
	perf_session_free_sample_buffers(session);
1100 1101
	return err;
}
1102

1103 1104 1105 1106 1107 1108 1109 1110
int perf_session__process_events(struct perf_session *self,
				 struct perf_event_ops *ops)
{
	int err;

	if (perf_session__register_idle_thread(self) == NULL)
		return -ENOMEM;

1111 1112 1113 1114 1115 1116 1117
	if (!self->fd_pipe)
		err = __perf_session__process_events(self,
						     self->header.data_offset,
						     self->header.data_size,
						     self->size, ops);
	else
		err = __perf_session__process_pipe_events(self, ops);
1118

1119 1120 1121
	return err;
}

1122
bool perf_session__has_traces(struct perf_session *self, const char *msg)
1123 1124
{
	if (!(self->sample_type & PERF_SAMPLE_RAW)) {
1125 1126
		pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg);
		return false;
1127 1128
	}

1129
	return true;
1130
}
1131

1132
int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps,
1133 1134 1135 1136
					     const char *symbol_name,
					     u64 addr)
{
	char *bracket;
1137
	enum map_type i;
1138 1139 1140 1141 1142
	struct ref_reloc_sym *ref;

	ref = zalloc(sizeof(struct ref_reloc_sym));
	if (ref == NULL)
		return -ENOMEM;
1143

1144 1145 1146
	ref->name = strdup(symbol_name);
	if (ref->name == NULL) {
		free(ref);
1147
		return -ENOMEM;
1148
	}
1149

1150
	bracket = strchr(ref->name, ']');
1151 1152 1153
	if (bracket)
		*bracket = '\0';

1154
	ref->addr = addr;
1155 1156

	for (i = 0; i < MAP__NR_TYPES; ++i) {
1157 1158
		struct kmap *kmap = map__kmap(maps[i]);
		kmap->ref_reloc_sym = ref;
1159 1160
	}

1161 1162
	return 0;
}
1163 1164 1165 1166 1167 1168 1169

size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp)
{
	return __dsos__fprintf(&self->host_machine.kernel_dsos, fp) +
	       __dsos__fprintf(&self->host_machine.user_dsos, fp) +
	       machines__fprintf_dsos(&self->machines, fp);
}
1170 1171 1172 1173 1174 1175 1176

size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, FILE *fp,
					  bool with_hits)
{
	size_t ret = machine__fprintf_dsos_buildid(&self->host_machine, fp, with_hits);
	return ret + machines__fprintf_dsos_buildid(&self->machines, fp, with_hits);
}
1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191

size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
{
	struct perf_evsel *pos;
	size_t ret = fprintf(fp, "Aggregated stats:\n");

	ret += hists__fprintf_nr_events(&session->hists, fp);

	list_for_each_entry(pos, &session->evlist->entries, node) {
		ret += fprintf(fp, "%s stats:\n", event_name(pos));
		ret += hists__fprintf_nr_events(&pos->hists, fp);
	}

	return ret;
}
1192

1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204
struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
					      unsigned int type)
{
	struct perf_evsel *pos;

	list_for_each_entry(pos, &session->evlist->entries, node) {
		if (pos->attr.type == type)
			return pos;
	}
	return NULL;
}

1205 1206 1207
void perf_session__print_ip(union perf_event *event,
			    struct perf_sample *sample,
			    struct perf_session *session,
1208
			    int print_sym, int print_dso)
1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236
{
	struct addr_location al;
	const char *symname, *dsoname;
	struct callchain_cursor *cursor = &session->callchain_cursor;
	struct callchain_cursor_node *node;

	if (perf_event__preprocess_sample(event, session, &al, sample,
					  NULL) < 0) {
		error("problem processing %d event, skipping it.\n",
			event->header.type);
		return;
	}

	if (symbol_conf.use_callchain && sample->callchain) {

		if (perf_session__resolve_callchain(session, al.thread,
						sample->callchain, NULL) != 0) {
			if (verbose)
				error("Failed to resolve callchain. Skipping\n");
			return;
		}
		callchain_cursor_commit(cursor);

		while (1) {
			node = callchain_cursor_current(cursor);
			if (!node)
				break;

1237 1238 1239 1240 1241 1242
			printf("\t%16" PRIx64, node->ip);
			if (print_sym) {
				if (node->sym && node->sym->name)
					symname = node->sym->name;
				else
					symname = "";
1243

1244 1245 1246
				printf(" %s", symname);
			}
			if (print_dso) {
1247 1248 1249 1250
				if (node->map && node->map->dso && node->map->dso->name)
					dsoname = node->map->dso->name;
				else
					dsoname = "";
1251

1252
				printf(" (%s)", dsoname);
1253 1254
			}
			printf("\n");
1255 1256 1257 1258 1259

			callchain_cursor_advance(cursor);
		}

	} else {
1260 1261 1262 1263 1264 1265
		printf("%16" PRIx64, al.addr);
		if (print_sym) {
			if (al.sym && al.sym->name)
				symname = al.sym->name;
			else
				symname = "";
1266

1267 1268 1269 1270
			printf(" %s", symname);
		}

		if (print_dso) {
1271 1272 1273 1274
			if (al.map && al.map->dso && al.map->dso->name)
				dsoname = al.map->dso->name;
			else
				dsoname = "";
1275

1276
			printf(" (%s)", dsoname);
1277
		}
1278 1279
	}
}