machine.c 64.7 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2
#include <dirent.h>
3
#include <errno.h>
4
#include <inttypes.h>
5
#include <regex.h>
6
#include <stdlib.h>
7
#include "callchain.h"
8
#include "debug.h"
9
#include "dso.h"
10
#include "env.h"
11
#include "event.h"
12 13
#include "evsel.h"
#include "hist.h"
14 15
#include "machine.h"
#include "map.h"
16 17 18
#include "map_symbol.h"
#include "branch.h"
#include "mem-events.h"
19
#include "srcline.h"
20
#include "symbol.h"
21
#include "sort.h"
22
#include "strlist.h"
23
#include "target.h"
24
#include "thread.h"
25
#include "util.h"
26
#include "vdso.h"
27
#include <stdbool.h>
28 29 30
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
31
#include "unwind.h"
32
#include "linux/hash.h"
33
#include "asm/bug.h"
34
#include "bpf-event.h"
35
#include <internal/lib.h> // page_size
36

37
#include <linux/ctype.h>
38
#include <symbol/kallsyms.h>
39
#include <linux/mman.h>
40
#include <linux/string.h>
41
#include <linux/zalloc.h>
42

43 44
static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock);

45 46 47 48 49
static struct dso *machine__kernel_dso(struct machine *machine)
{
	return machine->vmlinux_map->dso;
}

50 51 52 53
static void dsos__init(struct dsos *dsos)
{
	INIT_LIST_HEAD(&dsos->head);
	dsos->root = RB_ROOT;
54
	init_rwsem(&dsos->lock);
55 56
}

57 58 59 60 61 62
static void machine__threads_init(struct machine *machine)
{
	int i;

	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
		struct threads *threads = &machine->threads[i];
63
		threads->entries = RB_ROOT_CACHED;
64
		init_rwsem(&threads->lock);
65 66 67 68 69 70
		threads->nr = 0;
		INIT_LIST_HEAD(&threads->dead);
		threads->last_match = NULL;
	}
}

71 72
static int machine__set_mmap_name(struct machine *machine)
{
J
Jiri Olsa 已提交
73 74 75 76 77 78 79
	if (machine__is_host(machine))
		machine->mmap_name = strdup("[kernel.kallsyms]");
	else if (machine__is_default_guest(machine))
		machine->mmap_name = strdup("[guest.kernel.kallsyms]");
	else if (asprintf(&machine->mmap_name, "[guest.kernel.kallsyms.%d]",
			  machine->pid) < 0)
		machine->mmap_name = NULL;
80 81 82 83

	return machine->mmap_name ? 0 : -ENOMEM;
}

84 85
int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
{
86 87
	int err = -ENOMEM;

88
	memset(machine, 0, sizeof(*machine));
89
	maps__init(&machine->kmaps, machine);
90
	RB_CLEAR_NODE(&machine->rb_node);
91
	dsos__init(&machine->dsos);
92

93
	machine__threads_init(machine);
94

95
	machine->vdso_info = NULL;
96
	machine->env = NULL;
97

98 99
	machine->pid = pid;

100
	machine->id_hdr_size = 0;
101
	machine->kptr_restrict_warned = false;
102
	machine->comm_exec = false;
103
	machine->kernel_start = 0;
104
	machine->vmlinux_map = NULL;
105

106 107 108 109
	machine->root_dir = strdup(root_dir);
	if (machine->root_dir == NULL)
		return -ENOMEM;

110 111 112
	if (machine__set_mmap_name(machine))
		goto out;

113
	if (pid != HOST_KERNEL_ID) {
114
		struct thread *thread = machine__findnew_thread(machine, -1,
115
								pid);
116 117 118
		char comm[64];

		if (thread == NULL)
119
			goto out;
120 121

		snprintf(comm, sizeof(comm), "[guest/%d]", pid);
122
		thread__set_comm(thread, comm, 0);
123
		thread__put(thread);
124 125
	}

126
	machine->current_tid = NULL;
127
	err = 0;
128

129
out:
130
	if (err) {
131
		zfree(&machine->root_dir);
132 133
		zfree(&machine->mmap_name);
	}
134 135 136
	return 0;
}

137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
struct machine *machine__new_host(void)
{
	struct machine *machine = malloc(sizeof(*machine));

	if (machine != NULL) {
		machine__init(machine, "", HOST_KERNEL_ID);

		if (machine__create_kernel_maps(machine) < 0)
			goto out_delete;
	}

	return machine;
out_delete:
	free(machine);
	return NULL;
}

154 155 156 157 158
struct machine *machine__new_kallsyms(void)
{
	struct machine *machine = machine__new_host();
	/*
	 * FIXME:
159
	 * 1) We should switch to machine__load_kallsyms(), i.e. not explicitly
160 161 162
	 *    ask for not using the kcore parsing code, once this one is fixed
	 *    to create a map per module.
	 */
163
	if (machine && machine__load_kallsyms(machine, "/proc/kallsyms") <= 0) {
164 165 166 167 168 169 170
		machine__delete(machine);
		machine = NULL;
	}

	return machine;
}

171
static void dsos__purge(struct dsos *dsos)
172 173 174
{
	struct dso *pos, *n;

175
	down_write(&dsos->lock);
176

177
	list_for_each_entry_safe(pos, n, &dsos->head, node) {
178
		RB_CLEAR_NODE(&pos->rb_node);
179
		pos->root = NULL;
180 181
		list_del_init(&pos->node);
		dso__put(pos);
182
	}
183

184
	up_write(&dsos->lock);
185
}
186

187 188 189
static void dsos__exit(struct dsos *dsos)
{
	dsos__purge(dsos);
190
	exit_rwsem(&dsos->lock);
191 192
}

193 194
void machine__delete_threads(struct machine *machine)
{
195
	struct rb_node *nd;
196
	int i;
197

198 199
	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
		struct threads *threads = &machine->threads[i];
200
		down_write(&threads->lock);
201
		nd = rb_first_cached(&threads->entries);
202 203
		while (nd) {
			struct thread *t = rb_entry(nd, struct thread, rb_node);
204

205 206 207
			nd = rb_next(nd);
			__machine__remove_thread(machine, t, false);
		}
208
		up_write(&threads->lock);
209 210 211
	}
}

212 213
void machine__exit(struct machine *machine)
{
214 215
	int i;

216 217 218
	if (machine == NULL)
		return;

219
	machine__destroy_kernel_maps(machine);
220
	maps__exit(&machine->kmaps);
221
	dsos__exit(&machine->dsos);
222
	machine__exit_vdso(machine);
223
	zfree(&machine->root_dir);
224
	zfree(&machine->mmap_name);
225
	zfree(&machine->current_tid);
226 227 228

	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
		struct threads *threads = &machine->threads[i];
229 230 231 232 233 234 235 236 237 238 239 240
		struct thread *thread, *n;
		/*
		 * Forget about the dead, at this point whatever threads were
		 * left in the dead lists better have a reference count taken
		 * by who is using them, and then, when they drop those references
		 * and it finally hits zero, thread__put() will check and see that
		 * its not in the dead threads list and will not try to remove it
		 * from there, just calling thread__delete() straight away.
		 */
		list_for_each_entry_safe(thread, n, &threads->dead, node)
			list_del_init(&thread->node);

241
		exit_rwsem(&threads->lock);
242
	}
243 244 245 246
}

void machine__delete(struct machine *machine)
{
247 248 249 250
	if (machine) {
		machine__exit(machine);
		free(machine);
	}
251 252
}

253 254 255
void machines__init(struct machines *machines)
{
	machine__init(&machines->host, "", HOST_KERNEL_ID);
256
	machines->guests = RB_ROOT_CACHED;
257 258 259 260 261 262 263 264 265
}

void machines__exit(struct machines *machines)
{
	machine__exit(&machines->host);
	/* XXX exit guest */
}

struct machine *machines__add(struct machines *machines, pid_t pid,
266 267
			      const char *root_dir)
{
268
	struct rb_node **p = &machines->guests.rb_root.rb_node;
269 270
	struct rb_node *parent = NULL;
	struct machine *pos, *machine = malloc(sizeof(*machine));
271
	bool leftmost = true;
272 273 274 275 276 277 278 279 280 281 282 283 284 285

	if (machine == NULL)
		return NULL;

	if (machine__init(machine, root_dir, pid) != 0) {
		free(machine);
		return NULL;
	}

	while (*p != NULL) {
		parent = *p;
		pos = rb_entry(parent, struct machine, rb_node);
		if (pid < pos->pid)
			p = &(*p)->rb_left;
286
		else {
287
			p = &(*p)->rb_right;
288 289
			leftmost = false;
		}
290 291 292
	}

	rb_link_node(&machine->rb_node, parent, p);
293
	rb_insert_color_cached(&machine->rb_node, &machines->guests, leftmost);
294 295 296 297

	return machine;
}

298 299 300 301 302 303
void machines__set_comm_exec(struct machines *machines, bool comm_exec)
{
	struct rb_node *nd;

	machines->host.comm_exec = comm_exec;

304
	for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
305 306 307 308 309 310
		struct machine *machine = rb_entry(nd, struct machine, rb_node);

		machine->comm_exec = comm_exec;
	}
}

311
struct machine *machines__find(struct machines *machines, pid_t pid)
312
{
313
	struct rb_node **p = &machines->guests.rb_root.rb_node;
314 315 316 317
	struct rb_node *parent = NULL;
	struct machine *machine;
	struct machine *default_machine = NULL;

318 319 320
	if (pid == HOST_KERNEL_ID)
		return &machines->host;

321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
	while (*p != NULL) {
		parent = *p;
		machine = rb_entry(parent, struct machine, rb_node);
		if (pid < machine->pid)
			p = &(*p)->rb_left;
		else if (pid > machine->pid)
			p = &(*p)->rb_right;
		else
			return machine;
		if (!machine->pid)
			default_machine = machine;
	}

	return default_machine;
}

337
struct machine *machines__findnew(struct machines *machines, pid_t pid)
338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353
{
	char path[PATH_MAX];
	const char *root_dir = "";
	struct machine *machine = machines__find(machines, pid);

	if (machine && (machine->pid == pid))
		goto out;

	if ((pid != HOST_KERNEL_ID) &&
	    (pid != DEFAULT_GUEST_KERNEL_ID) &&
	    (symbol_conf.guestmount)) {
		sprintf(path, "%s/%d", symbol_conf.guestmount, pid);
		if (access(path, R_OK)) {
			static struct strlist *seen;

			if (!seen)
354
				seen = strlist__new(NULL, NULL);
355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370

			if (!strlist__has_entry(seen, path)) {
				pr_err("Can't access file %s\n", path);
				strlist__add(seen, path);
			}
			machine = NULL;
			goto out;
		}
		root_dir = path;
	}

	machine = machines__add(machines, pid, root_dir);
out:
	return machine;
}

371 372
void machines__process_guests(struct machines *machines,
			      machine__process_t process, void *data)
373 374 375
{
	struct rb_node *nd;

376
	for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
377 378 379 380 381
		struct machine *pos = rb_entry(nd, struct machine, rb_node);
		process(pos, data);
	}
}

382
void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size)
383 384 385 386
{
	struct rb_node *node;
	struct machine *machine;

387 388
	machines->host.id_hdr_size = id_hdr_size;

389 390
	for (node = rb_first_cached(&machines->guests); node;
	     node = rb_next(node)) {
391 392 393 394 395 396 397
		machine = rb_entry(node, struct machine, rb_node);
		machine->id_hdr_size = id_hdr_size;
	}

	return;
}

398 399 400 401 402 403 404 405 406 407 408 409 410
static void machine__update_thread_pid(struct machine *machine,
				       struct thread *th, pid_t pid)
{
	struct thread *leader;

	if (pid == th->pid_ || pid == -1 || th->pid_ != -1)
		return;

	th->pid_ = pid;

	if (th->pid_ == th->tid)
		return;

411
	leader = __machine__findnew_thread(machine, th->pid_, th->pid_);
412 413 414
	if (!leader)
		goto out_err;

415 416
	if (!leader->maps)
		leader->maps = maps__new(machine);
417

418
	if (!leader->maps)
419 420
		goto out_err;

421
	if (th->maps == leader->maps)
422 423
		return;

424
	if (th->maps) {
425 426 427 428 429
		/*
		 * Maps are created from MMAP events which provide the pid and
		 * tid.  Consequently there never should be any maps on a thread
		 * with an unknown pid.  Just print an error if there are.
		 */
430
		if (!maps__empty(th->maps))
431 432
			pr_err("Discarding thread maps for %d:%d\n",
			       th->pid_, th->tid);
433
		maps__put(th->maps);
434 435
	}

436
	th->maps = maps__get(leader->maps);
437 438
out_put:
	thread__put(leader);
439 440 441
	return;
out_err:
	pr_err("Failed to join map groups for %d:%d\n", th->pid_, th->tid);
442
	goto out_put;
443 444
}

445
/*
446 447 448
 * Front-end cache - TID lookups come in blocks,
 * so most of the time we dont have to look up
 * the full rbtree:
449
 */
450
static struct thread*
451 452
__threads__get_last_match(struct threads *threads, struct machine *machine,
			  int pid, int tid)
453 454 455
{
	struct thread *th;

456
	th = threads->last_match;
457 458 459
	if (th != NULL) {
		if (th->tid == tid) {
			machine__update_thread_pid(machine, th, pid);
460
			return thread__get(th);
461 462
		}

463
		threads->last_match = NULL;
464
	}
465

466 467 468
	return NULL;
}

469 470 471 472 473 474 475 476 477 478 479 480
static struct thread*
threads__get_last_match(struct threads *threads, struct machine *machine,
			int pid, int tid)
{
	struct thread *th = NULL;

	if (perf_singlethreaded)
		th = __threads__get_last_match(threads, machine, pid, tid);

	return th;
}

481
static void
482
__threads__set_last_match(struct threads *threads, struct thread *th)
483 484 485 486
{
	threads->last_match = th;
}

487 488 489 490 491 492 493
static void
threads__set_last_match(struct threads *threads, struct thread *th)
{
	if (perf_singlethreaded)
		__threads__set_last_match(threads, th);
}

494 495 496 497 498 499 500 501 502
/*
 * Caller must eventually drop thread->refcnt returned with a successful
 * lookup/new thread inserted.
 */
static struct thread *____machine__findnew_thread(struct machine *machine,
						  struct threads *threads,
						  pid_t pid, pid_t tid,
						  bool create)
{
503
	struct rb_node **p = &threads->entries.rb_root.rb_node;
504 505
	struct rb_node *parent = NULL;
	struct thread *th;
506
	bool leftmost = true;
507 508 509 510 511

	th = threads__get_last_match(threads, machine, pid, tid);
	if (th)
		return th;

512 513 514 515
	while (*p != NULL) {
		parent = *p;
		th = rb_entry(parent, struct thread, rb_node);

516
		if (th->tid == tid) {
517
			threads__set_last_match(threads, th);
518
			machine__update_thread_pid(machine, th, pid);
519
			return thread__get(th);
520 521
		}

522
		if (tid < th->tid)
523
			p = &(*p)->rb_left;
524
		else {
525
			p = &(*p)->rb_right;
526 527
			leftmost = false;
		}
528 529 530 531 532
	}

	if (!create)
		return NULL;

533
	th = thread__new(pid, tid);
534 535
	if (th != NULL) {
		rb_link_node(&th->rb_node, parent, p);
536
		rb_insert_color_cached(&th->rb_node, &threads->entries, leftmost);
537 538

		/*
539
		 * We have to initialize maps separately after rb tree is updated.
540 541
		 *
		 * The reason is that we call machine__findnew_thread
542
		 * within thread__init_maps to find the thread
543 544
		 * leader and that would screwed the rb tree.
		 */
545
		if (thread__init_maps(th, machine)) {
546
			rb_erase_cached(&th->rb_node, &threads->entries);
547
			RB_CLEAR_NODE(&th->rb_node);
548
			thread__put(th);
549
			return NULL;
550
		}
551 552 553 554
		/*
		 * It is now in the rbtree, get a ref
		 */
		thread__get(th);
555
		threads__set_last_match(threads, th);
556
		++threads->nr;
557 558 559 560 561
	}

	return th;
}

562 563
struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid)
{
564
	return ____machine__findnew_thread(machine, machine__threads(machine, tid), pid, tid, true);
565 566
}

567 568
struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
				       pid_t tid)
569
{
570
	struct threads *threads = machine__threads(machine, tid);
571 572
	struct thread *th;

573
	down_write(&threads->lock);
574
	th = __machine__findnew_thread(machine, pid, tid);
575
	up_write(&threads->lock);
576
	return th;
577 578
}

579 580
struct thread *machine__find_thread(struct machine *machine, pid_t pid,
				    pid_t tid)
581
{
582
	struct threads *threads = machine__threads(machine, tid);
583
	struct thread *th;
584

585
	down_read(&threads->lock);
586
	th =  ____machine__findnew_thread(machine, threads, pid, tid, false);
587
	up_read(&threads->lock);
588
	return th;
589
}
590

591 592 593 594 595 596 597 598 599
struct comm *machine__thread_exec_comm(struct machine *machine,
				       struct thread *thread)
{
	if (machine->comm_exec)
		return thread__exec_comm(thread);
	else
		return thread__comm(thread);
}

600 601
int machine__process_comm_event(struct machine *machine, union perf_event *event,
				struct perf_sample *sample)
602
{
603 604 605
	struct thread *thread = machine__findnew_thread(machine,
							event->comm.pid,
							event->comm.tid);
606
	bool exec = event->header.misc & PERF_RECORD_MISC_COMM_EXEC;
607
	int err = 0;
608

609 610 611
	if (exec)
		machine->comm_exec = true;

612 613 614
	if (dump_trace)
		perf_event__fprintf_comm(event, stdout);

615 616
	if (thread == NULL ||
	    __thread__set_comm(thread, event->comm.comm, sample->time, exec)) {
617
		dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
618
		err = -1;
619 620
	}

621 622 623
	thread__put(thread);

	return err;
624 625
}

626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656
int machine__process_namespaces_event(struct machine *machine __maybe_unused,
				      union perf_event *event,
				      struct perf_sample *sample __maybe_unused)
{
	struct thread *thread = machine__findnew_thread(machine,
							event->namespaces.pid,
							event->namespaces.tid);
	int err = 0;

	WARN_ONCE(event->namespaces.nr_namespaces > NR_NAMESPACES,
		  "\nWARNING: kernel seems to support more namespaces than perf"
		  " tool.\nTry updating the perf tool..\n\n");

	WARN_ONCE(event->namespaces.nr_namespaces < NR_NAMESPACES,
		  "\nWARNING: perf tool seems to support more namespaces than"
		  " the kernel.\nTry updating the kernel..\n\n");

	if (dump_trace)
		perf_event__fprintf_namespaces(event, stdout);

	if (thread == NULL ||
	    thread__set_namespaces(thread, sample->time, &event->namespaces)) {
		dump_printf("problem processing PERF_RECORD_NAMESPACES, skipping event.\n");
		err = -1;
	}

	thread__put(thread);

	return err;
}

657
int machine__process_lost_event(struct machine *machine __maybe_unused,
658
				union perf_event *event, struct perf_sample *sample __maybe_unused)
659
{
660
	dump_printf(": id:%" PRI_lu64 ": lost:%" PRI_lu64 "\n",
661 662 663 664
		    event->lost.id, event->lost.lost);
	return 0;
}

665 666 667
int machine__process_lost_samples_event(struct machine *machine __maybe_unused,
					union perf_event *event, struct perf_sample *sample)
{
668
	dump_printf(": id:%" PRIu64 ": lost samples :%" PRI_lu64 "\n",
669 670 671 672
		    sample->id, event->lost_samples.lost);
	return 0;
}

673 674 675
static struct dso *machine__findnew_module_dso(struct machine *machine,
					       struct kmod_path *m,
					       const char *filename)
676 677 678
{
	struct dso *dso;

679
	down_write(&machine->dsos.lock);
680 681

	dso = __dsos__find(&machine->dsos, m->name, true);
682
	if (!dso) {
683
		dso = __dsos__addnew(&machine->dsos, m->name);
684
		if (dso == NULL)
685
			goto out_unlock;
686

687
		dso__set_module_info(dso, m, machine);
688
		dso__set_long_name(dso, strdup(filename), true);
689
		dso->kernel = DSO_TYPE_KERNEL;
690 691
	}

692
	dso__get(dso);
693
out_unlock:
694
	up_write(&machine->dsos.lock);
695 696 697
	return dso;
}

698 699 700 701 702 703 704 705
int machine__process_aux_event(struct machine *machine __maybe_unused,
			       union perf_event *event)
{
	if (dump_trace)
		perf_event__fprintf_aux(event, stdout);
	return 0;
}

706 707 708 709 710 711 712 713
int machine__process_itrace_start_event(struct machine *machine __maybe_unused,
					union perf_event *event)
{
	if (dump_trace)
		perf_event__fprintf_itrace_start(event, stdout);
	return 0;
}

714 715 716 717 718 719 720 721
int machine__process_switch_event(struct machine *machine __maybe_unused,
				  union perf_event *event)
{
	if (dump_trace)
		perf_event__fprintf_switch(event, stdout);
	return 0;
}

722 723 724 725 726
static int machine__process_ksymbol_register(struct machine *machine,
					     union perf_event *event,
					     struct perf_sample *sample __maybe_unused)
{
	struct symbol *sym;
727
	struct map *map = maps__find(&machine->kmaps, event->ksymbol.addr);
728 729

	if (!map) {
730 731 732 733 734 735 736 737 738
		struct dso *dso = dso__new(event->ksymbol.name);

		if (dso) {
			dso->kernel = DSO_TYPE_KERNEL;
			map = map__new2(0, dso);
		}

		if (!dso || !map) {
			dso__put(dso);
739
			return -ENOMEM;
740
		}
741

742 743
		map->start = event->ksymbol.addr;
		map->end = map->start + event->ksymbol.len;
744
		maps__insert(&machine->kmaps, map);
745 746
	}

747
	sym = symbol__new(map->map_ip(map, map->start),
748 749
			  event->ksymbol.len,
			  0, 0, event->ksymbol.name);
750 751 752 753 754 755 756 757 758 759 760 761
	if (!sym)
		return -ENOMEM;
	dso__insert_symbol(map->dso, sym);
	return 0;
}

static int machine__process_ksymbol_unregister(struct machine *machine,
					       union perf_event *event,
					       struct perf_sample *sample __maybe_unused)
{
	struct map *map;

762
	map = maps__find(&machine->kmaps, event->ksymbol.addr);
763
	if (map)
764
		maps__remove(&machine->kmaps, map);
765 766 767 768 769 770 771 772 773 774 775

	return 0;
}

int machine__process_ksymbol(struct machine *machine __maybe_unused,
			     union perf_event *event,
			     struct perf_sample *sample)
{
	if (dump_trace)
		perf_event__fprintf_ksymbol(event, stdout);

776
	if (event->ksymbol.flags & PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER)
777 778 779 780 781
		return machine__process_ksymbol_unregister(machine, event,
							   sample);
	return machine__process_ksymbol_register(machine, event, sample);
}

782 783
static struct map *machine__addnew_module_map(struct machine *machine, u64 start,
					      const char *filename)
784
{
785 786
	struct map *map = NULL;
	struct kmod_path m;
787
	struct dso *dso;
788

789
	if (kmod_path__parse_name(&m, filename))
790 791
		return NULL;

792
	dso = machine__findnew_module_dso(machine, &m, filename);
793 794 795
	if (dso == NULL)
		goto out;

796
	map = map__new2(start, dso);
797
	if (map == NULL)
798
		goto out;
799

800
	maps__insert(&machine->kmaps, map);
801

802
	/* Put the map here because maps__insert alread got it */
803
	map__put(map);
804
out:
805 806
	/* put the dso here, corresponding to  machine__findnew_module_dso */
	dso__put(dso);
807
	zfree(&m.name);
808 809 810
	return map;
}

811
size_t machines__fprintf_dsos(struct machines *machines, FILE *fp)
812 813
{
	struct rb_node *nd;
814
	size_t ret = __dsos__fprintf(&machines->host.dsos.head, fp);
815

816
	for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
817
		struct machine *pos = rb_entry(nd, struct machine, rb_node);
818
		ret += __dsos__fprintf(&pos->dsos.head, fp);
819 820 821 822 823
	}

	return ret;
}

824
size_t machine__fprintf_dsos_buildid(struct machine *m, FILE *fp,
825 826
				     bool (skip)(struct dso *dso, int parm), int parm)
{
827
	return __dsos__fprintf_buildid(&m->dsos.head, fp, skip, parm);
828 829
}

830
size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
831 832 833
				     bool (skip)(struct dso *dso, int parm), int parm)
{
	struct rb_node *nd;
834
	size_t ret = machine__fprintf_dsos_buildid(&machines->host, fp, skip, parm);
835

836
	for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
837 838 839 840 841 842 843 844 845 846
		struct machine *pos = rb_entry(nd, struct machine, rb_node);
		ret += machine__fprintf_dsos_buildid(pos, fp, skip, parm);
	}
	return ret;
}

size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp)
{
	int i;
	size_t printed = 0;
847
	struct dso *kdso = machine__kernel_dso(machine);
848 849 850

	if (kdso->has_build_id) {
		char filename[PATH_MAX];
851 852
		if (dso__build_id_filename(kdso, filename, sizeof(filename),
					   false))
853 854 855 856 857 858 859 860 861 862 863 864 865
			printed += fprintf(fp, "[0] %s\n", filename);
	}

	for (i = 0; i < vmlinux_path__nr_entries; ++i)
		printed += fprintf(fp, "[%d] %s\n",
				   i + kdso->has_build_id, vmlinux_path[i]);

	return printed;
}

size_t machine__fprintf(struct machine *machine, FILE *fp)
{
	struct rb_node *nd;
866 867
	size_t ret;
	int i;
868

869 870
	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
		struct threads *threads = &machine->threads[i];
871 872

		down_read(&threads->lock);
873

874
		ret = fprintf(fp, "Threads: %u\n", threads->nr);
875

876 877
		for (nd = rb_first_cached(&threads->entries); nd;
		     nd = rb_next(nd)) {
878
			struct thread *pos = rb_entry(nd, struct thread, rb_node);
879

880 881
			ret += thread__fprintf(pos, fp);
		}
882

883
		up_read(&threads->lock);
884
	}
885 886 887 888 889
	return ret;
}

static struct dso *machine__get_kernel(struct machine *machine)
{
890
	const char *vmlinux_name = machine->mmap_name;
891 892 893
	struct dso *kernel;

	if (machine__is_host(machine)) {
J
Jiri Olsa 已提交
894 895 896
		if (symbol_conf.vmlinux_name)
			vmlinux_name = symbol_conf.vmlinux_name;

897 898
		kernel = machine__findnew_kernel(machine, vmlinux_name,
						 "[kernel]", DSO_TYPE_KERNEL);
899
	} else {
J
Jiri Olsa 已提交
900 901 902
		if (symbol_conf.default_guest_vmlinux_name)
			vmlinux_name = symbol_conf.default_guest_vmlinux_name;

903 904 905
		kernel = machine__findnew_kernel(machine, vmlinux_name,
						 "[guest.kernel]",
						 DSO_TYPE_GUEST_KERNEL);
906 907 908 909 910 911 912 913 914 915 916 917
	}

	if (kernel != NULL && (!kernel->has_build_id))
		dso__read_running_kernel_build_id(kernel, machine);

	return kernel;
}

struct process_args {
	u64 start;
};

918 919
void machine__get_kallsyms_filename(struct machine *machine, char *buf,
				    size_t bufsz)
920 921 922 923 924 925 926
{
	if (machine__is_default_guest(machine))
		scnprintf(buf, bufsz, "%s", symbol_conf.default_guest_kallsyms);
	else
		scnprintf(buf, bufsz, "%s/proc/kallsyms", machine->root_dir);
}

927 928 929 930 931 932
const char *ref_reloc_sym_names[] = {"_text", "_stext", NULL};

/* Figure out the start address of kernel map from /proc/kallsyms.
 * Returns the name of the start symbol in *symbol_name. Pass in NULL as
 * symbol_name if it's not that important.
 */
933
static int machine__get_running_kernel_start(struct machine *machine,
934 935
					     const char **symbol_name,
					     u64 *start, u64 *end)
936
{
937
	char filename[PATH_MAX];
938
	int i, err = -1;
939 940
	const char *name;
	u64 addr = 0;
941

942
	machine__get_kallsyms_filename(machine, filename, PATH_MAX);
943 944 945 946

	if (symbol__restricted_filename(filename, "/proc/kallsyms"))
		return 0;

947
	for (i = 0; (name = ref_reloc_sym_names[i]) != NULL; i++) {
948 949
		err = kallsyms__get_function_start(filename, name, &addr);
		if (!err)
950 951 952
			break;
	}

953 954 955
	if (err)
		return -1;

956 957
	if (symbol_name)
		*symbol_name = name;
958

959
	*start = addr;
960 961 962 963 964

	err = kallsyms__get_function_start(filename, "_etext", &addr);
	if (!err)
		*end = addr;

965
	return 0;
966 967
}

968 969 970
int machine__create_extra_kernel_map(struct machine *machine,
				     struct dso *kernel,
				     struct extra_kernel_map *xm)
971 972 973 974 975 976 977 978 979 980 981 982 983
{
	struct kmap *kmap;
	struct map *map;

	map = map__new2(xm->start, kernel);
	if (!map)
		return -1;

	map->end   = xm->end;
	map->pgoff = xm->pgoff;

	kmap = map__kmap(map);

984
	strlcpy(kmap->name, xm->name, KMAP_NAME_LEN);
985

986
	maps__insert(&machine->kmaps, map);
987

988 989
	pr_debug2("Added extra kernel map %s %" PRIx64 "-%" PRIx64 "\n",
		  kmap->name, map->start, map->end);
990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030

	map__put(map);

	return 0;
}

static u64 find_entry_trampoline(struct dso *dso)
{
	/* Duplicates are removed so lookup all aliases */
	const char *syms[] = {
		"_entry_trampoline",
		"__entry_trampoline_start",
		"entry_SYSCALL_64_trampoline",
	};
	struct symbol *sym = dso__first_symbol(dso);
	unsigned int i;

	for (; sym; sym = dso__next_symbol(sym)) {
		if (sym->binding != STB_GLOBAL)
			continue;
		for (i = 0; i < ARRAY_SIZE(syms); i++) {
			if (!strcmp(sym->name, syms[i]))
				return sym->start;
		}
	}

	return 0;
}

/*
 * These values can be used for kernels that do not have symbols for the entry
 * trampolines in kallsyms.
 */
#define X86_64_CPU_ENTRY_AREA_PER_CPU	0xfffffe0000000000ULL
#define X86_64_CPU_ENTRY_AREA_SIZE	0x2c000
#define X86_64_ENTRY_TRAMPOLINE		0x6000

/* Map x86_64 PTI entry trampolines */
int machine__map_x86_64_entry_trampolines(struct machine *machine,
					  struct dso *kernel)
{
1031
	struct maps *kmaps = &machine->kmaps;
1032
	int nr_cpus_avail, cpu;
1033 1034 1035 1036 1037 1038 1039 1040
	bool found = false;
	struct map *map;
	u64 pgoff;

	/*
	 * In the vmlinux case, pgoff is a virtual address which must now be
	 * mapped to a vmlinux offset.
	 */
1041
	maps__for_each_entry(kmaps, map) {
1042 1043 1044 1045 1046 1047
		struct kmap *kmap = __map__kmap(map);
		struct map *dest_map;

		if (!kmap || !is_entry_trampoline(kmap->name))
			continue;

1048
		dest_map = maps__find(kmaps, map->pgoff);
1049 1050 1051 1052 1053 1054
		if (dest_map != map)
			map->pgoff = dest_map->map_ip(dest_map, map->pgoff);
		found = true;
	}
	if (found || machine->trampolines_mapped)
		return 0;
1055

1056
	pgoff = find_entry_trampoline(kernel);
1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072
	if (!pgoff)
		return 0;

	nr_cpus_avail = machine__nr_cpus_avail(machine);

	/* Add a 1 page map for each CPU's entry trampoline */
	for (cpu = 0; cpu < nr_cpus_avail; cpu++) {
		u64 va = X86_64_CPU_ENTRY_AREA_PER_CPU +
			 cpu * X86_64_CPU_ENTRY_AREA_SIZE +
			 X86_64_ENTRY_TRAMPOLINE;
		struct extra_kernel_map xm = {
			.start = va,
			.end   = va + page_size,
			.pgoff = pgoff,
		};

1073 1074
		strlcpy(xm.name, ENTRY_TRAMPOLINE_NAME, KMAP_NAME_LEN);

1075 1076 1077 1078
		if (machine__create_extra_kernel_map(machine, kernel, &xm) < 0)
			return -1;
	}

1079 1080 1081 1082 1083 1084 1085 1086
	machine->trampolines_mapped = nr_cpus_avail;

	return 0;
}

int __weak machine__create_extra_kernel_maps(struct machine *machine __maybe_unused,
					     struct dso *kernel __maybe_unused)
{
1087 1088 1089
	return 0;
}

1090 1091
static int
__machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
1092
{
1093 1094 1095
	/* In case of renewal the kernel map, destroy previous one */
	machine__destroy_kernel_maps(machine);

1096 1097 1098
	machine->vmlinux_map = map__new2(0, kernel);
	if (machine->vmlinux_map == NULL)
		return -1;
1099

1100
	machine->vmlinux_map->map_ip = machine->vmlinux_map->unmap_ip = identity__map_ip;
1101
	maps__insert(&machine->kmaps, machine->vmlinux_map);
1102 1103 1104 1105 1106
	return 0;
}

void machine__destroy_kernel_maps(struct machine *machine)
{
1107 1108
	struct kmap *kmap;
	struct map *map = machine__kernel_map(machine);
1109

1110 1111
	if (map == NULL)
		return;
1112

1113
	kmap = map__kmap(map);
1114
	maps__remove(&machine->kmaps, map);
1115 1116 1117
	if (kmap && kmap->ref_reloc_sym) {
		zfree((char **)&kmap->ref_reloc_sym->name);
		zfree(&kmap->ref_reloc_sym);
1118
	}
1119 1120

	map__zput(machine->vmlinux_map);
1121 1122
}

1123
int machines__create_guest_kernel_maps(struct machines *machines)
1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171
{
	int ret = 0;
	struct dirent **namelist = NULL;
	int i, items = 0;
	char path[PATH_MAX];
	pid_t pid;
	char *endp;

	if (symbol_conf.default_guest_vmlinux_name ||
	    symbol_conf.default_guest_modules ||
	    symbol_conf.default_guest_kallsyms) {
		machines__create_kernel_maps(machines, DEFAULT_GUEST_KERNEL_ID);
	}

	if (symbol_conf.guestmount) {
		items = scandir(symbol_conf.guestmount, &namelist, NULL, NULL);
		if (items <= 0)
			return -ENOENT;
		for (i = 0; i < items; i++) {
			if (!isdigit(namelist[i]->d_name[0])) {
				/* Filter out . and .. */
				continue;
			}
			pid = (pid_t)strtol(namelist[i]->d_name, &endp, 10);
			if ((*endp != '\0') ||
			    (endp == namelist[i]->d_name) ||
			    (errno == ERANGE)) {
				pr_debug("invalid directory (%s). Skipping.\n",
					 namelist[i]->d_name);
				continue;
			}
			sprintf(path, "%s/%s/proc/kallsyms",
				symbol_conf.guestmount,
				namelist[i]->d_name);
			ret = access(path, R_OK);
			if (ret) {
				pr_debug("Can't access file %s\n", path);
				goto failure;
			}
			machines__create_kernel_maps(machines, pid);
		}
failure:
		free(namelist);
	}

	return ret;
}

1172
void machines__destroy_kernel_maps(struct machines *machines)
1173
{
1174
	struct rb_node *next = rb_first_cached(&machines->guests);
1175 1176

	machine__destroy_kernel_maps(&machines->host);
1177 1178 1179 1180 1181

	while (next) {
		struct machine *pos = rb_entry(next, struct machine, rb_node);

		next = rb_next(&pos->rb_node);
1182
		rb_erase_cached(&pos->rb_node, &machines->guests);
1183 1184 1185 1186
		machine__delete(pos);
	}
}

1187
int machines__create_kernel_maps(struct machines *machines, pid_t pid)
1188 1189 1190 1191 1192 1193 1194 1195 1196
{
	struct machine *machine = machines__findnew(machines, pid);

	if (machine == NULL)
		return -1;

	return machine__create_kernel_maps(machine);
}

1197
int machine__load_kallsyms(struct machine *machine, const char *filename)
1198
{
1199
	struct map *map = machine__kernel_map(machine);
1200
	int ret = __dso__load_kallsyms(map->dso, filename, map, true);
1201 1202

	if (ret > 0) {
1203
		dso__set_loaded(map->dso);
1204 1205 1206 1207 1208
		/*
		 * Since /proc/kallsyms will have multiple sessions for the
		 * kernel, with modules between them, fixup the end of all
		 * sections.
		 */
1209
		maps__fixup_end(&machine->kmaps);
1210 1211 1212 1213 1214
	}

	return ret;
}

1215
int machine__load_vmlinux_path(struct machine *machine)
1216
{
1217
	struct map *map = machine__kernel_map(machine);
1218
	int ret = dso__load_vmlinux_path(map->dso, map);
1219

1220
	if (ret > 0)
1221
		dso__set_loaded(map->dso);
1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239

	return ret;
}

static char *get_kernel_version(const char *root_dir)
{
	char version[PATH_MAX];
	FILE *file;
	char *name, *tmp;
	const char *prefix = "Linux version ";

	sprintf(version, "%s/proc/version", root_dir);
	file = fopen(version, "r");
	if (!file)
		return NULL;

	tmp = fgets(version, sizeof(version), file);
	fclose(file);
1240 1241
	if (!tmp)
		return NULL;
1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253

	name = strstr(version, prefix);
	if (!name)
		return NULL;
	name += strlen(prefix);
	tmp = strchr(name, ' ');
	if (tmp)
		*tmp = '\0';

	return strdup(name);
}

1254 1255 1256 1257 1258 1259
static bool is_kmod_dso(struct dso *dso)
{
	return dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE ||
	       dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE;
}

1260
static int maps__set_module_path(struct maps *maps, const char *path, struct kmod_path *m)
1261 1262
{
	char *long_name;
1263
	struct map *map = maps__find_by_name(maps, m->name);
1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278

	if (map == NULL)
		return 0;

	long_name = strdup(path);
	if (long_name == NULL)
		return -ENOMEM;

	dso__set_long_name(map->dso, long_name, true);
	dso__kernel_module_get_build_id(map->dso, "");

	/*
	 * Full name could reveal us kmod compression, so
	 * we need to update the symtab_type if needed.
	 */
1279
	if (m->comp && is_kmod_dso(map->dso)) {
1280
		map->dso->symtab_type++;
1281 1282
		map->dso->comp = m->comp;
	}
1283 1284 1285 1286

	return 0;
}

1287
static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, int depth)
1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311
{
	struct dirent *dent;
	DIR *dir = opendir(dir_name);
	int ret = 0;

	if (!dir) {
		pr_debug("%s: cannot open %s dir\n", __func__, dir_name);
		return -1;
	}

	while ((dent = readdir(dir)) != NULL) {
		char path[PATH_MAX];
		struct stat st;

		/*sshfs might return bad dent->d_type, so we have to stat*/
		snprintf(path, sizeof(path), "%s/%s", dir_name, dent->d_name);
		if (stat(path, &st))
			continue;

		if (S_ISDIR(st.st_mode)) {
			if (!strcmp(dent->d_name, ".") ||
			    !strcmp(dent->d_name, ".."))
				continue;

1312 1313 1314 1315 1316 1317 1318
			/* Do not follow top-level source and build symlinks */
			if (depth == 0) {
				if (!strcmp(dent->d_name, "source") ||
				    !strcmp(dent->d_name, "build"))
					continue;
			}

1319
			ret = maps__set_modules_path_dir(maps, path, depth + 1);
1320 1321 1322
			if (ret < 0)
				goto out;
		} else {
1323
			struct kmod_path m;
1324

1325 1326 1327
			ret = kmod_path__parse_name(&m, dent->d_name);
			if (ret)
				goto out;
1328

1329
			if (m.kmod)
1330
				ret = maps__set_module_path(maps, path, &m);
1331

1332
			zfree(&m.name);
1333

1334
			if (ret)
1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352
				goto out;
		}
	}

out:
	closedir(dir);
	return ret;
}

static int machine__set_modules_path(struct machine *machine)
{
	char *version;
	char modules_path[PATH_MAX];

	version = get_kernel_version(machine->root_dir);
	if (!version)
		return -1;

1353
	snprintf(modules_path, sizeof(modules_path), "%s/lib/modules/%s",
1354 1355 1356
		 machine->root_dir, version);
	free(version);

1357
	return maps__set_modules_path_dir(&machine->kmaps, modules_path, 0);
1358
}
1359
int __weak arch__fix_module_text_start(u64 *start __maybe_unused,
1360
				u64 *size __maybe_unused,
1361 1362 1363 1364
				const char *name __maybe_unused)
{
	return 0;
}
1365

1366 1367
static int machine__create_module(void *arg, const char *name, u64 start,
				  u64 size)
1368
{
1369
	struct machine *machine = arg;
1370
	struct map *map;
1371

1372
	if (arch__fix_module_text_start(&start, &size, name) < 0)
1373 1374
		return -1;

1375
	map = machine__addnew_module_map(machine, start, name);
1376 1377
	if (map == NULL)
		return -1;
1378
	map->end = start + size;
1379 1380 1381 1382 1383 1384 1385 1386

	dso__kernel_module_get_build_id(map->dso, machine->root_dir);

	return 0;
}

static int machine__create_modules(struct machine *machine)
{
1387 1388 1389
	const char *modules;
	char path[PATH_MAX];

1390
	if (machine__is_default_guest(machine)) {
1391
		modules = symbol_conf.default_guest_modules;
1392 1393
	} else {
		snprintf(path, PATH_MAX, "%s/proc/modules", machine->root_dir);
1394 1395 1396
		modules = path;
	}

1397
	if (symbol__restricted_filename(modules, "/proc/modules"))
1398 1399
		return -1;

1400
	if (modules__parse(modules, machine, machine__create_module))
1401 1402
		return -1;

1403 1404
	if (!machine__set_modules_path(machine))
		return 0;
1405

1406
	pr_debug("Problems setting modules path maps, continuing anyway...\n");
1407

1408
	return 0;
1409 1410
}

1411 1412 1413
static void machine__set_kernel_mmap(struct machine *machine,
				     u64 start, u64 end)
{
1414 1415 1416 1417 1418 1419 1420 1421
	machine->vmlinux_map->start = start;
	machine->vmlinux_map->end   = end;
	/*
	 * Be a bit paranoid here, some perf.data file came with
	 * a zero sized synthesized MMAP event for the kernel.
	 */
	if (start == 0 && end == 0)
		machine->vmlinux_map->end = ~0ULL;
1422 1423
}

1424 1425 1426 1427 1428 1429
static void machine__update_kernel_mmap(struct machine *machine,
				     u64 start, u64 end)
{
	struct map *map = machine__kernel_map(machine);

	map__get(map);
1430
	maps__remove(&machine->kmaps, map);
1431 1432 1433

	machine__set_kernel_mmap(machine, start, end);

1434
	maps__insert(&machine->kmaps, map);
1435 1436 1437
	map__put(map);
}

1438 1439 1440
int machine__create_kernel_maps(struct machine *machine)
{
	struct dso *kernel = machine__get_kernel(machine);
1441
	const char *name = NULL;
1442
	struct map *map;
1443
	u64 start = 0, end = ~0ULL;
1444 1445
	int ret;

1446
	if (kernel == NULL)
1447
		return -1;
1448

1449 1450
	ret = __machine__create_kernel_maps(machine, kernel);
	if (ret < 0)
1451
		goto out_put;
1452 1453 1454 1455 1456 1457 1458 1459 1460 1461

	if (symbol_conf.use_modules && machine__create_modules(machine) < 0) {
		if (machine__is_host(machine))
			pr_debug("Problems creating module maps, "
				 "continuing anyway...\n");
		else
			pr_debug("Problems creating module maps for guest %d, "
				 "continuing anyway...\n", machine->pid);
	}

1462
	if (!machine__get_running_kernel_start(machine, &name, &start, &end)) {
1463
		if (name &&
1464
		    map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map, name, start)) {
1465
			machine__destroy_kernel_maps(machine);
1466 1467
			ret = -1;
			goto out_put;
1468
		}
1469

1470 1471 1472 1473
		/*
		 * we have a real start address now, so re-order the kmaps
		 * assume it's the last in the kmaps
		 */
1474
		machine__update_kernel_mmap(machine, start, end);
1475 1476
	}

1477 1478 1479
	if (machine__create_extra_kernel_maps(machine, kernel))
		pr_debug("Problems creating extra kernel maps, continuing anyway...\n");

1480 1481 1482 1483 1484 1485 1486
	if (end == ~0ULL) {
		/* update end address of the kernel map using adjacent module address */
		map = map__next(machine__kernel_map(machine));
		if (map)
			machine__set_kernel_mmap(machine, start, map->start);
	}

1487 1488 1489
out_put:
	dso__put(kernel);
	return ret;
1490 1491
}

1492 1493 1494 1495
static bool machine__uses_kcore(struct machine *machine)
{
	struct dso *dso;

1496
	list_for_each_entry(dso, &machine->dsos.head, node) {
1497 1498 1499 1500 1501 1502 1503
		if (dso__is_kcore(dso))
			return true;
	}

	return false;
}

1504 1505 1506 1507 1508 1509 1510 1511 1512 1513
static bool perf_event__is_extra_kernel_mmap(struct machine *machine,
					     union perf_event *event)
{
	return machine__is(machine, "x86_64") &&
	       is_entry_trampoline(event->mmap.filename);
}

static int machine__process_extra_kernel_map(struct machine *machine,
					     union perf_event *event)
{
1514
	struct dso *kernel = machine__kernel_dso(machine);
1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528
	struct extra_kernel_map xm = {
		.start = event->mmap.start,
		.end   = event->mmap.start + event->mmap.len,
		.pgoff = event->mmap.pgoff,
	};

	if (kernel == NULL)
		return -1;

	strlcpy(xm.name, event->mmap.filename, KMAP_NAME_LEN);

	return machine__create_extra_kernel_map(machine, kernel, &xm);
}

1529 1530 1531 1532 1533 1534 1535
static int machine__process_kernel_mmap_event(struct machine *machine,
					      union perf_event *event)
{
	struct map *map;
	enum dso_kernel_type kernel_type;
	bool is_kernel_mmap;

1536 1537 1538 1539
	/* If we have maps from kcore then we do not need or want any others */
	if (machine__uses_kcore(machine))
		return 0;

1540 1541 1542 1543 1544 1545
	if (machine__is_host(machine))
		kernel_type = DSO_TYPE_KERNEL;
	else
		kernel_type = DSO_TYPE_GUEST_KERNEL;

	is_kernel_mmap = memcmp(event->mmap.filename,
1546 1547
				machine->mmap_name,
				strlen(machine->mmap_name) - 1) == 0;
1548 1549
	if (event->mmap.filename[0] == '/' ||
	    (!is_kernel_mmap && event->mmap.filename[0] == '[')) {
1550 1551
		map = machine__addnew_module_map(machine, event->mmap.start,
						 event->mmap.filename);
1552 1553 1554 1555 1556 1557
		if (map == NULL)
			goto out_problem;

		map->end = map->start + event->mmap.len;
	} else if (is_kernel_mmap) {
		const char *symbol_name = (event->mmap.filename +
1558
				strlen(machine->mmap_name));
1559 1560 1561 1562
		/*
		 * Should be there already, from the build-id table in
		 * the header.
		 */
1563 1564 1565
		struct dso *kernel = NULL;
		struct dso *dso;

1566
		down_read(&machine->dsos.lock);
1567

1568
		list_for_each_entry(dso, &machine->dsos.head, node) {
1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588

			/*
			 * The cpumode passed to is_kernel_module is not the
			 * cpumode of *this* event. If we insist on passing
			 * correct cpumode to is_kernel_module, we should
			 * record the cpumode when we adding this dso to the
			 * linked list.
			 *
			 * However we don't really need passing correct
			 * cpumode.  We know the correct cpumode must be kernel
			 * mode (if not, we should not link it onto kernel_dsos
			 * list).
			 *
			 * Therefore, we pass PERF_RECORD_MISC_CPUMODE_UNKNOWN.
			 * is_kernel_module() treats it as a kernel cpumode.
			 */

			if (!dso->kernel ||
			    is_kernel_module(dso->long_name,
					     PERF_RECORD_MISC_CPUMODE_UNKNOWN))
1589 1590
				continue;

1591

1592 1593 1594 1595
			kernel = dso;
			break;
		}

1596
		up_read(&machine->dsos.lock);
1597

1598
		if (kernel == NULL)
1599
			kernel = machine__findnew_dso(machine, machine->mmap_name);
1600 1601 1602 1603
		if (kernel == NULL)
			goto out_problem;

		kernel->kernel = kernel_type;
1604 1605
		if (__machine__create_kernel_maps(machine, kernel) < 0) {
			dso__put(kernel);
1606
			goto out_problem;
1607
		}
1608

1609 1610
		if (strstr(kernel->long_name, "vmlinux"))
			dso__set_short_name(kernel, "[kernel.vmlinux]", false);
1611

1612
		machine__update_kernel_mmap(machine, event->mmap.start,
1613
					 event->mmap.start + event->mmap.len);
1614 1615 1616 1617 1618 1619 1620

		/*
		 * Avoid using a zero address (kptr_restrict) for the ref reloc
		 * symbol. Effectively having zero here means that at record
		 * time /proc/sys/kernel/kptr_restrict was non zero.
		 */
		if (event->mmap.pgoff != 0) {
1621 1622 1623
			map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map,
							symbol_name,
							event->mmap.pgoff);
1624 1625 1626 1627 1628 1629
		}

		if (machine__is_default_guest(machine)) {
			/*
			 * preload dso of guest kernel and modules
			 */
1630
			dso__load(kernel, machine__kernel_map(machine));
1631
		}
1632 1633
	} else if (perf_event__is_extra_kernel_mmap(machine, event)) {
		return machine__process_extra_kernel_map(machine, event);
1634 1635 1636 1637 1638 1639
	}
	return 0;
out_problem:
	return -1;
}

1640
int machine__process_mmap2_event(struct machine *machine,
1641
				 union perf_event *event,
1642
				 struct perf_sample *sample)
1643 1644 1645
{
	struct thread *thread;
	struct map *map;
1646 1647 1648 1649 1650 1651
	struct dso_id dso_id = {
		.maj = event->mmap2.maj,
		.min = event->mmap2.min,
		.ino = event->mmap2.ino,
		.ino_generation = event->mmap2.ino_generation,
	};
1652 1653 1654 1655 1656
	int ret = 0;

	if (dump_trace)
		perf_event__fprintf_mmap2(event, stdout);

1657 1658
	if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
	    sample->cpumode == PERF_RECORD_MISC_KERNEL) {
1659 1660 1661 1662 1663 1664 1665
		ret = machine__process_kernel_mmap_event(machine, event);
		if (ret < 0)
			goto out_problem;
		return 0;
	}

	thread = machine__findnew_thread(machine, event->mmap2.pid,
1666
					event->mmap2.tid);
1667 1668 1669
	if (thread == NULL)
		goto out_problem;

1670
	map = map__new(machine, event->mmap2.start,
1671
			event->mmap2.len, event->mmap2.pgoff,
1672
			&dso_id, event->mmap2.prot,
1673
			event->mmap2.flags,
1674
			event->mmap2.filename, thread);
1675 1676

	if (map == NULL)
1677
		goto out_problem_map;
1678

1679 1680 1681 1682
	ret = thread__insert_map(thread, map);
	if (ret)
		goto out_problem_insert;

1683
	thread__put(thread);
1684
	map__put(map);
1685 1686
	return 0;

1687 1688
out_problem_insert:
	map__put(map);
1689 1690
out_problem_map:
	thread__put(thread);
1691 1692 1693 1694 1695
out_problem:
	dump_printf("problem processing PERF_RECORD_MMAP2, skipping event.\n");
	return 0;
}

1696
int machine__process_mmap_event(struct machine *machine, union perf_event *event,
1697
				struct perf_sample *sample)
1698 1699 1700
{
	struct thread *thread;
	struct map *map;
1701
	u32 prot = 0;
1702 1703 1704 1705 1706
	int ret = 0;

	if (dump_trace)
		perf_event__fprintf_mmap(event, stdout);

1707 1708
	if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
	    sample->cpumode == PERF_RECORD_MISC_KERNEL) {
1709 1710 1711 1712 1713 1714
		ret = machine__process_kernel_mmap_event(machine, event);
		if (ret < 0)
			goto out_problem;
		return 0;
	}

1715
	thread = machine__findnew_thread(machine, event->mmap.pid,
1716
					 event->mmap.tid);
1717 1718
	if (thread == NULL)
		goto out_problem;
1719

1720
	if (!(event->header.misc & PERF_RECORD_MISC_MMAP_DATA))
1721
		prot = PROT_EXEC;
1722

1723
	map = map__new(machine, event->mmap.start,
1724
			event->mmap.len, event->mmap.pgoff,
1725
			NULL, prot, 0, event->mmap.filename, thread);
1726

1727
	if (map == NULL)
1728
		goto out_problem_map;
1729

1730 1731 1732 1733
	ret = thread__insert_map(thread, map);
	if (ret)
		goto out_problem_insert;

1734
	thread__put(thread);
1735
	map__put(map);
1736 1737
	return 0;

1738 1739
out_problem_insert:
	map__put(map);
1740 1741
out_problem_map:
	thread__put(thread);
1742 1743 1744 1745 1746
out_problem:
	dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
	return 0;
}

1747
static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock)
1748
{
1749 1750 1751
	struct threads *threads = machine__threads(machine, th->tid);

	if (threads->last_match == th)
1752
		threads__set_last_match(threads, NULL);
1753

1754
	if (lock)
1755
		down_write(&threads->lock);
1756 1757 1758

	BUG_ON(refcount_read(&th->refcnt) == 0);

1759
	rb_erase_cached(&th->rb_node, &threads->entries);
1760
	RB_CLEAR_NODE(&th->rb_node);
1761
	--threads->nr;
1762
	/*
1763 1764 1765
	 * Move it first to the dead_threads list, then drop the reference,
	 * if this is the last reference, then the thread__delete destructor
	 * will be called and we will remove it from the dead_threads list.
1766
	 */
1767
	list_add_tail(&th->node, &threads->dead);
1768 1769 1770 1771 1772 1773 1774 1775

	/*
	 * We need to do the put here because if this is the last refcount,
	 * then we will be touching the threads->dead head when removing the
	 * thread.
	 */
	thread__put(th);

1776
	if (lock)
1777
		up_write(&threads->lock);
1778 1779
}

1780 1781 1782 1783 1784
void machine__remove_thread(struct machine *machine, struct thread *th)
{
	return __machine__remove_thread(machine, th, true);
}

1785 1786
int machine__process_fork_event(struct machine *machine, union perf_event *event,
				struct perf_sample *sample)
1787
{
1788 1789 1790
	struct thread *thread = machine__find_thread(machine,
						     event->fork.pid,
						     event->fork.tid);
1791 1792 1793
	struct thread *parent = machine__findnew_thread(machine,
							event->fork.ppid,
							event->fork.ptid);
1794
	bool do_maps_clone = true;
1795
	int err = 0;
1796

1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814
	if (dump_trace)
		perf_event__fprintf_task(event, stdout);

	/*
	 * There may be an existing thread that is not actually the parent,
	 * either because we are processing events out of order, or because the
	 * (fork) event that would have removed the thread was lost. Assume the
	 * latter case and continue on as best we can.
	 */
	if (parent->pid_ != (pid_t)event->fork.ppid) {
		dump_printf("removing erroneous parent thread %d/%d\n",
			    parent->pid_, parent->tid);
		machine__remove_thread(machine, parent);
		thread__put(parent);
		parent = machine__findnew_thread(machine, event->fork.ppid,
						 event->fork.ptid);
	}

1815
	/* if a thread currently exists for the thread id remove it */
1816
	if (thread != NULL) {
1817
		machine__remove_thread(machine, thread);
1818 1819
		thread__put(thread);
	}
1820

1821 1822
	thread = machine__findnew_thread(machine, event->fork.pid,
					 event->fork.tid);
1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838
	/*
	 * When synthesizing FORK events, we are trying to create thread
	 * objects for the already running tasks on the machine.
	 *
	 * Normally, for a kernel FORK event, we want to clone the parent's
	 * maps because that is what the kernel just did.
	 *
	 * But when synthesizing, this should not be done.  If we do, we end up
	 * with overlapping maps as we process the sythesized MMAP2 events that
	 * get delivered shortly thereafter.
	 *
	 * Use the FORK event misc flags in an internal way to signal this
	 * situation, so we can elide the map clone when appropriate.
	 */
	if (event->fork.header.misc & PERF_RECORD_MISC_FORK_EXEC)
		do_maps_clone = false;
1839 1840

	if (thread == NULL || parent == NULL ||
1841
	    thread__fork(thread, parent, sample->time, do_maps_clone) < 0) {
1842
		dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
1843
		err = -1;
1844
	}
1845 1846
	thread__put(thread);
	thread__put(parent);
1847

1848
	return err;
1849 1850
}

1851 1852
int machine__process_exit_event(struct machine *machine, union perf_event *event,
				struct perf_sample *sample __maybe_unused)
1853
{
1854 1855 1856
	struct thread *thread = machine__find_thread(machine,
						     event->fork.pid,
						     event->fork.tid);
1857 1858 1859 1860

	if (dump_trace)
		perf_event__fprintf_task(event, stdout);

1861
	if (thread != NULL) {
1862
		thread__exited(thread);
1863 1864
		thread__put(thread);
	}
1865 1866 1867 1868

	return 0;
}

1869 1870
int machine__process_event(struct machine *machine, union perf_event *event,
			   struct perf_sample *sample)
1871 1872 1873 1874 1875
{
	int ret;

	switch (event->header.type) {
	case PERF_RECORD_COMM:
1876
		ret = machine__process_comm_event(machine, event, sample); break;
1877
	case PERF_RECORD_MMAP:
1878
		ret = machine__process_mmap_event(machine, event, sample); break;
1879 1880
	case PERF_RECORD_NAMESPACES:
		ret = machine__process_namespaces_event(machine, event, sample); break;
1881
	case PERF_RECORD_MMAP2:
1882
		ret = machine__process_mmap2_event(machine, event, sample); break;
1883
	case PERF_RECORD_FORK:
1884
		ret = machine__process_fork_event(machine, event, sample); break;
1885
	case PERF_RECORD_EXIT:
1886
		ret = machine__process_exit_event(machine, event, sample); break;
1887
	case PERF_RECORD_LOST:
1888
		ret = machine__process_lost_event(machine, event, sample); break;
1889 1890
	case PERF_RECORD_AUX:
		ret = machine__process_aux_event(machine, event); break;
1891
	case PERF_RECORD_ITRACE_START:
1892
		ret = machine__process_itrace_start_event(machine, event); break;
1893 1894
	case PERF_RECORD_LOST_SAMPLES:
		ret = machine__process_lost_samples_event(machine, event, sample); break;
1895 1896 1897
	case PERF_RECORD_SWITCH:
	case PERF_RECORD_SWITCH_CPU_WIDE:
		ret = machine__process_switch_event(machine, event); break;
1898 1899
	case PERF_RECORD_KSYMBOL:
		ret = machine__process_ksymbol(machine, event, sample); break;
1900
	case PERF_RECORD_BPF_EVENT:
1901
		ret = machine__process_bpf(machine, event, sample); break;
1902 1903 1904 1905 1906 1907 1908
	default:
		ret = -1;
		break;
	}

	return ret;
}
1909

1910
static bool symbol__match_regex(struct symbol *sym, regex_t *regex)
1911
{
1912
	if (!regexec(regex, sym->name, 0, NULL, 0))
1913 1914 1915 1916
		return 1;
	return 0;
}

1917
static void ip__resolve_ams(struct thread *thread,
1918 1919 1920 1921 1922 1923
			    struct addr_map_symbol *ams,
			    u64 ip)
{
	struct addr_location al;

	memset(&al, 0, sizeof(al));
1924 1925 1926 1927 1928 1929 1930
	/*
	 * We cannot use the header.misc hint to determine whether a
	 * branch stack address is user, kernel, guest, hypervisor.
	 * Branches may straddle the kernel/user/hypervisor boundaries.
	 * Thus, we have to try consecutively until we find a match
	 * or else, the symbol is unknown
	 */
1931
	thread__find_cpumode_addr_location(thread, ip, &al);
1932 1933 1934

	ams->addr = ip;
	ams->al_addr = al.addr;
1935
	ams->ms.maps = al.maps;
1936 1937
	ams->ms.sym = al.sym;
	ams->ms.map = al.map;
1938
	ams->phys_addr = 0;
1939 1940
}

1941
static void ip__resolve_data(struct thread *thread,
1942 1943
			     u8 m, struct addr_map_symbol *ams,
			     u64 addr, u64 phys_addr)
1944 1945 1946 1947 1948
{
	struct addr_location al;

	memset(&al, 0, sizeof(al));

1949
	thread__find_symbol(thread, m, addr, &al);
1950

1951 1952
	ams->addr = addr;
	ams->al_addr = al.addr;
1953
	ams->ms.maps = al.maps;
1954 1955
	ams->ms.sym = al.sym;
	ams->ms.map = al.map;
1956
	ams->phys_addr = phys_addr;
1957 1958
}

1959 1960
struct mem_info *sample__resolve_mem(struct perf_sample *sample,
				     struct addr_location *al)
1961
{
1962
	struct mem_info *mi = mem_info__new();
1963 1964 1965 1966

	if (!mi)
		return NULL;

1967
	ip__resolve_ams(al->thread, &mi->iaddr, sample->ip);
1968 1969
	ip__resolve_data(al->thread, al->cpumode, &mi->daddr,
			 sample->addr, sample->phys_addr);
1970 1971 1972 1973 1974
	mi->data_src.val = sample->data_src;

	return mi;
}

1975
static char *callchain_srcline(struct map_symbol *ms, u64 ip)
1976
{
1977
	struct map *map = ms->map;
1978 1979
	char *srcline = NULL;

1980
	if (!map || callchain_param.key == CCKEY_FUNCTION)
1981 1982 1983 1984 1985 1986 1987 1988
		return srcline;

	srcline = srcline__tree_find(&map->dso->srclines, ip);
	if (!srcline) {
		bool show_sym = false;
		bool show_addr = callchain_param.key == CCKEY_ADDRESS;

		srcline = get_srcline(map->dso, map__rip_2objdump(map, ip),
1989
				      ms->sym, show_sym, show_addr, ip);
1990 1991
		srcline__tree_insert(&map->dso->srclines, ip, srcline);
	}
1992

1993
	return srcline;
1994 1995
}

1996 1997 1998 1999 2000
struct iterations {
	int nr_loop_iter;
	u64 cycles;
};

2001
static int add_callchain_ip(struct thread *thread,
2002
			    struct callchain_cursor *cursor,
2003 2004
			    struct symbol **parent,
			    struct addr_location *root_al,
2005
			    u8 *cpumode,
2006 2007 2008
			    u64 ip,
			    bool branch,
			    struct branch_flags *flags,
2009
			    struct iterations *iter,
2010
			    u64 branch_from)
2011
{
2012
	struct map_symbol ms;
2013
	struct addr_location al;
2014 2015
	int nr_loop_iter = 0;
	u64 iter_cycles = 0;
2016
	const char *srcline = NULL;
2017 2018 2019

	al.filtered = 0;
	al.sym = NULL;
2020
	if (!cpumode) {
2021
		thread__find_cpumode_addr_location(thread, ip, &al);
2022
	} else {
2023 2024 2025
		if (ip >= PERF_CONTEXT_MAX) {
			switch (ip) {
			case PERF_CONTEXT_HV:
2026
				*cpumode = PERF_RECORD_MISC_HYPERVISOR;
2027 2028
				break;
			case PERF_CONTEXT_KERNEL:
2029
				*cpumode = PERF_RECORD_MISC_KERNEL;
2030 2031
				break;
			case PERF_CONTEXT_USER:
2032
				*cpumode = PERF_RECORD_MISC_USER;
2033 2034 2035 2036 2037 2038 2039 2040
				break;
			default:
				pr_debug("invalid callchain context: "
					 "%"PRId64"\n", (s64) ip);
				/*
				 * It seems the callchain is corrupted.
				 * Discard all.
				 */
2041
				callchain_cursor_reset(cursor);
2042 2043 2044 2045
				return 1;
			}
			return 0;
		}
2046
		thread__find_symbol(thread, *cpumode, ip, &al);
2047 2048
	}

2049
	if (al.sym != NULL) {
2050
		if (perf_hpp_list.parent && !*parent &&
2051 2052 2053 2054 2055 2056 2057
		    symbol__match_regex(al.sym, &parent_regex))
			*parent = al.sym;
		else if (have_ignore_callees && root_al &&
		  symbol__match_regex(al.sym, &ignore_callees_regex)) {
			/* Treat this symbol as the root,
			   forgetting its callees. */
			*root_al = al;
2058
			callchain_cursor_reset(cursor);
2059 2060 2061
		}
	}

2062 2063
	if (symbol_conf.hide_unresolved && al.sym == NULL)
		return 0;
2064 2065 2066 2067 2068 2069

	if (iter) {
		nr_loop_iter = iter->nr_loop_iter;
		iter_cycles = iter->cycles;
	}

2070
	ms.maps = al.maps;
2071 2072 2073 2074
	ms.map = al.map;
	ms.sym = al.sym;
	srcline = callchain_srcline(&ms, al.addr);
	return callchain_cursor_append(cursor, ip, &ms,
2075
				       branch, flags, nr_loop_iter,
2076
				       iter_cycles, branch_from, srcline);
2077 2078
}

2079 2080
struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
					   struct addr_location *al)
2081 2082
{
	unsigned int i;
2083 2084
	const struct branch_stack *bs = sample->branch_stack;
	struct branch_info *bi = calloc(bs->nr, sizeof(struct branch_info));
2085 2086 2087 2088 2089

	if (!bi)
		return NULL;

	for (i = 0; i < bs->nr; i++) {
2090 2091
		ip__resolve_ams(al->thread, &bi[i].to, bs->entries[i].to);
		ip__resolve_ams(al->thread, &bi[i].from, bs->entries[i].from);
2092 2093 2094 2095 2096
		bi[i].flags = bs->entries[i].flags;
	}
	return bi;
}

2097 2098 2099 2100 2101
static void save_iterations(struct iterations *iter,
			    struct branch_entry *be, int nr)
{
	int i;

2102
	iter->nr_loop_iter++;
2103 2104 2105 2106 2107 2108
	iter->cycles = 0;

	for (i = 0; i < nr; i++)
		iter->cycles += be[i].flags.cycles;
}

2109 2110 2111 2112 2113 2114 2115
#define CHASHSZ 127
#define CHASHBITS 7
#define NO_ENTRY 0xff

#define PERF_MAX_BRANCH_DEPTH 127

/* Remove loops. */
2116 2117
static int remove_loops(struct branch_entry *l, int nr,
			struct iterations *iter)
2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141
{
	int i, j, off;
	unsigned char chash[CHASHSZ];

	memset(chash, NO_ENTRY, sizeof(chash));

	BUG_ON(PERF_MAX_BRANCH_DEPTH > 255);

	for (i = 0; i < nr; i++) {
		int h = hash_64(l[i].from, CHASHBITS) % CHASHSZ;

		/* no collision handling for now */
		if (chash[h] == NO_ENTRY) {
			chash[h] = i;
		} else if (l[chash[h]].from == l[i].from) {
			bool is_loop = true;
			/* check if it is a real loop */
			off = 0;
			for (j = chash[h]; j < i && i + off < nr; j++, off++)
				if (l[j].from != l[i + off].from) {
					is_loop = false;
					break;
				}
			if (is_loop) {
2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153
				j = nr - (i + off);
				if (j > 0) {
					save_iterations(iter + i + off,
						l + i, off);

					memmove(iter + i, iter + i + off,
						j * sizeof(*iter));

					memmove(l + i, l + i + off,
						j * sizeof(*l));
				}

2154 2155 2156 2157 2158 2159 2160
				nr -= off;
			}
		}
	}
	return nr;
}

K
Kan Liang 已提交
2161 2162 2163 2164 2165 2166 2167 2168
/*
 * Recolve LBR callstack chain sample
 * Return:
 * 1 on success get LBR callchain information
 * 0 no available LBR callchain information, should try fp
 * negative error code on other errors.
 */
static int resolve_lbr_callchain_sample(struct thread *thread,
2169
					struct callchain_cursor *cursor,
K
Kan Liang 已提交
2170 2171 2172 2173
					struct perf_sample *sample,
					struct symbol **parent,
					struct addr_location *root_al,
					int max_stack)
2174
{
K
Kan Liang 已提交
2175
	struct ip_callchain *chain = sample->callchain;
2176
	int chain_nr = min(max_stack, (int)chain->nr), i;
2177
	u8 cpumode = PERF_RECORD_MISC_USER;
2178
	u64 ip, branch_from = 0;
K
Kan Liang 已提交
2179 2180 2181 2182 2183 2184 2185 2186 2187

	for (i = 0; i < chain_nr; i++) {
		if (chain->ips[i] == PERF_CONTEXT_USER)
			break;
	}

	/* LBR only affects the user callchain */
	if (i != chain_nr) {
		struct branch_stack *lbr_stack = sample->branch_stack;
2188 2189 2190
		int lbr_nr = lbr_stack->nr, j, k;
		bool branch;
		struct branch_flags *flags;
K
Kan Liang 已提交
2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203
		/*
		 * LBR callstack can only get user call chain.
		 * The mix_chain_nr is kernel call chain
		 * number plus LBR user call chain number.
		 * i is kernel call chain number,
		 * 1 is PERF_CONTEXT_USER,
		 * lbr_nr + 1 is the user call chain number.
		 * For details, please refer to the comments
		 * in callchain__printf
		 */
		int mix_chain_nr = i + 1 + lbr_nr + 1;

		for (j = 0; j < mix_chain_nr; j++) {
2204
			int err;
2205 2206 2207
			branch = false;
			flags = NULL;

K
Kan Liang 已提交
2208 2209 2210
			if (callchain_param.order == ORDER_CALLEE) {
				if (j < i + 1)
					ip = chain->ips[j];
2211 2212 2213 2214 2215 2216
				else if (j > i + 1) {
					k = j - i - 2;
					ip = lbr_stack->entries[k].from;
					branch = true;
					flags = &lbr_stack->entries[k].flags;
				} else {
K
Kan Liang 已提交
2217
					ip = lbr_stack->entries[0].to;
2218 2219
					branch = true;
					flags = &lbr_stack->entries[0].flags;
2220 2221
					branch_from =
						lbr_stack->entries[0].from;
2222
				}
K
Kan Liang 已提交
2223
			} else {
2224 2225 2226 2227 2228 2229
				if (j < lbr_nr) {
					k = lbr_nr - j - 1;
					ip = lbr_stack->entries[k].from;
					branch = true;
					flags = &lbr_stack->entries[k].flags;
				}
K
Kan Liang 已提交
2230 2231
				else if (j > lbr_nr)
					ip = chain->ips[i + 1 - (j - lbr_nr)];
2232
				else {
K
Kan Liang 已提交
2233
					ip = lbr_stack->entries[0].to;
2234 2235
					branch = true;
					flags = &lbr_stack->entries[0].flags;
2236 2237
					branch_from =
						lbr_stack->entries[0].from;
2238
				}
K
Kan Liang 已提交
2239 2240
			}

2241 2242
			err = add_callchain_ip(thread, cursor, parent,
					       root_al, &cpumode, ip,
2243
					       branch, flags, NULL,
2244
					       branch_from);
K
Kan Liang 已提交
2245 2246 2247 2248 2249 2250 2251 2252 2253
			if (err)
				return (err < 0) ? err : 0;
		}
		return 1;
	}

	return 0;
}

2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274
static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
			     struct callchain_cursor *cursor,
			     struct symbol **parent,
			     struct addr_location *root_al,
			     u8 *cpumode, int ent)
{
	int err = 0;

	while (--ent >= 0) {
		u64 ip = chain->ips[ent];

		if (ip >= PERF_CONTEXT_MAX) {
			err = add_callchain_ip(thread, cursor, parent,
					       root_al, cpumode, ip,
					       false, NULL, NULL, 0);
			break;
		}
	}
	return err;
}

K
Kan Liang 已提交
2275
static int thread__resolve_callchain_sample(struct thread *thread,
2276
					    struct callchain_cursor *cursor,
2277
					    struct evsel *evsel,
K
Kan Liang 已提交
2278 2279 2280 2281 2282 2283 2284
					    struct perf_sample *sample,
					    struct symbol **parent,
					    struct addr_location *root_al,
					    int max_stack)
{
	struct branch_stack *branch = sample->branch_stack;
	struct ip_callchain *chain = sample->callchain;
2285
	int chain_nr = 0;
2286
	u8 cpumode = PERF_RECORD_MISC_USER;
2287
	int i, j, err, nr_entries;
2288 2289 2290
	int skip_idx = -1;
	int first_call = 0;

2291 2292 2293
	if (chain)
		chain_nr = chain->nr;

2294
	if (perf_evsel__has_branch_callstack(evsel)) {
2295
		err = resolve_lbr_callchain_sample(thread, cursor, sample, parent,
K
Kan Liang 已提交
2296 2297 2298 2299 2300
						   root_al, max_stack);
		if (err)
			return (err < 0) ? err : 0;
	}

2301 2302 2303 2304
	/*
	 * Based on DWARF debug information, some architectures skip
	 * a callchain entry saved by the kernel.
	 */
2305
	skip_idx = arch_skip_callchain_idx(thread, chain);
2306

2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321
	/*
	 * Add branches to call stack for easier browsing. This gives
	 * more context for a sample than just the callers.
	 *
	 * This uses individual histograms of paths compared to the
	 * aggregated histograms the normal LBR mode uses.
	 *
	 * Limitations for now:
	 * - No extra filters
	 * - No annotations (should annotate somehow)
	 */

	if (branch && callchain_param.branch_callstack) {
		int nr = min(max_stack, (int)branch->nr);
		struct branch_entry be[nr];
2322
		struct iterations iter[nr];
2323 2324 2325 2326 2327 2328 2329 2330 2331

		if (branch->nr > PERF_MAX_BRANCH_DEPTH) {
			pr_warning("corrupted branch chain. skipping...\n");
			goto check_calls;
		}

		for (i = 0; i < nr; i++) {
			if (callchain_param.order == ORDER_CALLEE) {
				be[i] = branch->entries[i];
2332 2333 2334 2335

				if (chain == NULL)
					continue;

2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352
				/*
				 * Check for overlap into the callchain.
				 * The return address is one off compared to
				 * the branch entry. To adjust for this
				 * assume the calling instruction is not longer
				 * than 8 bytes.
				 */
				if (i == skip_idx ||
				    chain->ips[first_call] >= PERF_CONTEXT_MAX)
					first_call++;
				else if (be[i].from < chain->ips[first_call] &&
				    be[i].from >= chain->ips[first_call] - 8)
					first_call++;
			} else
				be[i] = branch->entries[branch->nr - i - 1];
		}

2353 2354
		memset(iter, 0, sizeof(struct iterations) * nr);
		nr = remove_loops(be, nr, iter);
2355

2356
		for (i = 0; i < nr; i++) {
2357 2358 2359 2360 2361
			err = add_callchain_ip(thread, cursor, parent,
					       root_al,
					       NULL, be[i].to,
					       true, &be[i].flags,
					       NULL, be[i].from);
2362

2363
			if (!err)
2364
				err = add_callchain_ip(thread, cursor, parent, root_al,
2365 2366
						       NULL, be[i].from,
						       true, &be[i].flags,
2367
						       &iter[i], 0);
2368 2369 2370 2371 2372
			if (err == -EINVAL)
				break;
			if (err)
				return err;
		}
2373 2374 2375 2376

		if (chain_nr == 0)
			return 0;

2377 2378 2379 2380
		chain_nr -= nr;
	}

check_calls:
2381
	if (chain && callchain_param.order != ORDER_CALLEE) {
2382 2383 2384 2385 2386
		err = find_prev_cpumode(chain, thread, cursor, parent, root_al,
					&cpumode, chain->nr - first_call);
		if (err)
			return (err < 0) ? err : 0;
	}
2387
	for (i = first_call, nr_entries = 0;
2388
	     i < chain_nr && nr_entries < max_stack; i++) {
2389 2390 2391
		u64 ip;

		if (callchain_param.order == ORDER_CALLEE)
2392
			j = i;
2393
		else
2394 2395 2396 2397 2398 2399 2400
			j = chain->nr - i - 1;

#ifdef HAVE_SKIP_CALLCHAIN_IDX
		if (j == skip_idx)
			continue;
#endif
		ip = chain->ips[j];
2401 2402
		if (ip < PERF_CONTEXT_MAX)
                       ++nr_entries;
2403 2404 2405 2406 2407 2408 2409
		else if (callchain_param.order != ORDER_CALLEE) {
			err = find_prev_cpumode(chain, thread, cursor, parent,
						root_al, &cpumode, j);
			if (err)
				return (err < 0) ? err : 0;
			continue;
		}
2410

2411 2412
		err = add_callchain_ip(thread, cursor, parent,
				       root_al, &cpumode, ip,
2413
				       false, NULL, NULL, 0);
2414 2415

		if (err)
2416
			return (err < 0) ? err : 0;
2417 2418 2419 2420 2421
	}

	return 0;
}

2422
static int append_inlines(struct callchain_cursor *cursor, struct map_symbol *ms, u64 ip)
2423
{
2424 2425
	struct symbol *sym = ms->sym;
	struct map *map = ms->map;
2426 2427 2428
	struct inline_node *inline_node;
	struct inline_list *ilist;
	u64 addr;
2429
	int ret = 1;
2430 2431

	if (!symbol_conf.inline_name || !map || !sym)
2432
		return ret;
2433

2434 2435
	addr = map__map_ip(map, ip);
	addr = map__rip_2objdump(map, addr);
2436 2437 2438 2439 2440

	inline_node = inlines__tree_find(&map->dso->inlined_nodes, addr);
	if (!inline_node) {
		inline_node = dso__parse_addr_inlines(map->dso, addr, sym);
		if (!inline_node)
2441
			return ret;
2442 2443 2444 2445
		inlines__tree_insert(&map->dso->inlined_nodes, inline_node);
	}

	list_for_each_entry(ilist, &inline_node->val, list) {
2446
		struct map_symbol ilist_ms = {
2447
			.maps = ms->maps,
2448 2449 2450 2451
			.map = map,
			.sym = ilist->symbol,
		};
		ret = callchain_cursor_append(cursor, ip, &ilist_ms, false,
2452
					      NULL, 0, 0, 0, ilist->srcline);
2453 2454 2455 2456 2457

		if (ret != 0)
			return ret;
	}

2458
	return ret;
2459 2460
}

2461 2462 2463
static int unwind_entry(struct unwind_entry *entry, void *arg)
{
	struct callchain_cursor *cursor = arg;
2464
	const char *srcline = NULL;
2465
	u64 addr = entry->ip;
2466

2467
	if (symbol_conf.hide_unresolved && entry->ms.sym == NULL)
2468
		return 0;
2469

2470
	if (append_inlines(cursor, &entry->ms, entry->ip) == 0)
2471 2472
		return 0;

2473 2474 2475 2476
	/*
	 * Convert entry->ip from a virtual address to an offset in
	 * its corresponding binary.
	 */
2477 2478
	if (entry->ms.map)
		addr = map__map_ip(entry->ms.map, entry->ip);
2479

2480 2481
	srcline = callchain_srcline(&entry->ms, addr);
	return callchain_cursor_append(cursor, entry->ip, &entry->ms,
2482
				       false, NULL, 0, 0, 0, srcline);
2483 2484
}

2485 2486
static int thread__resolve_callchain_unwind(struct thread *thread,
					    struct callchain_cursor *cursor,
2487
					    struct evsel *evsel,
2488 2489
					    struct perf_sample *sample,
					    int max_stack)
2490 2491
{
	/* Can we do dwarf post unwind? */
2492 2493
	if (!((evsel->core.attr.sample_type & PERF_SAMPLE_REGS_USER) &&
	      (evsel->core.attr.sample_type & PERF_SAMPLE_STACK_USER)))
2494 2495 2496 2497 2498 2499 2500
		return 0;

	/* Bail out if nothing was captured. */
	if ((!sample->user_regs.regs) ||
	    (!sample->user_stack.size))
		return 0;

2501
	return unwind__get_entries(unwind_entry, cursor,
2502
				   thread, sample, max_stack);
2503
}
2504

2505 2506
int thread__resolve_callchain(struct thread *thread,
			      struct callchain_cursor *cursor,
2507
			      struct evsel *evsel,
2508 2509 2510 2511 2512 2513 2514
			      struct perf_sample *sample,
			      struct symbol **parent,
			      struct addr_location *root_al,
			      int max_stack)
{
	int ret = 0;

2515
	callchain_cursor_reset(cursor);
2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539

	if (callchain_param.order == ORDER_CALLEE) {
		ret = thread__resolve_callchain_sample(thread, cursor,
						       evsel, sample,
						       parent, root_al,
						       max_stack);
		if (ret)
			return ret;
		ret = thread__resolve_callchain_unwind(thread, cursor,
						       evsel, sample,
						       max_stack);
	} else {
		ret = thread__resolve_callchain_unwind(thread, cursor,
						       evsel, sample,
						       max_stack);
		if (ret)
			return ret;
		ret = thread__resolve_callchain_sample(thread, cursor,
						       evsel, sample,
						       parent, root_al,
						       max_stack);
	}

	return ret;
2540
}
2541 2542 2543 2544 2545

int machine__for_each_thread(struct machine *machine,
			     int (*fn)(struct thread *thread, void *p),
			     void *priv)
{
2546
	struct threads *threads;
2547 2548 2549
	struct rb_node *nd;
	struct thread *thread;
	int rc = 0;
2550
	int i;
2551

2552 2553
	for (i = 0; i < THREADS__TABLE_SIZE; i++) {
		threads = &machine->threads[i];
2554 2555
		for (nd = rb_first_cached(&threads->entries); nd;
		     nd = rb_next(nd)) {
2556 2557 2558 2559 2560
			thread = rb_entry(nd, struct thread, rb_node);
			rc = fn(thread, priv);
			if (rc != 0)
				return rc;
		}
2561

2562 2563 2564 2565 2566
		list_for_each_entry(thread, &threads->dead, node) {
			rc = fn(thread, priv);
			if (rc != 0)
				return rc;
		}
2567 2568 2569
	}
	return rc;
}
2570

2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581
int machines__for_each_thread(struct machines *machines,
			      int (*fn)(struct thread *thread, void *p),
			      void *priv)
{
	struct rb_node *nd;
	int rc = 0;

	rc = machine__for_each_thread(&machines->host, fn, priv);
	if (rc != 0)
		return rc;

2582
	for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
2583 2584 2585 2586 2587 2588 2589 2590 2591
		struct machine *machine = rb_entry(nd, struct machine, rb_node);

		rc = machine__for_each_thread(machine, fn, priv);
		if (rc != 0)
			return rc;
	}
	return rc;
}

2592 2593
pid_t machine__get_current_tid(struct machine *machine, int cpu)
{
2594 2595 2596
	int nr_cpus = min(machine->env->nr_cpus_online, MAX_NR_CPUS);

	if (cpu < 0 || cpu >= nr_cpus || !machine->current_tid)
2597 2598 2599 2600 2601 2602 2603 2604 2605
		return -1;

	return machine->current_tid[cpu];
}

int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
			     pid_t tid)
{
	struct thread *thread;
2606
	int nr_cpus = min(machine->env->nr_cpus_online, MAX_NR_CPUS);
2607 2608 2609 2610 2611 2612 2613

	if (cpu < 0)
		return -EINVAL;

	if (!machine->current_tid) {
		int i;

2614
		machine->current_tid = calloc(nr_cpus, sizeof(pid_t));
2615 2616
		if (!machine->current_tid)
			return -ENOMEM;
2617
		for (i = 0; i < nr_cpus; i++)
2618 2619 2620
			machine->current_tid[i] = -1;
	}

2621
	if (cpu >= nr_cpus) {
2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633
		pr_err("Requested CPU %d too large. ", cpu);
		pr_err("Consider raising MAX_NR_CPUS\n");
		return -EINVAL;
	}

	machine->current_tid[cpu] = tid;

	thread = machine__findnew_thread(machine, pid, tid);
	if (!thread)
		return -ENOMEM;

	thread->cpu = cpu;
2634
	thread__put(thread);
2635 2636 2637

	return 0;
}
2638

2639 2640 2641 2642 2643 2644 2645 2646 2647
/*
 * Compares the raw arch string. N.B. see instead perf_env__arch() if a
 * normalized arch is needed.
 */
bool machine__is(struct machine *machine, const char *arch)
{
	return machine && !strcmp(perf_env__raw_arch(machine->env), arch);
}

2648 2649 2650 2651 2652
int machine__nr_cpus_avail(struct machine *machine)
{
	return machine ? perf_env__nr_cpus_avail(machine->env) : 0;
}

2653 2654
int machine__get_kernel_start(struct machine *machine)
{
2655
	struct map *map = machine__kernel_map(machine);
2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667
	int err = 0;

	/*
	 * The only addresses above 2^63 are kernel addresses of a 64-bit
	 * kernel.  Note that addresses are unsigned so that on a 32-bit system
	 * all addresses including kernel addresses are less than 2^32.  In
	 * that case (32-bit system), if the kernel mapping is unknown, all
	 * addresses will be assumed to be in user space - see
	 * machine__kernel_ip().
	 */
	machine->kernel_start = 1ULL << 63;
	if (map) {
2668
		err = map__load(map);
2669 2670 2671 2672 2673 2674
		/*
		 * On x86_64, PTI entry trampolines are less than the
		 * start of kernel text, but still above 2^63. So leave
		 * kernel_start = 1ULL << 63 for x86_64.
		 */
		if (!err && !machine__is(machine, "x86_64"))
2675 2676 2677 2678
			machine->kernel_start = map->start;
	}
	return err;
}
2679

2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706
u8 machine__addr_cpumode(struct machine *machine, u8 cpumode, u64 addr)
{
	u8 addr_cpumode = cpumode;
	bool kernel_ip;

	if (!machine->single_address_space)
		goto out;

	kernel_ip = machine__kernel_ip(machine, addr);
	switch (cpumode) {
	case PERF_RECORD_MISC_KERNEL:
	case PERF_RECORD_MISC_USER:
		addr_cpumode = kernel_ip ? PERF_RECORD_MISC_KERNEL :
					   PERF_RECORD_MISC_USER;
		break;
	case PERF_RECORD_MISC_GUEST_KERNEL:
	case PERF_RECORD_MISC_GUEST_USER:
		addr_cpumode = kernel_ip ? PERF_RECORD_MISC_GUEST_KERNEL :
					   PERF_RECORD_MISC_GUEST_USER;
		break;
	default:
		break;
	}
out:
	return addr_cpumode;
}

2707 2708 2709 2710 2711
struct dso *machine__findnew_dso_id(struct machine *machine, const char *filename, struct dso_id *id)
{
	return dsos__findnew_id(&machine->dsos, filename, id);
}

2712 2713
struct dso *machine__findnew_dso(struct machine *machine, const char *filename)
{
2714
	return machine__findnew_dso_id(machine, filename, NULL);
2715
}
2716 2717 2718 2719 2720

char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
{
	struct machine *machine = vmachine;
	struct map *map;
2721
	struct symbol *sym = machine__find_kernel_symbol(machine, *addrp, &map);
2722 2723 2724 2725 2726 2727 2728 2729

	if (sym == NULL)
		return NULL;

	*modp = __map__is_kmodule(map) ? (char *)map->dso->short_name : NULL;
	*addrp = map->unmap_ip(map, sym->start);
	return sym->name;
}