ftrace.c 14.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Infrastructure for profiling code inserted by 'gcc -pg'.
 *
 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
 * Copyright (C) 2004-2008 Ingo Molnar <mingo@redhat.com>
 *
 * Originally ported from the -rt patch by:
 *   Copyright (C) 2007 Arnaldo Carvalho de Melo <acme@redhat.com>
 *
 * Based on code in the latency_tracer, that is:
 *
 *  Copyright (C) 2004-2006 Ingo Molnar
 *  Copyright (C) 2004 William Lee Irwin III
 */

16 17 18 19 20
#include <linux/stop_machine.h>
#include <linux/clocksource.h>
#include <linux/kallsyms.h>
#include <linux/kthread.h>
#include <linux/hardirq.h>
21
#include <linux/ftrace.h>
22
#include <linux/module.h>
23
#include <linux/sysctl.h>
24 25 26 27
#include <linux/hash.h>
#include <linux/list.h>

#include "trace.h"
28

29 30
int ftrace_enabled;
static int last_ftrace_enabled;
31

32
static DEFINE_SPINLOCK(ftrace_lock);
33 34
static DEFINE_MUTEX(ftrace_sysctl_lock);

35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
static struct ftrace_ops ftrace_list_end __read_mostly =
{
	.func = ftrace_stub,
};

static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;

/* mcount is defined per arch in assembly */
EXPORT_SYMBOL(mcount);

notrace void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
{
	struct ftrace_ops *op = ftrace_list;

	/* in case someone actually ports this to alpha! */
	read_barrier_depends();

	while (op != &ftrace_list_end) {
		/* silly alpha */
		read_barrier_depends();
		op->func(ip, parent_ip);
		op = op->next;
	};
}

/**
62
 * clear_ftrace_function - reset the ftrace function
63
 *
64 65
 * This NULLs the ftrace function and in essence stops
 * tracing.  There may be lag
66
 */
67
void clear_ftrace_function(void)
68
{
69 70 71 72 73 74 75
	ftrace_trace_function = ftrace_stub;
}

static int notrace __register_ftrace_function(struct ftrace_ops *ops)
{
	/* Should never be called by interrupts */
	spin_lock(&ftrace_lock);
76 77 78 79 80 81 82 83 84 85

	ops->next = ftrace_list;
	/*
	 * We are entering ops into the ftrace_list but another
	 * CPU might be walking that list. We need to make sure
	 * the ops->next pointer is valid before another CPU sees
	 * the ops pointer included into the ftrace_list.
	 */
	smp_wmb();
	ftrace_list = ops;
86

87 88 89 90 91 92 93 94 95 96
	if (ftrace_enabled) {
		/*
		 * For one func, simply call it directly.
		 * For more than one func, call the chain.
		 */
		if (ops->next == &ftrace_list_end)
			ftrace_trace_function = ops->func;
		else
			ftrace_trace_function = ftrace_list_func;
	}
97 98

	spin_unlock(&ftrace_lock);
99 100 101 102

	return 0;
}

103
static int notrace __unregister_ftrace_function(struct ftrace_ops *ops)
104 105 106 107
{
	struct ftrace_ops **p;
	int ret = 0;

108
	spin_lock(&ftrace_lock);
109 110

	/*
111 112
	 * If we are removing the last function, then simply point
	 * to the ftrace_stub.
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
	 */
	if (ftrace_list == ops && ops->next == &ftrace_list_end) {
		ftrace_trace_function = ftrace_stub;
		ftrace_list = &ftrace_list_end;
		goto out;
	}

	for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next)
		if (*p == ops)
			break;

	if (*p != ops) {
		ret = -1;
		goto out;
	}

	*p = (*p)->next;

131 132 133 134 135 136
	if (ftrace_enabled) {
		/* If we only have one func left, then call that directly */
		if (ftrace_list == &ftrace_list_end ||
		    ftrace_list->next == &ftrace_list_end)
			ftrace_trace_function = ftrace_list->func;
	}
137 138

 out:
139 140 141 142 143 144 145
	spin_unlock(&ftrace_lock);

	return ret;
}

#ifdef CONFIG_DYNAMIC_FTRACE

146 147 148 149 150 151 152 153
enum {
	FTRACE_ENABLE_CALLS		= (1 << 0),
	FTRACE_DISABLE_CALLS		= (1 << 1),
	FTRACE_UPDATE_TRACE_FUNC	= (1 << 2),
	FTRACE_ENABLE_MCOUNT		= (1 << 3),
	FTRACE_DISABLE_MCOUNT		= (1 << 4),
};

154 155 156 157 158 159 160
static struct hlist_head ftrace_hash[FTRACE_HASHSIZE];

static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu);

static DEFINE_SPINLOCK(ftrace_shutdown_lock);
static DEFINE_MUTEX(ftraced_lock);

161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
struct ftrace_page {
	struct ftrace_page	*next;
	int			index;
	struct dyn_ftrace	records[];
} __attribute__((packed));

#define ENTRIES_PER_PAGE \
  ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace))

/* estimate from running different kernels */
#define NR_TO_INIT		10000

static struct ftrace_page	*ftrace_pages_start;
static struct ftrace_page	*ftrace_pages;

176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
static int ftraced_trigger;
static int ftraced_suspend;

static int ftrace_record_suspend;

static inline int
notrace ftrace_ip_in_hash(unsigned long ip, unsigned long key)
{
	struct dyn_ftrace *p;
	struct hlist_node *t;
	int found = 0;

	hlist_for_each_entry(p, t, &ftrace_hash[key], node) {
		if (p->ip == ip) {
			found = 1;
			break;
		}
	}

	return found;
}

static inline void notrace
ftrace_add_hash(struct dyn_ftrace *node, unsigned long key)
{
	hlist_add_head(&node->node, &ftrace_hash[key]);
}

204
static notrace struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
205 206 207 208 209 210 211 212 213 214
{
	if (ftrace_pages->index == ENTRIES_PER_PAGE) {
		if (!ftrace_pages->next)
			return NULL;
		ftrace_pages = ftrace_pages->next;
	}

	return &ftrace_pages->records[ftrace_pages->index++];
}

215
static void notrace
216
ftrace_record_ip(unsigned long ip)
217 218 219 220 221 222 223
{
	struct dyn_ftrace *node;
	unsigned long flags;
	unsigned long key;
	int resched;
	int atomic;

224 225 226
	if (!ftrace_enabled)
		return;

227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
	resched = need_resched();
	preempt_disable_notrace();

	/* We simply need to protect against recursion */
	__get_cpu_var(ftrace_shutdown_disable_cpu)++;
	if (__get_cpu_var(ftrace_shutdown_disable_cpu) != 1)
		goto out;

	if (unlikely(ftrace_record_suspend))
		goto out;

	key = hash_long(ip, FTRACE_HASHBITS);

	WARN_ON_ONCE(key >= FTRACE_HASHSIZE);

	if (ftrace_ip_in_hash(ip, key))
		goto out;

	atomic = irqs_disabled();

	spin_lock_irqsave(&ftrace_shutdown_lock, flags);

	/* This ip may have hit the hash before the lock */
	if (ftrace_ip_in_hash(ip, key))
		goto out_unlock;

	/*
	 * There's a slight race that the ftraced will update the
255
	 * hash and reset here. If it is already converted, skip it.
256
	 */
257 258 259 260
	if (ftrace_ip_converted(ip))
		goto out_unlock;

	node = ftrace_alloc_dyn_node(ip);
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
	if (!node)
		goto out_unlock;

	node->ip = ip;

	ftrace_add_hash(node, key);

	ftraced_trigger = 1;

 out_unlock:
	spin_unlock_irqrestore(&ftrace_shutdown_lock, flags);
 out:
	__get_cpu_var(ftrace_shutdown_disable_cpu)--;

	/* prevent recursion with scheduler */
	if (resched)
		preempt_enable_no_resched_notrace();
	else
		preempt_enable_notrace();
}

282
#define FTRACE_ADDR ((long)(&ftrace_caller))
283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
#define MCOUNT_ADDR ((long)(&mcount))

static void notrace ftrace_replace_code(int saved)
{
	unsigned char *new = NULL, *old = NULL;
	struct dyn_ftrace *rec;
	struct ftrace_page *pg;
	unsigned long ip;
	int failed;
	int i;

	if (saved)
		old = ftrace_nop_replace();
	else
		new = ftrace_nop_replace();

	for (pg = ftrace_pages_start; pg; pg = pg->next) {
		for (i = 0; i < pg->index; i++) {
			rec = &pg->records[i];

			/* don't modify code that has already faulted */
			if (rec->flags & FTRACE_FL_FAILED)
				continue;

			ip = rec->ip;

			if (saved)
310
				new = ftrace_call_replace(ip, FTRACE_ADDR);
311
			else
312
				old = ftrace_call_replace(ip, FTRACE_ADDR);
313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328

			failed = ftrace_modify_code(ip, old, new);
			if (failed)
				rec->flags |= FTRACE_FL_FAILED;
		}
	}
}

static notrace void ftrace_shutdown_replenish(void)
{
	if (ftrace_pages->next)
		return;

	/* allocate another page */
	ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL);
}
329

330
static notrace void
331
ftrace_code_disable(struct dyn_ftrace *rec)
332 333 334 335 336 337 338 339
{
	unsigned long ip;
	unsigned char *nop, *call;
	int failed;

	ip = rec->ip;

	nop = ftrace_nop_replace();
340
	call = ftrace_call_replace(ip, MCOUNT_ADDR);
341 342 343 344 345 346

	failed = ftrace_modify_code(ip, call, nop);
	if (failed)
		rec->flags |= FTRACE_FL_FAILED;
}

347
static int notrace __ftrace_modify_code(void *data)
348
{
349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368
	unsigned long addr;
	int *command = data;

	if (*command & FTRACE_ENABLE_CALLS)
		ftrace_replace_code(1);
	else if (*command & FTRACE_DISABLE_CALLS)
		ftrace_replace_code(0);

	if (*command & FTRACE_UPDATE_TRACE_FUNC)
		ftrace_update_ftrace_func(ftrace_trace_function);

	if (*command & FTRACE_ENABLE_MCOUNT) {
		addr = (unsigned long)ftrace_record_ip;
		ftrace_mcount_set(&addr);
	} else if (*command & FTRACE_DISABLE_MCOUNT) {
		addr = (unsigned long)ftrace_stub;
		ftrace_mcount_set(&addr);
	}

	return 0;
369 370
}

371
static void notrace ftrace_run_update_code(int command)
372
{
373
	stop_machine_run(__ftrace_modify_code, &command, NR_CPUS);
374 375
}

376 377
static ftrace_func_t saved_ftrace_func;

378 379
static void notrace ftrace_startup(void)
{
380 381
	int command = 0;

382 383
	mutex_lock(&ftraced_lock);
	ftraced_suspend++;
384 385 386 387 388 389 390 391 392
	if (ftraced_suspend == 1)
		command |= FTRACE_ENABLE_CALLS;

	if (saved_ftrace_func != ftrace_trace_function) {
		saved_ftrace_func = ftrace_trace_function;
		command |= FTRACE_UPDATE_TRACE_FUNC;
	}

	if (!command || !ftrace_enabled)
393 394
		goto out;

395
	ftrace_run_update_code(command);
396 397 398 399 400 401
 out:
	mutex_unlock(&ftraced_lock);
}

static void notrace ftrace_shutdown(void)
{
402 403
	int command = 0;

404 405
	mutex_lock(&ftraced_lock);
	ftraced_suspend--;
406 407
	if (!ftraced_suspend)
		command |= FTRACE_DISABLE_CALLS;
408

409 410 411 412
	if (saved_ftrace_func != ftrace_trace_function) {
		saved_ftrace_func = ftrace_trace_function;
		command |= FTRACE_UPDATE_TRACE_FUNC;
	}
413

414 415 416 417
	if (!command || !ftrace_enabled)
		goto out;

	ftrace_run_update_code(command);
418 419 420 421
 out:
	mutex_unlock(&ftraced_lock);
}

422 423
static void notrace ftrace_startup_sysctl(void)
{
424 425
	int command = FTRACE_ENABLE_MCOUNT;

426
	mutex_lock(&ftraced_lock);
427 428
	/* Force update next time */
	saved_ftrace_func = NULL;
429 430
	/* ftraced_suspend is true if we want ftrace running */
	if (ftraced_suspend)
431 432 433
		command |= FTRACE_ENABLE_CALLS;

	ftrace_run_update_code(command);
434 435 436 437 438
	mutex_unlock(&ftraced_lock);
}

static void notrace ftrace_shutdown_sysctl(void)
{
439 440
	int command = FTRACE_DISABLE_MCOUNT;

441 442 443
	mutex_lock(&ftraced_lock);
	/* ftraced_suspend is true if ftrace is running */
	if (ftraced_suspend)
444 445 446
		command |= FTRACE_DISABLE_CALLS;

	ftrace_run_update_code(command);
447 448 449
	mutex_unlock(&ftraced_lock);
}

450 451 452 453 454 455 456 457 458
static cycle_t		ftrace_update_time;
static unsigned long	ftrace_update_cnt;
unsigned long		ftrace_update_tot_cnt;

static int notrace __ftrace_update_code(void *ignore)
{
	struct dyn_ftrace *p;
	struct hlist_head head;
	struct hlist_node *t;
459
	int save_ftrace_enabled;
460 461 462
	cycle_t start, stop;
	int i;

463 464 465
	/* Don't be recording funcs now */
	save_ftrace_enabled = ftrace_enabled;
	ftrace_enabled = 0;
466 467 468 469 470 471 472 473 474 475 476 477 478 479

	start = now(raw_smp_processor_id());
	ftrace_update_cnt = 0;

	/* No locks needed, the machine is stopped! */
	for (i = 0; i < FTRACE_HASHSIZE; i++) {
		if (hlist_empty(&ftrace_hash[i]))
			continue;

		head = ftrace_hash[i];
		INIT_HLIST_HEAD(&ftrace_hash[i]);

		/* all CPUS are stopped, we are safe to modify code */
		hlist_for_each_entry(p, t, &head, node) {
480
			ftrace_code_disable(p);
481 482 483 484 485 486 487 488 489
			ftrace_update_cnt++;
		}

	}

	stop = now(raw_smp_processor_id());
	ftrace_update_time = stop - start;
	ftrace_update_tot_cnt += ftrace_update_cnt;

490
	ftrace_enabled = save_ftrace_enabled;
491 492 493 494

	return 0;
}

495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510
static void notrace ftrace_update_code(void)
{
	stop_machine_run(__ftrace_update_code, NULL, NR_CPUS);
}

static int notrace ftraced(void *ignore)
{
	unsigned long usecs;

	set_current_state(TASK_INTERRUPTIBLE);

	while (!kthread_should_stop()) {

		/* check once a second */
		schedule_timeout(HZ);

511
		mutex_lock(&ftrace_sysctl_lock);
512
		mutex_lock(&ftraced_lock);
513
		if (ftrace_enabled && ftraced_trigger && !ftraced_suspend) {
514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530
			ftrace_record_suspend++;
			ftrace_update_code();
			usecs = nsecs_to_usecs(ftrace_update_time);
			if (ftrace_update_tot_cnt > 100000) {
				ftrace_update_tot_cnt = 0;
				pr_info("hm, dftrace overflow: %lu change%s"
					 " (%lu total) in %lu usec%s\n",
					ftrace_update_cnt,
					ftrace_update_cnt != 1 ? "s" : "",
					ftrace_update_tot_cnt,
					usecs, usecs != 1 ? "s" : "");
				WARN_ON_ONCE(1);
			}
			ftraced_trigger = 0;
			ftrace_record_suspend--;
		}
		mutex_unlock(&ftraced_lock);
531
		mutex_unlock(&ftrace_sysctl_lock);
532 533 534 535 536 537 538 539 540

		ftrace_shutdown_replenish();

		set_current_state(TASK_INTERRUPTIBLE);
	}
	__set_current_state(TASK_RUNNING);
	return 0;
}

541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582
static int __init ftrace_dyn_table_alloc(void)
{
	struct ftrace_page *pg;
	int cnt;
	int i;

	/* allocate a few pages */
	ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL);
	if (!ftrace_pages_start)
		return -1;

	/*
	 * Allocate a few more pages.
	 *
	 * TODO: have some parser search vmlinux before
	 *   final linking to find all calls to ftrace.
	 *   Then we can:
	 *    a) know how many pages to allocate.
	 *     and/or
	 *    b) set up the table then.
	 *
	 *  The dynamic code is still necessary for
	 *  modules.
	 */

	pg = ftrace_pages = ftrace_pages_start;

	cnt = NR_TO_INIT / ENTRIES_PER_PAGE;

	for (i = 0; i < cnt; i++) {
		pg->next = (void *)get_zeroed_page(GFP_KERNEL);

		/* If we fail, we'll try later anyway */
		if (!pg->next)
			break;

		pg = pg->next;
	}

	return 0;
}

583
static int __init notrace ftrace_dynamic_init(void)
584 585
{
	struct task_struct *p;
586
	unsigned long addr;
587 588
	int ret;

589 590 591 592 593 594 595
	addr = (unsigned long)ftrace_record_ip;
	stop_machine_run(ftrace_dyn_arch_init, &addr, NR_CPUS);

	/* ftrace_dyn_arch_init places the return code in addr */
	if (addr)
		return addr;

596
	ret = ftrace_dyn_table_alloc();
597 598 599 600 601 602 603
	if (ret)
		return ret;

	p = kthread_run(ftraced, NULL, "ftraced");
	if (IS_ERR(p))
		return -1;

604
	last_ftrace_enabled = ftrace_enabled = 1;
605 606 607 608

	return 0;
}

609
core_initcall(ftrace_dynamic_init);
610
#else
611 612 613 614
# define ftrace_startup()	  do { } while (0)
# define ftrace_shutdown()	  do { } while (0)
# define ftrace_startup_sysctl()  do { } while (0)
# define ftrace_shutdown_sysctl() do { } while (0)
615 616
#endif /* CONFIG_DYNAMIC_FTRACE */

617
/**
618 619
 * register_ftrace_function - register a function for profiling
 * @ops - ops structure that holds the function for profiling.
620
 *
621 622 623 624 625 626
 * Register a function to be called by all functions in the
 * kernel.
 *
 * Note: @ops->func and all the functions it calls must be labeled
 *       with "notrace", otherwise it will go into a
 *       recursive loop.
627
 */
628
int register_ftrace_function(struct ftrace_ops *ops)
629
{
630 631 632 633
	int ret;

	mutex_lock(&ftrace_sysctl_lock);
	ret = __register_ftrace_function(ops);
634
	ftrace_startup();
635 636 637
	mutex_unlock(&ftrace_sysctl_lock);

	return ret;
638 639 640 641 642 643 644 645 646 647 648 649
}

/**
 * unregister_ftrace_function - unresgister a function for profiling.
 * @ops - ops structure that holds the function to unregister
 *
 * Unregister a function that was added to be called by ftrace profiling.
 */
int unregister_ftrace_function(struct ftrace_ops *ops)
{
	int ret;

650
	mutex_lock(&ftrace_sysctl_lock);
651
	ret = __unregister_ftrace_function(ops);
652
	ftrace_shutdown();
653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694
	mutex_unlock(&ftrace_sysctl_lock);

	return ret;
}

notrace int
ftrace_enable_sysctl(struct ctl_table *table, int write,
		     struct file *filp, void __user *buffer, size_t *lenp,
		     loff_t *ppos)
{
	int ret;

	mutex_lock(&ftrace_sysctl_lock);

	ret  = proc_dointvec(table, write, filp, buffer, lenp, ppos);

	if (ret || !write || (last_ftrace_enabled == ftrace_enabled))
		goto out;

	last_ftrace_enabled = ftrace_enabled;

	if (ftrace_enabled) {

		ftrace_startup_sysctl();

		/* we are starting ftrace again */
		if (ftrace_list != &ftrace_list_end) {
			if (ftrace_list->next == &ftrace_list_end)
				ftrace_trace_function = ftrace_list->func;
			else
				ftrace_trace_function = ftrace_list_func;
		}

	} else {
		/* stopping ftrace calls (just send to ftrace_stub) */
		ftrace_trace_function = ftrace_stub;

		ftrace_shutdown_sysctl();
	}

 out:
	mutex_unlock(&ftrace_sysctl_lock);
695
	return ret;
696
}