trace_sched_wakeup.c 13.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 * trace task wakeup timings
 *
 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
 *
 * Based on code from the latency_tracer, that is:
 *
 *  Copyright (C) 2004-2006 Ingo Molnar
 *  Copyright (C) 2004 William Lee Irwin III
 */
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/debugfs.h>
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
18
#include <trace/events/sched.h>
19 20 21 22 23 24 25 26

#include "trace.h"

static struct trace_array	*wakeup_trace;
static int __read_mostly	tracer_enabled;

static struct task_struct	*wakeup_task;
static int			wakeup_cpu;
27
static int			wakeup_current_cpu;
28
static unsigned			wakeup_prio = -1;
29
static int			wakeup_rt;
30

31
static arch_spinlock_t wakeup_lock =
32
	(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
33

34
static void wakeup_reset(struct trace_array *tr);
I
Ingo Molnar 已提交
35
static void __wakeup_reset(struct trace_array *tr);
36 37
static int wakeup_graph_entry(struct ftrace_graph_ent *trace);
static void wakeup_graph_return(struct ftrace_graph_ret *trace);
38

39 40
static int save_lat_flag;

41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
#define TRACE_DISPLAY_GRAPH     1

static struct tracer_opt trace_opts[] = {
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
	/* display latency trace as call graph */
	{ TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) },
#endif
	{ } /* Empty entry */
};

static struct tracer_flags tracer_flags = {
	.val  = 0,
	.opts = trace_opts,
};

#define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH)

58
#ifdef CONFIG_FUNCTION_TRACER
59

60
/*
61 62 63 64 65 66 67 68 69 70 71 72
 * Prologue for the wakeup function tracers.
 *
 * Returns 1 if it is OK to continue, and preemption
 *            is disabled and data->disabled is incremented.
 *         0 if the trace is to be ignored, and preemption
 *            is not disabled and data->disabled is
 *            kept the same.
 *
 * Note, this function is also used outside this ifdef but
 *  inside the #ifdef of the function graph tracer below.
 *  This is OK, since the function graph tracer is
 *  dependent on the function tracer.
73
 */
74 75 76 77
static int
func_prolog_preempt_disable(struct trace_array *tr,
			    struct trace_array_cpu **data,
			    int *pc)
78 79 80 81 82
{
	long disabled;
	int cpu;

	if (likely(!wakeup_task))
83
		return 0;
84

85
	*pc = preempt_count();
86
	preempt_disable_notrace();
87 88

	cpu = raw_smp_processor_id();
89 90 91
	if (cpu != wakeup_current_cpu)
		goto out_enable;

92 93
	*data = tr->data[cpu];
	disabled = atomic_inc_return(&(*data)->disabled);
94 95 96
	if (unlikely(disabled != 1))
		goto out;

97
	return 1;
98

99 100
out:
	atomic_dec(&(*data)->disabled);
101

102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
out_enable:
	preempt_enable_notrace();
	return 0;
}

/*
 * wakeup uses its own tracer function to keep the overhead down:
 */
static void
wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
{
	struct trace_array *tr = wakeup_trace;
	struct trace_array_cpu *data;
	unsigned long flags;
	int pc;

	if (!func_prolog_preempt_disable(tr, &data, &pc))
		return;

	local_irq_save(flags);
	trace_function(tr, ip, parent_ip, flags, pc);
123
	local_irq_restore(flags);
124 125

	atomic_dec(&data->disabled);
126
	preempt_enable_notrace();
127 128 129 130 131
}

static struct ftrace_ops trace_ops __read_mostly =
{
	.func = wakeup_tracer_call,
132
	.flags = FTRACE_OPS_FL_GLOBAL,
133
};
134
#endif /* CONFIG_FUNCTION_TRACER */
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186

static int start_func_tracer(int graph)
{
	int ret;

	if (!graph)
		ret = register_ftrace_function(&trace_ops);
	else
		ret = register_ftrace_graph(&wakeup_graph_return,
					    &wakeup_graph_entry);

	if (!ret && tracing_is_enabled())
		tracer_enabled = 1;
	else
		tracer_enabled = 0;

	return ret;
}

static void stop_func_tracer(int graph)
{
	tracer_enabled = 0;

	if (!graph)
		unregister_ftrace_function(&trace_ops);
	else
		unregister_ftrace_graph();
}

#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
{

	if (!(bit & TRACE_DISPLAY_GRAPH))
		return -EINVAL;

	if (!(is_graph() ^ set))
		return 0;

	stop_func_tracer(!set);

	wakeup_reset(wakeup_trace);
	tracing_max_latency = 0;

	return start_func_tracer(set);
}

static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
{
	struct trace_array *tr = wakeup_trace;
	struct trace_array_cpu *data;
	unsigned long flags;
187
	int pc, ret = 0;
188

189
	if (!func_prolog_preempt_disable(tr, &data, &pc))
190 191 192 193 194 195
		return 0;

	local_save_flags(flags);
	ret = __trace_graph_entry(tr, trace, flags, pc);
	atomic_dec(&data->disabled);
	preempt_enable_notrace();
196

197 198 199 200 201 202 203 204
	return ret;
}

static void wakeup_graph_return(struct ftrace_graph_ret *trace)
{
	struct trace_array *tr = wakeup_trace;
	struct trace_array_cpu *data;
	unsigned long flags;
205
	int pc;
206

207
	if (!func_prolog_preempt_disable(tr, &data, &pc))
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229
		return;

	local_save_flags(flags);
	__trace_graph_return(tr, trace, flags, pc);
	atomic_dec(&data->disabled);

	preempt_enable_notrace();
	return;
}

static void wakeup_trace_open(struct trace_iterator *iter)
{
	if (is_graph())
		graph_trace_open(iter);
}

static void wakeup_trace_close(struct trace_iterator *iter)
{
	if (iter->private)
		graph_trace_close(iter);
}

230 231 232
#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC | \
			    TRACE_GRAPH_PRINT_ABS_TIME | \
			    TRACE_GRAPH_PRINT_DURATION)
233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287

static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
{
	/*
	 * In graph mode call the graph tracer output function,
	 * otherwise go with the TRACE_FN event handler
	 */
	if (is_graph())
		return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS);

	return TRACE_TYPE_UNHANDLED;
}

static void wakeup_print_header(struct seq_file *s)
{
	if (is_graph())
		print_graph_headers_flags(s, GRAPH_TRACER_FLAGS);
	else
		trace_default_header(s);
}

static void
__trace_function(struct trace_array *tr,
		 unsigned long ip, unsigned long parent_ip,
		 unsigned long flags, int pc)
{
	if (is_graph())
		trace_graph_function(tr, ip, parent_ip, flags, pc);
	else
		trace_function(tr, ip, parent_ip, flags, pc);
}
#else
#define __trace_function trace_function

static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
{
	return -EINVAL;
}

static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
{
	return -1;
}

static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
{
	return TRACE_TYPE_UNHANDLED;
}

static void wakeup_graph_return(struct ftrace_graph_ret *trace) { }
static void wakeup_print_header(struct seq_file *s) { }
static void wakeup_trace_open(struct trace_iterator *iter) { }
static void wakeup_trace_close(struct trace_iterator *iter) { }
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */

288 289 290
/*
 * Should this new latency be reported/recorded?
 */
I
Ingo Molnar 已提交
291
static int report_latency(cycle_t delta)
292 293 294 295 296 297 298 299 300 301 302
{
	if (tracing_thresh) {
		if (delta < tracing_thresh)
			return 0;
	} else {
		if (delta <= tracing_max_latency)
			return 0;
	}
	return 1;
}

303 304
static void
probe_wakeup_migrate_task(void *ignore, struct task_struct *task, int cpu)
305 306 307 308 309 310 311
{
	if (task != wakeup_task)
		return;

	wakeup_current_cpu = cpu;
}

M
Mathieu Desnoyers 已提交
312
static void notrace
313 314
probe_wakeup_sched_switch(void *ignore,
			  struct task_struct *prev, struct task_struct *next)
315 316 317 318 319 320
{
	struct trace_array_cpu *data;
	cycle_t T0, T1, delta;
	unsigned long flags;
	long disabled;
	int cpu;
321
	int pc;
322

M
Mathieu Desnoyers 已提交
323 324
	tracing_record_cmdline(prev);

325 326 327 328 329 330 331 332 333 334 335 336 337 338 339
	if (unlikely(!tracer_enabled))
		return;

	/*
	 * When we start a new trace, we set wakeup_task to NULL
	 * and then set tracer_enabled = 1. We want to make sure
	 * that another CPU does not see the tracer_enabled = 1
	 * and the wakeup_task with an older task, that might
	 * actually be the same as next.
	 */
	smp_rmb();

	if (next != wakeup_task)
		return;

340 341
	pc = preempt_count();

342 343
	/* disable local data, not wakeup_cpu data */
	cpu = raw_smp_processor_id();
M
Mathieu Desnoyers 已提交
344
	disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
345 346 347
	if (likely(disabled != 1))
		goto out;

348
	local_irq_save(flags);
349
	arch_spin_lock(&wakeup_lock);
350 351 352 353 354

	/* We could race with grabbing wakeup_lock */
	if (unlikely(!tracer_enabled || next != wakeup_task))
		goto out_unlock;

355 356 357
	/* The task we are waiting for is waking up */
	data = wakeup_trace->data[wakeup_cpu];

358
	__trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
359
	tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
360 361

	T0 = data->preempt_timestamp;
I
Ingo Molnar 已提交
362
	T1 = ftrace_now(cpu);
363 364 365 366 367
	delta = T1-T0;

	if (!report_latency(delta))
		goto out_unlock;

368 369 370 371
	if (likely(!is_tracing_stopped())) {
		tracing_max_latency = delta;
		update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
	}
372 373

out_unlock:
M
Mathieu Desnoyers 已提交
374
	__wakeup_reset(wakeup_trace);
375
	arch_spin_unlock(&wakeup_lock);
376
	local_irq_restore(flags);
377
out:
M
Mathieu Desnoyers 已提交
378
	atomic_dec(&wakeup_trace->data[cpu]->disabled);
M
Mathieu Desnoyers 已提交
379 380
}

I
Ingo Molnar 已提交
381
static void __wakeup_reset(struct trace_array *tr)
382 383 384 385 386 387 388 389 390 391
{
	wakeup_cpu = -1;
	wakeup_prio = -1;

	if (wakeup_task)
		put_task_struct(wakeup_task);

	wakeup_task = NULL;
}

I
Ingo Molnar 已提交
392
static void wakeup_reset(struct trace_array *tr)
393 394 395
{
	unsigned long flags;

396 397
	tracing_reset_online_cpus(tr);

398
	local_irq_save(flags);
399
	arch_spin_lock(&wakeup_lock);
400
	__wakeup_reset(tr);
401
	arch_spin_unlock(&wakeup_lock);
402
	local_irq_restore(flags);
403 404
}

I
Ingo Molnar 已提交
405
static void
406
probe_wakeup(void *ignore, struct task_struct *p, int success)
407
{
408
	struct trace_array_cpu *data;
409 410 411
	int cpu = smp_processor_id();
	unsigned long flags;
	long disabled;
412
	int pc;
413

M
Mathieu Desnoyers 已提交
414 415 416 417 418 419
	if (likely(!tracer_enabled))
		return;

	tracing_record_cmdline(p);
	tracing_record_cmdline(current);

420
	if ((wakeup_rt && !rt_task(p)) ||
421
			p->prio >= wakeup_prio ||
M
Mathieu Desnoyers 已提交
422
			p->prio >= current->prio)
423 424
		return;

425
	pc = preempt_count();
M
Mathieu Desnoyers 已提交
426
	disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
427 428 429 430
	if (unlikely(disabled != 1))
		goto out;

	/* interrupts should be off from try_to_wake_up */
431
	arch_spin_lock(&wakeup_lock);
432 433 434 435 436 437

	/* check for races. */
	if (!tracer_enabled || p->prio >= wakeup_prio)
		goto out_locked;

	/* reset the trace */
M
Mathieu Desnoyers 已提交
438
	__wakeup_reset(wakeup_trace);
439 440

	wakeup_cpu = task_cpu(p);
441
	wakeup_current_cpu = wakeup_cpu;
442 443 444 445 446 447 448
	wakeup_prio = p->prio;

	wakeup_task = p;
	get_task_struct(wakeup_task);

	local_save_flags(flags);

449 450
	data = wakeup_trace->data[wakeup_cpu];
	data->preempt_timestamp = ftrace_now(cpu);
451
	tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc);
452 453 454 455 456 457

	/*
	 * We must be careful in using CALLER_ADDR2. But since wake_up
	 * is not called by an assembly function  (where as schedule is)
	 * it should be safe to use it here.
	 */
458
	__trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
459 460

out_locked:
461
	arch_spin_unlock(&wakeup_lock);
462
out:
M
Mathieu Desnoyers 已提交
463
	atomic_dec(&wakeup_trace->data[cpu]->disabled);
464 465
}

I
Ingo Molnar 已提交
466
static void start_wakeup_tracer(struct trace_array *tr)
467
{
M
Mathieu Desnoyers 已提交
468 469
	int ret;

470
	ret = register_trace_sched_wakeup(probe_wakeup, NULL);
M
Mathieu Desnoyers 已提交
471
	if (ret) {
M
Mathieu Desnoyers 已提交
472
		pr_info("wakeup trace: Couldn't activate tracepoint"
M
Mathieu Desnoyers 已提交
473 474 475 476
			" probe to kernel_sched_wakeup\n");
		return;
	}

477
	ret = register_trace_sched_wakeup_new(probe_wakeup, NULL);
M
Mathieu Desnoyers 已提交
478
	if (ret) {
M
Mathieu Desnoyers 已提交
479
		pr_info("wakeup trace: Couldn't activate tracepoint"
M
Mathieu Desnoyers 已提交
480 481 482 483
			" probe to kernel_sched_wakeup_new\n");
		goto fail_deprobe;
	}

484
	ret = register_trace_sched_switch(probe_wakeup_sched_switch, NULL);
M
Mathieu Desnoyers 已提交
485
	if (ret) {
M
Mathieu Desnoyers 已提交
486
		pr_info("sched trace: Couldn't activate tracepoint"
487
			" probe to kernel_sched_switch\n");
M
Mathieu Desnoyers 已提交
488 489 490
		goto fail_deprobe_wake_new;
	}

491
	ret = register_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL);
492 493 494 495 496 497
	if (ret) {
		pr_info("wakeup trace: Couldn't activate tracepoint"
			" probe to kernel_sched_migrate_task\n");
		return;
	}

498 499 500 501 502 503 504 505 506 507 508
	wakeup_reset(tr);

	/*
	 * Don't let the tracer_enabled = 1 show up before
	 * the wakeup_task is reset. This may be overkill since
	 * wakeup_reset does a spin_unlock after setting the
	 * wakeup_task to NULL, but I want to be safe.
	 * This is a slow path anyway.
	 */
	smp_wmb();

509 510
	if (start_func_tracer(is_graph()))
		printk(KERN_ERR "failed to start wakeup tracer\n");
511

512
	return;
M
Mathieu Desnoyers 已提交
513
fail_deprobe_wake_new:
514
	unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
M
Mathieu Desnoyers 已提交
515
fail_deprobe:
516
	unregister_trace_sched_wakeup(probe_wakeup, NULL);
517 518
}

I
Ingo Molnar 已提交
519
static void stop_wakeup_tracer(struct trace_array *tr)
520 521
{
	tracer_enabled = 0;
522
	stop_func_tracer(is_graph());
523 524 525 526
	unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL);
	unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
	unregister_trace_sched_wakeup(probe_wakeup, NULL);
	unregister_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL);
527 528
}

529
static int __wakeup_tracer_init(struct trace_array *tr)
530
{
531 532 533
	save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT;
	trace_flags |= TRACE_ITER_LATENCY_FMT;

534
	tracing_max_latency = 0;
535
	wakeup_trace = tr;
S
Steven Rostedt 已提交
536
	start_wakeup_tracer(tr);
537
	return 0;
538 539
}

540 541 542 543 544 545 546 547 548 549 550 551
static int wakeup_tracer_init(struct trace_array *tr)
{
	wakeup_rt = 0;
	return __wakeup_tracer_init(tr);
}

static int wakeup_rt_tracer_init(struct trace_array *tr)
{
	wakeup_rt = 1;
	return __wakeup_tracer_init(tr);
}

I
Ingo Molnar 已提交
552
static void wakeup_tracer_reset(struct trace_array *tr)
553
{
S
Steven Rostedt 已提交
554 555 556
	stop_wakeup_tracer(tr);
	/* make sure we put back any tasks we are tracing */
	wakeup_reset(tr);
557 558 559

	if (!save_lat_flag)
		trace_flags &= ~TRACE_ITER_LATENCY_FMT;
560 561
}

562 563 564 565 566 567 568 569 570
static void wakeup_tracer_start(struct trace_array *tr)
{
	wakeup_reset(tr);
	tracer_enabled = 1;
}

static void wakeup_tracer_stop(struct trace_array *tr)
{
	tracer_enabled = 0;
571 572 573 574 575 576 577
}

static struct tracer wakeup_tracer __read_mostly =
{
	.name		= "wakeup",
	.init		= wakeup_tracer_init,
	.reset		= wakeup_tracer_reset,
578 579
	.start		= wakeup_tracer_start,
	.stop		= wakeup_tracer_stop,
580
	.print_max	= 1,
581 582 583 584
	.print_header	= wakeup_print_header,
	.print_line	= wakeup_print_line,
	.flags		= &tracer_flags,
	.set_flag	= wakeup_set_flag,
S
Steven Rostedt 已提交
585 586 587
#ifdef CONFIG_FTRACE_SELFTEST
	.selftest    = trace_selftest_startup_wakeup,
#endif
588 589
	.open		= wakeup_trace_open,
	.close		= wakeup_trace_close,
590
	.use_max_tr	= 1,
591 592
};

593 594 595 596 597 598 599
static struct tracer wakeup_rt_tracer __read_mostly =
{
	.name		= "wakeup_rt",
	.init		= wakeup_rt_tracer_init,
	.reset		= wakeup_tracer_reset,
	.start		= wakeup_tracer_start,
	.stop		= wakeup_tracer_stop,
600
	.wait_pipe	= poll_wait_pipe,
601
	.print_max	= 1,
602 603 604 605
	.print_header	= wakeup_print_header,
	.print_line	= wakeup_print_line,
	.flags		= &tracer_flags,
	.set_flag	= wakeup_set_flag,
606 607 608
#ifdef CONFIG_FTRACE_SELFTEST
	.selftest    = trace_selftest_startup_wakeup,
#endif
609 610
	.open		= wakeup_trace_open,
	.close		= wakeup_trace_close,
611
	.use_max_tr	= 1,
612 613
};

614 615 616 617 618 619 620 621
__init static int init_wakeup_tracer(void)
{
	int ret;

	ret = register_tracer(&wakeup_tracer);
	if (ret)
		return ret;

622 623 624 625
	ret = register_tracer(&wakeup_rt_tracer);
	if (ret)
		return ret;

626 627 628
	return 0;
}
device_initcall(init_wakeup_tracer);