trace_sched_wakeup.c 13.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 * trace task wakeup timings
 *
 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
 *
 * Based on code from the latency_tracer, that is:
 *
 *  Copyright (C) 2004-2006 Ingo Molnar
 *  Copyright (C) 2004 William Lee Irwin III
 */
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/debugfs.h>
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
18
#include <trace/events/sched.h>
19 20 21 22 23 24 25 26

#include "trace.h"

static struct trace_array	*wakeup_trace;
static int __read_mostly	tracer_enabled;

static struct task_struct	*wakeup_task;
static int			wakeup_cpu;
27
static int			wakeup_current_cpu;
28
static unsigned			wakeup_prio = -1;
29
static int			wakeup_rt;
30

31
static arch_spinlock_t wakeup_lock =
32
	(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
33

34
static void wakeup_reset(struct trace_array *tr);
I
Ingo Molnar 已提交
35
static void __wakeup_reset(struct trace_array *tr);
36 37
static int wakeup_graph_entry(struct ftrace_graph_ent *trace);
static void wakeup_graph_return(struct ftrace_graph_ret *trace);
38

39 40
static int save_lat_flag;

41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
#define TRACE_DISPLAY_GRAPH     1

static struct tracer_opt trace_opts[] = {
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
	/* display latency trace as call graph */
	{ TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) },
#endif
	{ } /* Empty entry */
};

static struct tracer_flags tracer_flags = {
	.val  = 0,
	.opts = trace_opts,
};

#define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH)

58
#ifdef CONFIG_FUNCTION_TRACER
59

60
/*
61 62 63 64 65 66 67 68 69 70 71 72
 * Prologue for the wakeup function tracers.
 *
 * Returns 1 if it is OK to continue, and preemption
 *            is disabled and data->disabled is incremented.
 *         0 if the trace is to be ignored, and preemption
 *            is not disabled and data->disabled is
 *            kept the same.
 *
 * Note, this function is also used outside this ifdef but
 *  inside the #ifdef of the function graph tracer below.
 *  This is OK, since the function graph tracer is
 *  dependent on the function tracer.
73
 */
74 75 76 77
static int
func_prolog_preempt_disable(struct trace_array *tr,
			    struct trace_array_cpu **data,
			    int *pc)
78 79 80 81 82
{
	long disabled;
	int cpu;

	if (likely(!wakeup_task))
83
		return 0;
84

85
	*pc = preempt_count();
86
	preempt_disable_notrace();
87 88

	cpu = raw_smp_processor_id();
89 90 91
	if (cpu != wakeup_current_cpu)
		goto out_enable;

92 93
	*data = tr->data[cpu];
	disabled = atomic_inc_return(&(*data)->disabled);
94 95 96
	if (unlikely(disabled != 1))
		goto out;

97
	return 1;
98

99 100
out:
	atomic_dec(&(*data)->disabled);
101

102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
out_enable:
	preempt_enable_notrace();
	return 0;
}

/*
 * wakeup uses its own tracer function to keep the overhead down:
 */
static void
wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
{
	struct trace_array *tr = wakeup_trace;
	struct trace_array_cpu *data;
	unsigned long flags;
	int pc;

	if (!func_prolog_preempt_disable(tr, &data, &pc))
		return;

	local_irq_save(flags);
	trace_function(tr, ip, parent_ip, flags, pc);
123
	local_irq_restore(flags);
124 125

	atomic_dec(&data->disabled);
126
	preempt_enable_notrace();
127 128 129 130 131 132
}

static struct ftrace_ops trace_ops __read_mostly =
{
	.func = wakeup_tracer_call,
};
133
#endif /* CONFIG_FUNCTION_TRACER */
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185

static int start_func_tracer(int graph)
{
	int ret;

	if (!graph)
		ret = register_ftrace_function(&trace_ops);
	else
		ret = register_ftrace_graph(&wakeup_graph_return,
					    &wakeup_graph_entry);

	if (!ret && tracing_is_enabled())
		tracer_enabled = 1;
	else
		tracer_enabled = 0;

	return ret;
}

static void stop_func_tracer(int graph)
{
	tracer_enabled = 0;

	if (!graph)
		unregister_ftrace_function(&trace_ops);
	else
		unregister_ftrace_graph();
}

#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
{

	if (!(bit & TRACE_DISPLAY_GRAPH))
		return -EINVAL;

	if (!(is_graph() ^ set))
		return 0;

	stop_func_tracer(!set);

	wakeup_reset(wakeup_trace);
	tracing_max_latency = 0;

	return start_func_tracer(set);
}

static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
{
	struct trace_array *tr = wakeup_trace;
	struct trace_array_cpu *data;
	unsigned long flags;
186
	int pc, ret = 0;
187

188
	if (!func_prolog_preempt_disable(tr, &data, &pc))
189 190 191 192 193 194
		return 0;

	local_save_flags(flags);
	ret = __trace_graph_entry(tr, trace, flags, pc);
	atomic_dec(&data->disabled);
	preempt_enable_notrace();
195

196 197 198 199 200 201 202 203
	return ret;
}

static void wakeup_graph_return(struct ftrace_graph_ret *trace)
{
	struct trace_array *tr = wakeup_trace;
	struct trace_array_cpu *data;
	unsigned long flags;
204
	int pc;
205

206
	if (!func_prolog_preempt_disable(tr, &data, &pc))
207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
		return;

	local_save_flags(flags);
	__trace_graph_return(tr, trace, flags, pc);
	atomic_dec(&data->disabled);

	preempt_enable_notrace();
	return;
}

static void wakeup_trace_open(struct trace_iterator *iter)
{
	if (is_graph())
		graph_trace_open(iter);
}

static void wakeup_trace_close(struct trace_iterator *iter)
{
	if (iter->private)
		graph_trace_close(iter);
}

#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC)

static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
{
	/*
	 * In graph mode call the graph tracer output function,
	 * otherwise go with the TRACE_FN event handler
	 */
	if (is_graph())
		return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS);

	return TRACE_TYPE_UNHANDLED;
}

static void wakeup_print_header(struct seq_file *s)
{
	if (is_graph())
		print_graph_headers_flags(s, GRAPH_TRACER_FLAGS);
	else
		trace_default_header(s);
}

static void
__trace_function(struct trace_array *tr,
		 unsigned long ip, unsigned long parent_ip,
		 unsigned long flags, int pc)
{
	if (is_graph())
		trace_graph_function(tr, ip, parent_ip, flags, pc);
	else
		trace_function(tr, ip, parent_ip, flags, pc);
}
#else
#define __trace_function trace_function

static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
{
	return -EINVAL;
}

static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
{
	return -1;
}

static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
{
	return TRACE_TYPE_UNHANDLED;
}

static void wakeup_graph_return(struct ftrace_graph_ret *trace) { }
static void wakeup_print_header(struct seq_file *s) { }
static void wakeup_trace_open(struct trace_iterator *iter) { }
static void wakeup_trace_close(struct trace_iterator *iter) { }
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */

285 286 287
/*
 * Should this new latency be reported/recorded?
 */
I
Ingo Molnar 已提交
288
static int report_latency(cycle_t delta)
289 290 291 292 293 294 295 296 297 298 299
{
	if (tracing_thresh) {
		if (delta < tracing_thresh)
			return 0;
	} else {
		if (delta <= tracing_max_latency)
			return 0;
	}
	return 1;
}

300 301
static void
probe_wakeup_migrate_task(void *ignore, struct task_struct *task, int cpu)
302 303 304 305 306 307 308
{
	if (task != wakeup_task)
		return;

	wakeup_current_cpu = cpu;
}

M
Mathieu Desnoyers 已提交
309
static void notrace
310 311
probe_wakeup_sched_switch(void *ignore,
			  struct task_struct *prev, struct task_struct *next)
312 313 314 315 316 317
{
	struct trace_array_cpu *data;
	cycle_t T0, T1, delta;
	unsigned long flags;
	long disabled;
	int cpu;
318
	int pc;
319

M
Mathieu Desnoyers 已提交
320 321
	tracing_record_cmdline(prev);

322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
	if (unlikely(!tracer_enabled))
		return;

	/*
	 * When we start a new trace, we set wakeup_task to NULL
	 * and then set tracer_enabled = 1. We want to make sure
	 * that another CPU does not see the tracer_enabled = 1
	 * and the wakeup_task with an older task, that might
	 * actually be the same as next.
	 */
	smp_rmb();

	if (next != wakeup_task)
		return;

337 338
	pc = preempt_count();

339 340
	/* disable local data, not wakeup_cpu data */
	cpu = raw_smp_processor_id();
M
Mathieu Desnoyers 已提交
341
	disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
342 343 344
	if (likely(disabled != 1))
		goto out;

345
	local_irq_save(flags);
346
	arch_spin_lock(&wakeup_lock);
347 348 349 350 351

	/* We could race with grabbing wakeup_lock */
	if (unlikely(!tracer_enabled || next != wakeup_task))
		goto out_unlock;

352 353 354
	/* The task we are waiting for is waking up */
	data = wakeup_trace->data[wakeup_cpu];

355
	__trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
356
	tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
357 358

	T0 = data->preempt_timestamp;
I
Ingo Molnar 已提交
359
	T1 = ftrace_now(cpu);
360 361 362 363 364
	delta = T1-T0;

	if (!report_latency(delta))
		goto out_unlock;

365 366 367 368
	if (likely(!is_tracing_stopped())) {
		tracing_max_latency = delta;
		update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
	}
369 370

out_unlock:
M
Mathieu Desnoyers 已提交
371
	__wakeup_reset(wakeup_trace);
372
	arch_spin_unlock(&wakeup_lock);
373
	local_irq_restore(flags);
374
out:
M
Mathieu Desnoyers 已提交
375
	atomic_dec(&wakeup_trace->data[cpu]->disabled);
M
Mathieu Desnoyers 已提交
376 377
}

I
Ingo Molnar 已提交
378
static void __wakeup_reset(struct trace_array *tr)
379 380 381 382 383 384 385 386 387 388
{
	wakeup_cpu = -1;
	wakeup_prio = -1;

	if (wakeup_task)
		put_task_struct(wakeup_task);

	wakeup_task = NULL;
}

I
Ingo Molnar 已提交
389
static void wakeup_reset(struct trace_array *tr)
390 391 392
{
	unsigned long flags;

393 394
	tracing_reset_online_cpus(tr);

395
	local_irq_save(flags);
396
	arch_spin_lock(&wakeup_lock);
397
	__wakeup_reset(tr);
398
	arch_spin_unlock(&wakeup_lock);
399
	local_irq_restore(flags);
400 401
}

I
Ingo Molnar 已提交
402
static void
403
probe_wakeup(void *ignore, struct task_struct *p, int success)
404
{
405
	struct trace_array_cpu *data;
406 407 408
	int cpu = smp_processor_id();
	unsigned long flags;
	long disabled;
409
	int pc;
410

M
Mathieu Desnoyers 已提交
411 412 413 414 415 416
	if (likely(!tracer_enabled))
		return;

	tracing_record_cmdline(p);
	tracing_record_cmdline(current);

417
	if ((wakeup_rt && !rt_task(p)) ||
418
			p->prio >= wakeup_prio ||
M
Mathieu Desnoyers 已提交
419
			p->prio >= current->prio)
420 421
		return;

422
	pc = preempt_count();
M
Mathieu Desnoyers 已提交
423
	disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
424 425 426 427
	if (unlikely(disabled != 1))
		goto out;

	/* interrupts should be off from try_to_wake_up */
428
	arch_spin_lock(&wakeup_lock);
429 430 431 432 433 434

	/* check for races. */
	if (!tracer_enabled || p->prio >= wakeup_prio)
		goto out_locked;

	/* reset the trace */
M
Mathieu Desnoyers 已提交
435
	__wakeup_reset(wakeup_trace);
436 437

	wakeup_cpu = task_cpu(p);
438
	wakeup_current_cpu = wakeup_cpu;
439 440 441 442 443 444 445
	wakeup_prio = p->prio;

	wakeup_task = p;
	get_task_struct(wakeup_task);

	local_save_flags(flags);

446 447
	data = wakeup_trace->data[wakeup_cpu];
	data->preempt_timestamp = ftrace_now(cpu);
448
	tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc);
449 450 451 452 453 454

	/*
	 * We must be careful in using CALLER_ADDR2. But since wake_up
	 * is not called by an assembly function  (where as schedule is)
	 * it should be safe to use it here.
	 */
455
	__trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
456 457

out_locked:
458
	arch_spin_unlock(&wakeup_lock);
459
out:
M
Mathieu Desnoyers 已提交
460
	atomic_dec(&wakeup_trace->data[cpu]->disabled);
461 462
}

I
Ingo Molnar 已提交
463
static void start_wakeup_tracer(struct trace_array *tr)
464
{
M
Mathieu Desnoyers 已提交
465 466
	int ret;

467
	ret = register_trace_sched_wakeup(probe_wakeup, NULL);
M
Mathieu Desnoyers 已提交
468
	if (ret) {
M
Mathieu Desnoyers 已提交
469
		pr_info("wakeup trace: Couldn't activate tracepoint"
M
Mathieu Desnoyers 已提交
470 471 472 473
			" probe to kernel_sched_wakeup\n");
		return;
	}

474
	ret = register_trace_sched_wakeup_new(probe_wakeup, NULL);
M
Mathieu Desnoyers 已提交
475
	if (ret) {
M
Mathieu Desnoyers 已提交
476
		pr_info("wakeup trace: Couldn't activate tracepoint"
M
Mathieu Desnoyers 已提交
477 478 479 480
			" probe to kernel_sched_wakeup_new\n");
		goto fail_deprobe;
	}

481
	ret = register_trace_sched_switch(probe_wakeup_sched_switch, NULL);
M
Mathieu Desnoyers 已提交
482
	if (ret) {
M
Mathieu Desnoyers 已提交
483
		pr_info("sched trace: Couldn't activate tracepoint"
484
			" probe to kernel_sched_switch\n");
M
Mathieu Desnoyers 已提交
485 486 487
		goto fail_deprobe_wake_new;
	}

488
	ret = register_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL);
489 490 491 492 493 494
	if (ret) {
		pr_info("wakeup trace: Couldn't activate tracepoint"
			" probe to kernel_sched_migrate_task\n");
		return;
	}

495 496 497 498 499 500 501 502 503 504 505
	wakeup_reset(tr);

	/*
	 * Don't let the tracer_enabled = 1 show up before
	 * the wakeup_task is reset. This may be overkill since
	 * wakeup_reset does a spin_unlock after setting the
	 * wakeup_task to NULL, but I want to be safe.
	 * This is a slow path anyway.
	 */
	smp_wmb();

506 507
	if (start_func_tracer(is_graph()))
		printk(KERN_ERR "failed to start wakeup tracer\n");
508

509
	return;
M
Mathieu Desnoyers 已提交
510
fail_deprobe_wake_new:
511
	unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
M
Mathieu Desnoyers 已提交
512
fail_deprobe:
513
	unregister_trace_sched_wakeup(probe_wakeup, NULL);
514 515
}

I
Ingo Molnar 已提交
516
static void stop_wakeup_tracer(struct trace_array *tr)
517 518
{
	tracer_enabled = 0;
519
	stop_func_tracer(is_graph());
520 521 522 523
	unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL);
	unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
	unregister_trace_sched_wakeup(probe_wakeup, NULL);
	unregister_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL);
524 525
}

526
static int __wakeup_tracer_init(struct trace_array *tr)
527
{
528 529 530
	save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT;
	trace_flags |= TRACE_ITER_LATENCY_FMT;

531
	tracing_max_latency = 0;
532
	wakeup_trace = tr;
S
Steven Rostedt 已提交
533
	start_wakeup_tracer(tr);
534
	return 0;
535 536
}

537 538 539 540 541 542 543 544 545 546 547 548
static int wakeup_tracer_init(struct trace_array *tr)
{
	wakeup_rt = 0;
	return __wakeup_tracer_init(tr);
}

static int wakeup_rt_tracer_init(struct trace_array *tr)
{
	wakeup_rt = 1;
	return __wakeup_tracer_init(tr);
}

I
Ingo Molnar 已提交
549
static void wakeup_tracer_reset(struct trace_array *tr)
550
{
S
Steven Rostedt 已提交
551 552 553
	stop_wakeup_tracer(tr);
	/* make sure we put back any tasks we are tracing */
	wakeup_reset(tr);
554 555 556

	if (!save_lat_flag)
		trace_flags &= ~TRACE_ITER_LATENCY_FMT;
557 558
}

559 560 561 562 563 564 565 566 567
static void wakeup_tracer_start(struct trace_array *tr)
{
	wakeup_reset(tr);
	tracer_enabled = 1;
}

static void wakeup_tracer_stop(struct trace_array *tr)
{
	tracer_enabled = 0;
568 569 570 571 572 573 574
}

static struct tracer wakeup_tracer __read_mostly =
{
	.name		= "wakeup",
	.init		= wakeup_tracer_init,
	.reset		= wakeup_tracer_reset,
575 576
	.start		= wakeup_tracer_start,
	.stop		= wakeup_tracer_stop,
577
	.print_max	= 1,
578 579 580 581
	.print_header	= wakeup_print_header,
	.print_line	= wakeup_print_line,
	.flags		= &tracer_flags,
	.set_flag	= wakeup_set_flag,
S
Steven Rostedt 已提交
582 583 584
#ifdef CONFIG_FTRACE_SELFTEST
	.selftest    = trace_selftest_startup_wakeup,
#endif
585 586
	.open		= wakeup_trace_open,
	.close		= wakeup_trace_close,
587
	.use_max_tr	= 1,
588 589
};

590 591 592 593 594 595 596
static struct tracer wakeup_rt_tracer __read_mostly =
{
	.name		= "wakeup_rt",
	.init		= wakeup_rt_tracer_init,
	.reset		= wakeup_tracer_reset,
	.start		= wakeup_tracer_start,
	.stop		= wakeup_tracer_stop,
597
	.wait_pipe	= poll_wait_pipe,
598
	.print_max	= 1,
599 600 601 602
	.print_header	= wakeup_print_header,
	.print_line	= wakeup_print_line,
	.flags		= &tracer_flags,
	.set_flag	= wakeup_set_flag,
603 604 605
#ifdef CONFIG_FTRACE_SELFTEST
	.selftest    = trace_selftest_startup_wakeup,
#endif
606 607
	.open		= wakeup_trace_open,
	.close		= wakeup_trace_close,
608
	.use_max_tr	= 1,
609 610
};

611 612 613 614 615 616 617 618
__init static int init_wakeup_tracer(void)
{
	int ret;

	ret = register_tracer(&wakeup_tracer);
	if (ret)
		return ret;

619 620 621 622
	ret = register_tracer(&wakeup_rt_tracer);
	if (ret)
		return ret;

623 624 625
	return 0;
}
device_initcall(init_wakeup_tracer);