trace_sched_wakeup.c 13.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 * trace task wakeup timings
 *
 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
 *
 * Based on code from the latency_tracer, that is:
 *
 *  Copyright (C) 2004-2006 Ingo Molnar
 *  Copyright (C) 2004 William Lee Irwin III
 */
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/debugfs.h>
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
18
#include <trace/events/sched.h>
19 20 21 22 23 24 25 26

#include "trace.h"

static struct trace_array	*wakeup_trace;
static int __read_mostly	tracer_enabled;

static struct task_struct	*wakeup_task;
static int			wakeup_cpu;
27
static int			wakeup_current_cpu;
28
static unsigned			wakeup_prio = -1;
29
static int			wakeup_rt;
30

31
static arch_spinlock_t wakeup_lock =
32
	(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
33

34
static void wakeup_reset(struct trace_array *tr);
I
Ingo Molnar 已提交
35
static void __wakeup_reset(struct trace_array *tr);
36 37
static int wakeup_graph_entry(struct ftrace_graph_ent *trace);
static void wakeup_graph_return(struct ftrace_graph_ret *trace);
38

39 40
static int save_lat_flag;

41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
#define TRACE_DISPLAY_GRAPH     1

static struct tracer_opt trace_opts[] = {
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
	/* display latency trace as call graph */
	{ TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) },
#endif
	{ } /* Empty entry */
};

static struct tracer_flags tracer_flags = {
	.val  = 0,
	.opts = trace_opts,
};

#define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH)

58
#ifdef CONFIG_FUNCTION_TRACER
59

60
/*
61 62 63 64 65 66 67 68 69 70 71 72
 * Prologue for the wakeup function tracers.
 *
 * Returns 1 if it is OK to continue, and preemption
 *            is disabled and data->disabled is incremented.
 *         0 if the trace is to be ignored, and preemption
 *            is not disabled and data->disabled is
 *            kept the same.
 *
 * Note, this function is also used outside this ifdef but
 *  inside the #ifdef of the function graph tracer below.
 *  This is OK, since the function graph tracer is
 *  dependent on the function tracer.
73
 */
74 75 76 77
static int
func_prolog_preempt_disable(struct trace_array *tr,
			    struct trace_array_cpu **data,
			    int *pc)
78 79 80 81 82
{
	long disabled;
	int cpu;

	if (likely(!wakeup_task))
83
		return 0;
84

85
	*pc = preempt_count();
86
	preempt_disable_notrace();
87 88

	cpu = raw_smp_processor_id();
89 90 91
	if (cpu != wakeup_current_cpu)
		goto out_enable;

92 93
	*data = tr->data[cpu];
	disabled = atomic_inc_return(&(*data)->disabled);
94 95 96
	if (unlikely(disabled != 1))
		goto out;

97
	return 1;
98

99 100
out:
	atomic_dec(&(*data)->disabled);
101

102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
out_enable:
	preempt_enable_notrace();
	return 0;
}

/*
 * wakeup uses its own tracer function to keep the overhead down:
 */
static void
wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
{
	struct trace_array *tr = wakeup_trace;
	struct trace_array_cpu *data;
	unsigned long flags;
	int pc;

	if (!func_prolog_preempt_disable(tr, &data, &pc))
		return;

	local_irq_save(flags);
	trace_function(tr, ip, parent_ip, flags, pc);
123
	local_irq_restore(flags);
124 125

	atomic_dec(&data->disabled);
126
	preempt_enable_notrace();
127 128 129 130 131
}

static struct ftrace_ops trace_ops __read_mostly =
{
	.func = wakeup_tracer_call,
132
	.flags = FTRACE_OPS_FL_GLOBAL,
133
};
134
#endif /* CONFIG_FUNCTION_TRACER */
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186

static int start_func_tracer(int graph)
{
	int ret;

	if (!graph)
		ret = register_ftrace_function(&trace_ops);
	else
		ret = register_ftrace_graph(&wakeup_graph_return,
					    &wakeup_graph_entry);

	if (!ret && tracing_is_enabled())
		tracer_enabled = 1;
	else
		tracer_enabled = 0;

	return ret;
}

static void stop_func_tracer(int graph)
{
	tracer_enabled = 0;

	if (!graph)
		unregister_ftrace_function(&trace_ops);
	else
		unregister_ftrace_graph();
}

#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
{

	if (!(bit & TRACE_DISPLAY_GRAPH))
		return -EINVAL;

	if (!(is_graph() ^ set))
		return 0;

	stop_func_tracer(!set);

	wakeup_reset(wakeup_trace);
	tracing_max_latency = 0;

	return start_func_tracer(set);
}

static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
{
	struct trace_array *tr = wakeup_trace;
	struct trace_array_cpu *data;
	unsigned long flags;
187
	int pc, ret = 0;
188

189
	if (!func_prolog_preempt_disable(tr, &data, &pc))
190 191 192 193 194 195
		return 0;

	local_save_flags(flags);
	ret = __trace_graph_entry(tr, trace, flags, pc);
	atomic_dec(&data->disabled);
	preempt_enable_notrace();
196

197 198 199 200 201 202 203 204
	return ret;
}

static void wakeup_graph_return(struct ftrace_graph_ret *trace)
{
	struct trace_array *tr = wakeup_trace;
	struct trace_array_cpu *data;
	unsigned long flags;
205
	int pc;
206

207
	if (!func_prolog_preempt_disable(tr, &data, &pc))
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285
		return;

	local_save_flags(flags);
	__trace_graph_return(tr, trace, flags, pc);
	atomic_dec(&data->disabled);

	preempt_enable_notrace();
	return;
}

static void wakeup_trace_open(struct trace_iterator *iter)
{
	if (is_graph())
		graph_trace_open(iter);
}

static void wakeup_trace_close(struct trace_iterator *iter)
{
	if (iter->private)
		graph_trace_close(iter);
}

#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC)

static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
{
	/*
	 * In graph mode call the graph tracer output function,
	 * otherwise go with the TRACE_FN event handler
	 */
	if (is_graph())
		return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS);

	return TRACE_TYPE_UNHANDLED;
}

static void wakeup_print_header(struct seq_file *s)
{
	if (is_graph())
		print_graph_headers_flags(s, GRAPH_TRACER_FLAGS);
	else
		trace_default_header(s);
}

static void
__trace_function(struct trace_array *tr,
		 unsigned long ip, unsigned long parent_ip,
		 unsigned long flags, int pc)
{
	if (is_graph())
		trace_graph_function(tr, ip, parent_ip, flags, pc);
	else
		trace_function(tr, ip, parent_ip, flags, pc);
}
#else
#define __trace_function trace_function

static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
{
	return -EINVAL;
}

static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
{
	return -1;
}

static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
{
	return TRACE_TYPE_UNHANDLED;
}

static void wakeup_graph_return(struct ftrace_graph_ret *trace) { }
static void wakeup_print_header(struct seq_file *s) { }
static void wakeup_trace_open(struct trace_iterator *iter) { }
static void wakeup_trace_close(struct trace_iterator *iter) { }
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */

286 287 288
/*
 * Should this new latency be reported/recorded?
 */
I
Ingo Molnar 已提交
289
static int report_latency(cycle_t delta)
290 291 292 293 294 295 296 297 298 299 300
{
	if (tracing_thresh) {
		if (delta < tracing_thresh)
			return 0;
	} else {
		if (delta <= tracing_max_latency)
			return 0;
	}
	return 1;
}

301 302
static void
probe_wakeup_migrate_task(void *ignore, struct task_struct *task, int cpu)
303 304 305 306 307 308 309
{
	if (task != wakeup_task)
		return;

	wakeup_current_cpu = cpu;
}

M
Mathieu Desnoyers 已提交
310
static void notrace
311 312
probe_wakeup_sched_switch(void *ignore,
			  struct task_struct *prev, struct task_struct *next)
313 314 315 316 317 318
{
	struct trace_array_cpu *data;
	cycle_t T0, T1, delta;
	unsigned long flags;
	long disabled;
	int cpu;
319
	int pc;
320

M
Mathieu Desnoyers 已提交
321 322
	tracing_record_cmdline(prev);

323 324 325 326 327 328 329 330 331 332 333 334 335 336 337
	if (unlikely(!tracer_enabled))
		return;

	/*
	 * When we start a new trace, we set wakeup_task to NULL
	 * and then set tracer_enabled = 1. We want to make sure
	 * that another CPU does not see the tracer_enabled = 1
	 * and the wakeup_task with an older task, that might
	 * actually be the same as next.
	 */
	smp_rmb();

	if (next != wakeup_task)
		return;

338 339
	pc = preempt_count();

340 341
	/* disable local data, not wakeup_cpu data */
	cpu = raw_smp_processor_id();
M
Mathieu Desnoyers 已提交
342
	disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
343 344 345
	if (likely(disabled != 1))
		goto out;

346
	local_irq_save(flags);
347
	arch_spin_lock(&wakeup_lock);
348 349 350 351 352

	/* We could race with grabbing wakeup_lock */
	if (unlikely(!tracer_enabled || next != wakeup_task))
		goto out_unlock;

353 354 355
	/* The task we are waiting for is waking up */
	data = wakeup_trace->data[wakeup_cpu];

356
	__trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
357
	tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
358 359

	T0 = data->preempt_timestamp;
I
Ingo Molnar 已提交
360
	T1 = ftrace_now(cpu);
361 362 363 364 365
	delta = T1-T0;

	if (!report_latency(delta))
		goto out_unlock;

366 367 368 369
	if (likely(!is_tracing_stopped())) {
		tracing_max_latency = delta;
		update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
	}
370 371

out_unlock:
M
Mathieu Desnoyers 已提交
372
	__wakeup_reset(wakeup_trace);
373
	arch_spin_unlock(&wakeup_lock);
374
	local_irq_restore(flags);
375
out:
M
Mathieu Desnoyers 已提交
376
	atomic_dec(&wakeup_trace->data[cpu]->disabled);
M
Mathieu Desnoyers 已提交
377 378
}

I
Ingo Molnar 已提交
379
static void __wakeup_reset(struct trace_array *tr)
380 381 382 383 384 385 386 387 388 389
{
	wakeup_cpu = -1;
	wakeup_prio = -1;

	if (wakeup_task)
		put_task_struct(wakeup_task);

	wakeup_task = NULL;
}

I
Ingo Molnar 已提交
390
static void wakeup_reset(struct trace_array *tr)
391 392 393
{
	unsigned long flags;

394 395
	tracing_reset_online_cpus(tr);

396
	local_irq_save(flags);
397
	arch_spin_lock(&wakeup_lock);
398
	__wakeup_reset(tr);
399
	arch_spin_unlock(&wakeup_lock);
400
	local_irq_restore(flags);
401 402
}

I
Ingo Molnar 已提交
403
static void
404
probe_wakeup(void *ignore, struct task_struct *p, int success)
405
{
406
	struct trace_array_cpu *data;
407 408 409
	int cpu = smp_processor_id();
	unsigned long flags;
	long disabled;
410
	int pc;
411

M
Mathieu Desnoyers 已提交
412 413 414 415 416 417
	if (likely(!tracer_enabled))
		return;

	tracing_record_cmdline(p);
	tracing_record_cmdline(current);

418
	if ((wakeup_rt && !rt_task(p)) ||
419
			p->prio >= wakeup_prio ||
M
Mathieu Desnoyers 已提交
420
			p->prio >= current->prio)
421 422
		return;

423
	pc = preempt_count();
M
Mathieu Desnoyers 已提交
424
	disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
425 426 427 428
	if (unlikely(disabled != 1))
		goto out;

	/* interrupts should be off from try_to_wake_up */
429
	arch_spin_lock(&wakeup_lock);
430 431 432 433 434 435

	/* check for races. */
	if (!tracer_enabled || p->prio >= wakeup_prio)
		goto out_locked;

	/* reset the trace */
M
Mathieu Desnoyers 已提交
436
	__wakeup_reset(wakeup_trace);
437 438

	wakeup_cpu = task_cpu(p);
439
	wakeup_current_cpu = wakeup_cpu;
440 441 442 443 444 445 446
	wakeup_prio = p->prio;

	wakeup_task = p;
	get_task_struct(wakeup_task);

	local_save_flags(flags);

447 448
	data = wakeup_trace->data[wakeup_cpu];
	data->preempt_timestamp = ftrace_now(cpu);
449
	tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc);
450 451 452 453 454 455

	/*
	 * We must be careful in using CALLER_ADDR2. But since wake_up
	 * is not called by an assembly function  (where as schedule is)
	 * it should be safe to use it here.
	 */
456
	__trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
457 458

out_locked:
459
	arch_spin_unlock(&wakeup_lock);
460
out:
M
Mathieu Desnoyers 已提交
461
	atomic_dec(&wakeup_trace->data[cpu]->disabled);
462 463
}

I
Ingo Molnar 已提交
464
static void start_wakeup_tracer(struct trace_array *tr)
465
{
M
Mathieu Desnoyers 已提交
466 467
	int ret;

468
	ret = register_trace_sched_wakeup(probe_wakeup, NULL);
M
Mathieu Desnoyers 已提交
469
	if (ret) {
M
Mathieu Desnoyers 已提交
470
		pr_info("wakeup trace: Couldn't activate tracepoint"
M
Mathieu Desnoyers 已提交
471 472 473 474
			" probe to kernel_sched_wakeup\n");
		return;
	}

475
	ret = register_trace_sched_wakeup_new(probe_wakeup, NULL);
M
Mathieu Desnoyers 已提交
476
	if (ret) {
M
Mathieu Desnoyers 已提交
477
		pr_info("wakeup trace: Couldn't activate tracepoint"
M
Mathieu Desnoyers 已提交
478 479 480 481
			" probe to kernel_sched_wakeup_new\n");
		goto fail_deprobe;
	}

482
	ret = register_trace_sched_switch(probe_wakeup_sched_switch, NULL);
M
Mathieu Desnoyers 已提交
483
	if (ret) {
M
Mathieu Desnoyers 已提交
484
		pr_info("sched trace: Couldn't activate tracepoint"
485
			" probe to kernel_sched_switch\n");
M
Mathieu Desnoyers 已提交
486 487 488
		goto fail_deprobe_wake_new;
	}

489
	ret = register_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL);
490 491 492 493 494 495
	if (ret) {
		pr_info("wakeup trace: Couldn't activate tracepoint"
			" probe to kernel_sched_migrate_task\n");
		return;
	}

496 497 498 499 500 501 502 503 504 505 506
	wakeup_reset(tr);

	/*
	 * Don't let the tracer_enabled = 1 show up before
	 * the wakeup_task is reset. This may be overkill since
	 * wakeup_reset does a spin_unlock after setting the
	 * wakeup_task to NULL, but I want to be safe.
	 * This is a slow path anyway.
	 */
	smp_wmb();

507 508
	if (start_func_tracer(is_graph()))
		printk(KERN_ERR "failed to start wakeup tracer\n");
509

510
	return;
M
Mathieu Desnoyers 已提交
511
fail_deprobe_wake_new:
512
	unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
M
Mathieu Desnoyers 已提交
513
fail_deprobe:
514
	unregister_trace_sched_wakeup(probe_wakeup, NULL);
515 516
}

I
Ingo Molnar 已提交
517
static void stop_wakeup_tracer(struct trace_array *tr)
518 519
{
	tracer_enabled = 0;
520
	stop_func_tracer(is_graph());
521 522 523 524
	unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL);
	unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
	unregister_trace_sched_wakeup(probe_wakeup, NULL);
	unregister_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL);
525 526
}

527
static int __wakeup_tracer_init(struct trace_array *tr)
528
{
529 530 531
	save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT;
	trace_flags |= TRACE_ITER_LATENCY_FMT;

532
	tracing_max_latency = 0;
533
	wakeup_trace = tr;
S
Steven Rostedt 已提交
534
	start_wakeup_tracer(tr);
535
	return 0;
536 537
}

538 539 540 541 542 543 544 545 546 547 548 549
static int wakeup_tracer_init(struct trace_array *tr)
{
	wakeup_rt = 0;
	return __wakeup_tracer_init(tr);
}

static int wakeup_rt_tracer_init(struct trace_array *tr)
{
	wakeup_rt = 1;
	return __wakeup_tracer_init(tr);
}

I
Ingo Molnar 已提交
550
static void wakeup_tracer_reset(struct trace_array *tr)
551
{
S
Steven Rostedt 已提交
552 553 554
	stop_wakeup_tracer(tr);
	/* make sure we put back any tasks we are tracing */
	wakeup_reset(tr);
555 556 557

	if (!save_lat_flag)
		trace_flags &= ~TRACE_ITER_LATENCY_FMT;
558 559
}

560 561 562 563 564 565 566 567 568
static void wakeup_tracer_start(struct trace_array *tr)
{
	wakeup_reset(tr);
	tracer_enabled = 1;
}

static void wakeup_tracer_stop(struct trace_array *tr)
{
	tracer_enabled = 0;
569 570 571 572 573 574 575
}

static struct tracer wakeup_tracer __read_mostly =
{
	.name		= "wakeup",
	.init		= wakeup_tracer_init,
	.reset		= wakeup_tracer_reset,
576 577
	.start		= wakeup_tracer_start,
	.stop		= wakeup_tracer_stop,
578
	.print_max	= 1,
579 580 581 582
	.print_header	= wakeup_print_header,
	.print_line	= wakeup_print_line,
	.flags		= &tracer_flags,
	.set_flag	= wakeup_set_flag,
S
Steven Rostedt 已提交
583 584 585
#ifdef CONFIG_FTRACE_SELFTEST
	.selftest    = trace_selftest_startup_wakeup,
#endif
586 587
	.open		= wakeup_trace_open,
	.close		= wakeup_trace_close,
588
	.use_max_tr	= 1,
589 590
};

591 592 593 594 595 596 597
static struct tracer wakeup_rt_tracer __read_mostly =
{
	.name		= "wakeup_rt",
	.init		= wakeup_rt_tracer_init,
	.reset		= wakeup_tracer_reset,
	.start		= wakeup_tracer_start,
	.stop		= wakeup_tracer_stop,
598
	.wait_pipe	= poll_wait_pipe,
599
	.print_max	= 1,
600 601 602 603
	.print_header	= wakeup_print_header,
	.print_line	= wakeup_print_line,
	.flags		= &tracer_flags,
	.set_flag	= wakeup_set_flag,
604 605 606
#ifdef CONFIG_FTRACE_SELFTEST
	.selftest    = trace_selftest_startup_wakeup,
#endif
607 608
	.open		= wakeup_trace_open,
	.close		= wakeup_trace_close,
609
	.use_max_tr	= 1,
610 611
};

612 613 614 615 616 617 618 619
__init static int init_wakeup_tracer(void)
{
	int ret;

	ret = register_tracer(&wakeup_tracer);
	if (ret)
		return ret;

620 621 622 623
	ret = register_tracer(&wakeup_rt_tracer);
	if (ret)
		return ret;

624 625 626
	return 0;
}
device_initcall(init_wakeup_tracer);