diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 5c2295b29f2c90504ced87871930d8950abe76ee..263e9e6bbd60912132ba18077c0fa0d66d7e1ffb 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -75,6 +75,14 @@ config PREEMPT_TRACER enabled. This option and the irqs-off timing option can be used together or separately.) +config SYSPROF_TRACER + bool "Sysprof Tracer" + depends on X86 + select TRACING + help + This tracer provides the trace needed by the 'Sysprof' userspace + tool. + config SCHED_TRACER bool "Scheduling Latency Tracer" depends on HAVE_FTRACE diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index c44a7dce9086a783f0245d16aad8726ba2bd3df4..71d17de172886b5841a246b2505555cd13d9a981 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_FTRACE) += libftrace.o obj-$(CONFIG_TRACING) += trace.o obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o +obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o obj-$(CONFIG_FTRACE) += trace_functions.o obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4a875600733b13654d03251a254a51d0b4f2c248..e46de641ea44e03ff7646f070fe7f594f0596559 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2913,6 +2913,9 @@ static __init void tracer_init_debugfs(void) pr_warning("Could not create debugfs " "'dyn_ftrace_total_info' entry\n"); #endif +#ifdef CONFIG_SYSPROF_TRACER + init_tracer_sysprof_debugfs(d_tracer); +#endif } static int trace_alloc_page(void) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 4966e6a964fed8c038546bc0306790ada216b218..8cb215b239d50f8fa5b46966e9141d9ca8cdfd33 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -193,6 +193,8 @@ struct trace_iterator { void tracing_reset(struct trace_array_cpu *data); int tracing_open_generic(struct inode *inode, struct file *filp); struct dentry *tracing_init_dentry(void); +void init_tracer_sysprof_debugfs(struct dentry *d_tracer); + void ftrace(struct trace_array *tr, struct trace_array_cpu *data, unsigned long ip, @@ -294,6 +296,10 @@ extern int trace_selftest_startup_wakeup(struct tracer *trace, extern int trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr); #endif +#ifdef CONFIG_SYSPROF_TRACER +extern int trace_selftest_startup_sysprof(struct tracer *trace, + struct trace_array *tr); +#endif #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 18c5423bc9777390c9442a5b461b5b67ba1d71f1..0911b7e073bf197b021ba77c75f6777a03910aa7 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -538,3 +538,26 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr return ret; } #endif /* CONFIG_CONTEXT_SWITCH_TRACER */ + +#ifdef CONFIG_SYSPROF_TRACER +int +trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr) +{ + unsigned long count; + int ret; + + /* start the tracing */ + tr->ctrl = 1; + trace->init(tr); + /* Sleep for a 1/10 of a second */ + msleep(100); + /* stop the tracing. */ + tr->ctrl = 0; + trace->ctrl_update(tr); + /* check the trace buffer */ + ret = trace_test_buffer(tr, &count); + trace->reset(tr); + + return ret; +} +#endif /* CONFIG_SYSPROF_TRACER */ diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c new file mode 100644 index 0000000000000000000000000000000000000000..2301e1e7c60648d7376f430b32e771257973be8a --- /dev/null +++ b/kernel/trace/trace_sysprof.c @@ -0,0 +1,363 @@ +/* + * trace stack traces + * + * Copyright (C) 2004-2008, Soeren Sandmann + * Copyright (C) 2007 Steven Rostedt + * Copyright (C) 2008 Ingo Molnar + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "trace.h" + +static struct trace_array *sysprof_trace; +static int __read_mostly tracer_enabled; + +/* + * 1 msec sample interval by default: + */ +static unsigned long sample_period = 1000000; +static const unsigned int sample_max_depth = 512; + +static DEFINE_MUTEX(sample_timer_lock); +/* + * Per CPU hrtimers that do the profiling: + */ +static DEFINE_PER_CPU(struct hrtimer, stack_trace_hrtimer); + +struct stack_frame { + const void __user *next_fp; + unsigned long return_address; +}; + +static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) +{ + int ret; + + if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) + return 0; + + ret = 1; + pagefault_disable(); + if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) + ret = 0; + pagefault_enable(); + + return ret; +} + +struct backtrace_info { + struct trace_array_cpu *data; + struct trace_array *tr; + int pos; +}; + +static void +backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) +{ + /* Ignore warnings */ +} + +static void backtrace_warning(void *data, char *msg) +{ + /* Ignore warnings */ +} + +static int backtrace_stack(void *data, char *name) +{ + /* Don't bother with IRQ stacks for now */ + return -1; +} + +static void backtrace_address(void *data, unsigned long addr, int reliable) +{ + struct backtrace_info *info = data; + + if (info->pos < sample_max_depth && reliable) { + __trace_special(info->tr, info->data, 1, addr, 0); + + info->pos++; + } +} + +const static struct stacktrace_ops backtrace_ops = { + .warning = backtrace_warning, + .warning_symbol = backtrace_warning_symbol, + .stack = backtrace_stack, + .address = backtrace_address, +}; + +static int +trace_kernel(struct pt_regs *regs, struct trace_array *tr, + struct trace_array_cpu *data) +{ + struct backtrace_info info; + unsigned long bp; + char *stack; + + info.tr = tr; + info.data = data; + info.pos = 1; + + __trace_special(info.tr, info.data, 1, regs->ip, 0); + + stack = ((char *)regs + sizeof(struct pt_regs)); +#ifdef CONFIG_FRAME_POINTER + bp = regs->bp; +#else + bp = 0; +#endif + + dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, &info); + + return info.pos; +} + +static void timer_notify(struct pt_regs *regs, int cpu) +{ + struct trace_array_cpu *data; + struct stack_frame frame; + struct trace_array *tr; + const void __user *fp; + int is_user; + int i; + + if (!regs) + return; + + tr = sysprof_trace; + data = tr->data[cpu]; + is_user = user_mode(regs); + + if (!current || current->pid == 0) + return; + + if (is_user && current->state != TASK_RUNNING) + return; + + __trace_special(tr, data, 0, 0, current->pid); + + if (!is_user) + i = trace_kernel(regs, tr, data); + else + i = 0; + + /* + * Trace user stack if we are not a kernel thread + */ + if (current->mm && i < sample_max_depth) { + regs = (struct pt_regs *)current->thread.sp0 - 1; + + fp = (void __user *)regs->bp; + + __trace_special(tr, data, 2, regs->ip, 0); + + while (i < sample_max_depth) { + frame.next_fp = 0; + frame.return_address = 0; + if (!copy_stack_frame(fp, &frame)) + break; + if ((unsigned long)fp < regs->sp) + break; + + __trace_special(tr, data, 2, frame.return_address, + (unsigned long)fp); + fp = frame.next_fp; + + i++; + } + + } + + /* + * Special trace entry if we overflow the max depth: + */ + if (i == sample_max_depth) + __trace_special(tr, data, -1, -1, -1); + + __trace_special(tr, data, 3, current->pid, i); +} + +static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer) +{ + /* trace here */ + timer_notify(get_irq_regs(), smp_processor_id()); + + hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); + + return HRTIMER_RESTART; +} + +static void start_stack_timer(int cpu) +{ + struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu); + + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer->function = stack_trace_timer_fn; + hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; + + hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL); +} + +static void start_stack_timers(void) +{ + cpumask_t saved_mask = current->cpus_allowed; + int cpu; + + for_each_online_cpu(cpu) { + set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); + start_stack_timer(cpu); + } + set_cpus_allowed_ptr(current, &saved_mask); +} + +static void stop_stack_timer(int cpu) +{ + struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu); + + hrtimer_cancel(hrtimer); +} + +static void stop_stack_timers(void) +{ + int cpu; + + for_each_online_cpu(cpu) + stop_stack_timer(cpu); +} + +static void stack_reset(struct trace_array *tr) +{ + int cpu; + + tr->time_start = ftrace_now(tr->cpu); + + for_each_online_cpu(cpu) + tracing_reset(tr->data[cpu]); +} + +static void start_stack_trace(struct trace_array *tr) +{ + mutex_lock(&sample_timer_lock); + stack_reset(tr); + start_stack_timers(); + tracer_enabled = 1; + mutex_unlock(&sample_timer_lock); +} + +static void stop_stack_trace(struct trace_array *tr) +{ + mutex_lock(&sample_timer_lock); + stop_stack_timers(); + tracer_enabled = 0; + mutex_unlock(&sample_timer_lock); +} + +static void stack_trace_init(struct trace_array *tr) +{ + sysprof_trace = tr; + + if (tr->ctrl) + start_stack_trace(tr); +} + +static void stack_trace_reset(struct trace_array *tr) +{ + if (tr->ctrl) + stop_stack_trace(tr); +} + +static void stack_trace_ctrl_update(struct trace_array *tr) +{ + /* When starting a new trace, reset the buffers */ + if (tr->ctrl) + start_stack_trace(tr); + else + stop_stack_trace(tr); +} + +static struct tracer stack_trace __read_mostly = +{ + .name = "sysprof", + .init = stack_trace_init, + .reset = stack_trace_reset, + .ctrl_update = stack_trace_ctrl_update, +#ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_sysprof, +#endif +}; + +__init static int init_stack_trace(void) +{ + return register_tracer(&stack_trace); +} +device_initcall(init_stack_trace); + +#define MAX_LONG_DIGITS 22 + +static ssize_t +sysprof_sample_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[MAX_LONG_DIGITS]; + int r; + + r = sprintf(buf, "%ld\n", nsecs_to_usecs(sample_period)); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + +static ssize_t +sysprof_sample_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[MAX_LONG_DIGITS]; + unsigned long val; + + if (cnt > MAX_LONG_DIGITS-1) + cnt = MAX_LONG_DIGITS-1; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + val = simple_strtoul(buf, NULL, 10); + /* + * Enforce a minimum sample period of 100 usecs: + */ + if (val < 100) + val = 100; + + mutex_lock(&sample_timer_lock); + stop_stack_timers(); + sample_period = val * 1000; + start_stack_timers(); + mutex_unlock(&sample_timer_lock); + + return cnt; +} + +static struct file_operations sysprof_sample_fops = { + .read = sysprof_sample_read, + .write = sysprof_sample_write, +}; + +void init_tracer_sysprof_debugfs(struct dentry *d_tracer) +{ + struct dentry *entry; + + entry = debugfs_create_file("sysprof_sample_period", 0644, + d_tracer, NULL, &sysprof_sample_fops); + if (entry) + return; + pr_warning("Could not create debugfs 'dyn_ftrace_total_info' entry\n"); +}