diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0456c3a51c6697c7eb8f49bbdf0f9faa74c78fb4..015a3d22cf7434deb500233623ae14b778d05e5b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -153,10 +153,6 @@ struct dyn_ftrace { unsigned long flags; struct dyn_ftrace *newlist; }; -#ifdef CONFIG_FUNCTION_PROFILER - unsigned long counter; - struct hlist_node node; -#endif struct dyn_arch_ftrace arch; }; diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 95e9ad5735d9c29aa62f5694c2927335198f275e..8a4136096d7d957f5d04feb3bbe615446b9aef2a 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -379,20 +379,16 @@ config DYNAMIC_FTRACE config FUNCTION_PROFILER bool "Kernel function profiler" - depends on DYNAMIC_FTRACE + depends on FUNCTION_TRACER default n help - This option enables the kernel function profiler. When the dynamic - function tracing is enabled, a counter is added into the function - records used by the dynamic function tracer. A file is created in - debugfs called function_profile_enabled which defaults to zero. + This option enables the kernel function profiler. A file is created + in debugfs called function_profile_enabled which defaults to zero. When a 1 is echoed into this file profiling begins, and when a zero is entered, profiling stops. A file in the trace_stats directory called functions, that show the list of functions that have been hit and their counters. - This takes up around 320K more memory. - If in doubt, say N config FTRACE_MCOUNT_RECORD diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 11f364c776d549676f273e75aeefab2360109def..24dac448cdc95974670466725e56b558baa1376a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -241,87 +241,48 @@ static void ftrace_update_pid_func(void) #endif } -/* set when tracing only a pid */ -struct pid *ftrace_pid_trace; -static struct pid * const ftrace_swapper_pid = &init_struct_pid; - -#ifdef CONFIG_DYNAMIC_FTRACE - -#ifndef CONFIG_FTRACE_MCOUNT_RECORD -# error Dynamic ftrace depends on MCOUNT_RECORD -#endif - -static struct hlist_head ftrace_func_hash[FTRACE_FUNC_HASHSIZE] __read_mostly; - -struct ftrace_func_probe { - struct hlist_node node; - struct ftrace_probe_ops *ops; - unsigned long flags; - unsigned long ip; - void *data; - struct rcu_head rcu; +#ifdef CONFIG_FUNCTION_PROFILER +struct ftrace_profile { + struct hlist_node node; + unsigned long ip; + unsigned long counter; }; -enum { - FTRACE_ENABLE_CALLS = (1 << 0), - FTRACE_DISABLE_CALLS = (1 << 1), - FTRACE_UPDATE_TRACE_FUNC = (1 << 2), - FTRACE_ENABLE_MCOUNT = (1 << 3), - FTRACE_DISABLE_MCOUNT = (1 << 4), - FTRACE_START_FUNC_RET = (1 << 5), - FTRACE_STOP_FUNC_RET = (1 << 6), +struct ftrace_profile_page { + struct ftrace_profile_page *next; + unsigned long index; + struct ftrace_profile records[]; }; -static int ftrace_filtered; +#define PROFILE_RECORDS_SIZE \ + (PAGE_SIZE - offsetof(struct ftrace_profile_page, records)) -static struct dyn_ftrace *ftrace_new_addrs; +#define PROFILES_PER_PAGE \ + (PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile)) -static DEFINE_MUTEX(ftrace_regex_lock); - -struct ftrace_page { - struct ftrace_page *next; - int index; - struct dyn_ftrace records[]; -}; +/* TODO: make these percpu, to prevent cache line bouncing */ +static struct ftrace_profile_page *profile_pages_start; +static struct ftrace_profile_page *profile_pages; -#define ENTRIES_PER_PAGE \ - ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace)) - -/* estimate from running different kernels */ -#define NR_TO_INIT 10000 - -static struct ftrace_page *ftrace_pages_start; -static struct ftrace_page *ftrace_pages; - -static struct dyn_ftrace *ftrace_free_records; - -/* - * This is a double for. Do not use 'break' to break out of the loop, - * you must use a goto. - */ -#define do_for_each_ftrace_rec(pg, rec) \ - for (pg = ftrace_pages_start; pg; pg = pg->next) { \ - int _____i; \ - for (_____i = 0; _____i < pg->index; _____i++) { \ - rec = &pg->records[_____i]; - -#define while_for_each_ftrace_rec() \ - } \ - } - -#ifdef CONFIG_FUNCTION_PROFILER static struct hlist_head *ftrace_profile_hash; static int ftrace_profile_bits; static int ftrace_profile_enabled; static DEFINE_MUTEX(ftrace_profile_lock); +static DEFINE_PER_CPU(atomic_t, ftrace_profile_disable); + +#define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */ + +static raw_spinlock_t ftrace_profile_rec_lock = + (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + static void * function_stat_next(void *v, int idx) { - struct dyn_ftrace *rec = v; - struct ftrace_page *pg; + struct ftrace_profile *rec = v; + struct ftrace_profile_page *pg; - pg = (struct ftrace_page *)((unsigned long)rec & PAGE_MASK); + pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK); again: rec++; @@ -330,27 +291,22 @@ function_stat_next(void *v, int idx) if (!pg) return NULL; rec = &pg->records[0]; + if (!rec->counter) + goto again; } - if (rec->flags & FTRACE_FL_FREE || - rec->flags & FTRACE_FL_FAILED || - !(rec->flags & FTRACE_FL_CONVERTED) || - /* ignore non hit functions */ - !rec->counter) - goto again; - return rec; } static void *function_stat_start(struct tracer_stat *trace) { - return function_stat_next(&ftrace_pages_start->records[0], 0); + return function_stat_next(&profile_pages_start->records[0], 0); } static int function_stat_cmp(void *p1, void *p2) { - struct dyn_ftrace *a = p1; - struct dyn_ftrace *b = p2; + struct ftrace_profile *a = p1; + struct ftrace_profile *b = p2; if (a->counter < b->counter) return -1; @@ -369,7 +325,7 @@ static int function_stat_headers(struct seq_file *m) static int function_stat_show(struct seq_file *m, void *v) { - struct dyn_ftrace *rec = v; + struct ftrace_profile *rec = v; char str[KSYM_SYMBOL_LEN]; kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); @@ -387,115 +343,191 @@ static struct tracer_stat function_stats = { .stat_show = function_stat_show }; -static void ftrace_profile_init(int nr_funcs) +static void ftrace_profile_reset(void) { - unsigned long addr; - int order; - int size; + struct ftrace_profile_page *pg; - /* - * We are profiling all functions, lets make it 1/4th of the - * number of functions that are in core kernel. So we have to - * iterate 4 times. - */ - order = (sizeof(struct hlist_head) * nr_funcs) / 4; - order = get_order(order); - size = 1 << (PAGE_SHIFT + order); - - pr_info("Allocating %d KB for profiler hash\n", size >> 10); + pg = profile_pages = profile_pages_start; - addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); - if (!addr) { - pr_warning("Could not allocate function profiler hash\n"); - return; + while (pg) { + memset(pg->records, 0, PROFILE_RECORDS_SIZE); + pg->index = 0; + pg = pg->next; } - ftrace_profile_hash = (void *)addr; + memset(ftrace_profile_hash, 0, + FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head)); +} - /* - * struct hlist_head should be a pointer of 4 or 8 bytes. - * And a simple bit manipulation can be done, but if for - * some reason struct hlist_head is not a mulitple of 2, - * then we play it safe, and simply count. This function - * is done once at boot up, so it is not that critical in - * performance. - */ +int ftrace_profile_pages_init(void) +{ + struct ftrace_profile_page *pg; + int i; - size--; - size /= sizeof(struct hlist_head); + /* If we already allocated, do nothing */ + if (profile_pages) + return 0; - for (; size; size >>= 1) - ftrace_profile_bits++; + profile_pages = (void *)get_zeroed_page(GFP_KERNEL); + if (!profile_pages) + return -ENOMEM; - pr_info("Function profiler has %d hash buckets\n", - 1 << ftrace_profile_bits); + pg = profile_pages_start = profile_pages; - return; + /* allocate 10 more pages to start */ + for (i = 0; i < 10; i++) { + pg->next = (void *)get_zeroed_page(GFP_KERNEL); + /* + * We only care about allocating profile_pages, if + * we failed to allocate here, hopefully we will allocate + * later. + */ + if (!pg->next) + break; + pg = pg->next; + } + + return 0; } -static ssize_t -ftrace_profile_read(struct file *filp, char __user *ubuf, - size_t cnt, loff_t *ppos) +static int ftrace_profile_init(void) { - char buf[64]; - int r; + int size; - r = sprintf(buf, "%u\n", ftrace_profile_enabled); - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -} + if (ftrace_profile_hash) { + /* If the profile is already created, simply reset it */ + ftrace_profile_reset(); + return 0; + } -static void ftrace_profile_reset(void) -{ - struct dyn_ftrace *rec; - struct ftrace_page *pg; + /* + * We are profiling all functions, but usually only a few thousand + * functions are hit. We'll make a hash of 1024 items. + */ + size = FTRACE_PROFILE_HASH_SIZE; - do_for_each_ftrace_rec(pg, rec) { - rec->counter = 0; - } while_for_each_ftrace_rec(); + ftrace_profile_hash = + kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL); + + if (!ftrace_profile_hash) + return -ENOMEM; + + size--; + + for (; size; size >>= 1) + ftrace_profile_bits++; + + /* Preallocate a few pages */ + if (ftrace_profile_pages_init() < 0) { + kfree(ftrace_profile_hash); + ftrace_profile_hash = NULL; + return -ENOMEM; + } + + return 0; } -static struct dyn_ftrace *ftrace_find_profiled_func(unsigned long ip) +/* interrupts must be disabled */ +static struct ftrace_profile *ftrace_find_profiled_func(unsigned long ip) { - struct dyn_ftrace *rec; + struct ftrace_profile *rec; struct hlist_head *hhd; struct hlist_node *n; - unsigned long flags; unsigned long key; - if (!ftrace_profile_hash) - return NULL; - key = hash_long(ip, ftrace_profile_bits); hhd = &ftrace_profile_hash[key]; if (hlist_empty(hhd)) return NULL; - local_irq_save(flags); hlist_for_each_entry_rcu(rec, n, hhd, node) { if (rec->ip == ip) - goto out; + return rec; + } + + return NULL; +} + +static void ftrace_add_profile(struct ftrace_profile *rec) +{ + unsigned long key; + + key = hash_long(rec->ip, ftrace_profile_bits); + hlist_add_head_rcu(&rec->node, &ftrace_profile_hash[key]); +} + +/* Interrupts must be disabled calling this */ +static struct ftrace_profile * +ftrace_profile_alloc(unsigned long ip, bool alloc_safe) +{ + struct ftrace_profile *rec = NULL; + + /* prevent recursion */ + if (atomic_inc_return(&__get_cpu_var(ftrace_profile_disable)) != 1) + goto out; + + __raw_spin_lock(&ftrace_profile_rec_lock); + + /* Try to always keep another page available */ + if (!profile_pages->next && alloc_safe) + profile_pages->next = (void *)get_zeroed_page(GFP_ATOMIC); + + /* + * Try to find the function again since another + * task on another CPU could have added it + */ + rec = ftrace_find_profiled_func(ip); + if (rec) + goto out_unlock; + + if (profile_pages->index == PROFILES_PER_PAGE) { + if (!profile_pages->next) + goto out_unlock; + profile_pages = profile_pages->next; } - rec = NULL; + + rec = &profile_pages->records[profile_pages->index++]; + rec->ip = ip; + ftrace_add_profile(rec); + + out_unlock: + __raw_spin_unlock(&ftrace_profile_rec_lock); out: - local_irq_restore(flags); + atomic_dec(&__get_cpu_var(ftrace_profile_disable)); return rec; } +/* + * If we are not in an interrupt, or softirq and + * and interrupts are disabled and preemption is not enabled + * (not in a spinlock) then it should be safe to allocate memory. + */ +static bool ftrace_safe_to_allocate(void) +{ + return !in_interrupt() && irqs_disabled() && !preempt_count(); +} + static void function_profile_call(unsigned long ip, unsigned long parent_ip) { - struct dyn_ftrace *rec; + struct ftrace_profile *rec; unsigned long flags; + bool alloc_safe; if (!ftrace_profile_enabled) return; + alloc_safe = ftrace_safe_to_allocate(); + local_irq_save(flags); rec = ftrace_find_profiled_func(ip); - if (!rec) - goto out; + if (!rec) { + rec = ftrace_profile_alloc(ip, alloc_safe); + if (!rec) + goto out; + } rec->counter++; out: @@ -515,11 +547,6 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, char buf[64]; int ret; - if (!ftrace_profile_hash) { - pr_info("Can not enable hash due to earlier problems\n"); - return -ENODEV; - } - if (cnt >= sizeof(buf)) return -EINVAL; @@ -537,7 +564,12 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, mutex_lock(&ftrace_profile_lock); if (ftrace_profile_enabled ^ val) { if (val) { - ftrace_profile_reset(); + ret = ftrace_profile_init(); + if (ret < 0) { + cnt = ret; + goto out; + } + register_ftrace_function(&ftrace_profile_ops); ftrace_profile_enabled = 1; } else { @@ -545,6 +577,7 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, unregister_ftrace_function(&ftrace_profile_ops); } } + out: mutex_unlock(&ftrace_profile_lock); filp->f_pos += cnt; @@ -552,6 +585,17 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, return cnt; } +static ssize_t +ftrace_profile_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[64]; + int r; + + r = sprintf(buf, "%u\n", ftrace_profile_enabled); + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + static const struct file_operations ftrace_profile_fops = { .open = tracing_open_generic, .read = ftrace_profile_read, @@ -577,39 +621,80 @@ static void ftrace_profile_debugfs(struct dentry *d_tracer) "'function_profile_enabled' entry\n"); } -static void ftrace_add_profile(struct dyn_ftrace *rec) -{ - unsigned long key; - - if (!ftrace_profile_hash) - return; - - key = hash_long(rec->ip, ftrace_profile_bits); - hlist_add_head_rcu(&rec->node, &ftrace_profile_hash[key]); -} - -static void ftrace_profile_release(struct dyn_ftrace *rec) -{ - mutex_lock(&ftrace_profile_lock); - hlist_del(&rec->node); - mutex_unlock(&ftrace_profile_lock); -} - #else /* CONFIG_FUNCTION_PROFILER */ -static void ftrace_profile_init(int nr_funcs) -{ -} -static void ftrace_add_profile(struct dyn_ftrace *rec) -{ -} static void ftrace_profile_debugfs(struct dentry *d_tracer) { } -static void ftrace_profile_release(struct dyn_ftrace *rec) -{ -} #endif /* CONFIG_FUNCTION_PROFILER */ +/* set when tracing only a pid */ +struct pid *ftrace_pid_trace; +static struct pid * const ftrace_swapper_pid = &init_struct_pid; + +#ifdef CONFIG_DYNAMIC_FTRACE + +#ifndef CONFIG_FTRACE_MCOUNT_RECORD +# error Dynamic ftrace depends on MCOUNT_RECORD +#endif + +static struct hlist_head ftrace_func_hash[FTRACE_FUNC_HASHSIZE] __read_mostly; + +struct ftrace_func_probe { + struct hlist_node node; + struct ftrace_probe_ops *ops; + unsigned long flags; + unsigned long ip; + void *data; + struct rcu_head rcu; +}; + +enum { + FTRACE_ENABLE_CALLS = (1 << 0), + FTRACE_DISABLE_CALLS = (1 << 1), + FTRACE_UPDATE_TRACE_FUNC = (1 << 2), + FTRACE_ENABLE_MCOUNT = (1 << 3), + FTRACE_DISABLE_MCOUNT = (1 << 4), + FTRACE_START_FUNC_RET = (1 << 5), + FTRACE_STOP_FUNC_RET = (1 << 6), +}; + +static int ftrace_filtered; + +static struct dyn_ftrace *ftrace_new_addrs; + +static DEFINE_MUTEX(ftrace_regex_lock); + +struct ftrace_page { + struct ftrace_page *next; + int index; + struct dyn_ftrace records[]; +}; + +#define ENTRIES_PER_PAGE \ + ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace)) + +/* estimate from running different kernels */ +#define NR_TO_INIT 10000 + +static struct ftrace_page *ftrace_pages_start; +static struct ftrace_page *ftrace_pages; + +static struct dyn_ftrace *ftrace_free_records; + +/* + * This is a double for. Do not use 'break' to break out of the loop, + * you must use a goto. + */ +#define do_for_each_ftrace_rec(pg, rec) \ + for (pg = ftrace_pages_start; pg; pg = pg->next) { \ + int _____i; \ + for (_____i = 0; _____i < pg->index; _____i++) { \ + rec = &pg->records[_____i]; + +#define while_for_each_ftrace_rec() \ + } \ + } + #ifdef CONFIG_KPROBES static int frozen_record_count; @@ -660,10 +745,8 @@ void ftrace_release(void *start, unsigned long size) mutex_lock(&ftrace_lock); do_for_each_ftrace_rec(pg, rec) { if ((rec->ip >= s) && (rec->ip < e) && - !(rec->flags & FTRACE_FL_FREE)) { + !(rec->flags & FTRACE_FL_FREE)) ftrace_free_rec(rec); - ftrace_profile_release(rec); - } } while_for_each_ftrace_rec(); mutex_unlock(&ftrace_lock); } @@ -717,8 +800,6 @@ ftrace_record_ip(unsigned long ip) rec->newlist = ftrace_new_addrs; ftrace_new_addrs = rec; - ftrace_add_profile(rec); - return rec; } @@ -2462,8 +2543,6 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) "'set_graph_function' entry\n"); #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - ftrace_profile_debugfs(d_tracer); - return 0; } @@ -2532,8 +2611,6 @@ void __init ftrace_init(void) if (ret) goto failed; - ftrace_profile_init(count); - last_ftrace_enabled = ftrace_enabled = 1; ret = ftrace_convert_nops(NULL, @@ -2734,6 +2811,9 @@ static __init int ftrace_init_debugfs(void) if (!entry) pr_warning("Could not create debugfs " "'set_ftrace_pid' entry\n"); + + ftrace_profile_debugfs(d_tracer); + return 0; } fs_initcall(ftrace_init_debugfs);