callchain.c 5.4 KB
Newer Older
1 2 3 4 5
/*
 * Performance events callchain code, extracted from core.c:
 *
 *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
 *  Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
6
 *  Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
7 8 9 10 11 12 13 14 15 16 17 18 19 20
 *  Copyright    2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
 *
 * For licensing details see kernel-base/COPYING
 */

#include <linux/perf_event.h>
#include <linux/slab.h>
#include "internal.h"

struct callchain_cpus_entries {
	struct rcu_head			rcu_head;
	struct perf_callchain_entry	*cpu_entries[0];
};

21 22 23 24 25 26 27 28
int sysctl_perf_event_max_stack __read_mostly = PERF_MAX_STACK_DEPTH;

static inline size_t perf_callchain_entry__sizeof(void)
{
	return (sizeof(struct perf_callchain_entry) +
		sizeof(__u64) * sysctl_perf_event_max_stack);
}

29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
static atomic_t nr_callchain_events;
static DEFINE_MUTEX(callchain_mutex);
static struct callchain_cpus_entries *callchain_cpus_entries;


__weak void perf_callchain_kernel(struct perf_callchain_entry *entry,
				  struct pt_regs *regs)
{
}

__weak void perf_callchain_user(struct perf_callchain_entry *entry,
				struct pt_regs *regs)
{
}

static void release_callchain_buffers_rcu(struct rcu_head *head)
{
	struct callchain_cpus_entries *entries;
	int cpu;

	entries = container_of(head, struct callchain_cpus_entries, rcu_head);

	for_each_possible_cpu(cpu)
		kfree(entries->cpu_entries[cpu]);

	kfree(entries);
}

static void release_callchain_buffers(void)
{
	struct callchain_cpus_entries *entries;

	entries = callchain_cpus_entries;
63
	RCU_INIT_POINTER(callchain_cpus_entries, NULL);
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
	call_rcu(&entries->rcu_head, release_callchain_buffers_rcu);
}

static int alloc_callchain_buffers(void)
{
	int cpu;
	int size;
	struct callchain_cpus_entries *entries;

	/*
	 * We can't use the percpu allocation API for data that can be
	 * accessed from NMI. Use a temporary manual per cpu allocation
	 * until that gets sorted out.
	 */
	size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]);

	entries = kzalloc(size, GFP_KERNEL);
	if (!entries)
		return -ENOMEM;

84
	size = perf_callchain_entry__sizeof() * PERF_NR_CONTEXTS;
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126

	for_each_possible_cpu(cpu) {
		entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
							 cpu_to_node(cpu));
		if (!entries->cpu_entries[cpu])
			goto fail;
	}

	rcu_assign_pointer(callchain_cpus_entries, entries);

	return 0;

fail:
	for_each_possible_cpu(cpu)
		kfree(entries->cpu_entries[cpu]);
	kfree(entries);

	return -ENOMEM;
}

int get_callchain_buffers(void)
{
	int err = 0;
	int count;

	mutex_lock(&callchain_mutex);

	count = atomic_inc_return(&nr_callchain_events);
	if (WARN_ON_ONCE(count < 1)) {
		err = -EINVAL;
		goto exit;
	}

	if (count > 1) {
		/* If the allocation failed, give up */
		if (!callchain_cpus_entries)
			err = -ENOMEM;
		goto exit;
	}

	err = alloc_callchain_buffers();
exit:
127 128
	if (err)
		atomic_dec(&nr_callchain_events);
129

130 131
	mutex_unlock(&callchain_mutex);

132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
	return err;
}

void put_callchain_buffers(void)
{
	if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) {
		release_callchain_buffers();
		mutex_unlock(&callchain_mutex);
	}
}

static struct perf_callchain_entry *get_callchain_entry(int *rctx)
{
	int cpu;
	struct callchain_cpus_entries *entries;

148
	*rctx = get_recursion_context(this_cpu_ptr(callchain_recursion));
149 150 151 152 153 154 155 156 157
	if (*rctx == -1)
		return NULL;

	entries = rcu_dereference(callchain_cpus_entries);
	if (!entries)
		return NULL;

	cpu = smp_processor_id();

158 159
	return (((void *)entries->cpu_entries[cpu]) +
		(*rctx * perf_callchain_entry__sizeof()));
160 161 162 163 164
}

static void
put_callchain_entry(int rctx)
{
165
	put_recursion_context(this_cpu_ptr(callchain_recursion), rctx);
166 167
}

168 169
struct perf_callchain_entry *
perf_callchain(struct perf_event *event, struct pt_regs *regs)
170
{
171 172 173 174
	bool kernel = !event->attr.exclude_callchain_kernel;
	bool user   = !event->attr.exclude_callchain_user;
	/* Disallow cross-task user callchains. */
	bool crosstask = event->ctx->task && event->ctx->task != current;
175 176 177

	if (!kernel && !user)
		return NULL;
178

179 180 181 182 183 184 185 186 187 188
	return get_perf_callchain(regs, 0, kernel, user, crosstask, true);
}

struct perf_callchain_entry *
get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
		   bool crosstask, bool add_mark)
{
	struct perf_callchain_entry *entry;
	int rctx;

189 190 191 192 193 194 195
	entry = get_callchain_entry(&rctx);
	if (rctx == -1)
		return NULL;

	if (!entry)
		goto exit_put;

196
	entry->nr = init_nr;
197

198
	if (kernel && !user_mode(regs)) {
199 200
		if (add_mark)
			perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
201 202 203
		perf_callchain_kernel(entry, regs);
	}

204 205 206 207 208 209 210 211 212
	if (user) {
		if (!user_mode(regs)) {
			if  (current->mm)
				regs = task_pt_regs(current);
			else
				regs = NULL;
		}

		if (regs) {
213
			if (crosstask)
214 215
				goto exit_put;

216 217
			if (add_mark)
				perf_callchain_store(entry, PERF_CONTEXT_USER);
218 219
			perf_callchain_user(entry, regs);
		}
220 221 222 223 224 225 226
	}

exit_put:
	put_callchain_entry(rctx);

	return entry;
}
227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248

int perf_event_max_stack_handler(struct ctl_table *table, int write,
				 void __user *buffer, size_t *lenp, loff_t *ppos)
{
	int new_value = sysctl_perf_event_max_stack, ret;
	struct ctl_table new_table = *table;

	new_table.data = &new_value;
	ret = proc_dointvec_minmax(&new_table, write, buffer, lenp, ppos);
	if (ret || !write)
		return ret;

	mutex_lock(&callchain_mutex);
	if (atomic_read(&nr_callchain_events))
		ret = -EBUSY;
	else
		sysctl_perf_event_max_stack = new_value;

	mutex_unlock(&callchain_mutex);

	return ret;
}