tracepoint.c 14.5 KB
Newer Older
M
Mathieu Desnoyers 已提交
1
/*
2
 * Copyright (C) 2008-2014 Mathieu Desnoyers
M
Mathieu Desnoyers 已提交
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/types.h>
#include <linux/jhash.h>
#include <linux/list.h>
#include <linux/rcupdate.h>
#include <linux/tracepoint.h>
#include <linux/err.h>
#include <linux/slab.h>
J
Jason Baron 已提交
27
#include <linux/sched.h>
28
#include <linux/static_key.h>
M
Mathieu Desnoyers 已提交
29

30 31
extern struct tracepoint * const __start___tracepoints_ptrs[];
extern struct tracepoint * const __stop___tracepoints_ptrs[];
M
Mathieu Desnoyers 已提交
32 33 34 35

/* Set to 1 to enable tracepoint debug output */
static const int tracepoint_debug;

36
#ifdef CONFIG_MODULES
M
Mathieu Desnoyers 已提交
37
/*
38
 * Tracepoint module list mutex protects the local module list.
M
Mathieu Desnoyers 已提交
39
 */
40
static DEFINE_MUTEX(tracepoint_module_list_mutex);
M
Mathieu Desnoyers 已提交
41

42
/* Local list of struct tp_module */
43 44 45
static LIST_HEAD(tracepoint_module_list);
#endif /* CONFIG_MODULES */

M
Mathieu Desnoyers 已提交
46
/*
47 48
 * tracepoints_mutex protects the builtin and module tracepoints.
 * tracepoints_mutex nests inside tracepoint_module_list_mutex.
M
Mathieu Desnoyers 已提交
49
 */
50
static DEFINE_MUTEX(tracepoints_mutex);
M
Mathieu Desnoyers 已提交
51 52 53

/*
 * Note about RCU :
54
 * It is used to delay the free of multiple probes array until a quiescent
M
Mathieu Desnoyers 已提交
55 56
 * state is reached.
 */
57
struct tp_probes {
58
	struct rcu_head rcu;
59
	struct tracepoint_func probes[0];
60
};
M
Mathieu Desnoyers 已提交
61

62
static inline void *allocate_probes(int count)
M
Mathieu Desnoyers 已提交
63
{
64
	struct tp_probes *p  = kmalloc(count * sizeof(struct tracepoint_func)
65 66
			+ sizeof(struct tp_probes), GFP_KERNEL);
	return p == NULL ? NULL : p->probes;
M
Mathieu Desnoyers 已提交
67 68
}

69
static void rcu_free_old_probes(struct rcu_head *head)
M
Mathieu Desnoyers 已提交
70
{
71
	kfree(container_of(head, struct tp_probes, rcu));
72 73
}

74
static inline void release_probes(struct tracepoint_func *old)
75 76 77 78
{
	if (old) {
		struct tp_probes *tp_probes = container_of(old,
			struct tp_probes, probes[0]);
79
		call_rcu_sched(&tp_probes->rcu, rcu_free_old_probes);
80
	}
M
Mathieu Desnoyers 已提交
81 82
}

83
static void debug_print_probes(struct tracepoint_func *funcs)
M
Mathieu Desnoyers 已提交
84 85 86
{
	int i;

87
	if (!tracepoint_debug || !funcs)
M
Mathieu Desnoyers 已提交
88 89
		return;

90 91
	for (i = 0; funcs[i].func; i++)
		printk(KERN_DEBUG "Probe %d : %p\n", i, funcs[i].func);
M
Mathieu Desnoyers 已提交
92 93
}

94 95 96
static struct tracepoint_func *
func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func,
	 int prio)
M
Mathieu Desnoyers 已提交
97
{
98
	struct tracepoint_func *old, *new;
99 100
	int nr_probes = 0;
	int pos = -1;
M
Mathieu Desnoyers 已提交
101

102
	if (WARN_ON(!tp_func->func))
103
		return ERR_PTR(-EINVAL);
M
Mathieu Desnoyers 已提交
104

105 106
	debug_print_probes(*funcs);
	old = *funcs;
M
Mathieu Desnoyers 已提交
107 108
	if (old) {
		/* (N -> N+1), (N != 0, 1) probes */
109 110 111 112
		for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
			/* Insert before probes of lower priority */
			if (pos < 0 && old[nr_probes].prio < prio)
				pos = nr_probes;
113 114
			if (old[nr_probes].func == tp_func->func &&
			    old[nr_probes].data == tp_func->data)
M
Mathieu Desnoyers 已提交
115
				return ERR_PTR(-EEXIST);
116
		}
M
Mathieu Desnoyers 已提交
117 118
	}
	/* + 2 : one for new probe, one for NULL func */
119
	new = allocate_probes(nr_probes + 2);
M
Mathieu Desnoyers 已提交
120 121
	if (new == NULL)
		return ERR_PTR(-ENOMEM);
122 123 124 125 126 127 128 129 130 131 132 133 134 135
	if (old) {
		if (pos < 0) {
			pos = nr_probes;
			memcpy(new, old, nr_probes * sizeof(struct tracepoint_func));
		} else {
			/* Copy higher priority probes ahead of the new probe */
			memcpy(new, old, pos * sizeof(struct tracepoint_func));
			/* Copy the rest after it. */
			memcpy(new + pos + 1, old + pos,
			       (nr_probes - pos) * sizeof(struct tracepoint_func));
		}
	} else
		pos = 0;
	new[pos] = *tp_func;
136
	new[nr_probes + 1].func = NULL;
137 138
	*funcs = new;
	debug_print_probes(*funcs);
M
Mathieu Desnoyers 已提交
139 140 141
	return old;
}

142 143
static void *func_remove(struct tracepoint_func **funcs,
		struct tracepoint_func *tp_func)
M
Mathieu Desnoyers 已提交
144 145
{
	int nr_probes = 0, nr_del = 0, i;
146
	struct tracepoint_func *old, *new;
M
Mathieu Desnoyers 已提交
147

148
	old = *funcs;
M
Mathieu Desnoyers 已提交
149

150
	if (!old)
151
		return ERR_PTR(-ENOENT);
152

153
	debug_print_probes(*funcs);
M
Mathieu Desnoyers 已提交
154
	/* (N -> M), (N > 1, M >= 0) probes */
155
	if (tp_func->func) {
156
		for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
157 158
			if (old[nr_probes].func == tp_func->func &&
			     old[nr_probes].data == tp_func->data)
159 160
				nr_del++;
		}
M
Mathieu Desnoyers 已提交
161 162
	}

163 164 165 166
	/*
	 * If probe is NULL, then nr_probes = nr_del = 0, and then the
	 * entire entry will be removed.
	 */
M
Mathieu Desnoyers 已提交
167 168
	if (nr_probes - nr_del == 0) {
		/* N -> 0, (N > 1) */
169 170
		*funcs = NULL;
		debug_print_probes(*funcs);
M
Mathieu Desnoyers 已提交
171 172 173 174 175
		return old;
	} else {
		int j = 0;
		/* N -> M, (N > 1, M > 0) */
		/* + 1 for NULL */
176
		new = allocate_probes(nr_probes - nr_del + 1);
M
Mathieu Desnoyers 已提交
177 178
		if (new == NULL)
			return ERR_PTR(-ENOMEM);
179
		for (i = 0; old[i].func; i++)
180 181
			if (old[i].func != tp_func->func
					|| old[i].data != tp_func->data)
M
Mathieu Desnoyers 已提交
182
				new[j++] = old[i];
183
		new[nr_probes - nr_del].func = NULL;
184
		*funcs = new;
M
Mathieu Desnoyers 已提交
185
	}
186
	debug_print_probes(*funcs);
M
Mathieu Desnoyers 已提交
187 188 189 190
	return old;
}

/*
191
 * Add the probe function to a tracepoint.
M
Mathieu Desnoyers 已提交
192
 */
193
static int tracepoint_add_func(struct tracepoint *tp,
194
			       struct tracepoint_func *func, int prio)
M
Mathieu Desnoyers 已提交
195
{
196
	struct tracepoint_func *old, *tp_funcs;
M
Mathieu Desnoyers 已提交
197

198 199
	if (tp->regfunc && !static_key_enabled(&tp->key))
		tp->regfunc();
M
Mathieu Desnoyers 已提交
200

201 202
	tp_funcs = rcu_dereference_protected(tp->funcs,
			lockdep_is_held(&tracepoints_mutex));
203
	old = func_add(&tp_funcs, func, prio);
204 205 206 207
	if (IS_ERR(old)) {
		WARN_ON_ONCE(1);
		return PTR_ERR(old);
	}
208

M
Mathieu Desnoyers 已提交
209 210 211 212 213 214 215
	/*
	 * rcu_assign_pointer has a smp_wmb() which makes sure that the new
	 * probe callbacks array is consistent before setting a pointer to it.
	 * This array is referenced by __DO_TRACE from
	 * include/linux/tracepoints.h. A matching smp_read_barrier_depends()
	 * is used.
	 */
216 217 218
	rcu_assign_pointer(tp->funcs, tp_funcs);
	if (!static_key_enabled(&tp->key))
		static_key_slow_inc(&tp->key);
219
	release_probes(old);
220
	return 0;
M
Mathieu Desnoyers 已提交
221 222 223
}

/*
224
 * Remove a probe function from a tracepoint.
M
Mathieu Desnoyers 已提交
225 226 227 228
 * Note: only waiting an RCU period after setting elem->call to the empty
 * function insures that the original callback is not used anymore. This insured
 * by preempt_disable around the call site.
 */
229 230
static int tracepoint_remove_func(struct tracepoint *tp,
		struct tracepoint_func *func)
M
Mathieu Desnoyers 已提交
231
{
232
	struct tracepoint_func *old, *tp_funcs;
M
Mathieu Desnoyers 已提交
233

234 235
	tp_funcs = rcu_dereference_protected(tp->funcs,
			lockdep_is_held(&tracepoints_mutex));
236 237 238 239
	old = func_remove(&tp_funcs, func);
	if (IS_ERR(old)) {
		WARN_ON_ONCE(1);
		return PTR_ERR(old);
M
Mathieu Desnoyers 已提交
240
	}
241

242 243 244 245
	if (!tp_funcs) {
		/* Removed last function */
		if (tp->unregfunc && static_key_enabled(&tp->key))
			tp->unregfunc();
246

247 248
		if (static_key_enabled(&tp->key))
			static_key_slow_dec(&tp->key);
249
	}
250
	rcu_assign_pointer(tp->funcs, tp_funcs);
251
	release_probes(old);
252
	return 0;
253 254
}

M
Mathieu Desnoyers 已提交
255 256
/**
 * tracepoint_probe_register -  Connect a probe to a tracepoint
257
 * @tp: tracepoint
M
Mathieu Desnoyers 已提交
258
 * @probe: probe handler
259
 * @data: tracepoint data
260
 * @prio: priority of this function over other registered functions
M
Mathieu Desnoyers 已提交
261
 *
262 263 264 265 266
 * Returns 0 if ok, error value on error.
 * Note: if @tp is within a module, the caller is responsible for
 * unregistering the probe before the module is gone. This can be
 * performed either with a tracepoint module going notifier, or from
 * within module exit functions.
M
Mathieu Desnoyers 已提交
267
 */
268 269
int tracepoint_probe_register_prio(struct tracepoint *tp, void *probe,
				   void *data, int prio)
M
Mathieu Desnoyers 已提交
270
{
271 272
	struct tracepoint_func tp_func;
	int ret;
M
Mathieu Desnoyers 已提交
273 274

	mutex_lock(&tracepoints_mutex);
275 276
	tp_func.func = probe;
	tp_func.data = data;
277 278
	tp_func.prio = prio;
	ret = tracepoint_add_func(tp, &tp_func, prio);
279
	mutex_unlock(&tracepoints_mutex);
280
	return ret;
M
Mathieu Desnoyers 已提交
281
}
282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
EXPORT_SYMBOL_GPL(tracepoint_probe_register_prio);

/**
 * tracepoint_probe_register -  Connect a probe to a tracepoint
 * @tp: tracepoint
 * @probe: probe handler
 * @data: tracepoint data
 * @prio: priority of this function over other registered functions
 *
 * Returns 0 if ok, error value on error.
 * Note: if @tp is within a module, the caller is responsible for
 * unregistering the probe before the module is gone. This can be
 * performed either with a tracepoint module going notifier, or from
 * within module exit functions.
 */
int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data)
{
	return tracepoint_probe_register_prio(tp, probe, data, TRACEPOINT_DEFAULT_PRIO);
}
M
Mathieu Desnoyers 已提交
301 302 303 304
EXPORT_SYMBOL_GPL(tracepoint_probe_register);

/**
 * tracepoint_probe_unregister -  Disconnect a probe from a tracepoint
305
 * @tp: tracepoint
M
Mathieu Desnoyers 已提交
306
 * @probe: probe function pointer
307
 * @data: tracepoint data
M
Mathieu Desnoyers 已提交
308
 *
309
 * Returns 0 if ok, error value on error.
M
Mathieu Desnoyers 已提交
310
 */
311
int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data)
M
Mathieu Desnoyers 已提交
312
{
313 314
	struct tracepoint_func tp_func;
	int ret;
M
Mathieu Desnoyers 已提交
315 316

	mutex_lock(&tracepoints_mutex);
317 318 319
	tp_func.func = probe;
	tp_func.data = data;
	ret = tracepoint_remove_func(tp, &tp_func);
320
	mutex_unlock(&tracepoints_mutex);
321
	return ret;
M
Mathieu Desnoyers 已提交
322 323 324
}
EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);

325
#ifdef CONFIG_MODULES
326 327
bool trace_module_has_bad_taint(struct module *mod)
{
328 329
	return mod->taints & ~((1 << TAINT_OOT_MODULE) | (1 << TAINT_CRAP) |
			       (1 << TAINT_UNSIGNED_MODULE));
330 331
}

332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399
static BLOCKING_NOTIFIER_HEAD(tracepoint_notify_list);

/**
 * register_tracepoint_notifier - register tracepoint coming/going notifier
 * @nb: notifier block
 *
 * Notifiers registered with this function are called on module
 * coming/going with the tracepoint_module_list_mutex held.
 * The notifier block callback should expect a "struct tp_module" data
 * pointer.
 */
int register_tracepoint_module_notifier(struct notifier_block *nb)
{
	struct tp_module *tp_mod;
	int ret;

	mutex_lock(&tracepoint_module_list_mutex);
	ret = blocking_notifier_chain_register(&tracepoint_notify_list, nb);
	if (ret)
		goto end;
	list_for_each_entry(tp_mod, &tracepoint_module_list, list)
		(void) nb->notifier_call(nb, MODULE_STATE_COMING, tp_mod);
end:
	mutex_unlock(&tracepoint_module_list_mutex);
	return ret;
}
EXPORT_SYMBOL_GPL(register_tracepoint_module_notifier);

/**
 * unregister_tracepoint_notifier - unregister tracepoint coming/going notifier
 * @nb: notifier block
 *
 * The notifier block callback should expect a "struct tp_module" data
 * pointer.
 */
int unregister_tracepoint_module_notifier(struct notifier_block *nb)
{
	struct tp_module *tp_mod;
	int ret;

	mutex_lock(&tracepoint_module_list_mutex);
	ret = blocking_notifier_chain_unregister(&tracepoint_notify_list, nb);
	if (ret)
		goto end;
	list_for_each_entry(tp_mod, &tracepoint_module_list, list)
		(void) nb->notifier_call(nb, MODULE_STATE_GOING, tp_mod);
end:
	mutex_unlock(&tracepoint_module_list_mutex);
	return ret;

}
EXPORT_SYMBOL_GPL(unregister_tracepoint_module_notifier);

/*
 * Ensure the tracer unregistered the module's probes before the module
 * teardown is performed. Prevents leaks of probe and data pointers.
 */
static void tp_module_going_check_quiescent(struct tracepoint * const *begin,
		struct tracepoint * const *end)
{
	struct tracepoint * const *iter;

	if (!begin)
		return;
	for (iter = begin; iter < end; iter++)
		WARN_ON_ONCE((*iter)->funcs);
}

400 401
static int tracepoint_module_coming(struct module *mod)
{
402
	struct tp_module *tp_mod;
403 404
	int ret = 0;

405 406 407
	if (!mod->num_tracepoints)
		return 0;

408
	/*
409 410
	 * We skip modules that taint the kernel, especially those with different
	 * module headers (for forced load), to make sure we don't cause a crash.
411
	 * Staging, out-of-tree, and unsigned GPL modules are fine.
412
	 */
413
	if (trace_module_has_bad_taint(mod))
414
		return 0;
415
	mutex_lock(&tracepoint_module_list_mutex);
416 417 418 419 420
	tp_mod = kmalloc(sizeof(struct tp_module), GFP_KERNEL);
	if (!tp_mod) {
		ret = -ENOMEM;
		goto end;
	}
421
	tp_mod->mod = mod;
422
	list_add_tail(&tp_mod->list, &tracepoint_module_list);
423 424
	blocking_notifier_call_chain(&tracepoint_notify_list,
			MODULE_STATE_COMING, tp_mod);
425
end:
426
	mutex_unlock(&tracepoint_module_list_mutex);
427 428 429
	return ret;
}

430
static void tracepoint_module_going(struct module *mod)
431
{
432
	struct tp_module *tp_mod;
433

434
	if (!mod->num_tracepoints)
435
		return;
436

437 438
	mutex_lock(&tracepoint_module_list_mutex);
	list_for_each_entry(tp_mod, &tracepoint_module_list, list) {
439
		if (tp_mod->mod == mod) {
440 441 442 443 444 445 446 447 448 449
			blocking_notifier_call_chain(&tracepoint_notify_list,
					MODULE_STATE_GOING, tp_mod);
			list_del(&tp_mod->list);
			kfree(tp_mod);
			/*
			 * Called the going notifier before checking for
			 * quiescence.
			 */
			tp_module_going_check_quiescent(mod->tracepoints_ptrs,
				mod->tracepoints_ptrs + mod->num_tracepoints);
450 451 452 453 454 455 456 457 458
			break;
		}
	}
	/*
	 * In the case of modules that were tainted at "coming", we'll simply
	 * walk through the list without finding it. We cannot use the "tainted"
	 * flag on "going", in case a module taints the kernel only after being
	 * loaded.
	 */
459
	mutex_unlock(&tracepoint_module_list_mutex);
460
}
461

462 463
static int tracepoint_module_notify(struct notifier_block *self,
		unsigned long val, void *data)
464 465
{
	struct module *mod = data;
466
	int ret = 0;
467 468 469

	switch (val) {
	case MODULE_STATE_COMING:
470 471 472 473
		ret = tracepoint_module_coming(mod);
		break;
	case MODULE_STATE_LIVE:
		break;
474
	case MODULE_STATE_GOING:
475 476 477
		tracepoint_module_going(mod);
		break;
	case MODULE_STATE_UNFORMED:
478 479
		break;
	}
480
	return ret;
481 482
}

483
static struct notifier_block tracepoint_module_nb = {
484 485 486 487
	.notifier_call = tracepoint_module_notify,
	.priority = 0,
};

488
static __init int init_tracepoints(void)
489
{
490 491 492
	int ret;

	ret = register_module_notifier(&tracepoint_module_nb);
493
	if (ret)
494
		pr_warn("Failed to register tracepoint module enter notifier\n");
495

496
	return ret;
497 498
}
__initcall(init_tracepoints);
499
#endif /* CONFIG_MODULES */
J
Jason Baron 已提交
500

501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
static void for_each_tracepoint_range(struct tracepoint * const *begin,
		struct tracepoint * const *end,
		void (*fct)(struct tracepoint *tp, void *priv),
		void *priv)
{
	struct tracepoint * const *iter;

	if (!begin)
		return;
	for (iter = begin; iter < end; iter++)
		fct(*iter, priv);
}

/**
 * for_each_kernel_tracepoint - iteration on all kernel tracepoints
 * @fct: callback
 * @priv: private data
 */
void for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv),
		void *priv)
{
	for_each_tracepoint_range(__start___tracepoints_ptrs,
		__stop___tracepoints_ptrs, fct, priv);
}
EXPORT_SYMBOL_GPL(for_each_kernel_tracepoint);

527
#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
528

529
/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
J
Jason Baron 已提交
530 531 532 533
static int sys_tracepoint_refcount;

void syscall_regfunc(void)
{
534
	struct task_struct *p, *t;
J
Jason Baron 已提交
535 536

	if (!sys_tracepoint_refcount) {
537 538
		read_lock(&tasklist_lock);
		for_each_process_thread(p, t) {
539
			set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
540 541
		}
		read_unlock(&tasklist_lock);
J
Jason Baron 已提交
542 543 544 545 546 547
	}
	sys_tracepoint_refcount++;
}

void syscall_unregfunc(void)
{
548
	struct task_struct *p, *t;
J
Jason Baron 已提交
549 550 551

	sys_tracepoint_refcount--;
	if (!sys_tracepoint_refcount) {
552 553
		read_lock(&tasklist_lock);
		for_each_process_thread(p, t) {
554
			clear_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
555 556
		}
		read_unlock(&tasklist_lock);
J
Jason Baron 已提交
557 558
	}
}
559
#endif