trace_kprobe.c 37.2 KB
Newer Older
1
/*
2
 * Kprobes-based tracing events
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
 *
 * Created by Masami Hiramatsu <mhiramat@redhat.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
19
#define pr_fmt(fmt)	"trace_kprobe: " fmt
20 21 22

#include <linux/module.h>
#include <linux/uaccess.h>
23
#include <linux/rculist.h>
24

25
#include "trace_probe.h"
26

27
#define KPROBE_EVENT_SYSTEM "kprobes"
28

29
/**
30
 * Kprobe event core functions
31
 */
32
struct trace_kprobe {
33
	struct list_head	list;
34
	struct kretprobe	rp;	/* Use rp.kp for kprobe use */
35
	unsigned long __percpu *nhit;
36
	const char		*symbol;	/* symbol name */
37
	struct trace_probe	tp;
38 39
};

40 41
#define SIZEOF_TRACE_KPROBE(n)				\
	(offsetof(struct trace_kprobe, tp.args) +	\
42
	(sizeof(struct probe_arg) * (n)))
43

44

45
static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
46
{
47
	return tk->rp.handler != NULL;
48 49
}

50
static nokprobe_inline const char *trace_kprobe_symbol(struct trace_kprobe *tk)
51
{
52
	return tk->symbol ? tk->symbol : "unknown";
53 54
}

55
static nokprobe_inline unsigned long trace_kprobe_offset(struct trace_kprobe *tk)
56
{
57
	return tk->rp.kp.offset;
58 59
}

60
static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk)
61
{
62
	return !!(kprobe_gone(&tk->rp.kp));
63 64
}

65
static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
66
						 struct module *mod)
67 68
{
	int len = strlen(mod->name);
69
	const char *name = trace_kprobe_symbol(tk);
70 71 72
	return strncmp(mod->name, name, len) == 0 && name[len] == ':';
}

73
static nokprobe_inline bool trace_kprobe_is_on_module(struct trace_kprobe *tk)
74
{
75
	return !!strchr(trace_kprobe_symbol(tk), ':');
76 77
}

78 79 80 81 82 83 84 85 86 87 88
static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
{
	unsigned long nhit = 0;
	int cpu;

	for_each_possible_cpu(cpu)
		nhit += *per_cpu_ptr(tk->nhit, cpu);

	return nhit;
}

89 90
static int register_kprobe_event(struct trace_kprobe *tk);
static int unregister_kprobe_event(struct trace_kprobe *tk);
91 92 93 94

static DEFINE_MUTEX(probe_lock);
static LIST_HEAD(probe_list);

95 96 97 98
static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
static int kretprobe_dispatcher(struct kretprobe_instance *ri,
				struct pt_regs *regs);

99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
/* Memory fetching by symbol */
struct symbol_cache {
	char		*symbol;
	long		offset;
	unsigned long	addr;
};

unsigned long update_symbol_cache(struct symbol_cache *sc)
{
	sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);

	if (sc->addr)
		sc->addr += sc->offset;

	return sc->addr;
}

void free_symbol_cache(struct symbol_cache *sc)
{
	kfree(sc->symbol);
	kfree(sc);
}

struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
{
	struct symbol_cache *sc;

	if (!sym || strlen(sym) == 0)
		return NULL;

	sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
	if (!sc)
		return NULL;

	sc->symbol = kstrdup(sym, GFP_KERNEL);
	if (!sc->symbol) {
		kfree(sc);
		return NULL;
	}
	sc->offset = offset;
	update_symbol_cache(sc);

	return sc;
}

144 145 146 147
/*
 * Kprobes-specific fetch functions
 */
#define DEFINE_FETCH_stack(type)					\
148
static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,		\
149 150 151 152
					  void *offset, void *dest)	\
{									\
	*(type *)dest = (type)regs_get_kernel_stack_nth(regs,		\
				(unsigned int)((unsigned long)offset));	\
153 154 155
}									\
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(stack, type));

156 157 158 159 160
DEFINE_BASIC_FETCH_FUNCS(stack)
/* No string on the stack entry */
#define fetch_stack_string	NULL
#define fetch_stack_string_size	NULL

161
#define DEFINE_FETCH_memory(type)					\
162
static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,		\
163 164 165 166 167 168 169
					  void *addr, void *dest)	\
{									\
	type retval;							\
	if (probe_kernel_address(addr, retval))				\
		*(type *)dest = 0;					\
	else								\
		*(type *)dest = retval;					\
170 171 172
}									\
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, type));

173 174 175 176 177
DEFINE_BASIC_FETCH_FUNCS(memory)
/*
 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
 * length and relative data location.
 */
178 179
static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
					    void *addr, void *dest)
180 181 182
{
	int maxlen = get_rloc_len(*(u32 *)dest);
	u8 *dst = get_rloc_data(dest);
183
	long ret;
184 185 186 187 188 189 190 191

	if (!maxlen)
		return;

	/*
	 * Try to get string again, since the string can be changed while
	 * probing.
	 */
192
	ret = strncpy_from_unsafe(dst, addr, maxlen);
193 194

	if (ret < 0) {	/* Failed to fetch string */
195
		dst[0] = '\0';
196 197
		*(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
	} else {
198
		*(u32 *)dest = make_data_rloc(ret, get_rloc_offs(*(u32 *)dest));
199 200
	}
}
201
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string));
202 203

/* Return the length of string -- including null terminal byte */
204 205
static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
						 void *addr, void *dest)
206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
{
	mm_segment_t old_fs;
	int ret, len = 0;
	u8 c;

	old_fs = get_fs();
	set_fs(KERNEL_DS);
	pagefault_disable();

	do {
		ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
		len++;
	} while (c && ret == 0 && len < MAX_STRING_SIZE);

	pagefault_enable();
	set_fs(old_fs);

	if (ret < 0)	/* Failed to check the length */
		*(u32 *)dest = 0;
	else
		*(u32 *)dest = len;
}
228
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string_size));
229

230
#define DEFINE_FETCH_symbol(type)					\
231
void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs, void *data, void *dest)\
232 233 234 235 236 237
{									\
	struct symbol_cache *sc = data;					\
	if (sc->addr)							\
		fetch_memory_##type(regs, (void *)sc->addr, dest);	\
	else								\
		*(type *)dest = 0;					\
238 239 240
}									\
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(symbol, type));

241 242 243 244
DEFINE_BASIC_FETCH_FUNCS(symbol)
DEFINE_FETCH_symbol(string)
DEFINE_FETCH_symbol(string_size)

245 246 247 248 249 250 251 252
/* kprobes don't support file_offset fetch methods */
#define fetch_file_offset_u8		NULL
#define fetch_file_offset_u16		NULL
#define fetch_file_offset_u32		NULL
#define fetch_file_offset_u64		NULL
#define fetch_file_offset_string	NULL
#define fetch_file_offset_string_size	NULL

253
/* Fetch type information table */
254
static const struct fetch_type kprobes_fetch_type_table[] = {
255 256 257 258 259 260 261 262 263 264 265 266 267 268
	/* Special types */
	[FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
					sizeof(u32), 1, "__data_loc char[]"),
	[FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
					string_size, sizeof(u32), 0, "u32"),
	/* Basic types */
	ASSIGN_FETCH_TYPE(u8,  u8,  0),
	ASSIGN_FETCH_TYPE(u16, u16, 0),
	ASSIGN_FETCH_TYPE(u32, u32, 0),
	ASSIGN_FETCH_TYPE(u64, u64, 0),
	ASSIGN_FETCH_TYPE(s8,  u8,  1),
	ASSIGN_FETCH_TYPE(s16, u16, 1),
	ASSIGN_FETCH_TYPE(s32, u32, 1),
	ASSIGN_FETCH_TYPE(s64, u64, 1),
269 270 271 272
	ASSIGN_FETCH_TYPE_ALIAS(x8,  u8,  u8,  0),
	ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0),
	ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0),
	ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0),
273 274 275 276

	ASSIGN_FETCH_TYPE_END
};

277 278 279
/*
 * Allocate new trace_probe and initialize it (including kprobes).
 */
280
static struct trace_kprobe *alloc_trace_kprobe(const char *group,
281
					     const char *event,
282 283 284
					     void *addr,
					     const char *symbol,
					     unsigned long offs,
285
					     int nargs, bool is_return)
286
{
287
	struct trace_kprobe *tk;
288
	int ret = -ENOMEM;
289

290 291
	tk = kzalloc(SIZEOF_TRACE_KPROBE(nargs), GFP_KERNEL);
	if (!tk)
292
		return ERR_PTR(ret);
293

294 295 296 297
	tk->nhit = alloc_percpu(unsigned long);
	if (!tk->nhit)
		goto error;

298
	if (symbol) {
299 300
		tk->symbol = kstrdup(symbol, GFP_KERNEL);
		if (!tk->symbol)
301
			goto error;
302 303
		tk->rp.kp.symbol_name = tk->symbol;
		tk->rp.kp.offset = offs;
304
	} else
305
		tk->rp.kp.addr = addr;
306 307

	if (is_return)
308
		tk->rp.handler = kretprobe_dispatcher;
309
	else
310
		tk->rp.kp.pre_handler = kprobe_dispatcher;
311

312
	if (!event || !is_good_name(event)) {
313
		ret = -EINVAL;
314
		goto error;
315 316
	}

317 318 319
	tk->tp.call.class = &tk->tp.class;
	tk->tp.call.name = kstrdup(event, GFP_KERNEL);
	if (!tk->tp.call.name)
320
		goto error;
321

322
	if (!group || !is_good_name(group)) {
323
		ret = -EINVAL;
324
		goto error;
325 326
	}

327 328
	tk->tp.class.system = kstrdup(group, GFP_KERNEL);
	if (!tk->tp.class.system)
329 330
		goto error;

331 332 333
	INIT_LIST_HEAD(&tk->list);
	INIT_LIST_HEAD(&tk->tp.files);
	return tk;
334
error:
335 336
	kfree(tk->tp.call.name);
	kfree(tk->symbol);
337
	free_percpu(tk->nhit);
338
	kfree(tk);
339
	return ERR_PTR(ret);
340 341
}

342
static void free_trace_kprobe(struct trace_kprobe *tk)
343 344 345
{
	int i;

346 347
	for (i = 0; i < tk->tp.nr_args; i++)
		traceprobe_free_probe_arg(&tk->tp.args[i]);
348

349 350 351
	kfree(tk->tp.call.class->system);
	kfree(tk->tp.call.name);
	kfree(tk->symbol);
352
	free_percpu(tk->nhit);
353
	kfree(tk);
354 355
}

356 357
static struct trace_kprobe *find_trace_kprobe(const char *event,
					      const char *group)
358
{
359
	struct trace_kprobe *tk;
360

361
	list_for_each_entry(tk, &probe_list, list)
362
		if (strcmp(trace_event_name(&tk->tp.call), event) == 0 &&
363 364
		    strcmp(tk->tp.call.class->system, group) == 0)
			return tk;
365 366 367
	return NULL;
}

368 369 370 371 372
/*
 * Enable trace_probe
 * if the file is NULL, enable "perf" handler, or enable "trace" handler.
 */
static int
373
enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
374 375 376
{
	int ret = 0;

377
	if (file) {
378 379 380 381
		struct event_file_link *link;

		link = kmalloc(sizeof(*link), GFP_KERNEL);
		if (!link) {
382
			ret = -ENOMEM;
383
			goto out;
384 385
		}

386
		link->file = file;
387
		list_add_tail_rcu(&link->list, &tk->tp.files);
388

389
		tk->tp.flags |= TP_FLAG_TRACE;
390
	} else
391
		tk->tp.flags |= TP_FLAG_PROFILE;
392

393 394 395
	if (trace_probe_is_registered(&tk->tp) && !trace_kprobe_has_gone(tk)) {
		if (trace_kprobe_is_return(tk))
			ret = enable_kretprobe(&tk->rp);
396
		else
397
			ret = enable_kprobe(&tk->rp.kp);
398
	}
399
 out:
400 401 402
	return ret;
}

403 404 405 406 407
/*
 * Disable trace_probe
 * if the file is NULL, disable "perf" handler, or disable "trace" handler.
 */
static int
408
disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
409
{
410 411
	struct event_file_link *link = NULL;
	int wait = 0;
412 413 414
	int ret = 0;

	if (file) {
415
		link = find_event_file_link(&tk->tp, file);
416
		if (!link) {
417
			ret = -EINVAL;
418
			goto out;
419 420
		}

421
		list_del_rcu(&link->list);
422
		wait = 1;
423
		if (!list_empty(&tk->tp.files))
424
			goto out;
425

426
		tk->tp.flags &= ~TP_FLAG_TRACE;
427
	} else
428
		tk->tp.flags &= ~TP_FLAG_PROFILE;
429

430 431 432
	if (!trace_probe_is_enabled(&tk->tp) && trace_probe_is_registered(&tk->tp)) {
		if (trace_kprobe_is_return(tk))
			disable_kretprobe(&tk->rp);
433
		else
434
			disable_kprobe(&tk->rp.kp);
435
		wait = 1;
436
	}
437
 out:
438 439 440 441 442 443 444 445 446 447 448 449 450
	if (wait) {
		/*
		 * Synchronize with kprobe_trace_func/kretprobe_trace_func
		 * to ensure disabled (all running handlers are finished).
		 * This is not only for kfree(), but also the caller,
		 * trace_remove_event_call() supposes it for releasing
		 * event_call related objects, which will be accessed in
		 * the kprobe_trace_func/kretprobe_trace_func.
		 */
		synchronize_sched();
		kfree(link);	/* Ignored if link == NULL */
	}

451
	return ret;
452 453
}

454
/* Internal register function - just handle k*probes and flags */
455
static int __register_trace_kprobe(struct trace_kprobe *tk)
456
{
457
	int i, ret;
458

459
	if (trace_probe_is_registered(&tk->tp))
460 461
		return -EINVAL;

462 463
	for (i = 0; i < tk->tp.nr_args; i++)
		traceprobe_update_arg(&tk->tp.args[i]);
464

465
	/* Set/clear disabled flag according to tp->flag */
466 467
	if (trace_probe_is_enabled(&tk->tp))
		tk->rp.kp.flags &= ~KPROBE_FLAG_DISABLED;
468
	else
469
		tk->rp.kp.flags |= KPROBE_FLAG_DISABLED;
470

471 472
	if (trace_kprobe_is_return(tk))
		ret = register_kretprobe(&tk->rp);
473
	else
474
		ret = register_kprobe(&tk->rp.kp);
475 476

	if (ret == 0)
477
		tk->tp.flags |= TP_FLAG_REGISTERED;
478
	else {
479 480
		pr_warn("Could not insert probe at %s+%lu: %d\n",
			trace_kprobe_symbol(tk), trace_kprobe_offset(tk), ret);
481
		if (ret == -ENOENT && trace_kprobe_is_on_module(tk)) {
482
			pr_warn("This probe might be able to register after target module is loaded. Continue.\n");
483 484
			ret = 0;
		} else if (ret == -EILSEQ) {
485 486
			pr_warn("Probing address(0x%p) is not an instruction boundary.\n",
				tk->rp.kp.addr);
487 488 489 490 491 492 493 494
			ret = -EINVAL;
		}
	}

	return ret;
}

/* Internal unregister function - just handle k*probes and flags */
495
static void __unregister_trace_kprobe(struct trace_kprobe *tk)
496
{
497 498 499
	if (trace_probe_is_registered(&tk->tp)) {
		if (trace_kprobe_is_return(tk))
			unregister_kretprobe(&tk->rp);
500
		else
501 502
			unregister_kprobe(&tk->rp.kp);
		tk->tp.flags &= ~TP_FLAG_REGISTERED;
503
		/* Cleanup kprobe for reuse */
504 505
		if (tk->rp.kp.symbol_name)
			tk->rp.kp.addr = NULL;
506 507 508 509
	}
}

/* Unregister a trace_probe and probe_event: call with locking probe_lock */
510
static int unregister_trace_kprobe(struct trace_kprobe *tk)
511
{
512
	/* Enabled event can not be unregistered */
513
	if (trace_probe_is_enabled(&tk->tp))
514 515
		return -EBUSY;

516
	/* Will fail if probe is being used by ftrace or perf */
517
	if (unregister_kprobe_event(tk))
518 519
		return -EBUSY;

520 521
	__unregister_trace_kprobe(tk);
	list_del(&tk->list);
522 523

	return 0;
524 525 526
}

/* Register a trace_probe and probe_event */
527
static int register_trace_kprobe(struct trace_kprobe *tk)
528
{
529
	struct trace_kprobe *old_tk;
530 531 532 533
	int ret;

	mutex_lock(&probe_lock);

534
	/* Delete old (same name) event if exist */
535
	old_tk = find_trace_kprobe(trace_event_name(&tk->tp.call),
536
			tk->tp.call.class->system);
537 538
	if (old_tk) {
		ret = unregister_trace_kprobe(old_tk);
539 540
		if (ret < 0)
			goto end;
541
		free_trace_kprobe(old_tk);
542
	}
543 544

	/* Register new event */
545
	ret = register_kprobe_event(tk);
546
	if (ret) {
547
		pr_warn("Failed to register probe event(%d)\n", ret);
548 549 550
		goto end;
	}

551
	/* Register k*probe */
552
	ret = __register_trace_kprobe(tk);
553
	if (ret < 0)
554
		unregister_kprobe_event(tk);
555
	else
556
		list_add_tail(&tk->list, &probe_list);
557

558 559 560 561 562
end:
	mutex_unlock(&probe_lock);
	return ret;
}

563
/* Module notifier call back, checking event on the module */
564
static int trace_kprobe_module_callback(struct notifier_block *nb,
565 566 567
				       unsigned long val, void *data)
{
	struct module *mod = data;
568
	struct trace_kprobe *tk;
569 570 571 572 573 574 575
	int ret;

	if (val != MODULE_STATE_COMING)
		return NOTIFY_DONE;

	/* Update probes on coming module */
	mutex_lock(&probe_lock);
576 577
	list_for_each_entry(tk, &probe_list, list) {
		if (trace_kprobe_within_module(tk, mod)) {
578
			/* Don't need to check busy - this should have gone. */
579 580
			__unregister_trace_kprobe(tk);
			ret = __register_trace_kprobe(tk);
581
			if (ret)
582 583 584
				pr_warn("Failed to re-register probe %s on %s: %d\n",
					trace_event_name(&tk->tp.call),
					mod->name, ret);
585 586 587 588 589 590 591
		}
	}
	mutex_unlock(&probe_lock);

	return NOTIFY_DONE;
}

592 593
static struct notifier_block trace_kprobe_module_nb = {
	.notifier_call = trace_kprobe_module_callback,
594 595 596
	.priority = 1	/* Invoked after kprobe module callback */
};

597
static int create_trace_kprobe(int argc, char **argv)
598 599 600
{
	/*
	 * Argument syntax:
601 602
	 *  - Add kprobe: p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS]
	 *  - Add kretprobe: r[:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS]
603
	 * Fetch args:
604 605 606
	 *  $retval	: fetch return value
	 *  $stack	: fetch stack address
	 *  $stackN	: fetch Nth of stack (N:0-)
607
	 *  $comm       : fetch current task comm
608 609 610
	 *  @ADDR	: fetch memory at ADDR (ADDR should be in kernel)
	 *  @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
	 *  %REG	: fetch register REG
611
	 * Dereferencing memory fetch:
612
	 *  +|-offs(ARG) : fetch memory at ARG +|- offs address.
613 614
	 * Alias name of args:
	 *  NAME=FETCHARG : set NAME as alias of FETCHARG.
615 616
	 * Type of args:
	 *  FETCHARG:TYPE : use TYPE instead of unsigned long.
617
	 */
618
	struct trace_kprobe *tk;
619
	int i, ret = 0;
620
	bool is_return = false, is_delete = false;
621
	char *symbol = NULL, *event = NULL, *group = NULL;
622
	char *arg;
623
	unsigned long offset = 0;
624
	void *addr = NULL;
625
	char buf[MAX_EVENT_NAME_LEN];
626

627
	/* argc must be >= 1 */
628
	if (argv[0][0] == 'p')
629
		is_return = false;
630
	else if (argv[0][0] == 'r')
631
		is_return = true;
632
	else if (argv[0][0] == '-')
633
		is_delete = true;
634
	else {
635 636
		pr_info("Probe definition must be started with 'p', 'r' or"
			" '-'.\n");
637
		return -EINVAL;
638
	}
639 640 641

	if (argv[0][1] == ':') {
		event = &argv[0][2];
642 643 644 645 646
		if (strchr(event, '/')) {
			group = event;
			event = strchr(group, '/') + 1;
			event[-1] = '\0';
			if (strlen(group) == 0) {
647
				pr_info("Group name is not specified\n");
648 649 650
				return -EINVAL;
			}
		}
651
		if (strlen(event) == 0) {
652
			pr_info("Event name is not specified\n");
653 654 655
			return -EINVAL;
		}
	}
656 657
	if (!group)
		group = KPROBE_EVENT_SYSTEM;
658

659 660 661 662 663
	if (is_delete) {
		if (!event) {
			pr_info("Delete command needs an event name.\n");
			return -EINVAL;
		}
664
		mutex_lock(&probe_lock);
665 666
		tk = find_trace_kprobe(event, group);
		if (!tk) {
667
			mutex_unlock(&probe_lock);
668 669 670 671
			pr_info("Event %s/%s doesn't exist.\n", group, event);
			return -ENOENT;
		}
		/* delete an event */
672
		ret = unregister_trace_kprobe(tk);
673
		if (ret == 0)
674
			free_trace_kprobe(tk);
675
		mutex_unlock(&probe_lock);
676
		return ret;
677 678 679 680 681 682
	}

	if (argc < 2) {
		pr_info("Probe point is not specified.\n");
		return -EINVAL;
	}
683 684
	if (isdigit(argv[1][0])) {
		/* an address specified */
685
		ret = kstrtoul(&argv[1][0], 0, (unsigned long *)&addr);
686 687
		if (ret) {
			pr_info("Failed to parse address.\n");
688
			return ret;
689
		}
690 691 692 693
	} else {
		/* a symbol specified */
		symbol = argv[1];
		/* TODO: support .init module functions */
694
		ret = traceprobe_split_symbol_offset(symbol, &offset);
695 696
		if (ret) {
			pr_info("Failed to parse symbol.\n");
697
			return ret;
698
		}
699 700 701
		if (offset && is_return &&
		    !arch_function_offset_within_entry(offset)) {
			pr_info("Given offset is not valid for return probe.\n");
702
			return -EINVAL;
703
		}
704
	}
705
	argc -= 2; argv += 2;
706 707

	/* setup a probe */
708 709 710
	if (!event) {
		/* Make a new event name */
		if (symbol)
711
			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
712 713
				 is_return ? 'r' : 'p', symbol, offset);
		else
714
			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
715
				 is_return ? 'r' : 'p', addr);
716 717
		event = buf;
	}
718
	tk = alloc_trace_kprobe(group, event, addr, symbol, offset, argc,
719
			       is_return);
720
	if (IS_ERR(tk)) {
721
		pr_info("Failed to allocate trace_probe.(%d)\n",
722 723
			(int)PTR_ERR(tk));
		return PTR_ERR(tk);
724
	}
725 726

	/* parse arguments */
727 728
	ret = 0;
	for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
729 730
		struct probe_arg *parg = &tk->tp.args[i];

731
		/* Increment count for freeing args in error case */
732
		tk->tp.nr_args++;
733

734 735
		/* Parse argument name */
		arg = strchr(argv[i], '=');
736
		if (arg) {
737
			*arg++ = '\0';
738
			parg->name = kstrdup(argv[i], GFP_KERNEL);
739
		} else {
740
			arg = argv[i];
741 742
			/* If argument name is omitted, set "argN" */
			snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
743
			parg->name = kstrdup(buf, GFP_KERNEL);
744
		}
745

746
		if (!parg->name) {
747
			pr_info("Failed to allocate argument[%d] name.\n", i);
748
			ret = -ENOMEM;
749 750
			goto error;
		}
751

752
		if (!is_good_name(parg->name)) {
753
			pr_info("Invalid argument[%d] name: %s\n",
754
				i, parg->name);
755 756 757
			ret = -EINVAL;
			goto error;
		}
758

759 760
		if (traceprobe_conflict_field_name(parg->name,
							tk->tp.args, i)) {
761
			pr_info("Argument[%d] name '%s' conflicts with "
762 763 764 765
				"another field.\n", i, argv[i]);
			ret = -EINVAL;
			goto error;
		}
766 767

		/* Parse fetch argument */
768
		ret = traceprobe_parse_probe_arg(arg, &tk->tp.size, parg,
769 770
						is_return, true,
						kprobes_fetch_type_table);
771
		if (ret) {
772
			pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
773
			goto error;
774
		}
775 776
	}

777
	ret = register_trace_kprobe(tk);
778 779 780 781 782
	if (ret)
		goto error;
	return 0;

error:
783
	free_trace_kprobe(tk);
784 785 786
	return ret;
}

787
static int release_all_trace_kprobes(void)
788
{
789
	struct trace_kprobe *tk;
790
	int ret = 0;
791 792

	mutex_lock(&probe_lock);
793
	/* Ensure no probe is in use. */
794 795
	list_for_each_entry(tk, &probe_list, list)
		if (trace_probe_is_enabled(&tk->tp)) {
796 797 798
			ret = -EBUSY;
			goto end;
		}
799 800
	/* TODO: Use batch unregistration */
	while (!list_empty(&probe_list)) {
801 802
		tk = list_entry(probe_list.next, struct trace_kprobe, list);
		ret = unregister_trace_kprobe(tk);
803 804
		if (ret)
			goto end;
805
		free_trace_kprobe(tk);
806
	}
807 808

end:
809
	mutex_unlock(&probe_lock);
810 811

	return ret;
812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832
}

/* Probes listing interfaces */
static void *probes_seq_start(struct seq_file *m, loff_t *pos)
{
	mutex_lock(&probe_lock);
	return seq_list_start(&probe_list, *pos);
}

static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
{
	return seq_list_next(v, &probe_list, pos);
}

static void probes_seq_stop(struct seq_file *m, void *v)
{
	mutex_unlock(&probe_lock);
}

static int probes_seq_show(struct seq_file *m, void *v)
{
833
	struct trace_kprobe *tk = v;
834
	int i;
835

836
	seq_putc(m, trace_kprobe_is_return(tk) ? 'r' : 'p');
837
	seq_printf(m, ":%s/%s", tk->tp.call.class->system,
838
			trace_event_name(&tk->tp.call));
839

840 841 842 843 844
	if (!tk->symbol)
		seq_printf(m, " 0x%p", tk->rp.kp.addr);
	else if (tk->rp.kp.offset)
		seq_printf(m, " %s+%u", trace_kprobe_symbol(tk),
			   tk->rp.kp.offset);
845
	else
846
		seq_printf(m, " %s", trace_kprobe_symbol(tk));
847

848 849
	for (i = 0; i < tk->tp.nr_args; i++)
		seq_printf(m, " %s=%s", tk->tp.args[i].name, tk->tp.args[i].comm);
850
	seq_putc(m, '\n');
851

852 853 854 855 856 857 858 859 860 861 862 863
	return 0;
}

static const struct seq_operations probes_seq_op = {
	.start  = probes_seq_start,
	.next   = probes_seq_next,
	.stop   = probes_seq_stop,
	.show   = probes_seq_show
};

static int probes_open(struct inode *inode, struct file *file)
{
864 865 866
	int ret;

	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
867
		ret = release_all_trace_kprobes();
868 869 870
		if (ret < 0)
			return ret;
	}
871 872 873 874 875 876 877

	return seq_open(file, &probes_seq_op);
}

static ssize_t probes_write(struct file *file, const char __user *buffer,
			    size_t count, loff_t *ppos)
{
878
	return traceprobe_probes_write(file, buffer, count, ppos,
879
			create_trace_kprobe);
880 881 882 883 884 885 886 887 888 889 890
}

static const struct file_operations kprobe_events_ops = {
	.owner          = THIS_MODULE,
	.open           = probes_open,
	.read           = seq_read,
	.llseek         = seq_lseek,
	.release        = seq_release,
	.write		= probes_write,
};

891 892 893
/* Probes profiling interfaces */
static int probes_profile_seq_show(struct seq_file *m, void *v)
{
894
	struct trace_kprobe *tk = v;
895

896
	seq_printf(m, "  %-44s %15lu %15lu\n",
897 898
		   trace_event_name(&tk->tp.call),
		   trace_kprobe_nhit(tk),
899
		   tk->rp.kp.nmissed);
900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923

	return 0;
}

static const struct seq_operations profile_seq_op = {
	.start  = probes_seq_start,
	.next   = probes_seq_next,
	.stop   = probes_seq_stop,
	.show   = probes_profile_seq_show
};

static int profile_open(struct inode *inode, struct file *file)
{
	return seq_open(file, &profile_seq_op);
}

static const struct file_operations kprobe_profile_ops = {
	.owner          = THIS_MODULE,
	.open           = profile_open,
	.read           = seq_read,
	.llseek         = seq_lseek,
	.release        = seq_release,
};

924
/* Kprobe handler */
925
static nokprobe_inline void
926
__kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
927
		    struct trace_event_file *trace_file)
928
{
929
	struct kprobe_trace_entry_head *entry;
930
	struct ring_buffer_event *event;
931
	struct ring_buffer *buffer;
932
	int size, dsize, pc;
933
	unsigned long irq_flags;
934
	struct trace_event_call *call = &tk->tp.call;
935

936
	WARN_ON(call != trace_file->event_call);
937

938
	if (trace_trigger_soft_disabled(trace_file))
939
		return;
940

941 942 943
	local_save_flags(irq_flags);
	pc = preempt_count();

944 945
	dsize = __get_data_size(&tk->tp, regs);
	size = sizeof(*entry) + tk->tp.size + dsize;
946

947
	event = trace_event_buffer_lock_reserve(&buffer, trace_file,
948 949
						call->event.type,
						size, irq_flags, pc);
950
	if (!event)
951
		return;
952 953

	entry = ring_buffer_event_data(event);
954 955
	entry->ip = (unsigned long)tk->rp.kp.addr;
	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
956

957
	event_trigger_unlock_commit_regs(trace_file, buffer, event,
958
					 entry, irq_flags, pc, regs);
959 960
}

961
static void
962
kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs)
963
{
964
	struct event_file_link *link;
965

966 967
	list_for_each_entry_rcu(link, &tk->tp.files, list)
		__kprobe_trace_func(tk, regs, link->file);
968
}
969
NOKPROBE_SYMBOL(kprobe_trace_func);
970

971
/* Kretprobe handler */
972
static nokprobe_inline void
973
__kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
974
		       struct pt_regs *regs,
975
		       struct trace_event_file *trace_file)
976
{
977
	struct kretprobe_trace_entry_head *entry;
978
	struct ring_buffer_event *event;
979
	struct ring_buffer *buffer;
980
	int size, pc, dsize;
981
	unsigned long irq_flags;
982
	struct trace_event_call *call = &tk->tp.call;
983

984
	WARN_ON(call != trace_file->event_call);
985

986
	if (trace_trigger_soft_disabled(trace_file))
987
		return;
988

989 990 991
	local_save_flags(irq_flags);
	pc = preempt_count();

992 993
	dsize = __get_data_size(&tk->tp, regs);
	size = sizeof(*entry) + tk->tp.size + dsize;
994

995
	event = trace_event_buffer_lock_reserve(&buffer, trace_file,
996 997
						call->event.type,
						size, irq_flags, pc);
998
	if (!event)
999
		return;
1000 1001

	entry = ring_buffer_event_data(event);
1002
	entry->func = (unsigned long)tk->rp.kp.addr;
1003
	entry->ret_ip = (unsigned long)ri->ret_addr;
1004
	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
1005

1006
	event_trigger_unlock_commit_regs(trace_file, buffer, event,
1007
					 entry, irq_flags, pc, regs);
1008 1009
}

1010
static void
1011
kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1012 1013
		     struct pt_regs *regs)
{
1014
	struct event_file_link *link;
1015

1016 1017
	list_for_each_entry_rcu(link, &tk->tp.files, list)
		__kretprobe_trace_func(tk, ri, regs, link->file);
1018
}
1019
NOKPROBE_SYMBOL(kretprobe_trace_func);
1020

1021
/* Event entry printers */
1022
static enum print_line_t
1023 1024
print_kprobe_event(struct trace_iterator *iter, int flags,
		   struct trace_event *event)
1025
{
1026
	struct kprobe_trace_entry_head *field;
1027
	struct trace_seq *s = &iter->seq;
1028
	struct trace_probe *tp;
1029
	u8 *data;
1030 1031
	int i;

1032
	field = (struct kprobe_trace_entry_head *)iter->ent;
1033
	tp = container_of(event, struct trace_probe, call.event);
1034

1035
	trace_seq_printf(s, "%s: (", trace_event_name(&tp->call));
1036

1037
	if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
1038
		goto out;
1039

1040
	trace_seq_putc(s, ')');
1041

1042 1043 1044
	data = (u8 *)&field[1];
	for (i = 0; i < tp->nr_args; i++)
		if (!tp->args[i].type->print(s, tp->args[i].name,
1045
					     data + tp->args[i].offset, field))
1046
			goto out;
1047

1048 1049 1050
	trace_seq_putc(s, '\n');
 out:
	return trace_handle_return(s);
1051 1052
}

1053
static enum print_line_t
1054 1055
print_kretprobe_event(struct trace_iterator *iter, int flags,
		      struct trace_event *event)
1056
{
1057
	struct kretprobe_trace_entry_head *field;
1058
	struct trace_seq *s = &iter->seq;
1059
	struct trace_probe *tp;
1060
	u8 *data;
1061 1062
	int i;

1063
	field = (struct kretprobe_trace_entry_head *)iter->ent;
1064
	tp = container_of(event, struct trace_probe, call.event);
1065

1066
	trace_seq_printf(s, "%s: (", trace_event_name(&tp->call));
1067

1068
	if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
1069
		goto out;
1070

1071
	trace_seq_puts(s, " <- ");
1072 1073

	if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
1074
		goto out;
1075

1076
	trace_seq_putc(s, ')');
1077

1078 1079 1080
	data = (u8 *)&field[1];
	for (i = 0; i < tp->nr_args; i++)
		if (!tp->args[i].type->print(s, tp->args[i].name,
1081
					     data + tp->args[i].offset, field))
1082
			goto out;
1083

1084
	trace_seq_putc(s, '\n');
1085

1086 1087
 out:
	return trace_handle_return(s);
1088 1089 1090
}


1091
static int kprobe_event_define_fields(struct trace_event_call *event_call)
1092 1093
{
	int ret, i;
1094
	struct kprobe_trace_entry_head field;
1095
	struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data;
1096

1097
	DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1098
	/* Set argument names as fields */
1099 1100 1101 1102 1103 1104 1105 1106
	for (i = 0; i < tk->tp.nr_args; i++) {
		struct probe_arg *parg = &tk->tp.args[i];

		ret = trace_define_field(event_call, parg->type->fmttype,
					 parg->name,
					 sizeof(field) + parg->offset,
					 parg->type->size,
					 parg->type->is_signed,
1107 1108 1109 1110
					 FILTER_OTHER);
		if (ret)
			return ret;
	}
1111 1112 1113
	return 0;
}

1114
static int kretprobe_event_define_fields(struct trace_event_call *event_call)
1115 1116
{
	int ret, i;
1117
	struct kretprobe_trace_entry_head field;
1118
	struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data;
1119

1120 1121
	DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
	DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1122
	/* Set argument names as fields */
1123 1124 1125 1126 1127 1128 1129 1130
	for (i = 0; i < tk->tp.nr_args; i++) {
		struct probe_arg *parg = &tk->tp.args[i];

		ret = trace_define_field(event_call, parg->type->fmttype,
					 parg->name,
					 sizeof(field) + parg->offset,
					 parg->type->size,
					 parg->type->is_signed,
1131 1132 1133 1134
					 FILTER_OTHER);
		if (ret)
			return ret;
	}
1135 1136 1137
	return 0;
}

1138
#ifdef CONFIG_PERF_EVENTS
1139 1140

/* Kprobe profile handler */
1141
static void
1142
kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
1143
{
1144
	struct trace_event_call *call = &tk->tp.call;
1145
	struct bpf_prog *prog = call->prog;
1146
	struct kprobe_trace_entry_head *entry;
1147
	struct hlist_head *head;
1148
	int size, __size, dsize;
1149
	int rctx;
1150

1151 1152 1153
	if (prog && !trace_call_bpf(prog, regs))
		return;

1154 1155 1156 1157
	head = this_cpu_ptr(call->perf_events);
	if (hlist_empty(head))
		return;

1158 1159
	dsize = __get_data_size(&tk->tp, regs);
	__size = sizeof(*entry) + tk->tp.size + dsize;
1160 1161
	size = ALIGN(__size + sizeof(u32), sizeof(u64));
	size -= sizeof(u32);
1162

1163
	entry = perf_trace_buf_alloc(size, NULL, &rctx);
1164
	if (!entry)
1165
		return;
1166

1167
	entry->ip = (unsigned long)tk->rp.kp.addr;
1168
	memset(&entry[1], 0, dsize);
1169
	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
1170 1171
	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
			      head, NULL);
1172
}
1173
NOKPROBE_SYMBOL(kprobe_perf_func);
1174 1175

/* Kretprobe profile handler */
1176
static void
1177
kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1178
		    struct pt_regs *regs)
1179
{
1180
	struct trace_event_call *call = &tk->tp.call;
1181
	struct bpf_prog *prog = call->prog;
1182
	struct kretprobe_trace_entry_head *entry;
1183
	struct hlist_head *head;
1184
	int size, __size, dsize;
1185
	int rctx;
1186

1187 1188 1189
	if (prog && !trace_call_bpf(prog, regs))
		return;

1190 1191 1192 1193
	head = this_cpu_ptr(call->perf_events);
	if (hlist_empty(head))
		return;

1194 1195
	dsize = __get_data_size(&tk->tp, regs);
	__size = sizeof(*entry) + tk->tp.size + dsize;
1196 1197
	size = ALIGN(__size + sizeof(u32), sizeof(u64));
	size -= sizeof(u32);
1198

1199
	entry = perf_trace_buf_alloc(size, NULL, &rctx);
1200
	if (!entry)
1201
		return;
1202

1203
	entry->func = (unsigned long)tk->rp.kp.addr;
1204
	entry->ret_ip = (unsigned long)ri->ret_addr;
1205
	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
1206 1207
	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
			      head, NULL);
1208
}
1209
NOKPROBE_SYMBOL(kretprobe_perf_func);
1210
#endif	/* CONFIG_PERF_EVENTS */
1211

1212 1213 1214 1215 1216 1217
/*
 * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex.
 *
 * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe
 * lockless, but we can't race with this __init function.
 */
1218
static int kprobe_register(struct trace_event_call *event,
1219
			   enum trace_reg type, void *data)
1220
{
1221
	struct trace_kprobe *tk = (struct trace_kprobe *)event->data;
1222
	struct trace_event_file *file = data;
1223

1224 1225
	switch (type) {
	case TRACE_REG_REGISTER:
1226
		return enable_trace_kprobe(tk, file);
1227
	case TRACE_REG_UNREGISTER:
1228
		return disable_trace_kprobe(tk, file);
1229 1230 1231

#ifdef CONFIG_PERF_EVENTS
	case TRACE_REG_PERF_REGISTER:
1232
		return enable_trace_kprobe(tk, NULL);
1233
	case TRACE_REG_PERF_UNREGISTER:
1234
		return disable_trace_kprobe(tk, NULL);
1235 1236
	case TRACE_REG_PERF_OPEN:
	case TRACE_REG_PERF_CLOSE:
1237 1238
	case TRACE_REG_PERF_ADD:
	case TRACE_REG_PERF_DEL:
1239
		return 0;
1240 1241 1242 1243
#endif
	}
	return 0;
}
1244

1245
static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1246
{
1247
	struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
1248

1249
	raw_cpu_inc(*tk->nhit);
1250

1251 1252
	if (tk->tp.flags & TP_FLAG_TRACE)
		kprobe_trace_func(tk, regs);
1253
#ifdef CONFIG_PERF_EVENTS
1254 1255
	if (tk->tp.flags & TP_FLAG_PROFILE)
		kprobe_perf_func(tk, regs);
1256
#endif
1257 1258
	return 0;	/* We don't tweek kernel, so just return 0 */
}
1259
NOKPROBE_SYMBOL(kprobe_dispatcher);
1260

1261 1262
static int
kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1263
{
1264
	struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
1265

1266
	raw_cpu_inc(*tk->nhit);
1267

1268 1269
	if (tk->tp.flags & TP_FLAG_TRACE)
		kretprobe_trace_func(tk, ri, regs);
1270
#ifdef CONFIG_PERF_EVENTS
1271 1272
	if (tk->tp.flags & TP_FLAG_PROFILE)
		kretprobe_perf_func(tk, ri, regs);
1273
#endif
1274 1275
	return 0;	/* We don't tweek kernel, so just return 0 */
}
1276
NOKPROBE_SYMBOL(kretprobe_dispatcher);
1277

1278 1279 1280 1281 1282 1283 1284 1285
static struct trace_event_functions kretprobe_funcs = {
	.trace		= print_kretprobe_event
};

static struct trace_event_functions kprobe_funcs = {
	.trace		= print_kprobe_event
};

1286
static int register_kprobe_event(struct trace_kprobe *tk)
1287
{
1288
	struct trace_event_call *call = &tk->tp.call;
1289 1290
	int ret;

1291
	/* Initialize trace_event_call */
1292
	INIT_LIST_HEAD(&call->class->fields);
1293
	if (trace_kprobe_is_return(tk)) {
1294
		call->event.funcs = &kretprobe_funcs;
1295
		call->class->define_fields = kretprobe_event_define_fields;
1296
	} else {
1297
		call->event.funcs = &kprobe_funcs;
1298
		call->class->define_fields = kprobe_event_define_fields;
1299
	}
1300
	if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0)
1301
		return -ENOMEM;
1302
	ret = register_trace_event(&call->event);
1303
	if (!ret) {
1304
		kfree(call->print_fmt);
1305
		return -ENODEV;
1306
	}
A
Alexei Starovoitov 已提交
1307
	call->flags = TRACE_EVENT_FL_KPROBE;
1308
	call->class->reg = kprobe_register;
1309
	call->data = tk;
1310
	ret = trace_add_event_call(call);
1311
	if (ret) {
1312
		pr_info("Failed to register kprobe event: %s\n",
1313
			trace_event_name(call));
1314
		kfree(call->print_fmt);
1315
		unregister_trace_event(&call->event);
1316
	}
1317 1318 1319
	return ret;
}

1320
static int unregister_kprobe_event(struct trace_kprobe *tk)
1321
{
1322 1323
	int ret;

1324
	/* tp->event is unregistered in trace_remove_event_call() */
1325
	ret = trace_remove_event_call(&tk->tp.call);
1326
	if (!ret)
1327
		kfree(tk->tp.call.print_fmt);
1328
	return ret;
1329 1330
}

1331
/* Make a tracefs interface for controlling probe points */
1332 1333 1334 1335 1336
static __init int init_kprobe_trace(void)
{
	struct dentry *d_tracer;
	struct dentry *entry;

1337
	if (register_module_notifier(&trace_kprobe_module_nb))
1338 1339
		return -EINVAL;

1340
	d_tracer = tracing_init_dentry();
1341
	if (IS_ERR(d_tracer))
1342 1343
		return 0;

1344
	entry = tracefs_create_file("kprobe_events", 0644, d_tracer,
1345 1346
				    NULL, &kprobe_events_ops);

1347
	/* Event list interface */
1348
	if (!entry)
1349
		pr_warn("Could not create tracefs 'kprobe_events' entry\n");
1350 1351

	/* Profile interface */
1352
	entry = tracefs_create_file("kprobe_profile", 0444, d_tracer,
1353 1354 1355
				    NULL, &kprobe_profile_ops);

	if (!entry)
1356
		pr_warn("Could not create tracefs 'kprobe_profile' entry\n");
1357 1358 1359 1360 1361 1362
	return 0;
}
fs_initcall(init_kprobe_trace);


#ifdef CONFIG_FTRACE_STARTUP_TEST
1363 1364
/*
 * The "__used" keeps gcc from removing the function symbol
1365 1366
 * from the kallsyms table. 'noinline' makes sure that there
 * isn't an inlined version used by the test method below
1367
 */
1368 1369
static __used __init noinline int
kprobe_trace_selftest_target(int a1, int a2, int a3, int a4, int a5, int a6)
1370 1371 1372 1373
{
	return a1 + a2 + a3 + a4 + a5 + a6;
}

1374
static __init struct trace_event_file *
1375
find_trace_probe_file(struct trace_kprobe *tk, struct trace_array *tr)
1376
{
1377
	struct trace_event_file *file;
1378 1379

	list_for_each_entry(file, &tr->events, list)
1380
		if (file->event_call == &tk->tp.call)
1381 1382 1383 1384 1385
			return file;

	return NULL;
}

1386
/*
1387
 * Nobody but us can call enable_trace_kprobe/disable_trace_kprobe at this
1388 1389
 * stage, we can do this lockless.
 */
1390 1391
static __init int kprobe_trace_self_tests_init(void)
{
1392
	int ret, warn = 0;
1393
	int (*target)(int, int, int, int, int, int);
1394
	struct trace_kprobe *tk;
1395
	struct trace_event_file *file;
1396

1397 1398 1399
	if (tracing_is_disabled())
		return -ENODEV;

1400 1401 1402 1403
	target = kprobe_trace_selftest_target;

	pr_info("Testing kprobe tracing: ");

1404 1405
	ret = traceprobe_command("p:testprobe kprobe_trace_selftest_target "
				  "$stack $stack0 +0($stack)",
1406
				  create_trace_kprobe);
1407
	if (WARN_ON_ONCE(ret)) {
1408
		pr_warn("error on probing function entry.\n");
1409 1410 1411
		warn++;
	} else {
		/* Enable trace point */
1412 1413
		tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
		if (WARN_ON_ONCE(tk == NULL)) {
1414
			pr_warn("error on getting new probe.\n");
1415
			warn++;
1416
		} else {
1417
			file = find_trace_probe_file(tk, top_trace_array());
1418 1419 1420 1421
			if (WARN_ON_ONCE(file == NULL)) {
				pr_warn("error on getting probe file.\n");
				warn++;
			} else
1422
				enable_trace_kprobe(tk, file);
1423
		}
1424
	}
1425

1426
	ret = traceprobe_command("r:testprobe2 kprobe_trace_selftest_target "
1427
				  "$retval", create_trace_kprobe);
1428
	if (WARN_ON_ONCE(ret)) {
1429
		pr_warn("error on probing function return.\n");
1430 1431 1432
		warn++;
	} else {
		/* Enable trace point */
1433 1434
		tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
		if (WARN_ON_ONCE(tk == NULL)) {
1435
			pr_warn("error on getting 2nd new probe.\n");
1436
			warn++;
1437
		} else {
1438
			file = find_trace_probe_file(tk, top_trace_array());
1439 1440 1441 1442
			if (WARN_ON_ONCE(file == NULL)) {
				pr_warn("error on getting probe file.\n");
				warn++;
			} else
1443
				enable_trace_kprobe(tk, file);
1444
		}
1445 1446 1447 1448
	}

	if (warn)
		goto end;
1449 1450 1451

	ret = target(1, 2, 3, 4, 5, 6);

1452 1453 1454 1455 1456 1457 1458 1459
	/*
	 * Not expecting an error here, the check is only to prevent the
	 * optimizer from removing the call to target() as otherwise there
	 * are no side-effects and the call is never performed.
	 */
	if (ret != 21)
		warn++;

1460
	/* Disable trace points before removing it */
1461 1462
	tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
	if (WARN_ON_ONCE(tk == NULL)) {
1463
		pr_warn("error on getting test probe.\n");
1464
		warn++;
1465
	} else {
1466 1467 1468 1469 1470
		if (trace_kprobe_nhit(tk) != 1) {
			pr_warn("incorrect number of testprobe hits\n");
			warn++;
		}

1471
		file = find_trace_probe_file(tk, top_trace_array());
1472 1473 1474 1475
		if (WARN_ON_ONCE(file == NULL)) {
			pr_warn("error on getting probe file.\n");
			warn++;
		} else
1476
			disable_trace_kprobe(tk, file);
1477
	}
1478

1479 1480
	tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
	if (WARN_ON_ONCE(tk == NULL)) {
1481
		pr_warn("error on getting 2nd test probe.\n");
1482
		warn++;
1483
	} else {
1484 1485 1486 1487 1488
		if (trace_kprobe_nhit(tk) != 1) {
			pr_warn("incorrect number of testprobe2 hits\n");
			warn++;
		}

1489
		file = find_trace_probe_file(tk, top_trace_array());
1490 1491 1492 1493
		if (WARN_ON_ONCE(file == NULL)) {
			pr_warn("error on getting probe file.\n");
			warn++;
		} else
1494
			disable_trace_kprobe(tk, file);
1495
	}
1496

1497
	ret = traceprobe_command("-:testprobe", create_trace_kprobe);
1498
	if (WARN_ON_ONCE(ret)) {
1499
		pr_warn("error on deleting a probe.\n");
1500 1501 1502
		warn++;
	}

1503
	ret = traceprobe_command("-:testprobe2", create_trace_kprobe);
1504
	if (WARN_ON_ONCE(ret)) {
1505
		pr_warn("error on deleting a probe.\n");
1506 1507
		warn++;
	}
1508

1509
end:
1510
	release_all_trace_kprobes();
1511 1512 1513 1514
	if (warn)
		pr_cont("NG: Some tests are failed. Please check them.\n");
	else
		pr_cont("OK\n");
1515 1516 1517 1518 1519 1520
	return 0;
}

late_initcall(kprobe_trace_self_tests_init);

#endif