trace_kprobe.c 39.7 KB
Newer Older
1
/*
2
 * Kprobes-based tracing events
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
 *
 * Created by Masami Hiramatsu <mhiramat@redhat.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
19
#define pr_fmt(fmt)	"trace_kprobe: " fmt
20 21 22

#include <linux/module.h>
#include <linux/uaccess.h>
23
#include <linux/rculist.h>
24

25
#include "trace_probe.h"
26

27
#define KPROBE_EVENT_SYSTEM "kprobes"
28
#define KRETPROBE_MAXACTIVE_MAX 4096
29

30
/**
31
 * Kprobe event core functions
32
 */
33
struct trace_kprobe {
34
	struct list_head	list;
35
	struct kretprobe	rp;	/* Use rp.kp for kprobe use */
36
	unsigned long __percpu *nhit;
37
	const char		*symbol;	/* symbol name */
38
	struct trace_probe	tp;
39 40
};

41 42
#define SIZEOF_TRACE_KPROBE(n)				\
	(offsetof(struct trace_kprobe, tp.args) +	\
43
	(sizeof(struct probe_arg) * (n)))
44

45

46
static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
47
{
48
	return tk->rp.handler != NULL;
49 50
}

51
static nokprobe_inline const char *trace_kprobe_symbol(struct trace_kprobe *tk)
52
{
53
	return tk->symbol ? tk->symbol : "unknown";
54 55
}

56
static nokprobe_inline unsigned long trace_kprobe_offset(struct trace_kprobe *tk)
57
{
58
	return tk->rp.kp.offset;
59 60
}

61
static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk)
62
{
63
	return !!(kprobe_gone(&tk->rp.kp));
64 65
}

66
static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
67
						 struct module *mod)
68 69
{
	int len = strlen(mod->name);
70
	const char *name = trace_kprobe_symbol(tk);
71 72 73
	return strncmp(mod->name, name, len) == 0 && name[len] == ':';
}

74
static nokprobe_inline bool trace_kprobe_is_on_module(struct trace_kprobe *tk)
75
{
76
	return !!strchr(trace_kprobe_symbol(tk), ':');
77 78
}

79 80 81 82 83 84 85 86 87 88 89
static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
{
	unsigned long nhit = 0;
	int cpu;

	for_each_possible_cpu(cpu)
		nhit += *per_cpu_ptr(tk->nhit, cpu);

	return nhit;
}

90 91
static int register_kprobe_event(struct trace_kprobe *tk);
static int unregister_kprobe_event(struct trace_kprobe *tk);
92 93 94 95

static DEFINE_MUTEX(probe_lock);
static LIST_HEAD(probe_list);

96 97 98 99
static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
static int kretprobe_dispatcher(struct kretprobe_instance *ri,
				struct pt_regs *regs);

100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
/* Memory fetching by symbol */
struct symbol_cache {
	char		*symbol;
	long		offset;
	unsigned long	addr;
};

unsigned long update_symbol_cache(struct symbol_cache *sc)
{
	sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);

	if (sc->addr)
		sc->addr += sc->offset;

	return sc->addr;
}

void free_symbol_cache(struct symbol_cache *sc)
{
	kfree(sc->symbol);
	kfree(sc);
}

struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
{
	struct symbol_cache *sc;

	if (!sym || strlen(sym) == 0)
		return NULL;

	sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
	if (!sc)
		return NULL;

	sc->symbol = kstrdup(sym, GFP_KERNEL);
	if (!sc->symbol) {
		kfree(sc);
		return NULL;
	}
	sc->offset = offset;
	update_symbol_cache(sc);

	return sc;
}

145 146 147 148
/*
 * Kprobes-specific fetch functions
 */
#define DEFINE_FETCH_stack(type)					\
149
static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,		\
150 151 152 153
					  void *offset, void *dest)	\
{									\
	*(type *)dest = (type)regs_get_kernel_stack_nth(regs,		\
				(unsigned int)((unsigned long)offset));	\
154 155 156
}									\
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(stack, type));

157 158 159 160 161
DEFINE_BASIC_FETCH_FUNCS(stack)
/* No string on the stack entry */
#define fetch_stack_string	NULL
#define fetch_stack_string_size	NULL

162
#define DEFINE_FETCH_memory(type)					\
163
static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,		\
164 165 166 167 168 169 170
					  void *addr, void *dest)	\
{									\
	type retval;							\
	if (probe_kernel_address(addr, retval))				\
		*(type *)dest = 0;					\
	else								\
		*(type *)dest = retval;					\
171 172 173
}									\
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, type));

174 175 176 177 178
DEFINE_BASIC_FETCH_FUNCS(memory)
/*
 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
 * length and relative data location.
 */
179 180
static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
					    void *addr, void *dest)
181 182 183
{
	int maxlen = get_rloc_len(*(u32 *)dest);
	u8 *dst = get_rloc_data(dest);
184
	long ret;
185 186 187 188 189 190 191 192

	if (!maxlen)
		return;

	/*
	 * Try to get string again, since the string can be changed while
	 * probing.
	 */
193
	ret = strncpy_from_unsafe(dst, addr, maxlen);
194 195

	if (ret < 0) {	/* Failed to fetch string */
196
		dst[0] = '\0';
197 198
		*(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
	} else {
199
		*(u32 *)dest = make_data_rloc(ret, get_rloc_offs(*(u32 *)dest));
200 201
	}
}
202
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string));
203 204

/* Return the length of string -- including null terminal byte */
205 206
static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
						 void *addr, void *dest)
207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228
{
	mm_segment_t old_fs;
	int ret, len = 0;
	u8 c;

	old_fs = get_fs();
	set_fs(KERNEL_DS);
	pagefault_disable();

	do {
		ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
		len++;
	} while (c && ret == 0 && len < MAX_STRING_SIZE);

	pagefault_enable();
	set_fs(old_fs);

	if (ret < 0)	/* Failed to check the length */
		*(u32 *)dest = 0;
	else
		*(u32 *)dest = len;
}
229
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string_size));
230

231
#define DEFINE_FETCH_symbol(type)					\
232
void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs, void *data, void *dest)\
233 234 235 236 237 238
{									\
	struct symbol_cache *sc = data;					\
	if (sc->addr)							\
		fetch_memory_##type(regs, (void *)sc->addr, dest);	\
	else								\
		*(type *)dest = 0;					\
239 240 241
}									\
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(symbol, type));

242 243 244 245
DEFINE_BASIC_FETCH_FUNCS(symbol)
DEFINE_FETCH_symbol(string)
DEFINE_FETCH_symbol(string_size)

246 247 248 249 250 251 252 253
/* kprobes don't support file_offset fetch methods */
#define fetch_file_offset_u8		NULL
#define fetch_file_offset_u16		NULL
#define fetch_file_offset_u32		NULL
#define fetch_file_offset_u64		NULL
#define fetch_file_offset_string	NULL
#define fetch_file_offset_string_size	NULL

254
/* Fetch type information table */
255
static const struct fetch_type kprobes_fetch_type_table[] = {
256 257 258 259 260 261 262 263 264 265 266 267 268 269
	/* Special types */
	[FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
					sizeof(u32), 1, "__data_loc char[]"),
	[FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
					string_size, sizeof(u32), 0, "u32"),
	/* Basic types */
	ASSIGN_FETCH_TYPE(u8,  u8,  0),
	ASSIGN_FETCH_TYPE(u16, u16, 0),
	ASSIGN_FETCH_TYPE(u32, u32, 0),
	ASSIGN_FETCH_TYPE(u64, u64, 0),
	ASSIGN_FETCH_TYPE(s8,  u8,  1),
	ASSIGN_FETCH_TYPE(s16, u16, 1),
	ASSIGN_FETCH_TYPE(s32, u32, 1),
	ASSIGN_FETCH_TYPE(s64, u64, 1),
270 271 272 273
	ASSIGN_FETCH_TYPE_ALIAS(x8,  u8,  u8,  0),
	ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0),
	ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0),
	ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0),
274 275 276 277

	ASSIGN_FETCH_TYPE_END
};

278 279 280
/*
 * Allocate new trace_probe and initialize it (including kprobes).
 */
281
static struct trace_kprobe *alloc_trace_kprobe(const char *group,
282
					     const char *event,
283 284 285
					     void *addr,
					     const char *symbol,
					     unsigned long offs,
286
					     int maxactive,
287
					     int nargs, bool is_return)
288
{
289
	struct trace_kprobe *tk;
290
	int ret = -ENOMEM;
291

292 293
	tk = kzalloc(SIZEOF_TRACE_KPROBE(nargs), GFP_KERNEL);
	if (!tk)
294
		return ERR_PTR(ret);
295

296 297 298 299
	tk->nhit = alloc_percpu(unsigned long);
	if (!tk->nhit)
		goto error;

300
	if (symbol) {
301 302
		tk->symbol = kstrdup(symbol, GFP_KERNEL);
		if (!tk->symbol)
303
			goto error;
304 305
		tk->rp.kp.symbol_name = tk->symbol;
		tk->rp.kp.offset = offs;
306
	} else
307
		tk->rp.kp.addr = addr;
308 309

	if (is_return)
310
		tk->rp.handler = kretprobe_dispatcher;
311
	else
312
		tk->rp.kp.pre_handler = kprobe_dispatcher;
313

314 315
	tk->rp.maxactive = maxactive;

316
	if (!event || !is_good_name(event)) {
317
		ret = -EINVAL;
318
		goto error;
319 320
	}

321 322 323
	tk->tp.call.class = &tk->tp.class;
	tk->tp.call.name = kstrdup(event, GFP_KERNEL);
	if (!tk->tp.call.name)
324
		goto error;
325

326
	if (!group || !is_good_name(group)) {
327
		ret = -EINVAL;
328
		goto error;
329 330
	}

331 332
	tk->tp.class.system = kstrdup(group, GFP_KERNEL);
	if (!tk->tp.class.system)
333 334
		goto error;

335 336 337
	INIT_LIST_HEAD(&tk->list);
	INIT_LIST_HEAD(&tk->tp.files);
	return tk;
338
error:
339 340
	kfree(tk->tp.call.name);
	kfree(tk->symbol);
341
	free_percpu(tk->nhit);
342
	kfree(tk);
343
	return ERR_PTR(ret);
344 345
}

346
static void free_trace_kprobe(struct trace_kprobe *tk)
347 348 349
{
	int i;

350 351
	for (i = 0; i < tk->tp.nr_args; i++)
		traceprobe_free_probe_arg(&tk->tp.args[i]);
352

353 354 355
	kfree(tk->tp.call.class->system);
	kfree(tk->tp.call.name);
	kfree(tk->symbol);
356
	free_percpu(tk->nhit);
357
	kfree(tk);
358 359
}

360 361
static struct trace_kprobe *find_trace_kprobe(const char *event,
					      const char *group)
362
{
363
	struct trace_kprobe *tk;
364

365
	list_for_each_entry(tk, &probe_list, list)
366
		if (strcmp(trace_event_name(&tk->tp.call), event) == 0 &&
367 368
		    strcmp(tk->tp.call.class->system, group) == 0)
			return tk;
369 370 371
	return NULL;
}

372 373 374 375 376
/*
 * Enable trace_probe
 * if the file is NULL, enable "perf" handler, or enable "trace" handler.
 */
static int
377
enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
378 379 380
{
	int ret = 0;

381
	if (file) {
382 383 384 385
		struct event_file_link *link;

		link = kmalloc(sizeof(*link), GFP_KERNEL);
		if (!link) {
386
			ret = -ENOMEM;
387
			goto out;
388 389
		}

390
		link->file = file;
391
		list_add_tail_rcu(&link->list, &tk->tp.files);
392

393
		tk->tp.flags |= TP_FLAG_TRACE;
394
	} else
395
		tk->tp.flags |= TP_FLAG_PROFILE;
396

397 398 399
	if (trace_probe_is_registered(&tk->tp) && !trace_kprobe_has_gone(tk)) {
		if (trace_kprobe_is_return(tk))
			ret = enable_kretprobe(&tk->rp);
400
		else
401
			ret = enable_kprobe(&tk->rp.kp);
402
	}
403
 out:
404 405 406
	return ret;
}

407 408 409 410 411
/*
 * Disable trace_probe
 * if the file is NULL, disable "perf" handler, or disable "trace" handler.
 */
static int
412
disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
413
{
414 415
	struct event_file_link *link = NULL;
	int wait = 0;
416 417 418
	int ret = 0;

	if (file) {
419
		link = find_event_file_link(&tk->tp, file);
420
		if (!link) {
421
			ret = -EINVAL;
422
			goto out;
423 424
		}

425
		list_del_rcu(&link->list);
426
		wait = 1;
427
		if (!list_empty(&tk->tp.files))
428
			goto out;
429

430
		tk->tp.flags &= ~TP_FLAG_TRACE;
431
	} else
432
		tk->tp.flags &= ~TP_FLAG_PROFILE;
433

434 435 436
	if (!trace_probe_is_enabled(&tk->tp) && trace_probe_is_registered(&tk->tp)) {
		if (trace_kprobe_is_return(tk))
			disable_kretprobe(&tk->rp);
437
		else
438
			disable_kprobe(&tk->rp.kp);
439
		wait = 1;
440
	}
441 442 443 444 445 446 447 448

	/*
	 * if tk is not added to any list, it must be a local trace_kprobe
	 * created with perf_event_open. We don't need to wait for these
	 * trace_kprobes
	 */
	if (list_empty(&tk->list))
		wait = 0;
449
 out:
450 451 452 453 454 455 456 457 458 459 460 461 462
	if (wait) {
		/*
		 * Synchronize with kprobe_trace_func/kretprobe_trace_func
		 * to ensure disabled (all running handlers are finished).
		 * This is not only for kfree(), but also the caller,
		 * trace_remove_event_call() supposes it for releasing
		 * event_call related objects, which will be accessed in
		 * the kprobe_trace_func/kretprobe_trace_func.
		 */
		synchronize_sched();
		kfree(link);	/* Ignored if link == NULL */
	}

463
	return ret;
464 465
}

466
/* Internal register function - just handle k*probes and flags */
467
static int __register_trace_kprobe(struct trace_kprobe *tk)
468
{
469
	int i, ret;
470

471
	if (trace_probe_is_registered(&tk->tp))
472 473
		return -EINVAL;

474 475
	for (i = 0; i < tk->tp.nr_args; i++)
		traceprobe_update_arg(&tk->tp.args[i]);
476

477
	/* Set/clear disabled flag according to tp->flag */
478 479
	if (trace_probe_is_enabled(&tk->tp))
		tk->rp.kp.flags &= ~KPROBE_FLAG_DISABLED;
480
	else
481
		tk->rp.kp.flags |= KPROBE_FLAG_DISABLED;
482

483 484
	if (trace_kprobe_is_return(tk))
		ret = register_kretprobe(&tk->rp);
485
	else
486
		ret = register_kprobe(&tk->rp.kp);
487 488

	if (ret == 0)
489
		tk->tp.flags |= TP_FLAG_REGISTERED;
490
	else {
491 492
		pr_warn("Could not insert probe at %s+%lu: %d\n",
			trace_kprobe_symbol(tk), trace_kprobe_offset(tk), ret);
493
		if (ret == -ENOENT && trace_kprobe_is_on_module(tk)) {
494
			pr_warn("This probe might be able to register after target module is loaded. Continue.\n");
495 496
			ret = 0;
		} else if (ret == -EILSEQ) {
497 498
			pr_warn("Probing address(0x%p) is not an instruction boundary.\n",
				tk->rp.kp.addr);
499 500 501 502 503 504 505 506
			ret = -EINVAL;
		}
	}

	return ret;
}

/* Internal unregister function - just handle k*probes and flags */
507
static void __unregister_trace_kprobe(struct trace_kprobe *tk)
508
{
509 510 511
	if (trace_probe_is_registered(&tk->tp)) {
		if (trace_kprobe_is_return(tk))
			unregister_kretprobe(&tk->rp);
512
		else
513 514
			unregister_kprobe(&tk->rp.kp);
		tk->tp.flags &= ~TP_FLAG_REGISTERED;
515
		/* Cleanup kprobe for reuse */
516 517
		if (tk->rp.kp.symbol_name)
			tk->rp.kp.addr = NULL;
518 519 520 521
	}
}

/* Unregister a trace_probe and probe_event: call with locking probe_lock */
522
static int unregister_trace_kprobe(struct trace_kprobe *tk)
523
{
524
	/* Enabled event can not be unregistered */
525
	if (trace_probe_is_enabled(&tk->tp))
526 527
		return -EBUSY;

528
	/* Will fail if probe is being used by ftrace or perf */
529
	if (unregister_kprobe_event(tk))
530 531
		return -EBUSY;

532 533
	__unregister_trace_kprobe(tk);
	list_del(&tk->list);
534 535

	return 0;
536 537 538
}

/* Register a trace_probe and probe_event */
539
static int register_trace_kprobe(struct trace_kprobe *tk)
540
{
541
	struct trace_kprobe *old_tk;
542 543 544 545
	int ret;

	mutex_lock(&probe_lock);

546
	/* Delete old (same name) event if exist */
547
	old_tk = find_trace_kprobe(trace_event_name(&tk->tp.call),
548
			tk->tp.call.class->system);
549 550
	if (old_tk) {
		ret = unregister_trace_kprobe(old_tk);
551 552
		if (ret < 0)
			goto end;
553
		free_trace_kprobe(old_tk);
554
	}
555 556

	/* Register new event */
557
	ret = register_kprobe_event(tk);
558
	if (ret) {
559
		pr_warn("Failed to register probe event(%d)\n", ret);
560 561 562
		goto end;
	}

563
	/* Register k*probe */
564
	ret = __register_trace_kprobe(tk);
565
	if (ret < 0)
566
		unregister_kprobe_event(tk);
567
	else
568
		list_add_tail(&tk->list, &probe_list);
569

570 571 572 573 574
end:
	mutex_unlock(&probe_lock);
	return ret;
}

575
/* Module notifier call back, checking event on the module */
576
static int trace_kprobe_module_callback(struct notifier_block *nb,
577 578 579
				       unsigned long val, void *data)
{
	struct module *mod = data;
580
	struct trace_kprobe *tk;
581 582 583 584 585 586 587
	int ret;

	if (val != MODULE_STATE_COMING)
		return NOTIFY_DONE;

	/* Update probes on coming module */
	mutex_lock(&probe_lock);
588 589
	list_for_each_entry(tk, &probe_list, list) {
		if (trace_kprobe_within_module(tk, mod)) {
590
			/* Don't need to check busy - this should have gone. */
591 592
			__unregister_trace_kprobe(tk);
			ret = __register_trace_kprobe(tk);
593
			if (ret)
594 595 596
				pr_warn("Failed to re-register probe %s on %s: %d\n",
					trace_event_name(&tk->tp.call),
					mod->name, ret);
597 598 599 600 601 602 603
		}
	}
	mutex_unlock(&probe_lock);

	return NOTIFY_DONE;
}

604 605
static struct notifier_block trace_kprobe_module_nb = {
	.notifier_call = trace_kprobe_module_callback,
606 607 608
	.priority = 1	/* Invoked after kprobe module callback */
};

609 610 611 612 613 614 615 616
/* Convert certain expected symbols into '_' when generating event names */
static inline void sanitize_event_name(char *name)
{
	while (*name++ != '\0')
		if (*name == ':' || *name == '.')
			*name = '_';
}

617
static int create_trace_kprobe(int argc, char **argv)
618 619 620
{
	/*
	 * Argument syntax:
621 622 623 624
	 *  - Add kprobe:
	 *      p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS]
	 *  - Add kretprobe:
	 *      r[MAXACTIVE][:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS]
625
	 * Fetch args:
626 627 628
	 *  $retval	: fetch return value
	 *  $stack	: fetch stack address
	 *  $stackN	: fetch Nth of stack (N:0-)
629
	 *  $comm       : fetch current task comm
630 631 632
	 *  @ADDR	: fetch memory at ADDR (ADDR should be in kernel)
	 *  @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
	 *  %REG	: fetch register REG
633
	 * Dereferencing memory fetch:
634
	 *  +|-offs(ARG) : fetch memory at ARG +|- offs address.
635 636
	 * Alias name of args:
	 *  NAME=FETCHARG : set NAME as alias of FETCHARG.
637 638
	 * Type of args:
	 *  FETCHARG:TYPE : use TYPE instead of unsigned long.
639
	 */
640
	struct trace_kprobe *tk;
641
	int i, ret = 0;
642
	bool is_return = false, is_delete = false;
643
	char *symbol = NULL, *event = NULL, *group = NULL;
644
	int maxactive = 0;
645
	char *arg;
646
	unsigned long offset = 0;
647
	void *addr = NULL;
648
	char buf[MAX_EVENT_NAME_LEN];
649

650
	/* argc must be >= 1 */
651
	if (argv[0][0] == 'p')
652
		is_return = false;
653
	else if (argv[0][0] == 'r')
654
		is_return = true;
655
	else if (argv[0][0] == '-')
656
		is_delete = true;
657
	else {
658 659
		pr_info("Probe definition must be started with 'p', 'r' or"
			" '-'.\n");
660
		return -EINVAL;
661
	}
662

663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684
	event = strchr(&argv[0][1], ':');
	if (event) {
		event[0] = '\0';
		event++;
	}
	if (is_return && isdigit(argv[0][1])) {
		ret = kstrtouint(&argv[0][1], 0, &maxactive);
		if (ret) {
			pr_info("Failed to parse maxactive.\n");
			return ret;
		}
		/* kretprobes instances are iterated over via a list. The
		 * maximum should stay reasonable.
		 */
		if (maxactive > KRETPROBE_MAXACTIVE_MAX) {
			pr_info("Maxactive is too big (%d > %d).\n",
				maxactive, KRETPROBE_MAXACTIVE_MAX);
			return -E2BIG;
		}
	}

	if (event) {
685 686 687 688 689
		if (strchr(event, '/')) {
			group = event;
			event = strchr(group, '/') + 1;
			event[-1] = '\0';
			if (strlen(group) == 0) {
690
				pr_info("Group name is not specified\n");
691 692 693
				return -EINVAL;
			}
		}
694
		if (strlen(event) == 0) {
695
			pr_info("Event name is not specified\n");
696 697 698
			return -EINVAL;
		}
	}
699 700
	if (!group)
		group = KPROBE_EVENT_SYSTEM;
701

702 703 704 705 706
	if (is_delete) {
		if (!event) {
			pr_info("Delete command needs an event name.\n");
			return -EINVAL;
		}
707
		mutex_lock(&probe_lock);
708 709
		tk = find_trace_kprobe(event, group);
		if (!tk) {
710
			mutex_unlock(&probe_lock);
711 712 713 714
			pr_info("Event %s/%s doesn't exist.\n", group, event);
			return -ENOENT;
		}
		/* delete an event */
715
		ret = unregister_trace_kprobe(tk);
716
		if (ret == 0)
717
			free_trace_kprobe(tk);
718
		mutex_unlock(&probe_lock);
719
		return ret;
720 721 722 723 724 725
	}

	if (argc < 2) {
		pr_info("Probe point is not specified.\n");
		return -EINVAL;
	}
726 727 728 729

	/* try to parse an address. if that fails, try to read the
	 * input as a symbol. */
	if (kstrtoul(argv[1], 0, (unsigned long *)&addr)) {
730 731 732
		/* a symbol specified */
		symbol = argv[1];
		/* TODO: support .init module functions */
733
		ret = traceprobe_split_symbol_offset(symbol, &offset);
734
		if (ret) {
735
			pr_info("Failed to parse either an address or a symbol.\n");
736
			return ret;
737
		}
738
		if (offset && is_return &&
739
		    !kprobe_on_func_entry(NULL, symbol, offset)) {
740
			pr_info("Given offset is not valid for return probe.\n");
741
			return -EINVAL;
742
		}
743
	}
744
	argc -= 2; argv += 2;
745 746

	/* setup a probe */
747 748 749
	if (!event) {
		/* Make a new event name */
		if (symbol)
750
			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
751 752
				 is_return ? 'r' : 'p', symbol, offset);
		else
753
			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
754
				 is_return ? 'r' : 'p', addr);
755
		sanitize_event_name(buf);
756 757
		event = buf;
	}
758 759
	tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive,
			       argc, is_return);
760
	if (IS_ERR(tk)) {
761
		pr_info("Failed to allocate trace_probe.(%d)\n",
762 763
			(int)PTR_ERR(tk));
		return PTR_ERR(tk);
764
	}
765 766

	/* parse arguments */
767 768
	ret = 0;
	for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
769 770
		struct probe_arg *parg = &tk->tp.args[i];

771
		/* Increment count for freeing args in error case */
772
		tk->tp.nr_args++;
773

774 775
		/* Parse argument name */
		arg = strchr(argv[i], '=');
776
		if (arg) {
777
			*arg++ = '\0';
778
			parg->name = kstrdup(argv[i], GFP_KERNEL);
779
		} else {
780
			arg = argv[i];
781 782
			/* If argument name is omitted, set "argN" */
			snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
783
			parg->name = kstrdup(buf, GFP_KERNEL);
784
		}
785

786
		if (!parg->name) {
787
			pr_info("Failed to allocate argument[%d] name.\n", i);
788
			ret = -ENOMEM;
789 790
			goto error;
		}
791

792
		if (!is_good_name(parg->name)) {
793
			pr_info("Invalid argument[%d] name: %s\n",
794
				i, parg->name);
795 796 797
			ret = -EINVAL;
			goto error;
		}
798

799 800
		if (traceprobe_conflict_field_name(parg->name,
							tk->tp.args, i)) {
801
			pr_info("Argument[%d] name '%s' conflicts with "
802 803 804 805
				"another field.\n", i, argv[i]);
			ret = -EINVAL;
			goto error;
		}
806 807

		/* Parse fetch argument */
808
		ret = traceprobe_parse_probe_arg(arg, &tk->tp.size, parg,
809 810
						is_return, true,
						kprobes_fetch_type_table);
811
		if (ret) {
812
			pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
813
			goto error;
814
		}
815 816
	}

817
	ret = register_trace_kprobe(tk);
818 819 820 821 822
	if (ret)
		goto error;
	return 0;

error:
823
	free_trace_kprobe(tk);
824 825 826
	return ret;
}

827
static int release_all_trace_kprobes(void)
828
{
829
	struct trace_kprobe *tk;
830
	int ret = 0;
831 832

	mutex_lock(&probe_lock);
833
	/* Ensure no probe is in use. */
834 835
	list_for_each_entry(tk, &probe_list, list)
		if (trace_probe_is_enabled(&tk->tp)) {
836 837 838
			ret = -EBUSY;
			goto end;
		}
839 840
	/* TODO: Use batch unregistration */
	while (!list_empty(&probe_list)) {
841 842
		tk = list_entry(probe_list.next, struct trace_kprobe, list);
		ret = unregister_trace_kprobe(tk);
843 844
		if (ret)
			goto end;
845
		free_trace_kprobe(tk);
846
	}
847 848

end:
849
	mutex_unlock(&probe_lock);
850 851

	return ret;
852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872
}

/* Probes listing interfaces */
static void *probes_seq_start(struct seq_file *m, loff_t *pos)
{
	mutex_lock(&probe_lock);
	return seq_list_start(&probe_list, *pos);
}

static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
{
	return seq_list_next(v, &probe_list, pos);
}

static void probes_seq_stop(struct seq_file *m, void *v)
{
	mutex_unlock(&probe_lock);
}

static int probes_seq_show(struct seq_file *m, void *v)
{
873
	struct trace_kprobe *tk = v;
874
	int i;
875

876
	seq_putc(m, trace_kprobe_is_return(tk) ? 'r' : 'p');
877
	seq_printf(m, ":%s/%s", tk->tp.call.class->system,
878
			trace_event_name(&tk->tp.call));
879

880 881 882 883 884
	if (!tk->symbol)
		seq_printf(m, " 0x%p", tk->rp.kp.addr);
	else if (tk->rp.kp.offset)
		seq_printf(m, " %s+%u", trace_kprobe_symbol(tk),
			   tk->rp.kp.offset);
885
	else
886
		seq_printf(m, " %s", trace_kprobe_symbol(tk));
887

888 889
	for (i = 0; i < tk->tp.nr_args; i++)
		seq_printf(m, " %s=%s", tk->tp.args[i].name, tk->tp.args[i].comm);
890
	seq_putc(m, '\n');
891

892 893 894 895 896 897 898 899 900 901 902 903
	return 0;
}

static const struct seq_operations probes_seq_op = {
	.start  = probes_seq_start,
	.next   = probes_seq_next,
	.stop   = probes_seq_stop,
	.show   = probes_seq_show
};

static int probes_open(struct inode *inode, struct file *file)
{
904 905 906
	int ret;

	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
907
		ret = release_all_trace_kprobes();
908 909 910
		if (ret < 0)
			return ret;
	}
911 912 913 914 915 916 917

	return seq_open(file, &probes_seq_op);
}

static ssize_t probes_write(struct file *file, const char __user *buffer,
			    size_t count, loff_t *ppos)
{
918 919
	return trace_parse_run_command(file, buffer, count, ppos,
				       create_trace_kprobe);
920 921 922 923 924 925 926 927 928 929 930
}

static const struct file_operations kprobe_events_ops = {
	.owner          = THIS_MODULE,
	.open           = probes_open,
	.read           = seq_read,
	.llseek         = seq_lseek,
	.release        = seq_release,
	.write		= probes_write,
};

931 932 933
/* Probes profiling interfaces */
static int probes_profile_seq_show(struct seq_file *m, void *v)
{
934
	struct trace_kprobe *tk = v;
935

936
	seq_printf(m, "  %-44s %15lu %15lu\n",
937 938
		   trace_event_name(&tk->tp.call),
		   trace_kprobe_nhit(tk),
939
		   tk->rp.kp.nmissed);
940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963

	return 0;
}

static const struct seq_operations profile_seq_op = {
	.start  = probes_seq_start,
	.next   = probes_seq_next,
	.stop   = probes_seq_stop,
	.show   = probes_profile_seq_show
};

static int profile_open(struct inode *inode, struct file *file)
{
	return seq_open(file, &profile_seq_op);
}

static const struct file_operations kprobe_profile_ops = {
	.owner          = THIS_MODULE,
	.open           = profile_open,
	.read           = seq_read,
	.llseek         = seq_lseek,
	.release        = seq_release,
};

964
/* Kprobe handler */
965
static nokprobe_inline void
966
__kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
967
		    struct trace_event_file *trace_file)
968
{
969
	struct kprobe_trace_entry_head *entry;
970
	struct ring_buffer_event *event;
971
	struct ring_buffer *buffer;
972
	int size, dsize, pc;
973
	unsigned long irq_flags;
974
	struct trace_event_call *call = &tk->tp.call;
975

976
	WARN_ON(call != trace_file->event_call);
977

978
	if (trace_trigger_soft_disabled(trace_file))
979
		return;
980

981 982 983
	local_save_flags(irq_flags);
	pc = preempt_count();

984 985
	dsize = __get_data_size(&tk->tp, regs);
	size = sizeof(*entry) + tk->tp.size + dsize;
986

987
	event = trace_event_buffer_lock_reserve(&buffer, trace_file,
988 989
						call->event.type,
						size, irq_flags, pc);
990
	if (!event)
991
		return;
992 993

	entry = ring_buffer_event_data(event);
994 995
	entry->ip = (unsigned long)tk->rp.kp.addr;
	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
996

997
	event_trigger_unlock_commit_regs(trace_file, buffer, event,
998
					 entry, irq_flags, pc, regs);
999 1000
}

1001
static void
1002
kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs)
1003
{
1004
	struct event_file_link *link;
1005

1006 1007
	list_for_each_entry_rcu(link, &tk->tp.files, list)
		__kprobe_trace_func(tk, regs, link->file);
1008
}
1009
NOKPROBE_SYMBOL(kprobe_trace_func);
1010

1011
/* Kretprobe handler */
1012
static nokprobe_inline void
1013
__kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1014
		       struct pt_regs *regs,
1015
		       struct trace_event_file *trace_file)
1016
{
1017
	struct kretprobe_trace_entry_head *entry;
1018
	struct ring_buffer_event *event;
1019
	struct ring_buffer *buffer;
1020
	int size, pc, dsize;
1021
	unsigned long irq_flags;
1022
	struct trace_event_call *call = &tk->tp.call;
1023

1024
	WARN_ON(call != trace_file->event_call);
1025

1026
	if (trace_trigger_soft_disabled(trace_file))
1027
		return;
1028

1029 1030 1031
	local_save_flags(irq_flags);
	pc = preempt_count();

1032 1033
	dsize = __get_data_size(&tk->tp, regs);
	size = sizeof(*entry) + tk->tp.size + dsize;
1034

1035
	event = trace_event_buffer_lock_reserve(&buffer, trace_file,
1036 1037
						call->event.type,
						size, irq_flags, pc);
1038
	if (!event)
1039
		return;
1040 1041

	entry = ring_buffer_event_data(event);
1042
	entry->func = (unsigned long)tk->rp.kp.addr;
1043
	entry->ret_ip = (unsigned long)ri->ret_addr;
1044
	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
1045

1046
	event_trigger_unlock_commit_regs(trace_file, buffer, event,
1047
					 entry, irq_flags, pc, regs);
1048 1049
}

1050
static void
1051
kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1052 1053
		     struct pt_regs *regs)
{
1054
	struct event_file_link *link;
1055

1056 1057
	list_for_each_entry_rcu(link, &tk->tp.files, list)
		__kretprobe_trace_func(tk, ri, regs, link->file);
1058
}
1059
NOKPROBE_SYMBOL(kretprobe_trace_func);
1060

1061
/* Event entry printers */
1062
static enum print_line_t
1063 1064
print_kprobe_event(struct trace_iterator *iter, int flags,
		   struct trace_event *event)
1065
{
1066
	struct kprobe_trace_entry_head *field;
1067
	struct trace_seq *s = &iter->seq;
1068
	struct trace_probe *tp;
1069
	u8 *data;
1070 1071
	int i;

1072
	field = (struct kprobe_trace_entry_head *)iter->ent;
1073
	tp = container_of(event, struct trace_probe, call.event);
1074

1075
	trace_seq_printf(s, "%s: (", trace_event_name(&tp->call));
1076

1077
	if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
1078
		goto out;
1079

1080
	trace_seq_putc(s, ')');
1081

1082 1083 1084
	data = (u8 *)&field[1];
	for (i = 0; i < tp->nr_args; i++)
		if (!tp->args[i].type->print(s, tp->args[i].name,
1085
					     data + tp->args[i].offset, field))
1086
			goto out;
1087

1088 1089 1090
	trace_seq_putc(s, '\n');
 out:
	return trace_handle_return(s);
1091 1092
}

1093
static enum print_line_t
1094 1095
print_kretprobe_event(struct trace_iterator *iter, int flags,
		      struct trace_event *event)
1096
{
1097
	struct kretprobe_trace_entry_head *field;
1098
	struct trace_seq *s = &iter->seq;
1099
	struct trace_probe *tp;
1100
	u8 *data;
1101 1102
	int i;

1103
	field = (struct kretprobe_trace_entry_head *)iter->ent;
1104
	tp = container_of(event, struct trace_probe, call.event);
1105

1106
	trace_seq_printf(s, "%s: (", trace_event_name(&tp->call));
1107

1108
	if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
1109
		goto out;
1110

1111
	trace_seq_puts(s, " <- ");
1112 1113

	if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
1114
		goto out;
1115

1116
	trace_seq_putc(s, ')');
1117

1118 1119 1120
	data = (u8 *)&field[1];
	for (i = 0; i < tp->nr_args; i++)
		if (!tp->args[i].type->print(s, tp->args[i].name,
1121
					     data + tp->args[i].offset, field))
1122
			goto out;
1123

1124
	trace_seq_putc(s, '\n');
1125

1126 1127
 out:
	return trace_handle_return(s);
1128 1129 1130
}


1131
static int kprobe_event_define_fields(struct trace_event_call *event_call)
1132 1133
{
	int ret, i;
1134
	struct kprobe_trace_entry_head field;
1135
	struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data;
1136

1137
	DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1138
	/* Set argument names as fields */
1139 1140 1141 1142 1143 1144 1145 1146
	for (i = 0; i < tk->tp.nr_args; i++) {
		struct probe_arg *parg = &tk->tp.args[i];

		ret = trace_define_field(event_call, parg->type->fmttype,
					 parg->name,
					 sizeof(field) + parg->offset,
					 parg->type->size,
					 parg->type->is_signed,
1147 1148 1149 1150
					 FILTER_OTHER);
		if (ret)
			return ret;
	}
1151 1152 1153
	return 0;
}

1154
static int kretprobe_event_define_fields(struct trace_event_call *event_call)
1155 1156
{
	int ret, i;
1157
	struct kretprobe_trace_entry_head field;
1158
	struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data;
1159

1160 1161
	DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
	DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1162
	/* Set argument names as fields */
1163 1164 1165 1166 1167 1168 1169 1170
	for (i = 0; i < tk->tp.nr_args; i++) {
		struct probe_arg *parg = &tk->tp.args[i];

		ret = trace_define_field(event_call, parg->type->fmttype,
					 parg->name,
					 sizeof(field) + parg->offset,
					 parg->type->size,
					 parg->type->is_signed,
1171 1172 1173 1174
					 FILTER_OTHER);
		if (ret)
			return ret;
	}
1175 1176 1177
	return 0;
}

1178
#ifdef CONFIG_PERF_EVENTS
1179 1180

/* Kprobe profile handler */
1181
static void
1182
kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
1183
{
1184
	struct trace_event_call *call = &tk->tp.call;
1185
	struct kprobe_trace_entry_head *entry;
1186
	struct hlist_head *head;
1187
	int size, __size, dsize;
1188
	int rctx;
1189

1190 1191
	if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs))
		return;
1192

1193 1194
	head = this_cpu_ptr(call->perf_events);
	if (hlist_empty(head))
1195
		return;
1196

1197 1198
	dsize = __get_data_size(&tk->tp, regs);
	__size = sizeof(*entry) + tk->tp.size + dsize;
1199 1200
	size = ALIGN(__size + sizeof(u32), sizeof(u64));
	size -= sizeof(u32);
1201

1202
	entry = perf_trace_buf_alloc(size, NULL, &rctx);
1203
	if (!entry)
1204
		return;
1205

1206
	entry->ip = (unsigned long)tk->rp.kp.addr;
1207
	memset(&entry[1], 0, dsize);
1208
	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
1209
	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
1210
			      head, NULL);
1211
}
1212
NOKPROBE_SYMBOL(kprobe_perf_func);
1213 1214

/* Kretprobe profile handler */
1215
static void
1216
kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1217
		    struct pt_regs *regs)
1218
{
1219
	struct trace_event_call *call = &tk->tp.call;
1220
	struct kretprobe_trace_entry_head *entry;
1221
	struct hlist_head *head;
1222
	int size, __size, dsize;
1223
	int rctx;
1224

1225
	if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs))
1226 1227
		return;

1228 1229 1230 1231
	head = this_cpu_ptr(call->perf_events);
	if (hlist_empty(head))
		return;

1232 1233
	dsize = __get_data_size(&tk->tp, regs);
	__size = sizeof(*entry) + tk->tp.size + dsize;
1234 1235
	size = ALIGN(__size + sizeof(u32), sizeof(u64));
	size -= sizeof(u32);
1236

1237
	entry = perf_trace_buf_alloc(size, NULL, &rctx);
1238
	if (!entry)
1239
		return;
1240

1241
	entry->func = (unsigned long)tk->rp.kp.addr;
1242
	entry->ret_ip = (unsigned long)ri->ret_addr;
1243
	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
1244
	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
1245
			      head, NULL);
1246
}
1247
NOKPROBE_SYMBOL(kretprobe_perf_func);
1248
#endif	/* CONFIG_PERF_EVENTS */
1249

1250 1251 1252 1253 1254 1255
/*
 * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex.
 *
 * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe
 * lockless, but we can't race with this __init function.
 */
1256
static int kprobe_register(struct trace_event_call *event,
1257
			   enum trace_reg type, void *data)
1258
{
1259
	struct trace_kprobe *tk = (struct trace_kprobe *)event->data;
1260
	struct trace_event_file *file = data;
1261

1262 1263
	switch (type) {
	case TRACE_REG_REGISTER:
1264
		return enable_trace_kprobe(tk, file);
1265
	case TRACE_REG_UNREGISTER:
1266
		return disable_trace_kprobe(tk, file);
1267 1268 1269

#ifdef CONFIG_PERF_EVENTS
	case TRACE_REG_PERF_REGISTER:
1270
		return enable_trace_kprobe(tk, NULL);
1271
	case TRACE_REG_PERF_UNREGISTER:
1272
		return disable_trace_kprobe(tk, NULL);
1273 1274
	case TRACE_REG_PERF_OPEN:
	case TRACE_REG_PERF_CLOSE:
1275 1276
	case TRACE_REG_PERF_ADD:
	case TRACE_REG_PERF_DEL:
1277
		return 0;
1278 1279 1280 1281
#endif
	}
	return 0;
}
1282

1283
static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1284
{
1285
	struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
1286

1287
	raw_cpu_inc(*tk->nhit);
1288

1289 1290
	if (tk->tp.flags & TP_FLAG_TRACE)
		kprobe_trace_func(tk, regs);
1291
#ifdef CONFIG_PERF_EVENTS
1292
	if (tk->tp.flags & TP_FLAG_PROFILE)
1293
		kprobe_perf_func(tk, regs);
1294
#endif
1295
	return 0;	/* We don't tweek kernel, so just return 0 */
1296
}
1297
NOKPROBE_SYMBOL(kprobe_dispatcher);
1298

1299 1300
static int
kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1301
{
1302
	struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
1303

1304
	raw_cpu_inc(*tk->nhit);
1305

1306 1307
	if (tk->tp.flags & TP_FLAG_TRACE)
		kretprobe_trace_func(tk, ri, regs);
1308
#ifdef CONFIG_PERF_EVENTS
1309 1310
	if (tk->tp.flags & TP_FLAG_PROFILE)
		kretprobe_perf_func(tk, ri, regs);
1311
#endif
1312 1313
	return 0;	/* We don't tweek kernel, so just return 0 */
}
1314
NOKPROBE_SYMBOL(kretprobe_dispatcher);
1315

1316 1317 1318 1319 1320 1321 1322 1323
static struct trace_event_functions kretprobe_funcs = {
	.trace		= print_kretprobe_event
};

static struct trace_event_functions kprobe_funcs = {
	.trace		= print_kprobe_event
};

1324 1325
static inline void init_trace_event_call(struct trace_kprobe *tk,
					 struct trace_event_call *call)
1326
{
1327
	INIT_LIST_HEAD(&call->class->fields);
1328
	if (trace_kprobe_is_return(tk)) {
1329
		call->event.funcs = &kretprobe_funcs;
1330
		call->class->define_fields = kretprobe_event_define_fields;
1331
	} else {
1332
		call->event.funcs = &kprobe_funcs;
1333
		call->class->define_fields = kprobe_event_define_fields;
1334
	}
1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347

	call->flags = TRACE_EVENT_FL_KPROBE;
	call->class->reg = kprobe_register;
	call->data = tk;
}

static int register_kprobe_event(struct trace_kprobe *tk)
{
	struct trace_event_call *call = &tk->tp.call;
	int ret = 0;

	init_trace_event_call(tk, call);

1348
	if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0)
1349
		return -ENOMEM;
1350
	ret = register_trace_event(&call->event);
1351
	if (!ret) {
1352
		kfree(call->print_fmt);
1353
		return -ENODEV;
1354
	}
1355
	ret = trace_add_event_call(call);
1356
	if (ret) {
1357
		pr_info("Failed to register kprobe event: %s\n",
1358
			trace_event_name(call));
1359
		kfree(call->print_fmt);
1360
		unregister_trace_event(&call->event);
1361
	}
1362 1363 1364
	return ret;
}

1365
static int unregister_kprobe_event(struct trace_kprobe *tk)
1366
{
1367 1368
	int ret;

1369
	/* tp->event is unregistered in trace_remove_event_call() */
1370
	ret = trace_remove_event_call(&tk->tp.call);
1371
	if (!ret)
1372
		kfree(tk->tp.call.print_fmt);
1373
	return ret;
1374 1375
}

1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435
#ifdef CONFIG_PERF_EVENTS
/* create a trace_kprobe, but don't add it to global lists */
struct trace_event_call *
create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
			  bool is_return)
{
	struct trace_kprobe *tk;
	int ret;
	char *event;

	/*
	 * local trace_kprobes are not added to probe_list, so they are never
	 * searched in find_trace_kprobe(). Therefore, there is no concern of
	 * duplicated name here.
	 */
	event = func ? func : "DUMMY_EVENT";

	tk = alloc_trace_kprobe(KPROBE_EVENT_SYSTEM, event, (void *)addr, func,
				offs, 0 /* maxactive */, 0 /* nargs */,
				is_return);

	if (IS_ERR(tk)) {
		pr_info("Failed to allocate trace_probe.(%d)\n",
			(int)PTR_ERR(tk));
		return ERR_CAST(tk);
	}

	init_trace_event_call(tk, &tk->tp.call);

	if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) {
		ret = -ENOMEM;
		goto error;
	}

	ret = __register_trace_kprobe(tk);
	if (ret < 0)
		goto error;

	return &tk->tp.call;
error:
	free_trace_kprobe(tk);
	return ERR_PTR(ret);
}

void destroy_local_trace_kprobe(struct trace_event_call *event_call)
{
	struct trace_kprobe *tk;

	tk = container_of(event_call, struct trace_kprobe, tp.call);

	if (trace_probe_is_enabled(&tk->tp)) {
		WARN_ON(1);
		return;
	}

	__unregister_trace_kprobe(tk);
	free_trace_kprobe(tk);
}
#endif /* CONFIG_PERF_EVENTS */

1436
/* Make a tracefs interface for controlling probe points */
1437 1438 1439 1440 1441
static __init int init_kprobe_trace(void)
{
	struct dentry *d_tracer;
	struct dentry *entry;

1442
	if (register_module_notifier(&trace_kprobe_module_nb))
1443 1444
		return -EINVAL;

1445
	d_tracer = tracing_init_dentry();
1446
	if (IS_ERR(d_tracer))
1447 1448
		return 0;

1449
	entry = tracefs_create_file("kprobe_events", 0644, d_tracer,
1450 1451
				    NULL, &kprobe_events_ops);

1452
	/* Event list interface */
1453
	if (!entry)
1454
		pr_warn("Could not create tracefs 'kprobe_events' entry\n");
1455 1456

	/* Profile interface */
1457
	entry = tracefs_create_file("kprobe_profile", 0444, d_tracer,
1458 1459 1460
				    NULL, &kprobe_profile_ops);

	if (!entry)
1461
		pr_warn("Could not create tracefs 'kprobe_profile' entry\n");
1462 1463 1464 1465 1466 1467
	return 0;
}
fs_initcall(init_kprobe_trace);


#ifdef CONFIG_FTRACE_STARTUP_TEST
1468 1469
/*
 * The "__used" keeps gcc from removing the function symbol
1470 1471
 * from the kallsyms table. 'noinline' makes sure that there
 * isn't an inlined version used by the test method below
1472
 */
1473 1474
static __used __init noinline int
kprobe_trace_selftest_target(int a1, int a2, int a3, int a4, int a5, int a6)
1475 1476 1477 1478
{
	return a1 + a2 + a3 + a4 + a5 + a6;
}

1479
static __init struct trace_event_file *
1480
find_trace_probe_file(struct trace_kprobe *tk, struct trace_array *tr)
1481
{
1482
	struct trace_event_file *file;
1483 1484

	list_for_each_entry(file, &tr->events, list)
1485
		if (file->event_call == &tk->tp.call)
1486 1487 1488 1489 1490
			return file;

	return NULL;
}

1491
/*
1492
 * Nobody but us can call enable_trace_kprobe/disable_trace_kprobe at this
1493 1494
 * stage, we can do this lockless.
 */
1495 1496
static __init int kprobe_trace_self_tests_init(void)
{
1497
	int ret, warn = 0;
1498
	int (*target)(int, int, int, int, int, int);
1499
	struct trace_kprobe *tk;
1500
	struct trace_event_file *file;
1501

1502 1503 1504
	if (tracing_is_disabled())
		return -ENODEV;

1505 1506 1507 1508
	target = kprobe_trace_selftest_target;

	pr_info("Testing kprobe tracing: ");

1509 1510 1511
	ret = trace_run_command("p:testprobe kprobe_trace_selftest_target "
				"$stack $stack0 +0($stack)",
				create_trace_kprobe);
1512
	if (WARN_ON_ONCE(ret)) {
1513
		pr_warn("error on probing function entry.\n");
1514 1515 1516
		warn++;
	} else {
		/* Enable trace point */
1517 1518
		tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
		if (WARN_ON_ONCE(tk == NULL)) {
1519
			pr_warn("error on getting new probe.\n");
1520
			warn++;
1521
		} else {
1522
			file = find_trace_probe_file(tk, top_trace_array());
1523 1524 1525 1526
			if (WARN_ON_ONCE(file == NULL)) {
				pr_warn("error on getting probe file.\n");
				warn++;
			} else
1527
				enable_trace_kprobe(tk, file);
1528
		}
1529
	}
1530

1531 1532
	ret = trace_run_command("r:testprobe2 kprobe_trace_selftest_target "
				"$retval", create_trace_kprobe);
1533
	if (WARN_ON_ONCE(ret)) {
1534
		pr_warn("error on probing function return.\n");
1535 1536 1537
		warn++;
	} else {
		/* Enable trace point */
1538 1539
		tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
		if (WARN_ON_ONCE(tk == NULL)) {
1540
			pr_warn("error on getting 2nd new probe.\n");
1541
			warn++;
1542
		} else {
1543
			file = find_trace_probe_file(tk, top_trace_array());
1544 1545 1546 1547
			if (WARN_ON_ONCE(file == NULL)) {
				pr_warn("error on getting probe file.\n");
				warn++;
			} else
1548
				enable_trace_kprobe(tk, file);
1549
		}
1550 1551 1552 1553
	}

	if (warn)
		goto end;
1554 1555 1556

	ret = target(1, 2, 3, 4, 5, 6);

1557 1558 1559 1560 1561 1562 1563 1564
	/*
	 * Not expecting an error here, the check is only to prevent the
	 * optimizer from removing the call to target() as otherwise there
	 * are no side-effects and the call is never performed.
	 */
	if (ret != 21)
		warn++;

1565
	/* Disable trace points before removing it */
1566 1567
	tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
	if (WARN_ON_ONCE(tk == NULL)) {
1568
		pr_warn("error on getting test probe.\n");
1569
		warn++;
1570
	} else {
1571 1572 1573 1574 1575
		if (trace_kprobe_nhit(tk) != 1) {
			pr_warn("incorrect number of testprobe hits\n");
			warn++;
		}

1576
		file = find_trace_probe_file(tk, top_trace_array());
1577 1578 1579 1580
		if (WARN_ON_ONCE(file == NULL)) {
			pr_warn("error on getting probe file.\n");
			warn++;
		} else
1581
			disable_trace_kprobe(tk, file);
1582
	}
1583

1584 1585
	tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
	if (WARN_ON_ONCE(tk == NULL)) {
1586
		pr_warn("error on getting 2nd test probe.\n");
1587
		warn++;
1588
	} else {
1589 1590 1591 1592 1593
		if (trace_kprobe_nhit(tk) != 1) {
			pr_warn("incorrect number of testprobe2 hits\n");
			warn++;
		}

1594
		file = find_trace_probe_file(tk, top_trace_array());
1595 1596 1597 1598
		if (WARN_ON_ONCE(file == NULL)) {
			pr_warn("error on getting probe file.\n");
			warn++;
		} else
1599
			disable_trace_kprobe(tk, file);
1600
	}
1601

1602
	ret = trace_run_command("-:testprobe", create_trace_kprobe);
1603
	if (WARN_ON_ONCE(ret)) {
1604
		pr_warn("error on deleting a probe.\n");
1605 1606 1607
		warn++;
	}

1608
	ret = trace_run_command("-:testprobe2", create_trace_kprobe);
1609
	if (WARN_ON_ONCE(ret)) {
1610
		pr_warn("error on deleting a probe.\n");
1611 1612
		warn++;
	}
1613

1614
end:
1615
	release_all_trace_kprobes();
1616 1617 1618 1619 1620
	/*
	 * Wait for the optimizer work to finish. Otherwise it might fiddle
	 * with probes in already freed __init text.
	 */
	wait_for_kprobe_optimizer();
1621 1622 1623 1624
	if (warn)
		pr_cont("NG: Some tests are failed. Please check them.\n");
	else
		pr_cont("OK\n");
1625 1626 1627 1628 1629 1630
	return 0;
}

late_initcall(kprobe_trace_self_tests_init);

#endif