trace_kprobe.c 38.7 KB
Newer Older
1
/*
2
 * Kprobes-based tracing events
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
 *
 * Created by Masami Hiramatsu <mhiramat@redhat.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
19
#define pr_fmt(fmt)	"trace_kprobe: " fmt
20 21 22

#include <linux/module.h>
#include <linux/uaccess.h>
23
#include <linux/rculist.h>
24

25
#include "trace_probe.h"
26

27
#define KPROBE_EVENT_SYSTEM "kprobes"
28
#define KRETPROBE_MAXACTIVE_MAX 4096
29

30
/**
31
 * Kprobe event core functions
32
 */
33
struct trace_kprobe {
34
	struct list_head	list;
35
	struct kretprobe	rp;	/* Use rp.kp for kprobe use */
36
	unsigned long __percpu *nhit;
37
	const char		*symbol;	/* symbol name */
38
	struct trace_probe	tp;
39 40
};

41 42
#define SIZEOF_TRACE_KPROBE(n)				\
	(offsetof(struct trace_kprobe, tp.args) +	\
43
	(sizeof(struct probe_arg) * (n)))
44

45
DEFINE_PER_CPU(int, bpf_kprobe_override);
46

47
static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
48
{
49
	return tk->rp.handler != NULL;
50 51
}

52
static nokprobe_inline const char *trace_kprobe_symbol(struct trace_kprobe *tk)
53
{
54
	return tk->symbol ? tk->symbol : "unknown";
55 56
}

57
static nokprobe_inline unsigned long trace_kprobe_offset(struct trace_kprobe *tk)
58
{
59
	return tk->rp.kp.offset;
60 61
}

62
static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk)
63
{
64
	return !!(kprobe_gone(&tk->rp.kp));
65 66
}

67
static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
68
						 struct module *mod)
69 70
{
	int len = strlen(mod->name);
71
	const char *name = trace_kprobe_symbol(tk);
72 73 74
	return strncmp(mod->name, name, len) == 0 && name[len] == ':';
}

75
static nokprobe_inline bool trace_kprobe_is_on_module(struct trace_kprobe *tk)
76
{
77
	return !!strchr(trace_kprobe_symbol(tk), ':');
78 79
}

80 81 82 83 84 85 86 87 88 89 90
static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
{
	unsigned long nhit = 0;
	int cpu;

	for_each_possible_cpu(cpu)
		nhit += *per_cpu_ptr(tk->nhit, cpu);

	return nhit;
}

91 92 93 94 95 96
int trace_kprobe_ftrace(struct trace_event_call *call)
{
	struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
	return kprobe_ftrace(&tk->rp.kp);
}

97 98
static int register_kprobe_event(struct trace_kprobe *tk);
static int unregister_kprobe_event(struct trace_kprobe *tk);
99 100 101 102

static DEFINE_MUTEX(probe_lock);
static LIST_HEAD(probe_list);

103 104 105 106
static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
static int kretprobe_dispatcher(struct kretprobe_instance *ri,
				struct pt_regs *regs);

107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
/* Memory fetching by symbol */
struct symbol_cache {
	char		*symbol;
	long		offset;
	unsigned long	addr;
};

unsigned long update_symbol_cache(struct symbol_cache *sc)
{
	sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);

	if (sc->addr)
		sc->addr += sc->offset;

	return sc->addr;
}

void free_symbol_cache(struct symbol_cache *sc)
{
	kfree(sc->symbol);
	kfree(sc);
}

struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
{
	struct symbol_cache *sc;

	if (!sym || strlen(sym) == 0)
		return NULL;

	sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
	if (!sc)
		return NULL;

	sc->symbol = kstrdup(sym, GFP_KERNEL);
	if (!sc->symbol) {
		kfree(sc);
		return NULL;
	}
	sc->offset = offset;
	update_symbol_cache(sc);

	return sc;
}

152 153 154 155
/*
 * Kprobes-specific fetch functions
 */
#define DEFINE_FETCH_stack(type)					\
156
static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,		\
157 158 159 160
					  void *offset, void *dest)	\
{									\
	*(type *)dest = (type)regs_get_kernel_stack_nth(regs,		\
				(unsigned int)((unsigned long)offset));	\
161 162 163
}									\
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(stack, type));

164 165 166 167 168
DEFINE_BASIC_FETCH_FUNCS(stack)
/* No string on the stack entry */
#define fetch_stack_string	NULL
#define fetch_stack_string_size	NULL

169
#define DEFINE_FETCH_memory(type)					\
170
static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,		\
171 172 173 174 175 176 177
					  void *addr, void *dest)	\
{									\
	type retval;							\
	if (probe_kernel_address(addr, retval))				\
		*(type *)dest = 0;					\
	else								\
		*(type *)dest = retval;					\
178 179 180
}									\
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, type));

181 182 183 184 185
DEFINE_BASIC_FETCH_FUNCS(memory)
/*
 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
 * length and relative data location.
 */
186 187
static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
					    void *addr, void *dest)
188 189 190
{
	int maxlen = get_rloc_len(*(u32 *)dest);
	u8 *dst = get_rloc_data(dest);
191
	long ret;
192 193 194 195 196 197 198 199

	if (!maxlen)
		return;

	/*
	 * Try to get string again, since the string can be changed while
	 * probing.
	 */
200
	ret = strncpy_from_unsafe(dst, addr, maxlen);
201 202

	if (ret < 0) {	/* Failed to fetch string */
203
		dst[0] = '\0';
204 205
		*(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
	} else {
206
		*(u32 *)dest = make_data_rloc(ret, get_rloc_offs(*(u32 *)dest));
207 208
	}
}
209
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string));
210 211

/* Return the length of string -- including null terminal byte */
212 213
static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
						 void *addr, void *dest)
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
{
	mm_segment_t old_fs;
	int ret, len = 0;
	u8 c;

	old_fs = get_fs();
	set_fs(KERNEL_DS);
	pagefault_disable();

	do {
		ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
		len++;
	} while (c && ret == 0 && len < MAX_STRING_SIZE);

	pagefault_enable();
	set_fs(old_fs);

	if (ret < 0)	/* Failed to check the length */
		*(u32 *)dest = 0;
	else
		*(u32 *)dest = len;
}
236
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string_size));
237

238
#define DEFINE_FETCH_symbol(type)					\
239
void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs, void *data, void *dest)\
240 241 242 243 244 245
{									\
	struct symbol_cache *sc = data;					\
	if (sc->addr)							\
		fetch_memory_##type(regs, (void *)sc->addr, dest);	\
	else								\
		*(type *)dest = 0;					\
246 247 248
}									\
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(symbol, type));

249 250 251 252
DEFINE_BASIC_FETCH_FUNCS(symbol)
DEFINE_FETCH_symbol(string)
DEFINE_FETCH_symbol(string_size)

253 254 255 256 257 258 259 260
/* kprobes don't support file_offset fetch methods */
#define fetch_file_offset_u8		NULL
#define fetch_file_offset_u16		NULL
#define fetch_file_offset_u32		NULL
#define fetch_file_offset_u64		NULL
#define fetch_file_offset_string	NULL
#define fetch_file_offset_string_size	NULL

261
/* Fetch type information table */
262
static const struct fetch_type kprobes_fetch_type_table[] = {
263 264 265 266 267 268 269 270 271 272 273 274 275 276
	/* Special types */
	[FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
					sizeof(u32), 1, "__data_loc char[]"),
	[FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
					string_size, sizeof(u32), 0, "u32"),
	/* Basic types */
	ASSIGN_FETCH_TYPE(u8,  u8,  0),
	ASSIGN_FETCH_TYPE(u16, u16, 0),
	ASSIGN_FETCH_TYPE(u32, u32, 0),
	ASSIGN_FETCH_TYPE(u64, u64, 0),
	ASSIGN_FETCH_TYPE(s8,  u8,  1),
	ASSIGN_FETCH_TYPE(s16, u16, 1),
	ASSIGN_FETCH_TYPE(s32, u32, 1),
	ASSIGN_FETCH_TYPE(s64, u64, 1),
277 278 279 280
	ASSIGN_FETCH_TYPE_ALIAS(x8,  u8,  u8,  0),
	ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0),
	ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0),
	ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0),
281 282 283 284

	ASSIGN_FETCH_TYPE_END
};

285 286 287
/*
 * Allocate new trace_probe and initialize it (including kprobes).
 */
288
static struct trace_kprobe *alloc_trace_kprobe(const char *group,
289
					     const char *event,
290 291 292
					     void *addr,
					     const char *symbol,
					     unsigned long offs,
293
					     int maxactive,
294
					     int nargs, bool is_return)
295
{
296
	struct trace_kprobe *tk;
297
	int ret = -ENOMEM;
298

299 300
	tk = kzalloc(SIZEOF_TRACE_KPROBE(nargs), GFP_KERNEL);
	if (!tk)
301
		return ERR_PTR(ret);
302

303 304 305 306
	tk->nhit = alloc_percpu(unsigned long);
	if (!tk->nhit)
		goto error;

307
	if (symbol) {
308 309
		tk->symbol = kstrdup(symbol, GFP_KERNEL);
		if (!tk->symbol)
310
			goto error;
311 312
		tk->rp.kp.symbol_name = tk->symbol;
		tk->rp.kp.offset = offs;
313
	} else
314
		tk->rp.kp.addr = addr;
315 316

	if (is_return)
317
		tk->rp.handler = kretprobe_dispatcher;
318
	else
319
		tk->rp.kp.pre_handler = kprobe_dispatcher;
320

321 322
	tk->rp.maxactive = maxactive;

323
	if (!event || !is_good_name(event)) {
324
		ret = -EINVAL;
325
		goto error;
326 327
	}

328 329 330
	tk->tp.call.class = &tk->tp.class;
	tk->tp.call.name = kstrdup(event, GFP_KERNEL);
	if (!tk->tp.call.name)
331
		goto error;
332

333
	if (!group || !is_good_name(group)) {
334
		ret = -EINVAL;
335
		goto error;
336 337
	}

338 339
	tk->tp.class.system = kstrdup(group, GFP_KERNEL);
	if (!tk->tp.class.system)
340 341
		goto error;

342 343 344
	INIT_LIST_HEAD(&tk->list);
	INIT_LIST_HEAD(&tk->tp.files);
	return tk;
345
error:
346 347
	kfree(tk->tp.call.name);
	kfree(tk->symbol);
348
	free_percpu(tk->nhit);
349
	kfree(tk);
350
	return ERR_PTR(ret);
351 352
}

353
static void free_trace_kprobe(struct trace_kprobe *tk)
354 355 356
{
	int i;

357 358
	for (i = 0; i < tk->tp.nr_args; i++)
		traceprobe_free_probe_arg(&tk->tp.args[i]);
359

360 361 362
	kfree(tk->tp.call.class->system);
	kfree(tk->tp.call.name);
	kfree(tk->symbol);
363
	free_percpu(tk->nhit);
364
	kfree(tk);
365 366
}

367 368
static struct trace_kprobe *find_trace_kprobe(const char *event,
					      const char *group)
369
{
370
	struct trace_kprobe *tk;
371

372
	list_for_each_entry(tk, &probe_list, list)
373
		if (strcmp(trace_event_name(&tk->tp.call), event) == 0 &&
374 375
		    strcmp(tk->tp.call.class->system, group) == 0)
			return tk;
376 377 378
	return NULL;
}

379 380 381 382 383
/*
 * Enable trace_probe
 * if the file is NULL, enable "perf" handler, or enable "trace" handler.
 */
static int
384
enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
385 386 387
{
	int ret = 0;

388
	if (file) {
389 390 391 392
		struct event_file_link *link;

		link = kmalloc(sizeof(*link), GFP_KERNEL);
		if (!link) {
393
			ret = -ENOMEM;
394
			goto out;
395 396
		}

397
		link->file = file;
398
		list_add_tail_rcu(&link->list, &tk->tp.files);
399

400
		tk->tp.flags |= TP_FLAG_TRACE;
401
	} else
402
		tk->tp.flags |= TP_FLAG_PROFILE;
403

404 405 406
	if (trace_probe_is_registered(&tk->tp) && !trace_kprobe_has_gone(tk)) {
		if (trace_kprobe_is_return(tk))
			ret = enable_kretprobe(&tk->rp);
407
		else
408
			ret = enable_kprobe(&tk->rp.kp);
409
	}
410
 out:
411 412 413
	return ret;
}

414 415 416 417 418
/*
 * Disable trace_probe
 * if the file is NULL, disable "perf" handler, or disable "trace" handler.
 */
static int
419
disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
420
{
421 422
	struct event_file_link *link = NULL;
	int wait = 0;
423 424 425
	int ret = 0;

	if (file) {
426
		link = find_event_file_link(&tk->tp, file);
427
		if (!link) {
428
			ret = -EINVAL;
429
			goto out;
430 431
		}

432
		list_del_rcu(&link->list);
433
		wait = 1;
434
		if (!list_empty(&tk->tp.files))
435
			goto out;
436

437
		tk->tp.flags &= ~TP_FLAG_TRACE;
438
	} else
439
		tk->tp.flags &= ~TP_FLAG_PROFILE;
440

441 442 443
	if (!trace_probe_is_enabled(&tk->tp) && trace_probe_is_registered(&tk->tp)) {
		if (trace_kprobe_is_return(tk))
			disable_kretprobe(&tk->rp);
444
		else
445
			disable_kprobe(&tk->rp.kp);
446
		wait = 1;
447
	}
448
 out:
449 450 451 452 453 454 455 456 457 458 459 460 461
	if (wait) {
		/*
		 * Synchronize with kprobe_trace_func/kretprobe_trace_func
		 * to ensure disabled (all running handlers are finished).
		 * This is not only for kfree(), but also the caller,
		 * trace_remove_event_call() supposes it for releasing
		 * event_call related objects, which will be accessed in
		 * the kprobe_trace_func/kretprobe_trace_func.
		 */
		synchronize_sched();
		kfree(link);	/* Ignored if link == NULL */
	}

462
	return ret;
463 464
}

465
/* Internal register function - just handle k*probes and flags */
466
static int __register_trace_kprobe(struct trace_kprobe *tk)
467
{
468
	int i, ret;
469

470
	if (trace_probe_is_registered(&tk->tp))
471 472
		return -EINVAL;

473 474
	for (i = 0; i < tk->tp.nr_args; i++)
		traceprobe_update_arg(&tk->tp.args[i]);
475

476
	/* Set/clear disabled flag according to tp->flag */
477 478
	if (trace_probe_is_enabled(&tk->tp))
		tk->rp.kp.flags &= ~KPROBE_FLAG_DISABLED;
479
	else
480
		tk->rp.kp.flags |= KPROBE_FLAG_DISABLED;
481

482 483
	if (trace_kprobe_is_return(tk))
		ret = register_kretprobe(&tk->rp);
484
	else
485
		ret = register_kprobe(&tk->rp.kp);
486 487

	if (ret == 0)
488
		tk->tp.flags |= TP_FLAG_REGISTERED;
489
	else {
490 491
		pr_warn("Could not insert probe at %s+%lu: %d\n",
			trace_kprobe_symbol(tk), trace_kprobe_offset(tk), ret);
492
		if (ret == -ENOENT && trace_kprobe_is_on_module(tk)) {
493
			pr_warn("This probe might be able to register after target module is loaded. Continue.\n");
494 495
			ret = 0;
		} else if (ret == -EILSEQ) {
496 497
			pr_warn("Probing address(0x%p) is not an instruction boundary.\n",
				tk->rp.kp.addr);
498 499 500 501 502 503 504 505
			ret = -EINVAL;
		}
	}

	return ret;
}

/* Internal unregister function - just handle k*probes and flags */
506
static void __unregister_trace_kprobe(struct trace_kprobe *tk)
507
{
508 509 510
	if (trace_probe_is_registered(&tk->tp)) {
		if (trace_kprobe_is_return(tk))
			unregister_kretprobe(&tk->rp);
511
		else
512 513
			unregister_kprobe(&tk->rp.kp);
		tk->tp.flags &= ~TP_FLAG_REGISTERED;
514
		/* Cleanup kprobe for reuse */
515 516
		if (tk->rp.kp.symbol_name)
			tk->rp.kp.addr = NULL;
517 518 519 520
	}
}

/* Unregister a trace_probe and probe_event: call with locking probe_lock */
521
static int unregister_trace_kprobe(struct trace_kprobe *tk)
522
{
523
	/* Enabled event can not be unregistered */
524
	if (trace_probe_is_enabled(&tk->tp))
525 526
		return -EBUSY;

527
	/* Will fail if probe is being used by ftrace or perf */
528
	if (unregister_kprobe_event(tk))
529 530
		return -EBUSY;

531 532
	__unregister_trace_kprobe(tk);
	list_del(&tk->list);
533 534

	return 0;
535 536 537
}

/* Register a trace_probe and probe_event */
538
static int register_trace_kprobe(struct trace_kprobe *tk)
539
{
540
	struct trace_kprobe *old_tk;
541 542 543 544
	int ret;

	mutex_lock(&probe_lock);

545
	/* Delete old (same name) event if exist */
546
	old_tk = find_trace_kprobe(trace_event_name(&tk->tp.call),
547
			tk->tp.call.class->system);
548 549
	if (old_tk) {
		ret = unregister_trace_kprobe(old_tk);
550 551
		if (ret < 0)
			goto end;
552
		free_trace_kprobe(old_tk);
553
	}
554 555

	/* Register new event */
556
	ret = register_kprobe_event(tk);
557
	if (ret) {
558
		pr_warn("Failed to register probe event(%d)\n", ret);
559 560 561
		goto end;
	}

562
	/* Register k*probe */
563
	ret = __register_trace_kprobe(tk);
564
	if (ret < 0)
565
		unregister_kprobe_event(tk);
566
	else
567
		list_add_tail(&tk->list, &probe_list);
568

569 570 571 572 573
end:
	mutex_unlock(&probe_lock);
	return ret;
}

574
/* Module notifier call back, checking event on the module */
575
static int trace_kprobe_module_callback(struct notifier_block *nb,
576 577 578
				       unsigned long val, void *data)
{
	struct module *mod = data;
579
	struct trace_kprobe *tk;
580 581 582 583 584 585 586
	int ret;

	if (val != MODULE_STATE_COMING)
		return NOTIFY_DONE;

	/* Update probes on coming module */
	mutex_lock(&probe_lock);
587 588
	list_for_each_entry(tk, &probe_list, list) {
		if (trace_kprobe_within_module(tk, mod)) {
589
			/* Don't need to check busy - this should have gone. */
590 591
			__unregister_trace_kprobe(tk);
			ret = __register_trace_kprobe(tk);
592
			if (ret)
593 594 595
				pr_warn("Failed to re-register probe %s on %s: %d\n",
					trace_event_name(&tk->tp.call),
					mod->name, ret);
596 597 598 599 600 601 602
		}
	}
	mutex_unlock(&probe_lock);

	return NOTIFY_DONE;
}

603 604
static struct notifier_block trace_kprobe_module_nb = {
	.notifier_call = trace_kprobe_module_callback,
605 606 607
	.priority = 1	/* Invoked after kprobe module callback */
};

608 609 610 611 612 613 614 615
/* Convert certain expected symbols into '_' when generating event names */
static inline void sanitize_event_name(char *name)
{
	while (*name++ != '\0')
		if (*name == ':' || *name == '.')
			*name = '_';
}

616
static int create_trace_kprobe(int argc, char **argv)
617 618 619
{
	/*
	 * Argument syntax:
620 621 622 623
	 *  - Add kprobe:
	 *      p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS]
	 *  - Add kretprobe:
	 *      r[MAXACTIVE][:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS]
624
	 * Fetch args:
625 626 627
	 *  $retval	: fetch return value
	 *  $stack	: fetch stack address
	 *  $stackN	: fetch Nth of stack (N:0-)
628
	 *  $comm       : fetch current task comm
629 630 631
	 *  @ADDR	: fetch memory at ADDR (ADDR should be in kernel)
	 *  @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
	 *  %REG	: fetch register REG
632
	 * Dereferencing memory fetch:
633
	 *  +|-offs(ARG) : fetch memory at ARG +|- offs address.
634 635
	 * Alias name of args:
	 *  NAME=FETCHARG : set NAME as alias of FETCHARG.
636 637
	 * Type of args:
	 *  FETCHARG:TYPE : use TYPE instead of unsigned long.
638
	 */
639
	struct trace_kprobe *tk;
640
	int i, ret = 0;
641
	bool is_return = false, is_delete = false;
642
	char *symbol = NULL, *event = NULL, *group = NULL;
643
	int maxactive = 0;
644
	char *arg;
645
	unsigned long offset = 0;
646
	void *addr = NULL;
647
	char buf[MAX_EVENT_NAME_LEN];
648

649
	/* argc must be >= 1 */
650
	if (argv[0][0] == 'p')
651
		is_return = false;
652
	else if (argv[0][0] == 'r')
653
		is_return = true;
654
	else if (argv[0][0] == '-')
655
		is_delete = true;
656
	else {
657 658
		pr_info("Probe definition must be started with 'p', 'r' or"
			" '-'.\n");
659
		return -EINVAL;
660
	}
661

662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683
	event = strchr(&argv[0][1], ':');
	if (event) {
		event[0] = '\0';
		event++;
	}
	if (is_return && isdigit(argv[0][1])) {
		ret = kstrtouint(&argv[0][1], 0, &maxactive);
		if (ret) {
			pr_info("Failed to parse maxactive.\n");
			return ret;
		}
		/* kretprobes instances are iterated over via a list. The
		 * maximum should stay reasonable.
		 */
		if (maxactive > KRETPROBE_MAXACTIVE_MAX) {
			pr_info("Maxactive is too big (%d > %d).\n",
				maxactive, KRETPROBE_MAXACTIVE_MAX);
			return -E2BIG;
		}
	}

	if (event) {
684 685 686 687 688
		if (strchr(event, '/')) {
			group = event;
			event = strchr(group, '/') + 1;
			event[-1] = '\0';
			if (strlen(group) == 0) {
689
				pr_info("Group name is not specified\n");
690 691 692
				return -EINVAL;
			}
		}
693
		if (strlen(event) == 0) {
694
			pr_info("Event name is not specified\n");
695 696 697
			return -EINVAL;
		}
	}
698 699
	if (!group)
		group = KPROBE_EVENT_SYSTEM;
700

701 702 703 704 705
	if (is_delete) {
		if (!event) {
			pr_info("Delete command needs an event name.\n");
			return -EINVAL;
		}
706
		mutex_lock(&probe_lock);
707 708
		tk = find_trace_kprobe(event, group);
		if (!tk) {
709
			mutex_unlock(&probe_lock);
710 711 712 713
			pr_info("Event %s/%s doesn't exist.\n", group, event);
			return -ENOENT;
		}
		/* delete an event */
714
		ret = unregister_trace_kprobe(tk);
715
		if (ret == 0)
716
			free_trace_kprobe(tk);
717
		mutex_unlock(&probe_lock);
718
		return ret;
719 720 721 722 723 724
	}

	if (argc < 2) {
		pr_info("Probe point is not specified.\n");
		return -EINVAL;
	}
725 726 727 728

	/* try to parse an address. if that fails, try to read the
	 * input as a symbol. */
	if (kstrtoul(argv[1], 0, (unsigned long *)&addr)) {
729 730 731
		/* a symbol specified */
		symbol = argv[1];
		/* TODO: support .init module functions */
732
		ret = traceprobe_split_symbol_offset(symbol, &offset);
733
		if (ret) {
734
			pr_info("Failed to parse either an address or a symbol.\n");
735
			return ret;
736
		}
737
		if (offset && is_return &&
738
		    !kprobe_on_func_entry(NULL, symbol, offset)) {
739
			pr_info("Given offset is not valid for return probe.\n");
740
			return -EINVAL;
741
		}
742
	}
743
	argc -= 2; argv += 2;
744 745

	/* setup a probe */
746 747 748
	if (!event) {
		/* Make a new event name */
		if (symbol)
749
			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
750 751
				 is_return ? 'r' : 'p', symbol, offset);
		else
752
			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
753
				 is_return ? 'r' : 'p', addr);
754
		sanitize_event_name(buf);
755 756
		event = buf;
	}
757 758
	tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive,
			       argc, is_return);
759
	if (IS_ERR(tk)) {
760
		pr_info("Failed to allocate trace_probe.(%d)\n",
761 762
			(int)PTR_ERR(tk));
		return PTR_ERR(tk);
763
	}
764 765

	/* parse arguments */
766 767
	ret = 0;
	for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
768 769
		struct probe_arg *parg = &tk->tp.args[i];

770
		/* Increment count for freeing args in error case */
771
		tk->tp.nr_args++;
772

773 774
		/* Parse argument name */
		arg = strchr(argv[i], '=');
775
		if (arg) {
776
			*arg++ = '\0';
777
			parg->name = kstrdup(argv[i], GFP_KERNEL);
778
		} else {
779
			arg = argv[i];
780 781
			/* If argument name is omitted, set "argN" */
			snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
782
			parg->name = kstrdup(buf, GFP_KERNEL);
783
		}
784

785
		if (!parg->name) {
786
			pr_info("Failed to allocate argument[%d] name.\n", i);
787
			ret = -ENOMEM;
788 789
			goto error;
		}
790

791
		if (!is_good_name(parg->name)) {
792
			pr_info("Invalid argument[%d] name: %s\n",
793
				i, parg->name);
794 795 796
			ret = -EINVAL;
			goto error;
		}
797

798 799
		if (traceprobe_conflict_field_name(parg->name,
							tk->tp.args, i)) {
800
			pr_info("Argument[%d] name '%s' conflicts with "
801 802 803 804
				"another field.\n", i, argv[i]);
			ret = -EINVAL;
			goto error;
		}
805 806

		/* Parse fetch argument */
807
		ret = traceprobe_parse_probe_arg(arg, &tk->tp.size, parg,
808 809
						is_return, true,
						kprobes_fetch_type_table);
810
		if (ret) {
811
			pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
812
			goto error;
813
		}
814 815
	}

816
	ret = register_trace_kprobe(tk);
817 818 819 820 821
	if (ret)
		goto error;
	return 0;

error:
822
	free_trace_kprobe(tk);
823 824 825
	return ret;
}

826
static int release_all_trace_kprobes(void)
827
{
828
	struct trace_kprobe *tk;
829
	int ret = 0;
830 831

	mutex_lock(&probe_lock);
832
	/* Ensure no probe is in use. */
833 834
	list_for_each_entry(tk, &probe_list, list)
		if (trace_probe_is_enabled(&tk->tp)) {
835 836 837
			ret = -EBUSY;
			goto end;
		}
838 839
	/* TODO: Use batch unregistration */
	while (!list_empty(&probe_list)) {
840 841
		tk = list_entry(probe_list.next, struct trace_kprobe, list);
		ret = unregister_trace_kprobe(tk);
842 843
		if (ret)
			goto end;
844
		free_trace_kprobe(tk);
845
	}
846 847

end:
848
	mutex_unlock(&probe_lock);
849 850

	return ret;
851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871
}

/* Probes listing interfaces */
static void *probes_seq_start(struct seq_file *m, loff_t *pos)
{
	mutex_lock(&probe_lock);
	return seq_list_start(&probe_list, *pos);
}

static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
{
	return seq_list_next(v, &probe_list, pos);
}

static void probes_seq_stop(struct seq_file *m, void *v)
{
	mutex_unlock(&probe_lock);
}

static int probes_seq_show(struct seq_file *m, void *v)
{
872
	struct trace_kprobe *tk = v;
873
	int i;
874

875
	seq_putc(m, trace_kprobe_is_return(tk) ? 'r' : 'p');
876
	seq_printf(m, ":%s/%s", tk->tp.call.class->system,
877
			trace_event_name(&tk->tp.call));
878

879 880 881 882 883
	if (!tk->symbol)
		seq_printf(m, " 0x%p", tk->rp.kp.addr);
	else if (tk->rp.kp.offset)
		seq_printf(m, " %s+%u", trace_kprobe_symbol(tk),
			   tk->rp.kp.offset);
884
	else
885
		seq_printf(m, " %s", trace_kprobe_symbol(tk));
886

887 888
	for (i = 0; i < tk->tp.nr_args; i++)
		seq_printf(m, " %s=%s", tk->tp.args[i].name, tk->tp.args[i].comm);
889
	seq_putc(m, '\n');
890

891 892 893 894 895 896 897 898 899 900 901 902
	return 0;
}

static const struct seq_operations probes_seq_op = {
	.start  = probes_seq_start,
	.next   = probes_seq_next,
	.stop   = probes_seq_stop,
	.show   = probes_seq_show
};

static int probes_open(struct inode *inode, struct file *file)
{
903 904 905
	int ret;

	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
906
		ret = release_all_trace_kprobes();
907 908 909
		if (ret < 0)
			return ret;
	}
910 911 912 913 914 915 916

	return seq_open(file, &probes_seq_op);
}

static ssize_t probes_write(struct file *file, const char __user *buffer,
			    size_t count, loff_t *ppos)
{
917
	return traceprobe_probes_write(file, buffer, count, ppos,
918
			create_trace_kprobe);
919 920 921 922 923 924 925 926 927 928 929
}

static const struct file_operations kprobe_events_ops = {
	.owner          = THIS_MODULE,
	.open           = probes_open,
	.read           = seq_read,
	.llseek         = seq_lseek,
	.release        = seq_release,
	.write		= probes_write,
};

930 931 932
/* Probes profiling interfaces */
static int probes_profile_seq_show(struct seq_file *m, void *v)
{
933
	struct trace_kprobe *tk = v;
934

935
	seq_printf(m, "  %-44s %15lu %15lu\n",
936 937
		   trace_event_name(&tk->tp.call),
		   trace_kprobe_nhit(tk),
938
		   tk->rp.kp.nmissed);
939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962

	return 0;
}

static const struct seq_operations profile_seq_op = {
	.start  = probes_seq_start,
	.next   = probes_seq_next,
	.stop   = probes_seq_stop,
	.show   = probes_profile_seq_show
};

static int profile_open(struct inode *inode, struct file *file)
{
	return seq_open(file, &profile_seq_op);
}

static const struct file_operations kprobe_profile_ops = {
	.owner          = THIS_MODULE,
	.open           = profile_open,
	.read           = seq_read,
	.llseek         = seq_lseek,
	.release        = seq_release,
};

963
/* Kprobe handler */
964
static nokprobe_inline void
965
__kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
966
		    struct trace_event_file *trace_file)
967
{
968
	struct kprobe_trace_entry_head *entry;
969
	struct ring_buffer_event *event;
970
	struct ring_buffer *buffer;
971
	int size, dsize, pc;
972
	unsigned long irq_flags;
973
	struct trace_event_call *call = &tk->tp.call;
974

975
	WARN_ON(call != trace_file->event_call);
976

977
	if (trace_trigger_soft_disabled(trace_file))
978
		return;
979

980 981 982
	local_save_flags(irq_flags);
	pc = preempt_count();

983 984
	dsize = __get_data_size(&tk->tp, regs);
	size = sizeof(*entry) + tk->tp.size + dsize;
985

986
	event = trace_event_buffer_lock_reserve(&buffer, trace_file,
987 988
						call->event.type,
						size, irq_flags, pc);
989
	if (!event)
990
		return;
991 992

	entry = ring_buffer_event_data(event);
993 994
	entry->ip = (unsigned long)tk->rp.kp.addr;
	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
995

996
	event_trigger_unlock_commit_regs(trace_file, buffer, event,
997
					 entry, irq_flags, pc, regs);
998 999
}

1000
static void
1001
kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs)
1002
{
1003
	struct event_file_link *link;
1004

1005 1006
	list_for_each_entry_rcu(link, &tk->tp.files, list)
		__kprobe_trace_func(tk, regs, link->file);
1007
}
1008
NOKPROBE_SYMBOL(kprobe_trace_func);
1009

1010
/* Kretprobe handler */
1011
static nokprobe_inline void
1012
__kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1013
		       struct pt_regs *regs,
1014
		       struct trace_event_file *trace_file)
1015
{
1016
	struct kretprobe_trace_entry_head *entry;
1017
	struct ring_buffer_event *event;
1018
	struct ring_buffer *buffer;
1019
	int size, pc, dsize;
1020
	unsigned long irq_flags;
1021
	struct trace_event_call *call = &tk->tp.call;
1022

1023
	WARN_ON(call != trace_file->event_call);
1024

1025
	if (trace_trigger_soft_disabled(trace_file))
1026
		return;
1027

1028 1029 1030
	local_save_flags(irq_flags);
	pc = preempt_count();

1031 1032
	dsize = __get_data_size(&tk->tp, regs);
	size = sizeof(*entry) + tk->tp.size + dsize;
1033

1034
	event = trace_event_buffer_lock_reserve(&buffer, trace_file,
1035 1036
						call->event.type,
						size, irq_flags, pc);
1037
	if (!event)
1038
		return;
1039 1040

	entry = ring_buffer_event_data(event);
1041
	entry->func = (unsigned long)tk->rp.kp.addr;
1042
	entry->ret_ip = (unsigned long)ri->ret_addr;
1043
	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
1044

1045
	event_trigger_unlock_commit_regs(trace_file, buffer, event,
1046
					 entry, irq_flags, pc, regs);
1047 1048
}

1049
static void
1050
kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1051 1052
		     struct pt_regs *regs)
{
1053
	struct event_file_link *link;
1054

1055 1056
	list_for_each_entry_rcu(link, &tk->tp.files, list)
		__kretprobe_trace_func(tk, ri, regs, link->file);
1057
}
1058
NOKPROBE_SYMBOL(kretprobe_trace_func);
1059

1060
/* Event entry printers */
1061
static enum print_line_t
1062 1063
print_kprobe_event(struct trace_iterator *iter, int flags,
		   struct trace_event *event)
1064
{
1065
	struct kprobe_trace_entry_head *field;
1066
	struct trace_seq *s = &iter->seq;
1067
	struct trace_probe *tp;
1068
	u8 *data;
1069 1070
	int i;

1071
	field = (struct kprobe_trace_entry_head *)iter->ent;
1072
	tp = container_of(event, struct trace_probe, call.event);
1073

1074
	trace_seq_printf(s, "%s: (", trace_event_name(&tp->call));
1075

1076
	if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
1077
		goto out;
1078

1079
	trace_seq_putc(s, ')');
1080

1081 1082 1083
	data = (u8 *)&field[1];
	for (i = 0; i < tp->nr_args; i++)
		if (!tp->args[i].type->print(s, tp->args[i].name,
1084
					     data + tp->args[i].offset, field))
1085
			goto out;
1086

1087 1088 1089
	trace_seq_putc(s, '\n');
 out:
	return trace_handle_return(s);
1090 1091
}

1092
static enum print_line_t
1093 1094
print_kretprobe_event(struct trace_iterator *iter, int flags,
		      struct trace_event *event)
1095
{
1096
	struct kretprobe_trace_entry_head *field;
1097
	struct trace_seq *s = &iter->seq;
1098
	struct trace_probe *tp;
1099
	u8 *data;
1100 1101
	int i;

1102
	field = (struct kretprobe_trace_entry_head *)iter->ent;
1103
	tp = container_of(event, struct trace_probe, call.event);
1104

1105
	trace_seq_printf(s, "%s: (", trace_event_name(&tp->call));
1106

1107
	if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
1108
		goto out;
1109

1110
	trace_seq_puts(s, " <- ");
1111 1112

	if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
1113
		goto out;
1114

1115
	trace_seq_putc(s, ')');
1116

1117 1118 1119
	data = (u8 *)&field[1];
	for (i = 0; i < tp->nr_args; i++)
		if (!tp->args[i].type->print(s, tp->args[i].name,
1120
					     data + tp->args[i].offset, field))
1121
			goto out;
1122

1123
	trace_seq_putc(s, '\n');
1124

1125 1126
 out:
	return trace_handle_return(s);
1127 1128 1129
}


1130
static int kprobe_event_define_fields(struct trace_event_call *event_call)
1131 1132
{
	int ret, i;
1133
	struct kprobe_trace_entry_head field;
1134
	struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data;
1135

1136
	DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1137
	/* Set argument names as fields */
1138 1139 1140 1141 1142 1143 1144 1145
	for (i = 0; i < tk->tp.nr_args; i++) {
		struct probe_arg *parg = &tk->tp.args[i];

		ret = trace_define_field(event_call, parg->type->fmttype,
					 parg->name,
					 sizeof(field) + parg->offset,
					 parg->type->size,
					 parg->type->is_signed,
1146 1147 1148 1149
					 FILTER_OTHER);
		if (ret)
			return ret;
	}
1150 1151 1152
	return 0;
}

1153
static int kretprobe_event_define_fields(struct trace_event_call *event_call)
1154 1155
{
	int ret, i;
1156
	struct kretprobe_trace_entry_head field;
1157
	struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data;
1158

1159 1160
	DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
	DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1161
	/* Set argument names as fields */
1162 1163 1164 1165 1166 1167 1168 1169
	for (i = 0; i < tk->tp.nr_args; i++) {
		struct probe_arg *parg = &tk->tp.args[i];

		ret = trace_define_field(event_call, parg->type->fmttype,
					 parg->name,
					 sizeof(field) + parg->offset,
					 parg->type->size,
					 parg->type->is_signed,
1170 1171 1172 1173
					 FILTER_OTHER);
		if (ret)
			return ret;
	}
1174 1175 1176
	return 0;
}

1177
#ifdef CONFIG_PERF_EVENTS
1178 1179

/* Kprobe profile handler */
1180
static int
1181
kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
1182
{
1183
	struct trace_event_call *call = &tk->tp.call;
1184
	struct kprobe_trace_entry_head *entry;
1185
	struct hlist_head *head;
1186
	int size, __size, dsize;
1187
	int rctx;
1188

1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207
	if (bpf_prog_array_valid(call)) {
		int ret;

		ret = trace_call_bpf(call, regs);

		/*
		 * We need to check and see if we modified the pc of the
		 * pt_regs, and if so clear the kprobe and return 1 so that we
		 * don't do the instruction skipping.  Also reset our state so
		 * we are clean the next pass through.
		 */
		if (__this_cpu_read(bpf_kprobe_override)) {
			__this_cpu_write(bpf_kprobe_override, 0);
			reset_current_kprobe();
			return 1;
		}
		if (!ret)
			return 0;
	}
1208

1209 1210
	head = this_cpu_ptr(call->perf_events);
	if (hlist_empty(head))
1211
		return 0;
1212

1213 1214
	dsize = __get_data_size(&tk->tp, regs);
	__size = sizeof(*entry) + tk->tp.size + dsize;
1215 1216
	size = ALIGN(__size + sizeof(u32), sizeof(u64));
	size -= sizeof(u32);
1217

1218
	entry = perf_trace_buf_alloc(size, NULL, &rctx);
1219
	if (!entry)
1220
		return 0;
1221

1222
	entry->ip = (unsigned long)tk->rp.kp.addr;
1223
	memset(&entry[1], 0, dsize);
1224
	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
1225
	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
1226
			      head, NULL, NULL);
1227
	return 0;
1228
}
1229
NOKPROBE_SYMBOL(kprobe_perf_func);
1230 1231

/* Kretprobe profile handler */
1232
static void
1233
kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1234
		    struct pt_regs *regs)
1235
{
1236
	struct trace_event_call *call = &tk->tp.call;
1237
	struct kretprobe_trace_entry_head *entry;
1238
	struct hlist_head *head;
1239
	int size, __size, dsize;
1240
	int rctx;
1241

1242
	if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs))
1243 1244
		return;

1245 1246 1247 1248
	head = this_cpu_ptr(call->perf_events);
	if (hlist_empty(head))
		return;

1249 1250
	dsize = __get_data_size(&tk->tp, regs);
	__size = sizeof(*entry) + tk->tp.size + dsize;
1251 1252
	size = ALIGN(__size + sizeof(u32), sizeof(u64));
	size -= sizeof(u32);
1253

1254
	entry = perf_trace_buf_alloc(size, NULL, &rctx);
1255
	if (!entry)
1256
		return;
1257

1258
	entry->func = (unsigned long)tk->rp.kp.addr;
1259
	entry->ret_ip = (unsigned long)ri->ret_addr;
1260
	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
1261
	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
1262
			      head, NULL, NULL);
1263
}
1264
NOKPROBE_SYMBOL(kretprobe_perf_func);
1265
#endif	/* CONFIG_PERF_EVENTS */
1266

1267 1268 1269 1270 1271 1272
/*
 * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex.
 *
 * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe
 * lockless, but we can't race with this __init function.
 */
1273
static int kprobe_register(struct trace_event_call *event,
1274
			   enum trace_reg type, void *data)
1275
{
1276
	struct trace_kprobe *tk = (struct trace_kprobe *)event->data;
1277
	struct trace_event_file *file = data;
1278

1279 1280
	switch (type) {
	case TRACE_REG_REGISTER:
1281
		return enable_trace_kprobe(tk, file);
1282
	case TRACE_REG_UNREGISTER:
1283
		return disable_trace_kprobe(tk, file);
1284 1285 1286

#ifdef CONFIG_PERF_EVENTS
	case TRACE_REG_PERF_REGISTER:
1287
		return enable_trace_kprobe(tk, NULL);
1288
	case TRACE_REG_PERF_UNREGISTER:
1289
		return disable_trace_kprobe(tk, NULL);
1290 1291
	case TRACE_REG_PERF_OPEN:
	case TRACE_REG_PERF_CLOSE:
1292 1293
	case TRACE_REG_PERF_ADD:
	case TRACE_REG_PERF_DEL:
1294
		return 0;
1295 1296 1297 1298
#endif
	}
	return 0;
}
1299

1300
static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1301
{
1302
	struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
1303
	int ret = 0;
1304

1305
	raw_cpu_inc(*tk->nhit);
1306

1307 1308
	if (tk->tp.flags & TP_FLAG_TRACE)
		kprobe_trace_func(tk, regs);
1309
#ifdef CONFIG_PERF_EVENTS
1310
	if (tk->tp.flags & TP_FLAG_PROFILE)
1311
		ret = kprobe_perf_func(tk, regs);
1312
#endif
1313
	return ret;
1314
}
1315
NOKPROBE_SYMBOL(kprobe_dispatcher);
1316

1317 1318
static int
kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1319
{
1320
	struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
1321

1322
	raw_cpu_inc(*tk->nhit);
1323

1324 1325
	if (tk->tp.flags & TP_FLAG_TRACE)
		kretprobe_trace_func(tk, ri, regs);
1326
#ifdef CONFIG_PERF_EVENTS
1327 1328
	if (tk->tp.flags & TP_FLAG_PROFILE)
		kretprobe_perf_func(tk, ri, regs);
1329
#endif
1330 1331
	return 0;	/* We don't tweek kernel, so just return 0 */
}
1332
NOKPROBE_SYMBOL(kretprobe_dispatcher);
1333

1334 1335 1336 1337 1338 1339 1340 1341
static struct trace_event_functions kretprobe_funcs = {
	.trace		= print_kretprobe_event
};

static struct trace_event_functions kprobe_funcs = {
	.trace		= print_kprobe_event
};

1342
static int register_kprobe_event(struct trace_kprobe *tk)
1343
{
1344
	struct trace_event_call *call = &tk->tp.call;
1345 1346
	int ret;

1347
	/* Initialize trace_event_call */
1348
	INIT_LIST_HEAD(&call->class->fields);
1349
	if (trace_kprobe_is_return(tk)) {
1350
		call->event.funcs = &kretprobe_funcs;
1351
		call->class->define_fields = kretprobe_event_define_fields;
1352
	} else {
1353
		call->event.funcs = &kprobe_funcs;
1354
		call->class->define_fields = kprobe_event_define_fields;
1355
	}
1356
	if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0)
1357
		return -ENOMEM;
1358
	ret = register_trace_event(&call->event);
1359
	if (!ret) {
1360
		kfree(call->print_fmt);
1361
		return -ENODEV;
1362
	}
A
Alexei Starovoitov 已提交
1363
	call->flags = TRACE_EVENT_FL_KPROBE;
1364
	call->class->reg = kprobe_register;
1365
	call->data = tk;
1366
	ret = trace_add_event_call(call);
1367
	if (ret) {
1368
		pr_info("Failed to register kprobe event: %s\n",
1369
			trace_event_name(call));
1370
		kfree(call->print_fmt);
1371
		unregister_trace_event(&call->event);
1372
	}
1373 1374 1375
	return ret;
}

1376
static int unregister_kprobe_event(struct trace_kprobe *tk)
1377
{
1378 1379
	int ret;

1380
	/* tp->event is unregistered in trace_remove_event_call() */
1381
	ret = trace_remove_event_call(&tk->tp.call);
1382
	if (!ret)
1383
		kfree(tk->tp.call.print_fmt);
1384
	return ret;
1385 1386
}

1387
/* Make a tracefs interface for controlling probe points */
1388 1389 1390 1391 1392
static __init int init_kprobe_trace(void)
{
	struct dentry *d_tracer;
	struct dentry *entry;

1393
	if (register_module_notifier(&trace_kprobe_module_nb))
1394 1395
		return -EINVAL;

1396
	d_tracer = tracing_init_dentry();
1397
	if (IS_ERR(d_tracer))
1398 1399
		return 0;

1400
	entry = tracefs_create_file("kprobe_events", 0644, d_tracer,
1401 1402
				    NULL, &kprobe_events_ops);

1403
	/* Event list interface */
1404
	if (!entry)
1405
		pr_warn("Could not create tracefs 'kprobe_events' entry\n");
1406 1407

	/* Profile interface */
1408
	entry = tracefs_create_file("kprobe_profile", 0444, d_tracer,
1409 1410 1411
				    NULL, &kprobe_profile_ops);

	if (!entry)
1412
		pr_warn("Could not create tracefs 'kprobe_profile' entry\n");
1413 1414 1415 1416 1417 1418
	return 0;
}
fs_initcall(init_kprobe_trace);


#ifdef CONFIG_FTRACE_STARTUP_TEST
1419 1420
/*
 * The "__used" keeps gcc from removing the function symbol
1421 1422
 * from the kallsyms table. 'noinline' makes sure that there
 * isn't an inlined version used by the test method below
1423
 */
1424 1425
static __used __init noinline int
kprobe_trace_selftest_target(int a1, int a2, int a3, int a4, int a5, int a6)
1426 1427 1428 1429
{
	return a1 + a2 + a3 + a4 + a5 + a6;
}

1430
static __init struct trace_event_file *
1431
find_trace_probe_file(struct trace_kprobe *tk, struct trace_array *tr)
1432
{
1433
	struct trace_event_file *file;
1434 1435

	list_for_each_entry(file, &tr->events, list)
1436
		if (file->event_call == &tk->tp.call)
1437 1438 1439 1440 1441
			return file;

	return NULL;
}

1442
/*
1443
 * Nobody but us can call enable_trace_kprobe/disable_trace_kprobe at this
1444 1445
 * stage, we can do this lockless.
 */
1446 1447
static __init int kprobe_trace_self_tests_init(void)
{
1448
	int ret, warn = 0;
1449
	int (*target)(int, int, int, int, int, int);
1450
	struct trace_kprobe *tk;
1451
	struct trace_event_file *file;
1452

1453 1454 1455
	if (tracing_is_disabled())
		return -ENODEV;

1456 1457 1458 1459
	target = kprobe_trace_selftest_target;

	pr_info("Testing kprobe tracing: ");

1460 1461
	ret = traceprobe_command("p:testprobe kprobe_trace_selftest_target "
				  "$stack $stack0 +0($stack)",
1462
				  create_trace_kprobe);
1463
	if (WARN_ON_ONCE(ret)) {
1464
		pr_warn("error on probing function entry.\n");
1465 1466 1467
		warn++;
	} else {
		/* Enable trace point */
1468 1469
		tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
		if (WARN_ON_ONCE(tk == NULL)) {
1470
			pr_warn("error on getting new probe.\n");
1471
			warn++;
1472
		} else {
1473
			file = find_trace_probe_file(tk, top_trace_array());
1474 1475 1476 1477
			if (WARN_ON_ONCE(file == NULL)) {
				pr_warn("error on getting probe file.\n");
				warn++;
			} else
1478
				enable_trace_kprobe(tk, file);
1479
		}
1480
	}
1481

1482
	ret = traceprobe_command("r:testprobe2 kprobe_trace_selftest_target "
1483
				  "$retval", create_trace_kprobe);
1484
	if (WARN_ON_ONCE(ret)) {
1485
		pr_warn("error on probing function return.\n");
1486 1487 1488
		warn++;
	} else {
		/* Enable trace point */
1489 1490
		tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
		if (WARN_ON_ONCE(tk == NULL)) {
1491
			pr_warn("error on getting 2nd new probe.\n");
1492
			warn++;
1493
		} else {
1494
			file = find_trace_probe_file(tk, top_trace_array());
1495 1496 1497 1498
			if (WARN_ON_ONCE(file == NULL)) {
				pr_warn("error on getting probe file.\n");
				warn++;
			} else
1499
				enable_trace_kprobe(tk, file);
1500
		}
1501 1502 1503 1504
	}

	if (warn)
		goto end;
1505 1506 1507

	ret = target(1, 2, 3, 4, 5, 6);

1508 1509 1510 1511 1512 1513 1514 1515
	/*
	 * Not expecting an error here, the check is only to prevent the
	 * optimizer from removing the call to target() as otherwise there
	 * are no side-effects and the call is never performed.
	 */
	if (ret != 21)
		warn++;

1516
	/* Disable trace points before removing it */
1517 1518
	tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
	if (WARN_ON_ONCE(tk == NULL)) {
1519
		pr_warn("error on getting test probe.\n");
1520
		warn++;
1521
	} else {
1522 1523 1524 1525 1526
		if (trace_kprobe_nhit(tk) != 1) {
			pr_warn("incorrect number of testprobe hits\n");
			warn++;
		}

1527
		file = find_trace_probe_file(tk, top_trace_array());
1528 1529 1530 1531
		if (WARN_ON_ONCE(file == NULL)) {
			pr_warn("error on getting probe file.\n");
			warn++;
		} else
1532
			disable_trace_kprobe(tk, file);
1533
	}
1534

1535 1536
	tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
	if (WARN_ON_ONCE(tk == NULL)) {
1537
		pr_warn("error on getting 2nd test probe.\n");
1538
		warn++;
1539
	} else {
1540 1541 1542 1543 1544
		if (trace_kprobe_nhit(tk) != 1) {
			pr_warn("incorrect number of testprobe2 hits\n");
			warn++;
		}

1545
		file = find_trace_probe_file(tk, top_trace_array());
1546 1547 1548 1549
		if (WARN_ON_ONCE(file == NULL)) {
			pr_warn("error on getting probe file.\n");
			warn++;
		} else
1550
			disable_trace_kprobe(tk, file);
1551
	}
1552

1553
	ret = traceprobe_command("-:testprobe", create_trace_kprobe);
1554
	if (WARN_ON_ONCE(ret)) {
1555
		pr_warn("error on deleting a probe.\n");
1556 1557 1558
		warn++;
	}

1559
	ret = traceprobe_command("-:testprobe2", create_trace_kprobe);
1560
	if (WARN_ON_ONCE(ret)) {
1561
		pr_warn("error on deleting a probe.\n");
1562 1563
		warn++;
	}
1564

1565
end:
1566
	release_all_trace_kprobes();
1567 1568 1569 1570 1571
	/*
	 * Wait for the optimizer work to finish. Otherwise it might fiddle
	 * with probes in already freed __init text.
	 */
	wait_for_kprobe_optimizer();
1572 1573 1574 1575
	if (warn)
		pr_cont("NG: Some tests are failed. Please check them.\n");
	else
		pr_cont("OK\n");
1576 1577 1578 1579 1580 1581
	return 0;
}

late_initcall(kprobe_trace_self_tests_init);

#endif