trace_kprobe.c 42.5 KB
Newer Older
1
/*
2
 * Kprobes-based tracing events
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
 *
 * Created by Masami Hiramatsu <mhiramat@redhat.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
19
#define pr_fmt(fmt)	"trace_kprobe: " fmt
20 21 22

#include <linux/module.h>
#include <linux/uaccess.h>
23
#include <linux/rculist.h>
24
#include <linux/error-injection.h>
25

26
#include "trace_probe.h"
27

28
#define KPROBE_EVENT_SYSTEM "kprobes"
29
#define KRETPROBE_MAXACTIVE_MAX 4096
30

31
/**
32
 * Kprobe event core functions
33
 */
34
struct trace_kprobe {
35
	struct list_head	list;
36
	struct kretprobe	rp;	/* Use rp.kp for kprobe use */
37
	unsigned long __percpu *nhit;
38
	const char		*symbol;	/* symbol name */
39
	struct trace_probe	tp;
40 41
};

42 43
#define SIZEOF_TRACE_KPROBE(n)				\
	(offsetof(struct trace_kprobe, tp.args) +	\
44
	(sizeof(struct probe_arg) * (n)))
45

46
static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
47
{
48
	return tk->rp.handler != NULL;
49 50
}

51
static nokprobe_inline const char *trace_kprobe_symbol(struct trace_kprobe *tk)
52
{
53
	return tk->symbol ? tk->symbol : "unknown";
54 55
}

56
static nokprobe_inline unsigned long trace_kprobe_offset(struct trace_kprobe *tk)
57
{
58
	return tk->rp.kp.offset;
59 60
}

61
static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk)
62
{
63
	return !!(kprobe_gone(&tk->rp.kp));
64 65
}

66
static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
67
						 struct module *mod)
68 69
{
	int len = strlen(mod->name);
70
	const char *name = trace_kprobe_symbol(tk);
71 72 73
	return strncmp(mod->name, name, len) == 0 && name[len] == ':';
}

74
static nokprobe_inline bool trace_kprobe_is_on_module(struct trace_kprobe *tk)
75
{
76
	return !!strchr(trace_kprobe_symbol(tk), ':');
77 78
}

79 80 81 82 83 84 85 86 87 88 89
static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
{
	unsigned long nhit = 0;
	int cpu;

	for_each_possible_cpu(cpu)
		nhit += *per_cpu_ptr(tk->nhit, cpu);

	return nhit;
}

90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
static nokprobe_inline
unsigned long trace_kprobe_address(struct trace_kprobe *tk)
{
	unsigned long addr;

	if (tk->symbol) {
		addr = (unsigned long)
			kallsyms_lookup_name(trace_kprobe_symbol(tk));
		addr += tk->rp.kp.offset;
	} else {
		addr = (unsigned long)tk->rp.kp.addr;
	}
	return addr;
}

105
bool trace_kprobe_on_func_entry(struct trace_event_call *call)
106 107
{
	struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
108 109 110 111

	return kprobe_on_func_entry(tk->rp.kp.addr,
			tk->rp.kp.addr ? NULL : tk->rp.kp.symbol_name,
			tk->rp.kp.addr ? 0 : tk->rp.kp.offset);
112 113
}

114
bool trace_kprobe_error_injectable(struct trace_event_call *call)
115 116 117
{
	struct trace_kprobe *tk = (struct trace_kprobe *)call->data;

118
	return within_error_injection_list(trace_kprobe_address(tk));
119 120
}

121 122
static int register_kprobe_event(struct trace_kprobe *tk);
static int unregister_kprobe_event(struct trace_kprobe *tk);
123 124 125 126

static DEFINE_MUTEX(probe_lock);
static LIST_HEAD(probe_list);

127 128 129 130
static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
static int kretprobe_dispatcher(struct kretprobe_instance *ri,
				struct pt_regs *regs);

131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
/* Memory fetching by symbol */
struct symbol_cache {
	char		*symbol;
	long		offset;
	unsigned long	addr;
};

unsigned long update_symbol_cache(struct symbol_cache *sc)
{
	sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);

	if (sc->addr)
		sc->addr += sc->offset;

	return sc->addr;
}

void free_symbol_cache(struct symbol_cache *sc)
{
	kfree(sc->symbol);
	kfree(sc);
}

struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
{
	struct symbol_cache *sc;

	if (!sym || strlen(sym) == 0)
		return NULL;

	sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
	if (!sc)
		return NULL;

	sc->symbol = kstrdup(sym, GFP_KERNEL);
	if (!sc->symbol) {
		kfree(sc);
		return NULL;
	}
	sc->offset = offset;
	update_symbol_cache(sc);

	return sc;
}

176 177 178 179
/*
 * Kprobes-specific fetch functions
 */
#define DEFINE_FETCH_stack(type)					\
180
static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,		\
181 182 183 184
					  void *offset, void *dest)	\
{									\
	*(type *)dest = (type)regs_get_kernel_stack_nth(regs,		\
				(unsigned int)((unsigned long)offset));	\
185 186 187
}									\
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(stack, type));

188 189 190 191 192
DEFINE_BASIC_FETCH_FUNCS(stack)
/* No string on the stack entry */
#define fetch_stack_string	NULL
#define fetch_stack_string_size	NULL

193
#define DEFINE_FETCH_memory(type)					\
194
static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,		\
195 196 197 198 199 200 201
					  void *addr, void *dest)	\
{									\
	type retval;							\
	if (probe_kernel_address(addr, retval))				\
		*(type *)dest = 0;					\
	else								\
		*(type *)dest = retval;					\
202 203 204
}									\
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, type));

205 206 207 208 209
DEFINE_BASIC_FETCH_FUNCS(memory)
/*
 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
 * length and relative data location.
 */
210 211
static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
					    void *addr, void *dest)
212 213 214
{
	int maxlen = get_rloc_len(*(u32 *)dest);
	u8 *dst = get_rloc_data(dest);
215
	long ret;
216 217 218 219 220 221 222 223

	if (!maxlen)
		return;

	/*
	 * Try to get string again, since the string can be changed while
	 * probing.
	 */
224
	ret = strncpy_from_unsafe(dst, addr, maxlen);
225 226

	if (ret < 0) {	/* Failed to fetch string */
227
		dst[0] = '\0';
228 229
		*(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
	} else {
230
		*(u32 *)dest = make_data_rloc(ret, get_rloc_offs(*(u32 *)dest));
231 232
	}
}
233
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string));
234 235

/* Return the length of string -- including null terminal byte */
236 237
static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
						 void *addr, void *dest)
238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259
{
	mm_segment_t old_fs;
	int ret, len = 0;
	u8 c;

	old_fs = get_fs();
	set_fs(KERNEL_DS);
	pagefault_disable();

	do {
		ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
		len++;
	} while (c && ret == 0 && len < MAX_STRING_SIZE);

	pagefault_enable();
	set_fs(old_fs);

	if (ret < 0)	/* Failed to check the length */
		*(u32 *)dest = 0;
	else
		*(u32 *)dest = len;
}
260
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string_size));
261

262
#define DEFINE_FETCH_symbol(type)					\
263
void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs, void *data, void *dest)\
264 265 266 267 268 269
{									\
	struct symbol_cache *sc = data;					\
	if (sc->addr)							\
		fetch_memory_##type(regs, (void *)sc->addr, dest);	\
	else								\
		*(type *)dest = 0;					\
270 271 272
}									\
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(symbol, type));

273 274 275 276
DEFINE_BASIC_FETCH_FUNCS(symbol)
DEFINE_FETCH_symbol(string)
DEFINE_FETCH_symbol(string_size)

277 278 279 280 281 282 283 284
/* kprobes don't support file_offset fetch methods */
#define fetch_file_offset_u8		NULL
#define fetch_file_offset_u16		NULL
#define fetch_file_offset_u32		NULL
#define fetch_file_offset_u64		NULL
#define fetch_file_offset_string	NULL
#define fetch_file_offset_string_size	NULL

285
/* Fetch type information table */
286
static const struct fetch_type kprobes_fetch_type_table[] = {
287 288 289 290 291 292 293 294 295 296 297 298 299 300
	/* Special types */
	[FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
					sizeof(u32), 1, "__data_loc char[]"),
	[FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
					string_size, sizeof(u32), 0, "u32"),
	/* Basic types */
	ASSIGN_FETCH_TYPE(u8,  u8,  0),
	ASSIGN_FETCH_TYPE(u16, u16, 0),
	ASSIGN_FETCH_TYPE(u32, u32, 0),
	ASSIGN_FETCH_TYPE(u64, u64, 0),
	ASSIGN_FETCH_TYPE(s8,  u8,  1),
	ASSIGN_FETCH_TYPE(s16, u16, 1),
	ASSIGN_FETCH_TYPE(s32, u32, 1),
	ASSIGN_FETCH_TYPE(s64, u64, 1),
301 302 303 304
	ASSIGN_FETCH_TYPE_ALIAS(x8,  u8,  u8,  0),
	ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0),
	ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0),
	ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0),
305 306 307 308

	ASSIGN_FETCH_TYPE_END
};

309 310 311
/*
 * Allocate new trace_probe and initialize it (including kprobes).
 */
312
static struct trace_kprobe *alloc_trace_kprobe(const char *group,
313
					     const char *event,
314 315 316
					     void *addr,
					     const char *symbol,
					     unsigned long offs,
317
					     int maxactive,
318
					     int nargs, bool is_return)
319
{
320
	struct trace_kprobe *tk;
321
	int ret = -ENOMEM;
322

323 324
	tk = kzalloc(SIZEOF_TRACE_KPROBE(nargs), GFP_KERNEL);
	if (!tk)
325
		return ERR_PTR(ret);
326

327 328 329 330
	tk->nhit = alloc_percpu(unsigned long);
	if (!tk->nhit)
		goto error;

331
	if (symbol) {
332 333
		tk->symbol = kstrdup(symbol, GFP_KERNEL);
		if (!tk->symbol)
334
			goto error;
335 336
		tk->rp.kp.symbol_name = tk->symbol;
		tk->rp.kp.offset = offs;
337
	} else
338
		tk->rp.kp.addr = addr;
339 340

	if (is_return)
341
		tk->rp.handler = kretprobe_dispatcher;
342
	else
343
		tk->rp.kp.pre_handler = kprobe_dispatcher;
344

345 346
	tk->rp.maxactive = maxactive;

347
	if (!event || !is_good_name(event)) {
348
		ret = -EINVAL;
349
		goto error;
350 351
	}

352 353 354
	tk->tp.call.class = &tk->tp.class;
	tk->tp.call.name = kstrdup(event, GFP_KERNEL);
	if (!tk->tp.call.name)
355
		goto error;
356

357
	if (!group || !is_good_name(group)) {
358
		ret = -EINVAL;
359
		goto error;
360 361
	}

362 363
	tk->tp.class.system = kstrdup(group, GFP_KERNEL);
	if (!tk->tp.class.system)
364 365
		goto error;

366 367 368
	INIT_LIST_HEAD(&tk->list);
	INIT_LIST_HEAD(&tk->tp.files);
	return tk;
369
error:
370 371
	kfree(tk->tp.call.name);
	kfree(tk->symbol);
372
	free_percpu(tk->nhit);
373
	kfree(tk);
374
	return ERR_PTR(ret);
375 376
}

377
static void free_trace_kprobe(struct trace_kprobe *tk)
378 379 380
{
	int i;

381 382
	for (i = 0; i < tk->tp.nr_args; i++)
		traceprobe_free_probe_arg(&tk->tp.args[i]);
383

384 385 386
	kfree(tk->tp.call.class->system);
	kfree(tk->tp.call.name);
	kfree(tk->symbol);
387
	free_percpu(tk->nhit);
388
	kfree(tk);
389 390
}

391 392
static struct trace_kprobe *find_trace_kprobe(const char *event,
					      const char *group)
393
{
394
	struct trace_kprobe *tk;
395

396
	list_for_each_entry(tk, &probe_list, list)
397
		if (strcmp(trace_event_name(&tk->tp.call), event) == 0 &&
398 399
		    strcmp(tk->tp.call.class->system, group) == 0)
			return tk;
400 401 402
	return NULL;
}

403 404 405 406 407 408 409 410 411 412 413 414 415 416
static inline int __enable_trace_kprobe(struct trace_kprobe *tk)
{
	int ret = 0;

	if (trace_probe_is_registered(&tk->tp) && !trace_kprobe_has_gone(tk)) {
		if (trace_kprobe_is_return(tk))
			ret = enable_kretprobe(&tk->rp);
		else
			ret = enable_kprobe(&tk->rp.kp);
	}

	return ret;
}

417 418 419 420 421
/*
 * Enable trace_probe
 * if the file is NULL, enable "perf" handler, or enable "trace" handler.
 */
static int
422
enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
423
{
424
	struct event_file_link *link;
425 426
	int ret = 0;

427
	if (file) {
428 429
		link = kmalloc(sizeof(*link), GFP_KERNEL);
		if (!link) {
430
			ret = -ENOMEM;
431
			goto out;
432 433
		}

434
		link->file = file;
435
		list_add_tail_rcu(&link->list, &tk->tp.files);
436

437
		tk->tp.flags |= TP_FLAG_TRACE;
438 439 440
		ret = __enable_trace_kprobe(tk);
		if (ret) {
			list_del_rcu(&link->list);
441 442 443
			kfree(link);
			tk->tp.flags &= ~TP_FLAG_TRACE;
		}
444 445 446 447 448 449

	} else {
		tk->tp.flags |= TP_FLAG_PROFILE;
		ret = __enable_trace_kprobe(tk);
		if (ret)
			tk->tp.flags &= ~TP_FLAG_PROFILE;
450
	}
451
 out:
452 453 454
	return ret;
}

455 456 457 458 459
/*
 * Disable trace_probe
 * if the file is NULL, disable "perf" handler, or disable "trace" handler.
 */
static int
460
disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
461
{
462 463
	struct event_file_link *link = NULL;
	int wait = 0;
464 465 466
	int ret = 0;

	if (file) {
467
		link = find_event_file_link(&tk->tp, file);
468
		if (!link) {
469
			ret = -EINVAL;
470
			goto out;
471 472
		}

473
		list_del_rcu(&link->list);
474
		wait = 1;
475
		if (!list_empty(&tk->tp.files))
476
			goto out;
477

478
		tk->tp.flags &= ~TP_FLAG_TRACE;
479
	} else
480
		tk->tp.flags &= ~TP_FLAG_PROFILE;
481

482 483 484
	if (!trace_probe_is_enabled(&tk->tp) && trace_probe_is_registered(&tk->tp)) {
		if (trace_kprobe_is_return(tk))
			disable_kretprobe(&tk->rp);
485
		else
486
			disable_kprobe(&tk->rp.kp);
487
		wait = 1;
488
	}
489 490 491 492 493 494 495 496

	/*
	 * if tk is not added to any list, it must be a local trace_kprobe
	 * created with perf_event_open. We don't need to wait for these
	 * trace_kprobes
	 */
	if (list_empty(&tk->list))
		wait = 0;
497
 out:
498 499 500 501 502 503 504 505 506 507 508 509 510
	if (wait) {
		/*
		 * Synchronize with kprobe_trace_func/kretprobe_trace_func
		 * to ensure disabled (all running handlers are finished).
		 * This is not only for kfree(), but also the caller,
		 * trace_remove_event_call() supposes it for releasing
		 * event_call related objects, which will be accessed in
		 * the kprobe_trace_func/kretprobe_trace_func.
		 */
		synchronize_sched();
		kfree(link);	/* Ignored if link == NULL */
	}

511
	return ret;
512 513
}

514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529
#if defined(CONFIG_KPROBES_ON_FTRACE) && \
	!defined(CONFIG_KPROBE_EVENTS_ON_NOTRACE)
static bool within_notrace_func(struct trace_kprobe *tk)
{
	unsigned long offset, size, addr;

	addr = trace_kprobe_address(tk);
	if (!kallsyms_lookup_size_offset(addr, &size, &offset))
		return true;	/* Out of range. */

	return !ftrace_location_range(addr - offset, addr - offset + size);
}
#else
#define within_notrace_func(tk)	(false)
#endif

530
/* Internal register function - just handle k*probes and flags */
531
static int __register_trace_kprobe(struct trace_kprobe *tk)
532
{
533
	int i, ret;
534

535
	if (trace_probe_is_registered(&tk->tp))
536 537
		return -EINVAL;

538 539 540 541 542 543
	if (within_notrace_func(tk)) {
		pr_warn("Could not probe notrace function %s\n",
			trace_kprobe_symbol(tk));
		return -EINVAL;
	}

544 545
	for (i = 0; i < tk->tp.nr_args; i++)
		traceprobe_update_arg(&tk->tp.args[i]);
546

547
	/* Set/clear disabled flag according to tp->flag */
548 549
	if (trace_probe_is_enabled(&tk->tp))
		tk->rp.kp.flags &= ~KPROBE_FLAG_DISABLED;
550
	else
551
		tk->rp.kp.flags |= KPROBE_FLAG_DISABLED;
552

553 554
	if (trace_kprobe_is_return(tk))
		ret = register_kretprobe(&tk->rp);
555
	else
556
		ret = register_kprobe(&tk->rp.kp);
557 558

	if (ret == 0)
559
		tk->tp.flags |= TP_FLAG_REGISTERED;
560
	else {
561
		if (ret == -ENOENT && trace_kprobe_is_on_module(tk)) {
562
			pr_warn("This probe might be able to register after target module is loaded. Continue.\n");
563 564
			ret = 0;
		} else if (ret == -EILSEQ) {
565 566
			pr_warn("Probing address(0x%p) is not an instruction boundary.\n",
				tk->rp.kp.addr);
567 568 569 570 571 572 573 574
			ret = -EINVAL;
		}
	}

	return ret;
}

/* Internal unregister function - just handle k*probes and flags */
575
static void __unregister_trace_kprobe(struct trace_kprobe *tk)
576
{
577 578 579
	if (trace_probe_is_registered(&tk->tp)) {
		if (trace_kprobe_is_return(tk))
			unregister_kretprobe(&tk->rp);
580
		else
581 582
			unregister_kprobe(&tk->rp.kp);
		tk->tp.flags &= ~TP_FLAG_REGISTERED;
583
		/* Cleanup kprobe for reuse */
584 585
		if (tk->rp.kp.symbol_name)
			tk->rp.kp.addr = NULL;
586 587 588 589
	}
}

/* Unregister a trace_probe and probe_event: call with locking probe_lock */
590
static int unregister_trace_kprobe(struct trace_kprobe *tk)
591
{
592
	/* Enabled event can not be unregistered */
593
	if (trace_probe_is_enabled(&tk->tp))
594 595
		return -EBUSY;

596
	/* Will fail if probe is being used by ftrace or perf */
597
	if (unregister_kprobe_event(tk))
598 599
		return -EBUSY;

600 601
	__unregister_trace_kprobe(tk);
	list_del(&tk->list);
602 603

	return 0;
604 605 606
}

/* Register a trace_probe and probe_event */
607
static int register_trace_kprobe(struct trace_kprobe *tk)
608
{
609
	struct trace_kprobe *old_tk;
610 611 612 613
	int ret;

	mutex_lock(&probe_lock);

614
	/* Delete old (same name) event if exist */
615
	old_tk = find_trace_kprobe(trace_event_name(&tk->tp.call),
616
			tk->tp.call.class->system);
617 618
	if (old_tk) {
		ret = unregister_trace_kprobe(old_tk);
619 620
		if (ret < 0)
			goto end;
621
		free_trace_kprobe(old_tk);
622
	}
623 624

	/* Register new event */
625
	ret = register_kprobe_event(tk);
626
	if (ret) {
627
		pr_warn("Failed to register probe event(%d)\n", ret);
628 629 630
		goto end;
	}

631
	/* Register k*probe */
632
	ret = __register_trace_kprobe(tk);
633
	if (ret < 0)
634
		unregister_kprobe_event(tk);
635
	else
636
		list_add_tail(&tk->list, &probe_list);
637

638 639 640 641 642
end:
	mutex_unlock(&probe_lock);
	return ret;
}

643
/* Module notifier call back, checking event on the module */
644
static int trace_kprobe_module_callback(struct notifier_block *nb,
645 646 647
				       unsigned long val, void *data)
{
	struct module *mod = data;
648
	struct trace_kprobe *tk;
649 650 651 652 653 654 655
	int ret;

	if (val != MODULE_STATE_COMING)
		return NOTIFY_DONE;

	/* Update probes on coming module */
	mutex_lock(&probe_lock);
656 657
	list_for_each_entry(tk, &probe_list, list) {
		if (trace_kprobe_within_module(tk, mod)) {
658
			/* Don't need to check busy - this should have gone. */
659 660
			__unregister_trace_kprobe(tk);
			ret = __register_trace_kprobe(tk);
661
			if (ret)
662 663 664
				pr_warn("Failed to re-register probe %s on %s: %d\n",
					trace_event_name(&tk->tp.call),
					mod->name, ret);
665 666 667 668 669 670 671
		}
	}
	mutex_unlock(&probe_lock);

	return NOTIFY_DONE;
}

672 673
static struct notifier_block trace_kprobe_module_nb = {
	.notifier_call = trace_kprobe_module_callback,
674 675 676
	.priority = 1	/* Invoked after kprobe module callback */
};

677 678 679 680 681 682 683 684
/* Convert certain expected symbols into '_' when generating event names */
static inline void sanitize_event_name(char *name)
{
	while (*name++ != '\0')
		if (*name == ':' || *name == '.')
			*name = '_';
}

685
static int create_trace_kprobe(int argc, char **argv)
686 687 688
{
	/*
	 * Argument syntax:
689 690 691 692
	 *  - Add kprobe:
	 *      p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS]
	 *  - Add kretprobe:
	 *      r[MAXACTIVE][:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS]
693
	 * Fetch args:
694 695 696
	 *  $retval	: fetch return value
	 *  $stack	: fetch stack address
	 *  $stackN	: fetch Nth of stack (N:0-)
697
	 *  $comm       : fetch current task comm
698 699 700
	 *  @ADDR	: fetch memory at ADDR (ADDR should be in kernel)
	 *  @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
	 *  %REG	: fetch register REG
701
	 * Dereferencing memory fetch:
702
	 *  +|-offs(ARG) : fetch memory at ARG +|- offs address.
703 704
	 * Alias name of args:
	 *  NAME=FETCHARG : set NAME as alias of FETCHARG.
705 706
	 * Type of args:
	 *  FETCHARG:TYPE : use TYPE instead of unsigned long.
707
	 */
708
	struct trace_kprobe *tk;
709
	int i, ret = 0;
710
	bool is_return = false, is_delete = false;
711
	char *symbol = NULL, *event = NULL, *group = NULL;
712
	int maxactive = 0;
713
	char *arg;
714
	long offset = 0;
715
	void *addr = NULL;
716
	char buf[MAX_EVENT_NAME_LEN];
717

718
	/* argc must be >= 1 */
719
	if (argv[0][0] == 'p')
720
		is_return = false;
721
	else if (argv[0][0] == 'r')
722
		is_return = true;
723
	else if (argv[0][0] == '-')
724
		is_delete = true;
725
	else {
726 727
		pr_info("Probe definition must be started with 'p', 'r' or"
			" '-'.\n");
728
		return -EINVAL;
729
	}
730

731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752
	event = strchr(&argv[0][1], ':');
	if (event) {
		event[0] = '\0';
		event++;
	}
	if (is_return && isdigit(argv[0][1])) {
		ret = kstrtouint(&argv[0][1], 0, &maxactive);
		if (ret) {
			pr_info("Failed to parse maxactive.\n");
			return ret;
		}
		/* kretprobes instances are iterated over via a list. The
		 * maximum should stay reasonable.
		 */
		if (maxactive > KRETPROBE_MAXACTIVE_MAX) {
			pr_info("Maxactive is too big (%d > %d).\n",
				maxactive, KRETPROBE_MAXACTIVE_MAX);
			return -E2BIG;
		}
	}

	if (event) {
753 754 755 756 757
		if (strchr(event, '/')) {
			group = event;
			event = strchr(group, '/') + 1;
			event[-1] = '\0';
			if (strlen(group) == 0) {
758
				pr_info("Group name is not specified\n");
759 760 761
				return -EINVAL;
			}
		}
762
		if (strlen(event) == 0) {
763
			pr_info("Event name is not specified\n");
764 765 766
			return -EINVAL;
		}
	}
767 768
	if (!group)
		group = KPROBE_EVENT_SYSTEM;
769

770 771 772 773 774
	if (is_delete) {
		if (!event) {
			pr_info("Delete command needs an event name.\n");
			return -EINVAL;
		}
775
		mutex_lock(&probe_lock);
776 777
		tk = find_trace_kprobe(event, group);
		if (!tk) {
778
			mutex_unlock(&probe_lock);
779 780 781 782
			pr_info("Event %s/%s doesn't exist.\n", group, event);
			return -ENOENT;
		}
		/* delete an event */
783
		ret = unregister_trace_kprobe(tk);
784
		if (ret == 0)
785
			free_trace_kprobe(tk);
786
		mutex_unlock(&probe_lock);
787
		return ret;
788 789 790 791 792 793
	}

	if (argc < 2) {
		pr_info("Probe point is not specified.\n");
		return -EINVAL;
	}
794 795 796 797

	/* try to parse an address. if that fails, try to read the
	 * input as a symbol. */
	if (kstrtoul(argv[1], 0, (unsigned long *)&addr)) {
798 799 800
		/* a symbol specified */
		symbol = argv[1];
		/* TODO: support .init module functions */
801
		ret = traceprobe_split_symbol_offset(symbol, &offset);
802
		if (ret || offset < 0 || offset > UINT_MAX) {
803
			pr_info("Failed to parse either an address or a symbol.\n");
804
			return ret;
805
		}
806
		if (offset && is_return &&
807
		    !kprobe_on_func_entry(NULL, symbol, offset)) {
808
			pr_info("Given offset is not valid for return probe.\n");
809
			return -EINVAL;
810
		}
811
	}
812
	argc -= 2; argv += 2;
813 814

	/* setup a probe */
815 816 817
	if (!event) {
		/* Make a new event name */
		if (symbol)
818
			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
819 820
				 is_return ? 'r' : 'p', symbol, offset);
		else
821
			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
822
				 is_return ? 'r' : 'p', addr);
823
		sanitize_event_name(buf);
824 825
		event = buf;
	}
826 827
	tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive,
			       argc, is_return);
828
	if (IS_ERR(tk)) {
829
		pr_info("Failed to allocate trace_probe.(%d)\n",
830 831
			(int)PTR_ERR(tk));
		return PTR_ERR(tk);
832
	}
833 834

	/* parse arguments */
835 836
	ret = 0;
	for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
837 838
		struct probe_arg *parg = &tk->tp.args[i];

839
		/* Increment count for freeing args in error case */
840
		tk->tp.nr_args++;
841

842 843
		/* Parse argument name */
		arg = strchr(argv[i], '=');
844
		if (arg) {
845
			*arg++ = '\0';
846
			parg->name = kstrdup(argv[i], GFP_KERNEL);
847
		} else {
848
			arg = argv[i];
849 850
			/* If argument name is omitted, set "argN" */
			snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
851
			parg->name = kstrdup(buf, GFP_KERNEL);
852
		}
853

854
		if (!parg->name) {
855
			pr_info("Failed to allocate argument[%d] name.\n", i);
856
			ret = -ENOMEM;
857 858
			goto error;
		}
859

860
		if (!is_good_name(parg->name)) {
861
			pr_info("Invalid argument[%d] name: %s\n",
862
				i, parg->name);
863 864 865
			ret = -EINVAL;
			goto error;
		}
866

867 868
		if (traceprobe_conflict_field_name(parg->name,
							tk->tp.args, i)) {
869
			pr_info("Argument[%d] name '%s' conflicts with "
870 871 872 873
				"another field.\n", i, argv[i]);
			ret = -EINVAL;
			goto error;
		}
874 875

		/* Parse fetch argument */
876
		ret = traceprobe_parse_probe_arg(arg, &tk->tp.size, parg,
877 878
						is_return, true,
						kprobes_fetch_type_table);
879
		if (ret) {
880
			pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
881
			goto error;
882
		}
883 884
	}

885
	ret = register_trace_kprobe(tk);
886 887 888 889 890
	if (ret)
		goto error;
	return 0;

error:
891
	free_trace_kprobe(tk);
892 893 894
	return ret;
}

895
static int release_all_trace_kprobes(void)
896
{
897
	struct trace_kprobe *tk;
898
	int ret = 0;
899 900

	mutex_lock(&probe_lock);
901
	/* Ensure no probe is in use. */
902 903
	list_for_each_entry(tk, &probe_list, list)
		if (trace_probe_is_enabled(&tk->tp)) {
904 905 906
			ret = -EBUSY;
			goto end;
		}
907 908
	/* TODO: Use batch unregistration */
	while (!list_empty(&probe_list)) {
909 910
		tk = list_entry(probe_list.next, struct trace_kprobe, list);
		ret = unregister_trace_kprobe(tk);
911 912
		if (ret)
			goto end;
913
		free_trace_kprobe(tk);
914
	}
915 916

end:
917
	mutex_unlock(&probe_lock);
918 919

	return ret;
920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940
}

/* Probes listing interfaces */
static void *probes_seq_start(struct seq_file *m, loff_t *pos)
{
	mutex_lock(&probe_lock);
	return seq_list_start(&probe_list, *pos);
}

static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
{
	return seq_list_next(v, &probe_list, pos);
}

static void probes_seq_stop(struct seq_file *m, void *v)
{
	mutex_unlock(&probe_lock);
}

static int probes_seq_show(struct seq_file *m, void *v)
{
941
	struct trace_kprobe *tk = v;
942
	int i;
943

944
	seq_putc(m, trace_kprobe_is_return(tk) ? 'r' : 'p');
945
	seq_printf(m, ":%s/%s", tk->tp.call.class->system,
946
			trace_event_name(&tk->tp.call));
947

948 949 950 951 952
	if (!tk->symbol)
		seq_printf(m, " 0x%p", tk->rp.kp.addr);
	else if (tk->rp.kp.offset)
		seq_printf(m, " %s+%u", trace_kprobe_symbol(tk),
			   tk->rp.kp.offset);
953
	else
954
		seq_printf(m, " %s", trace_kprobe_symbol(tk));
955

956 957
	for (i = 0; i < tk->tp.nr_args; i++)
		seq_printf(m, " %s=%s", tk->tp.args[i].name, tk->tp.args[i].comm);
958
	seq_putc(m, '\n');
959

960 961 962 963 964 965 966 967 968 969 970 971
	return 0;
}

static const struct seq_operations probes_seq_op = {
	.start  = probes_seq_start,
	.next   = probes_seq_next,
	.stop   = probes_seq_stop,
	.show   = probes_seq_show
};

static int probes_open(struct inode *inode, struct file *file)
{
972 973 974
	int ret;

	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
975
		ret = release_all_trace_kprobes();
976 977 978
		if (ret < 0)
			return ret;
	}
979 980 981 982 983 984 985

	return seq_open(file, &probes_seq_op);
}

static ssize_t probes_write(struct file *file, const char __user *buffer,
			    size_t count, loff_t *ppos)
{
986 987
	return trace_parse_run_command(file, buffer, count, ppos,
				       create_trace_kprobe);
988 989 990 991 992 993 994 995 996 997 998
}

static const struct file_operations kprobe_events_ops = {
	.owner          = THIS_MODULE,
	.open           = probes_open,
	.read           = seq_read,
	.llseek         = seq_lseek,
	.release        = seq_release,
	.write		= probes_write,
};

999 1000 1001
/* Probes profiling interfaces */
static int probes_profile_seq_show(struct seq_file *m, void *v)
{
1002
	struct trace_kprobe *tk = v;
1003

1004
	seq_printf(m, "  %-44s %15lu %15lu\n",
1005 1006
		   trace_event_name(&tk->tp.call),
		   trace_kprobe_nhit(tk),
1007
		   tk->rp.kp.nmissed);
1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031

	return 0;
}

static const struct seq_operations profile_seq_op = {
	.start  = probes_seq_start,
	.next   = probes_seq_next,
	.stop   = probes_seq_stop,
	.show   = probes_profile_seq_show
};

static int profile_open(struct inode *inode, struct file *file)
{
	return seq_open(file, &profile_seq_op);
}

static const struct file_operations kprobe_profile_ops = {
	.owner          = THIS_MODULE,
	.open           = profile_open,
	.read           = seq_read,
	.llseek         = seq_lseek,
	.release        = seq_release,
};

1032
/* Kprobe handler */
1033
static nokprobe_inline void
1034
__kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
1035
		    struct trace_event_file *trace_file)
1036
{
1037
	struct kprobe_trace_entry_head *entry;
1038
	struct ring_buffer_event *event;
1039
	struct ring_buffer *buffer;
1040
	int size, dsize, pc;
1041
	unsigned long irq_flags;
1042
	struct trace_event_call *call = &tk->tp.call;
1043

1044
	WARN_ON(call != trace_file->event_call);
1045

1046
	if (trace_trigger_soft_disabled(trace_file))
1047
		return;
1048

1049 1050 1051
	local_save_flags(irq_flags);
	pc = preempt_count();

1052 1053
	dsize = __get_data_size(&tk->tp, regs);
	size = sizeof(*entry) + tk->tp.size + dsize;
1054

1055
	event = trace_event_buffer_lock_reserve(&buffer, trace_file,
1056 1057
						call->event.type,
						size, irq_flags, pc);
1058
	if (!event)
1059
		return;
1060 1061

	entry = ring_buffer_event_data(event);
1062 1063
	entry->ip = (unsigned long)tk->rp.kp.addr;
	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
1064

1065
	event_trigger_unlock_commit_regs(trace_file, buffer, event,
1066
					 entry, irq_flags, pc, regs);
1067 1068
}

1069
static void
1070
kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs)
1071
{
1072
	struct event_file_link *link;
1073

1074 1075
	list_for_each_entry_rcu(link, &tk->tp.files, list)
		__kprobe_trace_func(tk, regs, link->file);
1076
}
1077
NOKPROBE_SYMBOL(kprobe_trace_func);
1078

1079
/* Kretprobe handler */
1080
static nokprobe_inline void
1081
__kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1082
		       struct pt_regs *regs,
1083
		       struct trace_event_file *trace_file)
1084
{
1085
	struct kretprobe_trace_entry_head *entry;
1086
	struct ring_buffer_event *event;
1087
	struct ring_buffer *buffer;
1088
	int size, pc, dsize;
1089
	unsigned long irq_flags;
1090
	struct trace_event_call *call = &tk->tp.call;
1091

1092
	WARN_ON(call != trace_file->event_call);
1093

1094
	if (trace_trigger_soft_disabled(trace_file))
1095
		return;
1096

1097 1098 1099
	local_save_flags(irq_flags);
	pc = preempt_count();

1100 1101
	dsize = __get_data_size(&tk->tp, regs);
	size = sizeof(*entry) + tk->tp.size + dsize;
1102

1103
	event = trace_event_buffer_lock_reserve(&buffer, trace_file,
1104 1105
						call->event.type,
						size, irq_flags, pc);
1106
	if (!event)
1107
		return;
1108 1109

	entry = ring_buffer_event_data(event);
1110
	entry->func = (unsigned long)tk->rp.kp.addr;
1111
	entry->ret_ip = (unsigned long)ri->ret_addr;
1112
	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
1113

1114
	event_trigger_unlock_commit_regs(trace_file, buffer, event,
1115
					 entry, irq_flags, pc, regs);
1116 1117
}

1118
static void
1119
kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1120 1121
		     struct pt_regs *regs)
{
1122
	struct event_file_link *link;
1123

1124 1125
	list_for_each_entry_rcu(link, &tk->tp.files, list)
		__kretprobe_trace_func(tk, ri, regs, link->file);
1126
}
1127
NOKPROBE_SYMBOL(kretprobe_trace_func);
1128

1129
/* Event entry printers */
1130
static enum print_line_t
1131 1132
print_kprobe_event(struct trace_iterator *iter, int flags,
		   struct trace_event *event)
1133
{
1134
	struct kprobe_trace_entry_head *field;
1135
	struct trace_seq *s = &iter->seq;
1136
	struct trace_probe *tp;
1137
	u8 *data;
1138 1139
	int i;

1140
	field = (struct kprobe_trace_entry_head *)iter->ent;
1141
	tp = container_of(event, struct trace_probe, call.event);
1142

1143
	trace_seq_printf(s, "%s: (", trace_event_name(&tp->call));
1144

1145
	if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
1146
		goto out;
1147

1148
	trace_seq_putc(s, ')');
1149

1150 1151 1152
	data = (u8 *)&field[1];
	for (i = 0; i < tp->nr_args; i++)
		if (!tp->args[i].type->print(s, tp->args[i].name,
1153
					     data + tp->args[i].offset, field))
1154
			goto out;
1155

1156 1157 1158
	trace_seq_putc(s, '\n');
 out:
	return trace_handle_return(s);
1159 1160
}

1161
static enum print_line_t
1162 1163
print_kretprobe_event(struct trace_iterator *iter, int flags,
		      struct trace_event *event)
1164
{
1165
	struct kretprobe_trace_entry_head *field;
1166
	struct trace_seq *s = &iter->seq;
1167
	struct trace_probe *tp;
1168
	u8 *data;
1169 1170
	int i;

1171
	field = (struct kretprobe_trace_entry_head *)iter->ent;
1172
	tp = container_of(event, struct trace_probe, call.event);
1173

1174
	trace_seq_printf(s, "%s: (", trace_event_name(&tp->call));
1175

1176
	if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
1177
		goto out;
1178

1179
	trace_seq_puts(s, " <- ");
1180 1181

	if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
1182
		goto out;
1183

1184
	trace_seq_putc(s, ')');
1185

1186 1187 1188
	data = (u8 *)&field[1];
	for (i = 0; i < tp->nr_args; i++)
		if (!tp->args[i].type->print(s, tp->args[i].name,
1189
					     data + tp->args[i].offset, field))
1190
			goto out;
1191

1192
	trace_seq_putc(s, '\n');
1193

1194 1195
 out:
	return trace_handle_return(s);
1196 1197 1198
}


1199
static int kprobe_event_define_fields(struct trace_event_call *event_call)
1200 1201
{
	int ret, i;
1202
	struct kprobe_trace_entry_head field;
1203
	struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data;
1204

1205
	DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1206
	/* Set argument names as fields */
1207 1208 1209 1210 1211 1212 1213 1214
	for (i = 0; i < tk->tp.nr_args; i++) {
		struct probe_arg *parg = &tk->tp.args[i];

		ret = trace_define_field(event_call, parg->type->fmttype,
					 parg->name,
					 sizeof(field) + parg->offset,
					 parg->type->size,
					 parg->type->is_signed,
1215 1216 1217 1218
					 FILTER_OTHER);
		if (ret)
			return ret;
	}
1219 1220 1221
	return 0;
}

1222
static int kretprobe_event_define_fields(struct trace_event_call *event_call)
1223 1224
{
	int ret, i;
1225
	struct kretprobe_trace_entry_head field;
1226
	struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data;
1227

1228 1229
	DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
	DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1230
	/* Set argument names as fields */
1231 1232 1233 1234 1235 1236 1237 1238
	for (i = 0; i < tk->tp.nr_args; i++) {
		struct probe_arg *parg = &tk->tp.args[i];

		ret = trace_define_field(event_call, parg->type->fmttype,
					 parg->name,
					 sizeof(field) + parg->offset,
					 parg->type->size,
					 parg->type->is_signed,
1239 1240 1241 1242
					 FILTER_OTHER);
		if (ret)
			return ret;
	}
1243 1244 1245
	return 0;
}

1246
#ifdef CONFIG_PERF_EVENTS
1247 1248

/* Kprobe profile handler */
1249
static int
1250
kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
1251
{
1252
	struct trace_event_call *call = &tk->tp.call;
1253
	struct kprobe_trace_entry_head *entry;
1254
	struct hlist_head *head;
1255
	int size, __size, dsize;
1256
	int rctx;
1257

1258
	if (bpf_prog_array_valid(call)) {
1259
		unsigned long orig_ip = instruction_pointer(regs);
1260 1261 1262 1263 1264 1265 1266
		int ret;

		ret = trace_call_bpf(call, regs);

		/*
		 * We need to check and see if we modified the pc of the
		 * pt_regs, and if so clear the kprobe and return 1 so that we
1267 1268 1269
		 * don't do the single stepping.
		 * The ftrace kprobe handler leaves it up to us to re-enable
		 * preemption here before returning if we've modified the ip.
1270
		 */
1271
		if (orig_ip != instruction_pointer(regs)) {
1272
			reset_current_kprobe();
1273
			preempt_enable_no_resched();
1274 1275 1276 1277 1278
			return 1;
		}
		if (!ret)
			return 0;
	}
1279

1280 1281
	head = this_cpu_ptr(call->perf_events);
	if (hlist_empty(head))
1282
		return 0;
1283

1284 1285
	dsize = __get_data_size(&tk->tp, regs);
	__size = sizeof(*entry) + tk->tp.size + dsize;
1286 1287
	size = ALIGN(__size + sizeof(u32), sizeof(u64));
	size -= sizeof(u32);
1288

1289
	entry = perf_trace_buf_alloc(size, NULL, &rctx);
1290
	if (!entry)
1291
		return 0;
1292

1293
	entry->ip = (unsigned long)tk->rp.kp.addr;
1294
	memset(&entry[1], 0, dsize);
1295
	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
1296
	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
1297
			      head, NULL);
1298
	return 0;
1299
}
1300
NOKPROBE_SYMBOL(kprobe_perf_func);
1301 1302

/* Kretprobe profile handler */
1303
static void
1304
kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1305
		    struct pt_regs *regs)
1306
{
1307
	struct trace_event_call *call = &tk->tp.call;
1308
	struct kretprobe_trace_entry_head *entry;
1309
	struct hlist_head *head;
1310
	int size, __size, dsize;
1311
	int rctx;
1312

1313
	if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs))
1314 1315
		return;

1316 1317 1318 1319
	head = this_cpu_ptr(call->perf_events);
	if (hlist_empty(head))
		return;

1320 1321
	dsize = __get_data_size(&tk->tp, regs);
	__size = sizeof(*entry) + tk->tp.size + dsize;
1322 1323
	size = ALIGN(__size + sizeof(u32), sizeof(u64));
	size -= sizeof(u32);
1324

1325
	entry = perf_trace_buf_alloc(size, NULL, &rctx);
1326
	if (!entry)
1327
		return;
1328

1329
	entry->func = (unsigned long)tk->rp.kp.addr;
1330
	entry->ret_ip = (unsigned long)ri->ret_addr;
1331
	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
1332
	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
1333
			      head, NULL);
1334
}
1335
NOKPROBE_SYMBOL(kretprobe_perf_func);
1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364

int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type,
			const char **symbol, u64 *probe_offset,
			u64 *probe_addr, bool perf_type_tracepoint)
{
	const char *pevent = trace_event_name(event->tp_event);
	const char *group = event->tp_event->class->system;
	struct trace_kprobe *tk;

	if (perf_type_tracepoint)
		tk = find_trace_kprobe(pevent, group);
	else
		tk = event->tp_event->data;
	if (!tk)
		return -EINVAL;

	*fd_type = trace_kprobe_is_return(tk) ? BPF_FD_TYPE_KRETPROBE
					      : BPF_FD_TYPE_KPROBE;
	if (tk->symbol) {
		*symbol = tk->symbol;
		*probe_offset = tk->rp.kp.offset;
		*probe_addr = 0;
	} else {
		*symbol = NULL;
		*probe_offset = 0;
		*probe_addr = (unsigned long)tk->rp.kp.addr;
	}
	return 0;
}
1365
#endif	/* CONFIG_PERF_EVENTS */
1366

1367 1368 1369 1370 1371 1372
/*
 * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex.
 *
 * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe
 * lockless, but we can't race with this __init function.
 */
1373
static int kprobe_register(struct trace_event_call *event,
1374
			   enum trace_reg type, void *data)
1375
{
1376
	struct trace_kprobe *tk = (struct trace_kprobe *)event->data;
1377
	struct trace_event_file *file = data;
1378

1379 1380
	switch (type) {
	case TRACE_REG_REGISTER:
1381
		return enable_trace_kprobe(tk, file);
1382
	case TRACE_REG_UNREGISTER:
1383
		return disable_trace_kprobe(tk, file);
1384 1385 1386

#ifdef CONFIG_PERF_EVENTS
	case TRACE_REG_PERF_REGISTER:
1387
		return enable_trace_kprobe(tk, NULL);
1388
	case TRACE_REG_PERF_UNREGISTER:
1389
		return disable_trace_kprobe(tk, NULL);
1390 1391
	case TRACE_REG_PERF_OPEN:
	case TRACE_REG_PERF_CLOSE:
1392 1393
	case TRACE_REG_PERF_ADD:
	case TRACE_REG_PERF_DEL:
1394
		return 0;
1395 1396 1397 1398
#endif
	}
	return 0;
}
1399

1400
static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1401
{
1402
	struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
1403
	int ret = 0;
1404

1405
	raw_cpu_inc(*tk->nhit);
1406

1407 1408
	if (tk->tp.flags & TP_FLAG_TRACE)
		kprobe_trace_func(tk, regs);
1409
#ifdef CONFIG_PERF_EVENTS
1410
	if (tk->tp.flags & TP_FLAG_PROFILE)
1411
		ret = kprobe_perf_func(tk, regs);
1412
#endif
1413
	return ret;
1414
}
1415
NOKPROBE_SYMBOL(kprobe_dispatcher);
1416

1417 1418
static int
kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1419
{
1420
	struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
1421

1422
	raw_cpu_inc(*tk->nhit);
1423

1424 1425
	if (tk->tp.flags & TP_FLAG_TRACE)
		kretprobe_trace_func(tk, ri, regs);
1426
#ifdef CONFIG_PERF_EVENTS
1427 1428
	if (tk->tp.flags & TP_FLAG_PROFILE)
		kretprobe_perf_func(tk, ri, regs);
1429
#endif
1430 1431
	return 0;	/* We don't tweek kernel, so just return 0 */
}
1432
NOKPROBE_SYMBOL(kretprobe_dispatcher);
1433

1434 1435 1436 1437 1438 1439 1440 1441
static struct trace_event_functions kretprobe_funcs = {
	.trace		= print_kretprobe_event
};

static struct trace_event_functions kprobe_funcs = {
	.trace		= print_kprobe_event
};

1442 1443
static inline void init_trace_event_call(struct trace_kprobe *tk,
					 struct trace_event_call *call)
1444
{
1445
	INIT_LIST_HEAD(&call->class->fields);
1446
	if (trace_kprobe_is_return(tk)) {
1447
		call->event.funcs = &kretprobe_funcs;
1448
		call->class->define_fields = kretprobe_event_define_fields;
1449
	} else {
1450
		call->event.funcs = &kprobe_funcs;
1451
		call->class->define_fields = kprobe_event_define_fields;
1452
	}
1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465

	call->flags = TRACE_EVENT_FL_KPROBE;
	call->class->reg = kprobe_register;
	call->data = tk;
}

static int register_kprobe_event(struct trace_kprobe *tk)
{
	struct trace_event_call *call = &tk->tp.call;
	int ret = 0;

	init_trace_event_call(tk, call);

1466
	if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0)
1467
		return -ENOMEM;
1468
	ret = register_trace_event(&call->event);
1469
	if (!ret) {
1470
		kfree(call->print_fmt);
1471
		return -ENODEV;
1472
	}
1473
	ret = trace_add_event_call(call);
1474
	if (ret) {
1475
		pr_info("Failed to register kprobe event: %s\n",
1476
			trace_event_name(call));
1477
		kfree(call->print_fmt);
1478
		unregister_trace_event(&call->event);
1479
	}
1480 1481 1482
	return ret;
}

1483
static int unregister_kprobe_event(struct trace_kprobe *tk)
1484
{
1485 1486
	int ret;

1487
	/* tp->event is unregistered in trace_remove_event_call() */
1488
	ret = trace_remove_event_call(&tk->tp.call);
1489
	if (!ret)
1490
		kfree(tk->tp.call.print_fmt);
1491
	return ret;
1492 1493
}

1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528
#ifdef CONFIG_PERF_EVENTS
/* create a trace_kprobe, but don't add it to global lists */
struct trace_event_call *
create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
			  bool is_return)
{
	struct trace_kprobe *tk;
	int ret;
	char *event;

	/*
	 * local trace_kprobes are not added to probe_list, so they are never
	 * searched in find_trace_kprobe(). Therefore, there is no concern of
	 * duplicated name here.
	 */
	event = func ? func : "DUMMY_EVENT";

	tk = alloc_trace_kprobe(KPROBE_EVENT_SYSTEM, event, (void *)addr, func,
				offs, 0 /* maxactive */, 0 /* nargs */,
				is_return);

	if (IS_ERR(tk)) {
		pr_info("Failed to allocate trace_probe.(%d)\n",
			(int)PTR_ERR(tk));
		return ERR_CAST(tk);
	}

	init_trace_event_call(tk, &tk->tp.call);

	if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) {
		ret = -ENOMEM;
		goto error;
	}

	ret = __register_trace_kprobe(tk);
1529 1530
	if (ret < 0) {
		kfree(tk->tp.call.print_fmt);
1531
		goto error;
1532
	}
1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551

	return &tk->tp.call;
error:
	free_trace_kprobe(tk);
	return ERR_PTR(ret);
}

void destroy_local_trace_kprobe(struct trace_event_call *event_call)
{
	struct trace_kprobe *tk;

	tk = container_of(event_call, struct trace_kprobe, tp.call);

	if (trace_probe_is_enabled(&tk->tp)) {
		WARN_ON(1);
		return;
	}

	__unregister_trace_kprobe(tk);
1552 1553

	kfree(tk->tp.call.print_fmt);
1554 1555 1556 1557
	free_trace_kprobe(tk);
}
#endif /* CONFIG_PERF_EVENTS */

1558
/* Make a tracefs interface for controlling probe points */
1559 1560 1561 1562 1563
static __init int init_kprobe_trace(void)
{
	struct dentry *d_tracer;
	struct dentry *entry;

1564
	if (register_module_notifier(&trace_kprobe_module_nb))
1565 1566
		return -EINVAL;

1567
	d_tracer = tracing_init_dentry();
1568
	if (IS_ERR(d_tracer))
1569 1570
		return 0;

1571
	entry = tracefs_create_file("kprobe_events", 0644, d_tracer,
1572 1573
				    NULL, &kprobe_events_ops);

1574
	/* Event list interface */
1575
	if (!entry)
1576
		pr_warn("Could not create tracefs 'kprobe_events' entry\n");
1577 1578

	/* Profile interface */
1579
	entry = tracefs_create_file("kprobe_profile", 0444, d_tracer,
1580 1581 1582
				    NULL, &kprobe_profile_ops);

	if (!entry)
1583
		pr_warn("Could not create tracefs 'kprobe_profile' entry\n");
1584 1585 1586 1587 1588 1589
	return 0;
}
fs_initcall(init_kprobe_trace);


#ifdef CONFIG_FTRACE_STARTUP_TEST
1590 1591
/*
 * The "__used" keeps gcc from removing the function symbol
1592 1593
 * from the kallsyms table. 'noinline' makes sure that there
 * isn't an inlined version used by the test method below
1594
 */
1595 1596
static __used __init noinline int
kprobe_trace_selftest_target(int a1, int a2, int a3, int a4, int a5, int a6)
1597 1598 1599 1600
{
	return a1 + a2 + a3 + a4 + a5 + a6;
}

1601
static __init struct trace_event_file *
1602
find_trace_probe_file(struct trace_kprobe *tk, struct trace_array *tr)
1603
{
1604
	struct trace_event_file *file;
1605 1606

	list_for_each_entry(file, &tr->events, list)
1607
		if (file->event_call == &tk->tp.call)
1608 1609 1610 1611 1612
			return file;

	return NULL;
}

1613
/*
1614
 * Nobody but us can call enable_trace_kprobe/disable_trace_kprobe at this
1615 1616
 * stage, we can do this lockless.
 */
1617 1618
static __init int kprobe_trace_self_tests_init(void)
{
1619
	int ret, warn = 0;
1620
	int (*target)(int, int, int, int, int, int);
1621
	struct trace_kprobe *tk;
1622
	struct trace_event_file *file;
1623

1624 1625 1626
	if (tracing_is_disabled())
		return -ENODEV;

1627 1628 1629 1630
	target = kprobe_trace_selftest_target;

	pr_info("Testing kprobe tracing: ");

1631 1632 1633
	ret = trace_run_command("p:testprobe kprobe_trace_selftest_target "
				"$stack $stack0 +0($stack)",
				create_trace_kprobe);
1634
	if (WARN_ON_ONCE(ret)) {
1635
		pr_warn("error on probing function entry.\n");
1636 1637 1638
		warn++;
	} else {
		/* Enable trace point */
1639 1640
		tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
		if (WARN_ON_ONCE(tk == NULL)) {
1641
			pr_warn("error on getting new probe.\n");
1642
			warn++;
1643
		} else {
1644
			file = find_trace_probe_file(tk, top_trace_array());
1645 1646 1647 1648
			if (WARN_ON_ONCE(file == NULL)) {
				pr_warn("error on getting probe file.\n");
				warn++;
			} else
1649
				enable_trace_kprobe(tk, file);
1650
		}
1651
	}
1652

1653 1654
	ret = trace_run_command("r:testprobe2 kprobe_trace_selftest_target "
				"$retval", create_trace_kprobe);
1655
	if (WARN_ON_ONCE(ret)) {
1656
		pr_warn("error on probing function return.\n");
1657 1658 1659
		warn++;
	} else {
		/* Enable trace point */
1660 1661
		tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
		if (WARN_ON_ONCE(tk == NULL)) {
1662
			pr_warn("error on getting 2nd new probe.\n");
1663
			warn++;
1664
		} else {
1665
			file = find_trace_probe_file(tk, top_trace_array());
1666 1667 1668 1669
			if (WARN_ON_ONCE(file == NULL)) {
				pr_warn("error on getting probe file.\n");
				warn++;
			} else
1670
				enable_trace_kprobe(tk, file);
1671
		}
1672 1673 1674 1675
	}

	if (warn)
		goto end;
1676 1677 1678

	ret = target(1, 2, 3, 4, 5, 6);

1679 1680 1681 1682 1683 1684 1685 1686
	/*
	 * Not expecting an error here, the check is only to prevent the
	 * optimizer from removing the call to target() as otherwise there
	 * are no side-effects and the call is never performed.
	 */
	if (ret != 21)
		warn++;

1687
	/* Disable trace points before removing it */
1688 1689
	tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
	if (WARN_ON_ONCE(tk == NULL)) {
1690
		pr_warn("error on getting test probe.\n");
1691
		warn++;
1692
	} else {
1693 1694 1695 1696 1697
		if (trace_kprobe_nhit(tk) != 1) {
			pr_warn("incorrect number of testprobe hits\n");
			warn++;
		}

1698
		file = find_trace_probe_file(tk, top_trace_array());
1699 1700 1701 1702
		if (WARN_ON_ONCE(file == NULL)) {
			pr_warn("error on getting probe file.\n");
			warn++;
		} else
1703
			disable_trace_kprobe(tk, file);
1704
	}
1705

1706 1707
	tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
	if (WARN_ON_ONCE(tk == NULL)) {
1708
		pr_warn("error on getting 2nd test probe.\n");
1709
		warn++;
1710
	} else {
1711 1712 1713 1714 1715
		if (trace_kprobe_nhit(tk) != 1) {
			pr_warn("incorrect number of testprobe2 hits\n");
			warn++;
		}

1716
		file = find_trace_probe_file(tk, top_trace_array());
1717 1718 1719 1720
		if (WARN_ON_ONCE(file == NULL)) {
			pr_warn("error on getting probe file.\n");
			warn++;
		} else
1721
			disable_trace_kprobe(tk, file);
1722
	}
1723

1724
	ret = trace_run_command("-:testprobe", create_trace_kprobe);
1725
	if (WARN_ON_ONCE(ret)) {
1726
		pr_warn("error on deleting a probe.\n");
1727 1728 1729
		warn++;
	}

1730
	ret = trace_run_command("-:testprobe2", create_trace_kprobe);
1731
	if (WARN_ON_ONCE(ret)) {
1732
		pr_warn("error on deleting a probe.\n");
1733 1734
		warn++;
	}
1735

1736
end:
1737
	release_all_trace_kprobes();
1738 1739 1740 1741 1742
	/*
	 * Wait for the optimizer work to finish. Otherwise it might fiddle
	 * with probes in already freed __init text.
	 */
	wait_for_kprobe_optimizer();
1743 1744 1745 1746
	if (warn)
		pr_cont("NG: Some tests are failed. Please check them.\n");
	else
		pr_cont("OK\n");
1747 1748 1749 1750 1751 1752
	return 0;
}

late_initcall(kprobe_trace_self_tests_init);

#endif