trace_kprobe.c 45.3 KB
Newer Older
1
/*
2
 * Kprobes-based tracing events
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
 *
 * Created by Masami Hiramatsu <mhiramat@redhat.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include <linux/module.h>
#include <linux/uaccess.h>
#include <linux/kprobes.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/smp.h>
#include <linux/debugfs.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/ctype.h>
#include <linux/ptrace.h>
31
#include <linux/perf_event.h>
32
#include <linux/stringify.h>
33 34
#include <linux/limits.h>
#include <linux/uaccess.h>
35
#include <asm/bitsperlong.h>
36 37 38 39

#include "trace.h"
#include "trace_output.h"

40
#define MAX_TRACE_ARGS 128
41
#define MAX_ARGSTR_LEN 63
42
#define MAX_EVENT_NAME_LEN 64
43
#define MAX_STRING_SIZE PATH_MAX
44
#define KPROBE_EVENT_SYSTEM "kprobes"
45

46
/* Reserved field names */
47 48 49
#define FIELD_STRING_IP "__probe_ip"
#define FIELD_STRING_RETIP "__probe_ret_ip"
#define FIELD_STRING_FUNC "__probe_func"
50 51 52 53 54 55 56 57 58 59 60 61 62

const char *reserved_field_names[] = {
	"common_type",
	"common_flags",
	"common_preempt_count",
	"common_pid",
	"common_tgid",
	"common_lock_depth",
	FIELD_STRING_IP,
	FIELD_STRING_RETIP,
	FIELD_STRING_FUNC,
};

63
/* Printing function type */
64 65
typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *,
				 void *);
66 67 68 69 70 71
#define PRINT_TYPE_FUNC_NAME(type)	print_type_##type
#define PRINT_TYPE_FMT_NAME(type)	print_type_format_##type

/* Printing  in basic type function template */
#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast)			\
static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s,	\
72 73
						const char *name,	\
						void *data, void *ent)\
74 75 76 77 78 79 80 81 82 83 84 85 86 87
{									\
	return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\
}									\
static const char PRINT_TYPE_FMT_NAME(type)[] = fmt;

DEFINE_BASIC_PRINT_TYPE_FUNC(u8, "%x", unsigned int)
DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "%x", unsigned int)
DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "%lx", unsigned long)
DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "%llx", unsigned long long)
DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d", int)
DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int)
DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long)
DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long)

88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
/* data_rloc: data relative location, compatible with u32 */
#define make_data_rloc(len, roffs)	\
	(((u32)(len) << 16) | ((u32)(roffs) & 0xffff))
#define get_rloc_len(dl)	((u32)(dl) >> 16)
#define get_rloc_offs(dl)	((u32)(dl) & 0xffff)

static inline void *get_rloc_data(u32 *dl)
{
	return (u8 *)dl + get_rloc_offs(*dl);
}

/* For data_loc conversion */
static inline void *get_loc_data(u32 *dl, void *ent)
{
	return (u8 *)ent + get_rloc_offs(*dl);
}

/*
 * Convert data_rloc to data_loc:
 *  data_rloc stores the offset from data_rloc itself, but data_loc
 *  stores the offset from event entry.
 */
#define convert_rloc_to_loc(dl, offs)	((u32)(dl) + (offs))

/* For defining macros, define string/string_size types */
typedef u32 string;
typedef u32 string_size;

/* Print type function for string type */
static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s,
						  const char *name,
						  void *data, void *ent)
{
	int len = *(u32 *)data >> 16;

	if (!len)
		return trace_seq_printf(s, " %s=(fault)", name);
	else
		return trace_seq_printf(s, " %s=\"%s\"", name,
					(const char *)get_loc_data(data, ent));
}
static const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";

131 132 133 134 135
/* Data fetch function type */
typedef	void (*fetch_func_t)(struct pt_regs *, void *, void *);

struct fetch_param {
	fetch_func_t	fn;
136 137 138
	void *data;
};

139 140
static __kprobes void call_fetch(struct fetch_param *fprm,
				 struct pt_regs *regs, void *dest)
141
{
142
	return fprm->fn(regs, fprm->data, dest);
143 144
}

145
#define FETCH_FUNC_NAME(method, type)	fetch_##method##_##type
146 147 148 149
/*
 * Define macro for basic types - we don't need to define s* types, because
 * we have to care only about bitwidth at recording time.
 */
150 151 152 153 154 155 156 157 158 159 160 161 162 163
#define DEFINE_BASIC_FETCH_FUNCS(method) \
DEFINE_FETCH_##method(u8)		\
DEFINE_FETCH_##method(u16)		\
DEFINE_FETCH_##method(u32)		\
DEFINE_FETCH_##method(u64)

#define CHECK_FETCH_FUNCS(method, fn)			\
	(((FETCH_FUNC_NAME(method, u8) == fn) ||	\
	  (FETCH_FUNC_NAME(method, u16) == fn) ||	\
	  (FETCH_FUNC_NAME(method, u32) == fn) ||	\
	  (FETCH_FUNC_NAME(method, u64) == fn) ||	\
	  (FETCH_FUNC_NAME(method, string) == fn) ||	\
	  (FETCH_FUNC_NAME(method, string_size) == fn)) \
	 && (fn != NULL))
164 165 166 167

/* Data fetch function templates */
#define DEFINE_FETCH_reg(type)						\
static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs,	\
168
					void *offset, void *dest)	\
169 170 171
{									\
	*(type *)dest = (type)regs_get_register(regs,			\
				(unsigned int)((unsigned long)offset));	\
172
}
173
DEFINE_BASIC_FETCH_FUNCS(reg)
174 175 176
/* No string on the register */
#define fetch_reg_string NULL
#define fetch_reg_string_size NULL
177 178 179 180 181 182 183

#define DEFINE_FETCH_stack(type)					\
static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
					  void *offset, void *dest)	\
{									\
	*(type *)dest = (type)regs_get_kernel_stack_nth(regs,		\
				(unsigned int)((unsigned long)offset));	\
184
}
185
DEFINE_BASIC_FETCH_FUNCS(stack)
186 187 188
/* No string on the stack entry */
#define fetch_stack_string NULL
#define fetch_stack_string_size NULL
189

190 191 192 193 194
#define DEFINE_FETCH_retval(type)					\
static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\
					  void *dummy, void *dest)	\
{									\
	*(type *)dest = (type)regs_return_value(regs);			\
195
}
196
DEFINE_BASIC_FETCH_FUNCS(retval)
197 198 199
/* No string on the retval */
#define fetch_retval_string NULL
#define fetch_retval_string_size NULL
200 201 202 203 204 205 206 207 208 209

#define DEFINE_FETCH_memory(type)					\
static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
					  void *addr, void *dest)	\
{									\
	type retval;							\
	if (probe_kernel_address(addr, retval))				\
		*(type *)dest = 0;					\
	else								\
		*(type *)dest = retval;					\
210
}
211
DEFINE_BASIC_FETCH_FUNCS(memory)
212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267
/*
 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
 * length and relative data location.
 */
static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
						      void *addr, void *dest)
{
	long ret;
	int maxlen = get_rloc_len(*(u32 *)dest);
	u8 *dst = get_rloc_data(dest);
	u8 *src = addr;
	mm_segment_t old_fs = get_fs();
	if (!maxlen)
		return;
	/*
	 * Try to get string again, since the string can be changed while
	 * probing.
	 */
	set_fs(KERNEL_DS);
	pagefault_disable();
	do
		ret = __copy_from_user_inatomic(dst++, src++, 1);
	while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen);
	dst[-1] = '\0';
	pagefault_enable();
	set_fs(old_fs);

	if (ret < 0) {	/* Failed to fetch string */
		((u8 *)get_rloc_data(dest))[0] = '\0';
		*(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
	} else
		*(u32 *)dest = make_data_rloc(src - (u8 *)addr,
					      get_rloc_offs(*(u32 *)dest));
}
/* Return the length of string -- including null terminal byte */
static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
							void *addr, void *dest)
{
	int ret, len = 0;
	u8 c;
	mm_segment_t old_fs = get_fs();

	set_fs(KERNEL_DS);
	pagefault_disable();
	do {
		ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
		len++;
	} while (c && ret == 0 && len < MAX_STRING_SIZE);
	pagefault_enable();
	set_fs(old_fs);

	if (ret < 0)	/* Failed to check the length */
		*(u32 *)dest = 0;
	else
		*(u32 *)dest = len;
}
268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310

/* Memory fetching by symbol */
struct symbol_cache {
	char *symbol;
	long offset;
	unsigned long addr;
};

static unsigned long update_symbol_cache(struct symbol_cache *sc)
{
	sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
	if (sc->addr)
		sc->addr += sc->offset;
	return sc->addr;
}

static void free_symbol_cache(struct symbol_cache *sc)
{
	kfree(sc->symbol);
	kfree(sc);
}

static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
{
	struct symbol_cache *sc;

	if (!sym || strlen(sym) == 0)
		return NULL;
	sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
	if (!sc)
		return NULL;

	sc->symbol = kstrdup(sym, GFP_KERNEL);
	if (!sc->symbol) {
		kfree(sc);
		return NULL;
	}
	sc->offset = offset;

	update_symbol_cache(sc);
	return sc;
}

311 312 313 314 315 316 317 318 319
#define DEFINE_FETCH_symbol(type)					\
static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\
					  void *data, void *dest)	\
{									\
	struct symbol_cache *sc = data;					\
	if (sc->addr)							\
		fetch_memory_##type(regs, (void *)sc->addr, dest);	\
	else								\
		*(type *)dest = 0;					\
320
}
321
DEFINE_BASIC_FETCH_FUNCS(symbol)
322 323
DEFINE_FETCH_symbol(string)
DEFINE_FETCH_symbol(string_size)
324

325 326 327
/* Dereference memory access function */
struct deref_fetch_param {
	struct fetch_param orig;
328 329 330
	long offset;
};

331 332 333 334 335 336 337 338 339 340 341 342
#define DEFINE_FETCH_deref(type)					\
static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\
					    void *data, void *dest)	\
{									\
	struct deref_fetch_param *dprm = data;				\
	unsigned long addr;						\
	call_fetch(&dprm->orig, regs, &addr);				\
	if (addr) {							\
		addr += dprm->offset;					\
		fetch_memory_##type(regs, (void *)addr, dest);		\
	} else								\
		*(type *)dest = 0;					\
343
}
344
DEFINE_BASIC_FETCH_FUNCS(deref)
345 346
DEFINE_FETCH_deref(string)
DEFINE_FETCH_deref(string_size)
347

348
static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
349
{
350
	if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
351
		free_deref_fetch_param(data->orig.data);
352
	else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
353 354 355 356
		free_symbol_cache(data->orig.data);
	kfree(data);
}

357 358 359 360 361 362
/* Default (unsigned long) fetch type */
#define __DEFAULT_FETCH_TYPE(t) u##t
#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)

363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391
/* Fetch types */
enum {
	FETCH_MTD_reg = 0,
	FETCH_MTD_stack,
	FETCH_MTD_retval,
	FETCH_MTD_memory,
	FETCH_MTD_symbol,
	FETCH_MTD_deref,
	FETCH_MTD_END,
};

#define ASSIGN_FETCH_FUNC(method, type)	\
	[FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type)

#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype)	\
	{.name = _name,				\
	 .size = _size,					\
	 .is_signed = sign,				\
	 .print = PRINT_TYPE_FUNC_NAME(ptype),		\
	 .fmt = PRINT_TYPE_FMT_NAME(ptype),		\
	 .fmttype = _fmttype,				\
	 .fetch = {					\
ASSIGN_FETCH_FUNC(reg, ftype),				\
ASSIGN_FETCH_FUNC(stack, ftype),			\
ASSIGN_FETCH_FUNC(retval, ftype),			\
ASSIGN_FETCH_FUNC(memory, ftype),			\
ASSIGN_FETCH_FUNC(symbol, ftype),			\
ASSIGN_FETCH_FUNC(deref, ftype),			\
	  }						\
392 393
	}

394 395 396 397 398 399
#define ASSIGN_FETCH_TYPE(ptype, ftype, sign)			\
	__ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype)

#define FETCH_TYPE_STRING 0
#define FETCH_TYPE_STRSIZE 1

400 401 402 403 404 405 406
/* Fetch type information table */
static const struct fetch_type {
	const char	*name;		/* Name of type */
	size_t		size;		/* Byte size of type */
	int		is_signed;	/* Signed flag */
	print_type_func_t	print;	/* Print functions */
	const char	*fmt;		/* Fromat string */
407
	const char	*fmttype;	/* Name in format file */
408
	/* Fetch functions */
409
	fetch_func_t	fetch[FETCH_MTD_END];
410
} fetch_type_table[] = {
411 412 413 414 415 416
	/* Special types */
	[FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
					sizeof(u32), 1, "__data_loc char[]"),
	[FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
					string_size, sizeof(u32), 0, "u32"),
	/* Basic types */
417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446
	ASSIGN_FETCH_TYPE(u8,  u8,  0),
	ASSIGN_FETCH_TYPE(u16, u16, 0),
	ASSIGN_FETCH_TYPE(u32, u32, 0),
	ASSIGN_FETCH_TYPE(u64, u64, 0),
	ASSIGN_FETCH_TYPE(s8,  u8,  1),
	ASSIGN_FETCH_TYPE(s16, u16, 1),
	ASSIGN_FETCH_TYPE(s32, u32, 1),
	ASSIGN_FETCH_TYPE(s64, u64, 1),
};

static const struct fetch_type *find_fetch_type(const char *type)
{
	int i;

	if (!type)
		type = DEFAULT_FETCH_TYPE_STR;

	for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++)
		if (strcmp(type, fetch_type_table[i].name) == 0)
			return &fetch_type_table[i];
	return NULL;
}

/* Special function : only accept unsigned long */
static __kprobes void fetch_stack_address(struct pt_regs *regs,
					  void *dummy, void *dest)
{
	*(unsigned long *)dest = kernel_stack_pointer(regs);
}

447 448 449 450 451 452 453 454 455 456 457 458 459 460 461
static fetch_func_t get_fetch_size_function(const struct fetch_type *type,
					    fetch_func_t orig_fn)
{
	int i;

	if (type != &fetch_type_table[FETCH_TYPE_STRING])
		return NULL;	/* Only string type needs size function */
	for (i = 0; i < FETCH_MTD_END; i++)
		if (type->fetch[i] == orig_fn)
			return fetch_type_table[FETCH_TYPE_STRSIZE].fetch[i];

	WARN_ON(1);	/* This should not happen */
	return NULL;
}

462
/**
463
 * Kprobe event core functions
464 465
 */

466
struct probe_arg {
467
	struct fetch_param	fetch;
468
	struct fetch_param	fetch_size;
469 470 471 472
	unsigned int		offset;	/* Offset from argument entry */
	const char		*name;	/* Name of this argument */
	const char		*comm;	/* Command of this argument */
	const struct fetch_type	*type;	/* Type of this argument */
473 474
};

475 476 477 478
/* Flags for trace_probe */
#define TP_FLAG_TRACE	1
#define TP_FLAG_PROFILE	2

479 480
struct trace_probe {
	struct list_head	list;
481
	struct kretprobe	rp;	/* Use rp.kp for kprobe use */
482
	unsigned long 		nhit;
483
	unsigned int		flags;	/* For TP_FLAG_* */
484
	const char		*symbol;	/* symbol name */
485
	struct ftrace_event_class	class;
486
	struct ftrace_event_call	call;
487
	ssize_t			size;		/* trace entry size */
488
	unsigned int		nr_args;
489
	struct probe_arg	args[];
490 491
};

492 493
#define SIZEOF_TRACE_PROBE(n)			\
	(offsetof(struct trace_probe, args) +	\
494
	(sizeof(struct probe_arg) * (n)))
495

496

497 498
static __kprobes int probe_is_return(struct trace_probe *tp)
{
499
	return tp->rp.handler != NULL;
500 501 502 503 504 505 506 507 508 509 510 511 512
}

static __kprobes const char *probe_symbol(struct trace_probe *tp)
{
	return tp->symbol ? tp->symbol : "unknown";
}

static int register_probe_event(struct trace_probe *tp);
static void unregister_probe_event(struct trace_probe *tp);

static DEFINE_MUTEX(probe_lock);
static LIST_HEAD(probe_list);

513 514 515 516
static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
static int kretprobe_dispatcher(struct kretprobe_instance *ri,
				struct pt_regs *regs);

517 518 519 520 521 522 523 524 525 526 527 528
/* Check the name is good for event/group */
static int check_event_name(const char *name)
{
	if (!isalpha(*name) && *name != '_')
		return 0;
	while (*++name != '\0') {
		if (!isalpha(*name) && !isdigit(*name) && *name != '_')
			return 0;
	}
	return 1;
}

529 530 531
/*
 * Allocate new trace_probe and initialize it (including kprobes).
 */
532 533
static struct trace_probe *alloc_trace_probe(const char *group,
					     const char *event,
534 535 536 537
					     void *addr,
					     const char *symbol,
					     unsigned long offs,
					     int nargs, int is_return)
538 539
{
	struct trace_probe *tp;
540
	int ret = -ENOMEM;
541

542
	tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
543
	if (!tp)
544
		return ERR_PTR(ret);
545 546 547 548 549

	if (symbol) {
		tp->symbol = kstrdup(symbol, GFP_KERNEL);
		if (!tp->symbol)
			goto error;
550 551 552 553 554 555
		tp->rp.kp.symbol_name = tp->symbol;
		tp->rp.kp.offset = offs;
	} else
		tp->rp.kp.addr = addr;

	if (is_return)
556
		tp->rp.handler = kretprobe_dispatcher;
557
	else
558
		tp->rp.kp.pre_handler = kprobe_dispatcher;
559

560 561
	if (!event || !check_event_name(event)) {
		ret = -EINVAL;
562
		goto error;
563 564
	}

565
	tp->call.class = &tp->class;
566 567 568
	tp->call.name = kstrdup(event, GFP_KERNEL);
	if (!tp->call.name)
		goto error;
569

570 571
	if (!group || !check_event_name(group)) {
		ret = -EINVAL;
572
		goto error;
573 574
	}

575 576
	tp->class.system = kstrdup(group, GFP_KERNEL);
	if (!tp->class.system)
577 578
		goto error;

579 580 581
	INIT_LIST_HEAD(&tp->list);
	return tp;
error:
582
	kfree(tp->call.name);
583 584
	kfree(tp->symbol);
	kfree(tp);
585
	return ERR_PTR(ret);
586 587
}

588 589
static void free_probe_arg(struct probe_arg *arg)
{
590
	if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
591
		free_deref_fetch_param(arg->fetch.data);
592
	else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
593 594
		free_symbol_cache(arg->fetch.data);
	kfree(arg->name);
595
	kfree(arg->comm);
596 597
}

598 599 600 601 602
static void free_trace_probe(struct trace_probe *tp)
{
	int i;

	for (i = 0; i < tp->nr_args; i++)
603
		free_probe_arg(&tp->args[i]);
604

605
	kfree(tp->call.class->system);
606 607 608 609 610
	kfree(tp->call.name);
	kfree(tp->symbol);
	kfree(tp);
}

611 612
static struct trace_probe *find_probe_event(const char *event,
					    const char *group)
613 614 615 616
{
	struct trace_probe *tp;

	list_for_each_entry(tp, &probe_list, list)
617
		if (strcmp(tp->call.name, event) == 0 &&
618
		    strcmp(tp->call.class->system, group) == 0)
619 620 621 622
			return tp;
	return NULL;
}

623 624
/* Unregister a trace_probe and probe_event: call with locking probe_lock */
static void unregister_trace_probe(struct trace_probe *tp)
625 626 627 628
{
	if (probe_is_return(tp))
		unregister_kretprobe(&tp->rp);
	else
629
		unregister_kprobe(&tp->rp.kp);
630
	list_del(&tp->list);
631
	unregister_probe_event(tp);
632 633 634 635 636 637 638 639 640 641
}

/* Register a trace_probe and probe_event */
static int register_trace_probe(struct trace_probe *tp)
{
	struct trace_probe *old_tp;
	int ret;

	mutex_lock(&probe_lock);

642
	/* register as an event */
643
	old_tp = find_probe_event(tp->call.name, tp->call.class->system);
644 645 646 647 648 649 650 651 652 653 654
	if (old_tp) {
		/* delete old event */
		unregister_trace_probe(old_tp);
		free_trace_probe(old_tp);
	}
	ret = register_probe_event(tp);
	if (ret) {
		pr_warning("Faild to register probe event(%d)\n", ret);
		goto end;
	}

655
	tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
656 657 658
	if (probe_is_return(tp))
		ret = register_kretprobe(&tp->rp);
	else
659
		ret = register_kprobe(&tp->rp.kp);
660 661 662 663 664 665

	if (ret) {
		pr_warning("Could not insert probe(%d)\n", ret);
		if (ret == -EILSEQ) {
			pr_warning("Probing address(0x%p) is not an "
				   "instruction boundary.\n",
666
				   tp->rp.kp.addr);
667 668
			ret = -EINVAL;
		}
669 670 671
		unregister_probe_event(tp);
	} else
		list_add_tail(&tp->list, &probe_list);
672 673 674 675 676 677
end:
	mutex_unlock(&probe_lock);
	return ret;
}

/* Split symbol and offset. */
678
static int split_symbol_offset(char *symbol, unsigned long *offset)
679 680 681 682 683 684 685 686 687 688
{
	char *tmp;
	int ret;

	if (!offset)
		return -EINVAL;

	tmp = strchr(symbol, '+');
	if (tmp) {
		/* skip sign because strict_strtol doesn't accept '+' */
689
		ret = strict_strtoul(tmp + 1, 0, offset);
690 691 692 693 694 695 696 697 698 699 700
		if (ret)
			return ret;
		*tmp = '\0';
	} else
		*offset = 0;
	return 0;
}

#define PARAM_MAX_ARGS 16
#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))

701 702
static int parse_probe_vars(char *arg, const struct fetch_type *t,
			    struct fetch_param *f, int is_return)
703 704 705 706
{
	int ret = 0;
	unsigned long param;

707
	if (strcmp(arg, "retval") == 0) {
708
		if (is_return)
709
			f->fn = t->fetch[FETCH_MTD_retval];
710
		else
711
			ret = -EINVAL;
712 713
	} else if (strncmp(arg, "stack", 5) == 0) {
		if (arg[5] == '\0') {
714 715 716 717
			if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR) == 0)
				f->fn = fetch_stack_address;
			else
				ret = -EINVAL;
718 719
		} else if (isdigit(arg[5])) {
			ret = strict_strtoul(arg + 5, 10, &param);
720 721 722
			if (ret || param > PARAM_MAX_STACK)
				ret = -EINVAL;
			else {
723
				f->fn = t->fetch[FETCH_MTD_stack];
724
				f->data = (void *)param;
725
			}
726 727 728
		} else
			ret = -EINVAL;
	} else
729 730 731 732
		ret = -EINVAL;
	return ret;
}

733
/* Recursive argument parser */
734 735
static int __parse_probe_arg(char *arg, const struct fetch_type *t,
			     struct fetch_param *f, int is_return)
736 737 738 739 740 741 742 743
{
	int ret = 0;
	unsigned long param;
	long offset;
	char *tmp;

	switch (arg[0]) {
	case '$':
744
		ret = parse_probe_vars(arg + 1, t, f, is_return);
745 746 747 748
		break;
	case '%':	/* named register */
		ret = regs_query_register_offset(arg + 1);
		if (ret >= 0) {
749
			f->fn = t->fetch[FETCH_MTD_reg];
750
			f->data = (void *)(unsigned long)ret;
751 752 753
			ret = 0;
		}
		break;
754 755 756 757 758
	case '@':	/* memory or symbol */
		if (isdigit(arg[1])) {
			ret = strict_strtoul(arg + 1, 0, &param);
			if (ret)
				break;
759
			f->fn = t->fetch[FETCH_MTD_memory];
760
			f->data = (void *)param;
761 762 763 764
		} else {
			ret = split_symbol_offset(arg + 1, &offset);
			if (ret)
				break;
765 766
			f->data = alloc_symbol_cache(arg + 1, offset);
			if (f->data)
767
				f->fn = t->fetch[FETCH_MTD_symbol];
768 769
		}
		break;
770
	case '+':	/* deref memory */
771 772
	case '-':
		tmp = strchr(arg, '(');
773
		if (!tmp)
774 775 776 777 778 779 780 781 782 783
			break;
		*tmp = '\0';
		ret = strict_strtol(arg + 1, 0, &offset);
		if (ret)
			break;
		if (arg[0] == '-')
			offset = -offset;
		arg = tmp + 1;
		tmp = strrchr(arg, ')');
		if (tmp) {
784 785
			struct deref_fetch_param *dprm;
			const struct fetch_type *t2 = find_fetch_type(NULL);
786
			*tmp = '\0';
787 788 789
			dprm = kzalloc(sizeof(struct deref_fetch_param),
				       GFP_KERNEL);
			if (!dprm)
790
				return -ENOMEM;
791 792 793
			dprm->offset = offset;
			ret = __parse_probe_arg(arg, t2, &dprm->orig,
						is_return);
794
			if (ret)
795
				kfree(dprm);
796
			else {
797
				f->fn = t->fetch[FETCH_MTD_deref];
798
				f->data = (void *)dprm;
799
			}
800
		}
801 802
		break;
	}
803 804 805
	if (!ret && !f->fn) {	/* Parsed, but do not find fetch method */
		pr_info("%s type has no corresponding fetch method.\n",
			t->name);
806
		ret = -EINVAL;
807
	}
808 809 810
	return ret;
}

811
/* String length checking wrapper */
812 813
static int parse_probe_arg(char *arg, struct trace_probe *tp,
			   struct probe_arg *parg, int is_return)
814
{
815
	const char *t;
816
	int ret;
817

818 819 820 821
	if (strlen(arg) > MAX_ARGSTR_LEN) {
		pr_info("Argument is too long.: %s\n",  arg);
		return -ENOSPC;
	}
822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838
	parg->comm = kstrdup(arg, GFP_KERNEL);
	if (!parg->comm) {
		pr_info("Failed to allocate memory for command '%s'.\n", arg);
		return -ENOMEM;
	}
	t = strchr(parg->comm, ':');
	if (t) {
		arg[t - parg->comm] = '\0';
		t++;
	}
	parg->type = find_fetch_type(t);
	if (!parg->type) {
		pr_info("Unsupported type: %s\n", t);
		return -EINVAL;
	}
	parg->offset = tp->size;
	tp->size += parg->type->size;
839 840 841 842 843 844 845
	ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return);
	if (ret >= 0) {
		parg->fetch_size.fn = get_fetch_size_function(parg->type,
							      parg->fetch.fn);
		parg->fetch_size.data = parg->fetch.data;
	}
	return ret;
846 847
}

848 849 850 851 852 853 854 855 856 857 858 859 860 861
/* Return 1 if name is reserved or already used by another argument */
static int conflict_field_name(const char *name,
			       struct probe_arg *args, int narg)
{
	int i;
	for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++)
		if (strcmp(reserved_field_names[i], name) == 0)
			return 1;
	for (i = 0; i < narg; i++)
		if (strcmp(args[i].name, name) == 0)
			return 1;
	return 0;
}

862 863 864 865
static int create_trace_probe(int argc, char **argv)
{
	/*
	 * Argument syntax:
866 867
	 *  - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
	 *  - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
868
	 * Fetch args:
869 870 871
	 *  $retval	: fetch return value
	 *  $stack	: fetch stack address
	 *  $stackN	: fetch Nth of stack (N:0-)
872 873 874
	 *  @ADDR	: fetch memory at ADDR (ADDR should be in kernel)
	 *  @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
	 *  %REG	: fetch register REG
875
	 * Dereferencing memory fetch:
876
	 *  +|-offs(ARG) : fetch memory at ARG +|- offs address.
877 878
	 * Alias name of args:
	 *  NAME=FETCHARG : set NAME as alias of FETCHARG.
879 880
	 * Type of args:
	 *  FETCHARG:TYPE : use TYPE instead of unsigned long.
881 882 883
	 */
	struct trace_probe *tp;
	int i, ret = 0;
884
	int is_return = 0, is_delete = 0;
885 886
	char *symbol = NULL, *event = NULL, *group = NULL;
	char *arg, *tmp;
887
	unsigned long offset = 0;
888
	void *addr = NULL;
889
	char buf[MAX_EVENT_NAME_LEN];
890

891
	/* argc must be >= 1 */
892 893 894 895
	if (argv[0][0] == 'p')
		is_return = 0;
	else if (argv[0][0] == 'r')
		is_return = 1;
896 897
	else if (argv[0][0] == '-')
		is_delete = 1;
898
	else {
899 900
		pr_info("Probe definition must be started with 'p', 'r' or"
			" '-'.\n");
901
		return -EINVAL;
902
	}
903 904 905

	if (argv[0][1] == ':') {
		event = &argv[0][2];
906 907 908 909 910
		if (strchr(event, '/')) {
			group = event;
			event = strchr(group, '/') + 1;
			event[-1] = '\0';
			if (strlen(group) == 0) {
911
				pr_info("Group name is not specified\n");
912 913 914
				return -EINVAL;
			}
		}
915
		if (strlen(event) == 0) {
916
			pr_info("Event name is not specified\n");
917 918 919
			return -EINVAL;
		}
	}
920 921
	if (!group)
		group = KPROBE_EVENT_SYSTEM;
922

923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942
	if (is_delete) {
		if (!event) {
			pr_info("Delete command needs an event name.\n");
			return -EINVAL;
		}
		tp = find_probe_event(event, group);
		if (!tp) {
			pr_info("Event %s/%s doesn't exist.\n", group, event);
			return -ENOENT;
		}
		/* delete an event */
		unregister_trace_probe(tp);
		free_trace_probe(tp);
		return 0;
	}

	if (argc < 2) {
		pr_info("Probe point is not specified.\n");
		return -EINVAL;
	}
943
	if (isdigit(argv[1][0])) {
944 945
		if (is_return) {
			pr_info("Return probe point must be a symbol.\n");
946
			return -EINVAL;
947
		}
948
		/* an address specified */
949
		ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr);
950 951
		if (ret) {
			pr_info("Failed to parse address.\n");
952
			return ret;
953
		}
954 955 956 957 958
	} else {
		/* a symbol specified */
		symbol = argv[1];
		/* TODO: support .init module functions */
		ret = split_symbol_offset(symbol, &offset);
959 960
		if (ret) {
			pr_info("Failed to parse symbol.\n");
961
			return ret;
962 963 964
		}
		if (offset && is_return) {
			pr_info("Return probe must be used without offset.\n");
965
			return -EINVAL;
966
		}
967
	}
968
	argc -= 2; argv += 2;
969 970

	/* setup a probe */
971 972 973
	if (!event) {
		/* Make a new event name */
		if (symbol)
974
			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
975 976
				 is_return ? 'r' : 'p', symbol, offset);
		else
977
			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
978
				 is_return ? 'r' : 'p', addr);
979 980
		event = buf;
	}
981 982
	tp = alloc_trace_probe(group, event, addr, symbol, offset, argc,
			       is_return);
983 984 985
	if (IS_ERR(tp)) {
		pr_info("Failed to allocate trace_probe.(%d)\n",
			(int)PTR_ERR(tp));
986
		return PTR_ERR(tp);
987
	}
988 989

	/* parse arguments */
990 991
	ret = 0;
	for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
992 993 994 995 996 997
		/* Parse argument name */
		arg = strchr(argv[i], '=');
		if (arg)
			*arg++ = '\0';
		else
			arg = argv[i];
998

999
		tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
1000 1001 1002 1003
		if (!tp->args[i].name) {
			pr_info("Failed to allocate argument%d name '%s'.\n",
				i, argv[i]);
			ret = -ENOMEM;
1004 1005
			goto error;
		}
1006 1007 1008 1009 1010 1011 1012 1013 1014 1015
		tmp = strchr(tp->args[i].name, ':');
		if (tmp)
			*tmp = '_';	/* convert : to _ */

		if (conflict_field_name(tp->args[i].name, tp->args, i)) {
			pr_info("Argument%d name '%s' conflicts with "
				"another field.\n", i, argv[i]);
			ret = -EINVAL;
			goto error;
		}
1016 1017

		/* Parse fetch argument */
1018
		ret = parse_probe_arg(arg, tp, &tp->args[i], is_return);
1019 1020
		if (ret) {
			pr_info("Parse error at argument%d. (%d)\n", i, ret);
L
Lai Jiangshan 已提交
1021
			kfree(tp->args[i].name);
1022
			goto error;
1023
		}
L
Lai Jiangshan 已提交
1024 1025

		tp->nr_args++;
1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072
	}

	ret = register_trace_probe(tp);
	if (ret)
		goto error;
	return 0;

error:
	free_trace_probe(tp);
	return ret;
}

static void cleanup_all_probes(void)
{
	struct trace_probe *tp;

	mutex_lock(&probe_lock);
	/* TODO: Use batch unregistration */
	while (!list_empty(&probe_list)) {
		tp = list_entry(probe_list.next, struct trace_probe, list);
		unregister_trace_probe(tp);
		free_trace_probe(tp);
	}
	mutex_unlock(&probe_lock);
}


/* Probes listing interfaces */
static void *probes_seq_start(struct seq_file *m, loff_t *pos)
{
	mutex_lock(&probe_lock);
	return seq_list_start(&probe_list, *pos);
}

static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
{
	return seq_list_next(v, &probe_list, pos);
}

static void probes_seq_stop(struct seq_file *m, void *v)
{
	mutex_unlock(&probe_lock);
}

static int probes_seq_show(struct seq_file *m, void *v)
{
	struct trace_probe *tp = v;
1073
	int i;
1074 1075

	seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
1076
	seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name);
1077

1078 1079 1080
	if (!tp->symbol)
		seq_printf(m, " 0x%p", tp->rp.kp.addr);
	else if (tp->rp.kp.offset)
1081
		seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset);
1082
	else
1083
		seq_printf(m, " %s", probe_symbol(tp));
1084

1085 1086
	for (i = 0; i < tp->nr_args; i++)
		seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm);
1087
	seq_printf(m, "\n");
1088

1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182
	return 0;
}

static const struct seq_operations probes_seq_op = {
	.start  = probes_seq_start,
	.next   = probes_seq_next,
	.stop   = probes_seq_stop,
	.show   = probes_seq_show
};

static int probes_open(struct inode *inode, struct file *file)
{
	if ((file->f_mode & FMODE_WRITE) &&
	    (file->f_flags & O_TRUNC))
		cleanup_all_probes();

	return seq_open(file, &probes_seq_op);
}

static int command_trace_probe(const char *buf)
{
	char **argv;
	int argc = 0, ret = 0;

	argv = argv_split(GFP_KERNEL, buf, &argc);
	if (!argv)
		return -ENOMEM;

	if (argc)
		ret = create_trace_probe(argc, argv);

	argv_free(argv);
	return ret;
}

#define WRITE_BUFSIZE 128

static ssize_t probes_write(struct file *file, const char __user *buffer,
			    size_t count, loff_t *ppos)
{
	char *kbuf, *tmp;
	int ret;
	size_t done;
	size_t size;

	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
	if (!kbuf)
		return -ENOMEM;

	ret = done = 0;
	while (done < count) {
		size = count - done;
		if (size >= WRITE_BUFSIZE)
			size = WRITE_BUFSIZE - 1;
		if (copy_from_user(kbuf, buffer + done, size)) {
			ret = -EFAULT;
			goto out;
		}
		kbuf[size] = '\0';
		tmp = strchr(kbuf, '\n');
		if (tmp) {
			*tmp = '\0';
			size = tmp - kbuf + 1;
		} else if (done + size < count) {
			pr_warning("Line length is too long: "
				   "Should be less than %d.", WRITE_BUFSIZE);
			ret = -EINVAL;
			goto out;
		}
		done += size;
		/* Remove comments */
		tmp = strchr(kbuf, '#');
		if (tmp)
			*tmp = '\0';

		ret = command_trace_probe(kbuf);
		if (ret)
			goto out;
	}
	ret = done;
out:
	kfree(kbuf);
	return ret;
}

static const struct file_operations kprobe_events_ops = {
	.owner          = THIS_MODULE,
	.open           = probes_open,
	.read           = seq_read,
	.llseek         = seq_lseek,
	.release        = seq_release,
	.write		= probes_write,
};

1183 1184 1185 1186 1187 1188
/* Probes profiling interfaces */
static int probes_profile_seq_show(struct seq_file *m, void *v)
{
	struct trace_probe *tp = v;

	seq_printf(m, "  %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
1189
		   tp->rp.kp.nmissed);
1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213

	return 0;
}

static const struct seq_operations profile_seq_op = {
	.start  = probes_seq_start,
	.next   = probes_seq_next,
	.stop   = probes_seq_stop,
	.show   = probes_profile_seq_show
};

static int profile_open(struct inode *inode, struct file *file)
{
	return seq_open(file, &profile_seq_op);
}

static const struct file_operations kprobe_profile_ops = {
	.owner          = THIS_MODULE,
	.open           = profile_open,
	.read           = seq_read,
	.llseek         = seq_lseek,
	.release        = seq_release,
};

1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261
/* Sum up total data length for dynamic arraies (strings) */
static __kprobes int __get_data_size(struct trace_probe *tp,
				     struct pt_regs *regs)
{
	int i, ret = 0;
	u32 len;

	for (i = 0; i < tp->nr_args; i++)
		if (unlikely(tp->args[i].fetch_size.fn)) {
			call_fetch(&tp->args[i].fetch_size, regs, &len);
			ret += len;
		}

	return ret;
}

/* Store the value of each argument */
static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp,
				       struct pt_regs *regs,
				       u8 *data, int maxlen)
{
	int i;
	u32 end = tp->size;
	u32 *dl;	/* Data (relative) location */

	for (i = 0; i < tp->nr_args; i++) {
		if (unlikely(tp->args[i].fetch_size.fn)) {
			/*
			 * First, we set the relative location and
			 * maximum data length to *dl
			 */
			dl = (u32 *)(data + tp->args[i].offset);
			*dl = make_data_rloc(maxlen, end - tp->args[i].offset);
			/* Then try to fetch string or dynamic array data */
			call_fetch(&tp->args[i].fetch, regs, dl);
			/* Reduce maximum length */
			end += get_rloc_len(*dl);
			maxlen -= get_rloc_len(*dl);
			/* Trick here, convert data_rloc to data_loc */
			*dl = convert_rloc_to_loc(*dl,
				 ent_size + tp->args[i].offset);
		} else
			/* Just fetching data normally */
			call_fetch(&tp->args[i].fetch, regs,
				   data + tp->args[i].offset);
	}
}

1262
/* Kprobe handler */
1263
static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
1264
{
1265
	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1266
	struct kprobe_trace_entry_head *entry;
1267
	struct ring_buffer_event *event;
1268
	struct ring_buffer *buffer;
1269
	int size, dsize, pc;
1270
	unsigned long irq_flags;
1271
	struct ftrace_event_call *call = &tp->call;
1272

1273 1274
	tp->nhit++;

1275 1276 1277
	local_save_flags(irq_flags);
	pc = preempt_count();

1278 1279
	dsize = __get_data_size(tp, regs);
	size = sizeof(*entry) + tp->size + dsize;
1280

1281 1282
	event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
						  size, irq_flags, pc);
1283
	if (!event)
1284
		return;
1285 1286 1287

	entry = ring_buffer_event_data(event);
	entry->ip = (unsigned long)kp->addr;
1288
	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1289

1290 1291
	if (!filter_current_check_discard(buffer, call, entry, event))
		trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
1292 1293 1294
}

/* Kretprobe handler */
1295
static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
1296 1297 1298
					  struct pt_regs *regs)
{
	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1299
	struct kretprobe_trace_entry_head *entry;
1300
	struct ring_buffer_event *event;
1301
	struct ring_buffer *buffer;
1302
	int size, pc, dsize;
1303
	unsigned long irq_flags;
1304
	struct ftrace_event_call *call = &tp->call;
1305 1306 1307 1308

	local_save_flags(irq_flags);
	pc = preempt_count();

1309 1310
	dsize = __get_data_size(tp, regs);
	size = sizeof(*entry) + tp->size + dsize;
1311

1312 1313
	event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
						  size, irq_flags, pc);
1314
	if (!event)
1315
		return;
1316 1317

	entry = ring_buffer_event_data(event);
1318
	entry->func = (unsigned long)tp->rp.kp.addr;
1319
	entry->ret_ip = (unsigned long)ri->ret_addr;
1320
	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1321

1322 1323
	if (!filter_current_check_discard(buffer, call, entry, event))
		trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
1324 1325 1326 1327
}

/* Event entry printers */
enum print_line_t
1328 1329
print_kprobe_event(struct trace_iterator *iter, int flags,
		   struct trace_event *event)
1330
{
1331
	struct kprobe_trace_entry_head *field;
1332
	struct trace_seq *s = &iter->seq;
1333
	struct trace_probe *tp;
1334
	u8 *data;
1335 1336
	int i;

1337
	field = (struct kprobe_trace_entry_head *)iter->ent;
1338
	tp = container_of(event, struct trace_probe, call.event);
1339

1340 1341 1342
	if (!trace_seq_printf(s, "%s: (", tp->call.name))
		goto partial;

1343 1344 1345
	if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
		goto partial;

1346
	if (!trace_seq_puts(s, ")"))
1347 1348
		goto partial;

1349 1350 1351
	data = (u8 *)&field[1];
	for (i = 0; i < tp->nr_args; i++)
		if (!tp->args[i].type->print(s, tp->args[i].name,
1352
					     data + tp->args[i].offset, field))
1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363
			goto partial;

	if (!trace_seq_puts(s, "\n"))
		goto partial;

	return TRACE_TYPE_HANDLED;
partial:
	return TRACE_TYPE_PARTIAL_LINE;
}

enum print_line_t
1364 1365
print_kretprobe_event(struct trace_iterator *iter, int flags,
		      struct trace_event *event)
1366
{
1367
	struct kretprobe_trace_entry_head *field;
1368
	struct trace_seq *s = &iter->seq;
1369
	struct trace_probe *tp;
1370
	u8 *data;
1371 1372
	int i;

1373
	field = (struct kretprobe_trace_entry_head *)iter->ent;
1374
	tp = container_of(event, struct trace_probe, call.event);
1375

1376 1377 1378
	if (!trace_seq_printf(s, "%s: (", tp->call.name))
		goto partial;

1379 1380 1381 1382 1383 1384 1385 1386 1387
	if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
		goto partial;

	if (!trace_seq_puts(s, " <- "))
		goto partial;

	if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
		goto partial;

1388
	if (!trace_seq_puts(s, ")"))
1389 1390
		goto partial;

1391 1392 1393
	data = (u8 *)&field[1];
	for (i = 0; i < tp->nr_args; i++)
		if (!tp->args[i].type->print(s, tp->args[i].name,
1394
					     data + tp->args[i].offset, field))
1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408
			goto partial;

	if (!trace_seq_puts(s, "\n"))
		goto partial;

	return TRACE_TYPE_HANDLED;
partial:
	return TRACE_TYPE_PARTIAL_LINE;
}

static int probe_event_enable(struct ftrace_event_call *call)
{
	struct trace_probe *tp = (struct trace_probe *)call->data;

1409 1410
	tp->flags |= TP_FLAG_TRACE;
	if (probe_is_return(tp))
1411
		return enable_kretprobe(&tp->rp);
1412
	else
1413
		return enable_kprobe(&tp->rp.kp);
1414 1415 1416 1417 1418 1419
}

static void probe_event_disable(struct ftrace_event_call *call)
{
	struct trace_probe *tp = (struct trace_probe *)call->data;

1420 1421 1422 1423 1424 1425 1426
	tp->flags &= ~TP_FLAG_TRACE;
	if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) {
		if (probe_is_return(tp))
			disable_kretprobe(&tp->rp);
		else
			disable_kprobe(&tp->rp.kp);
	}
1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442
}

#undef DEFINE_FIELD
#define DEFINE_FIELD(type, item, name, is_signed)			\
	do {								\
		ret = trace_define_field(event_call, #type, name,	\
					 offsetof(typeof(field), item),	\
					 sizeof(field.item), is_signed, \
					 FILTER_OTHER);			\
		if (ret)						\
			return ret;					\
	} while (0)

static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
{
	int ret, i;
1443
	struct kprobe_trace_entry_head field;
1444 1445
	struct trace_probe *tp = (struct trace_probe *)event_call->data;

1446
	DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1447
	/* Set argument names as fields */
1448
	for (i = 0; i < tp->nr_args; i++) {
1449
		ret = trace_define_field(event_call, tp->args[i].type->fmttype,
1450 1451 1452 1453 1454 1455 1456 1457
					 tp->args[i].name,
					 sizeof(field) + tp->args[i].offset,
					 tp->args[i].type->size,
					 tp->args[i].type->is_signed,
					 FILTER_OTHER);
		if (ret)
			return ret;
	}
1458 1459 1460 1461 1462 1463
	return 0;
}

static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
{
	int ret, i;
1464
	struct kretprobe_trace_entry_head field;
1465 1466
	struct trace_probe *tp = (struct trace_probe *)event_call->data;

1467 1468
	DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
	DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1469
	/* Set argument names as fields */
1470
	for (i = 0; i < tp->nr_args; i++) {
1471
		ret = trace_define_field(event_call, tp->args[i].type->fmttype,
1472 1473 1474 1475 1476 1477 1478 1479
					 tp->args[i].name,
					 sizeof(field) + tp->args[i].offset,
					 tp->args[i].type->size,
					 tp->args[i].type->is_signed,
					 FILTER_OTHER);
		if (ret)
			return ret;
	}
1480 1481 1482
	return 0;
}

1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503
static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
{
	int i;
	int pos = 0;

	const char *fmt, *arg;

	if (!probe_is_return(tp)) {
		fmt = "(%lx)";
		arg = "REC->" FIELD_STRING_IP;
	} else {
		fmt = "(%lx <- %lx)";
		arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
	}

	/* When len=0, we just calculate the needed length */
#define LEN_OR_ZERO (len ? len - pos : 0)

	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);

	for (i = 0; i < tp->nr_args; i++) {
1504 1505
		pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s",
				tp->args[i].name, tp->args[i].type->fmt);
1506 1507 1508 1509 1510
	}

	pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);

	for (i = 0; i < tp->nr_args; i++) {
1511 1512 1513 1514 1515 1516 1517
		if (strcmp(tp->args[i].type->name, "string") == 0)
			pos += snprintf(buf + pos, LEN_OR_ZERO,
					", __get_str(%s)",
					tp->args[i].name);
		else
			pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
					tp->args[i].name);
1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543
	}

#undef LEN_OR_ZERO

	/* return the length of print_fmt */
	return pos;
}

static int set_print_fmt(struct trace_probe *tp)
{
	int len;
	char *print_fmt;

	/* First: called with 0 length to calculate the needed length */
	len = __set_print_fmt(tp, NULL, 0);
	print_fmt = kmalloc(len + 1, GFP_KERNEL);
	if (!print_fmt)
		return -ENOMEM;

	/* Second: actually write the @print_fmt */
	__set_print_fmt(tp, print_fmt, len + 1);
	tp->call.print_fmt = print_fmt;

	return 0;
}

1544
#ifdef CONFIG_PERF_EVENTS
1545 1546

/* Kprobe profile handler */
1547
static __kprobes void kprobe_perf_func(struct kprobe *kp,
1548 1549 1550 1551
					 struct pt_regs *regs)
{
	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
	struct ftrace_event_call *call = &tp->call;
1552
	struct kprobe_trace_entry_head *entry;
1553
	struct hlist_head *head;
1554
	int size, __size, dsize;
1555
	int rctx;
1556

1557 1558
	dsize = __get_data_size(tp, regs);
	__size = sizeof(*entry) + tp->size + dsize;
1559 1560
	size = ALIGN(__size + sizeof(u32), sizeof(u64));
	size -= sizeof(u32);
1561
	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1562
		     "profile buffer not large enough"))
1563
		return;
1564

S
Steven Rostedt 已提交
1565
	entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
1566
	if (!entry)
1567
		return;
1568 1569

	entry->ip = (unsigned long)kp->addr;
1570 1571
	memset(&entry[1], 0, dsize);
	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1572

1573
	head = this_cpu_ptr(call->perf_events);
1574
	perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
1575 1576 1577
}

/* Kretprobe profile handler */
1578
static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1579 1580 1581 1582
					    struct pt_regs *regs)
{
	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
	struct ftrace_event_call *call = &tp->call;
1583
	struct kretprobe_trace_entry_head *entry;
1584
	struct hlist_head *head;
1585
	int size, __size, dsize;
1586
	int rctx;
1587

1588 1589
	dsize = __get_data_size(tp, regs);
	__size = sizeof(*entry) + tp->size + dsize;
1590 1591
	size = ALIGN(__size + sizeof(u32), sizeof(u64));
	size -= sizeof(u32);
1592
	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1593
		     "profile buffer not large enough"))
1594
		return;
1595

S
Steven Rostedt 已提交
1596
	entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
1597
	if (!entry)
1598
		return;
1599

1600 1601
	entry->func = (unsigned long)tp->rp.kp.addr;
	entry->ret_ip = (unsigned long)ri->ret_addr;
1602
	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1603

1604
	head = this_cpu_ptr(call->perf_events);
1605
	perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
1606 1607
}

1608
static int probe_perf_enable(struct ftrace_event_call *call)
1609 1610 1611
{
	struct trace_probe *tp = (struct trace_probe *)call->data;

1612
	tp->flags |= TP_FLAG_PROFILE;
1613

1614
	if (probe_is_return(tp))
1615
		return enable_kretprobe(&tp->rp);
1616
	else
1617 1618 1619
		return enable_kprobe(&tp->rp.kp);
}

1620
static void probe_perf_disable(struct ftrace_event_call *call)
1621
{
1622 1623
	struct trace_probe *tp = (struct trace_probe *)call->data;

1624
	tp->flags &= ~TP_FLAG_PROFILE;
1625

1626
	if (!(tp->flags & TP_FLAG_TRACE)) {
1627 1628 1629 1630 1631
		if (probe_is_return(tp))
			disable_kretprobe(&tp->rp);
		else
			disable_kprobe(&tp->rp.kp);
	}
1632
}
1633
#endif	/* CONFIG_PERF_EVENTS */
1634

1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654
static __kprobes
int kprobe_register(struct ftrace_event_call *event, enum trace_reg type)
{
	switch (type) {
	case TRACE_REG_REGISTER:
		return probe_event_enable(event);
	case TRACE_REG_UNREGISTER:
		probe_event_disable(event);
		return 0;

#ifdef CONFIG_PERF_EVENTS
	case TRACE_REG_PERF_REGISTER:
		return probe_perf_enable(event);
	case TRACE_REG_PERF_UNREGISTER:
		probe_perf_disable(event);
		return 0;
#endif
	}
	return 0;
}
1655 1656 1657 1658 1659

static __kprobes
int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
{
	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1660

1661 1662
	if (tp->flags & TP_FLAG_TRACE)
		kprobe_trace_func(kp, regs);
1663
#ifdef CONFIG_PERF_EVENTS
1664
	if (tp->flags & TP_FLAG_PROFILE)
1665
		kprobe_perf_func(kp, regs);
1666
#endif
1667 1668 1669 1670 1671 1672 1673 1674 1675 1676
	return 0;	/* We don't tweek kernel, so just return 0 */
}

static __kprobes
int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
{
	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);

	if (tp->flags & TP_FLAG_TRACE)
		kretprobe_trace_func(ri, regs);
1677
#ifdef CONFIG_PERF_EVENTS
1678
	if (tp->flags & TP_FLAG_PROFILE)
1679
		kretprobe_perf_func(ri, regs);
1680
#endif
1681 1682
	return 0;	/* We don't tweek kernel, so just return 0 */
}
1683

1684 1685 1686 1687 1688 1689 1690 1691
static struct trace_event_functions kretprobe_funcs = {
	.trace		= print_kretprobe_event
};

static struct trace_event_functions kprobe_funcs = {
	.trace		= print_kprobe_event
};

1692 1693 1694 1695 1696 1697
static int register_probe_event(struct trace_probe *tp)
{
	struct ftrace_event_call *call = &tp->call;
	int ret;

	/* Initialize ftrace_event_call */
1698
	INIT_LIST_HEAD(&call->class->fields);
1699
	if (probe_is_return(tp)) {
1700
		call->event.funcs = &kretprobe_funcs;
1701
		call->class->define_fields = kretprobe_event_define_fields;
1702
	} else {
1703
		call->event.funcs = &kprobe_funcs;
1704
		call->class->define_fields = kprobe_event_define_fields;
1705
	}
1706 1707
	if (set_print_fmt(tp) < 0)
		return -ENOMEM;
1708 1709
	ret = register_ftrace_event(&call->event);
	if (!ret) {
1710
		kfree(call->print_fmt);
1711
		return -ENODEV;
1712
	}
1713
	call->flags = 0;
1714
	call->class->reg = kprobe_register;
1715 1716
	call->data = tp;
	ret = trace_add_event_call(call);
1717
	if (ret) {
1718
		pr_info("Failed to register kprobe event: %s\n", call->name);
1719
		kfree(call->print_fmt);
1720
		unregister_ftrace_event(&call->event);
1721
	}
1722 1723 1724 1725 1726
	return ret;
}

static void unregister_probe_event(struct trace_probe *tp)
{
1727
	/* tp->event is unregistered in trace_remove_event_call() */
1728
	trace_remove_event_call(&tp->call);
1729
	kfree(tp->call.print_fmt);
1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744
}

/* Make a debugfs interface for controling probe points */
static __init int init_kprobe_trace(void)
{
	struct dentry *d_tracer;
	struct dentry *entry;

	d_tracer = tracing_init_dentry();
	if (!d_tracer)
		return 0;

	entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
				    NULL, &kprobe_events_ops);

1745
	/* Event list interface */
1746 1747 1748
	if (!entry)
		pr_warning("Could not create debugfs "
			   "'kprobe_events' entry\n");
1749 1750 1751 1752 1753 1754 1755 1756

	/* Profile interface */
	entry = debugfs_create_file("kprobe_profile", 0444, d_tracer,
				    NULL, &kprobe_profile_ops);

	if (!entry)
		pr_warning("Could not create debugfs "
			   "'kprobe_profile' entry\n");
1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771
	return 0;
}
fs_initcall(init_kprobe_trace);


#ifdef CONFIG_FTRACE_STARTUP_TEST

static int kprobe_trace_selftest_target(int a1, int a2, int a3,
					int a4, int a5, int a6)
{
	return a1 + a2 + a3 + a4 + a5 + a6;
}

static __init int kprobe_trace_self_tests_init(void)
{
1772
	int ret, warn = 0;
1773
	int (*target)(int, int, int, int, int, int);
1774
	struct trace_probe *tp;
1775 1776 1777 1778 1779 1780

	target = kprobe_trace_selftest_target;

	pr_info("Testing kprobe tracing: ");

	ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793
				  "$stack $stack0 +0($stack)");
	if (WARN_ON_ONCE(ret)) {
		pr_warning("error on probing function entry.\n");
		warn++;
	} else {
		/* Enable trace point */
		tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM);
		if (WARN_ON_ONCE(tp == NULL)) {
			pr_warning("error on getting new probe.\n");
			warn++;
		} else
			probe_event_enable(&tp->call);
	}
1794 1795

	ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
1796
				  "$retval");
1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811
	if (WARN_ON_ONCE(ret)) {
		pr_warning("error on probing function return.\n");
		warn++;
	} else {
		/* Enable trace point */
		tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM);
		if (WARN_ON_ONCE(tp == NULL)) {
			pr_warning("error on getting new probe.\n");
			warn++;
		} else
			probe_event_enable(&tp->call);
	}

	if (warn)
		goto end;
1812 1813 1814

	ret = target(1, 2, 3, 4, 5, 6);

1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825
	ret = command_trace_probe("-:testprobe");
	if (WARN_ON_ONCE(ret)) {
		pr_warning("error on deleting a probe.\n");
		warn++;
	}

	ret = command_trace_probe("-:testprobe2");
	if (WARN_ON_ONCE(ret)) {
		pr_warning("error on deleting a probe.\n");
		warn++;
	}
1826

1827 1828 1829 1830 1831 1832
end:
	cleanup_all_probes();
	if (warn)
		pr_cont("NG: Some tests are failed. Please check them.\n");
	else
		pr_cont("OK\n");
1833 1834 1835 1836 1837 1838
	return 0;
}

late_initcall(kprobe_trace_self_tests_init);

#endif