kprobes.c 21.3 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
/*
 *  Kernel Probes (KProbes)
 *  arch/i386/kernel/kprobes.c
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 * Copyright (C) IBM Corporation, 2002, 2004
 *
 * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
 *		Probes initial implementation ( includes contributions from
 *		Rusty Russell).
 * 2004-July	Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
 *		interface to access function arguments.
26 27 28
 * 2005-May	Hien Nguyen <hien@us.ibm.com>, Jim Keniston
 *		<jkenisto@us.ibm.com> and Prasanna S Panchamukhi
 *		<prasanna@in.ibm.com> added function-return probes.
L
Linus Torvalds 已提交
29 30 31 32 33
 */

#include <linux/kprobes.h>
#include <linux/ptrace.h>
#include <linux/preempt.h>
34
#include <asm/cacheflush.h>
L
Linus Torvalds 已提交
35 36
#include <asm/kdebug.h>
#include <asm/desc.h>
37
#include <asm/uaccess.h>
L
Linus Torvalds 已提交
38 39 40

void jprobe_return_end(void);

41 42 43
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);

M
Masami Hiramatsu 已提交
44
/* insert a jmp code */
45
static __always_inline void set_jmp_op(void *from, void *to)
M
Masami Hiramatsu 已提交
46 47 48 49 50 51 52 53 54 55 56 57 58
{
	struct __arch_jmp_op {
		char op;
		long raddr;
	} __attribute__((packed)) *jop;
	jop = (struct __arch_jmp_op *)from;
	jop->raddr = (long)(to) - ((long)(from) + 5);
	jop->op = RELATIVEJUMP_INSTRUCTION;
}

/*
 * returns non-zero if opcodes can be boosted.
 */
59
static __always_inline int can_boost(kprobe_opcode_t *opcodes)
M
Masami Hiramatsu 已提交
60
{
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf)		      \
	(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) |   \
	  (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) |   \
	  (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) |   \
	  (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf))    \
	 << (row % 32))
	/*
	 * Undefined/reserved opcodes, conditional jump, Opcode Extension
	 * Groups, and some special opcodes can not be boost.
	 */
	static const unsigned long twobyte_is_boostable[256 / 32] = {
		/*      0 1 2 3 4 5 6 7 8 9 a b c d e f         */
		/*      -------------------------------         */
		W(0x00, 0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0)| /* 00 */
		W(0x10, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 10 */
		W(0x20, 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0)| /* 20 */
		W(0x30, 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */
		W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */
		W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 50 */
		W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1)| /* 60 */
		W(0x70, 0,0,0,0,1,1,1,1,0,0,0,0,0,0,1,1), /* 70 */
		W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 80 */
		W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */
		W(0xa0, 1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1)| /* a0 */
		W(0xb0, 1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1), /* b0 */
		W(0xc0, 1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1)| /* c0 */
		W(0xd0, 0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1), /* d0 */
		W(0xe0, 0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,1)| /* e0 */
		W(0xf0, 0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0)  /* f0 */
		/*      -------------------------------         */
		/*      0 1 2 3 4 5 6 7 8 9 a b c d e f         */
	};
#undef W
	kprobe_opcode_t opcode;
	kprobe_opcode_t *orig_opcodes = opcodes;
retry:
	if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
		return 0;
	opcode = *(opcodes++);

	/* 2nd-byte opcode */
	if (opcode == 0x0f) {
		if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
			return 0;
		return test_bit(*opcodes, twobyte_is_boostable);
	}

	switch (opcode & 0xf0) {
	case 0x60:
		if (0x63 < opcode && opcode < 0x67)
			goto retry; /* prefixes */
		/* can't boost Address-size override and bound */
		return (opcode != 0x62 && opcode != 0x67);
M
Masami Hiramatsu 已提交
114 115 116
	case 0x70:
		return 0; /* can't boost conditional jump */
	case 0xc0:
117 118
		/* can't boost software-interruptions */
		return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf;
M
Masami Hiramatsu 已提交
119 120 121 122
	case 0xd0:
		/* can boost AA* and XLAT */
		return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7);
	case 0xe0:
123 124
		/* can boost in/out and absolute jmps */
		return ((opcode & 0x04) || opcode == 0xea);
M
Masami Hiramatsu 已提交
125
	case 0xf0:
126 127
		if ((opcode & 0x0c) == 0 && opcode != 0xf1)
			goto retry; /* lock/rep(ne) prefix */
M
Masami Hiramatsu 已提交
128 129 130
		/* clear and set flags can be boost */
		return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
	default:
131 132 133 134
		if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e)
			goto retry; /* prefixes */
		/* can't boost CS override and call */
		return (opcode != 0x2e && opcode != 0x9a);
M
Masami Hiramatsu 已提交
135 136 137
	}
}

L
Linus Torvalds 已提交
138 139 140
/*
 * returns non-zero if opcode modifies the interrupt flag.
 */
141
static int __kprobes is_IF_modifier(kprobe_opcode_t opcode)
L
Linus Torvalds 已提交
142 143 144 145 146 147 148 149 150 151 152
{
	switch (opcode) {
	case 0xfa:		/* cli */
	case 0xfb:		/* sti */
	case 0xcf:		/* iret/iretd */
	case 0x9d:		/* popf/popfd */
		return 1;
	}
	return 0;
}

153
int __kprobes arch_prepare_kprobe(struct kprobe *p)
L
Linus Torvalds 已提交
154
{
155 156 157 158 159
	/* insn: must be on special executable page on i386. */
	p->ainsn.insn = get_insn_slot();
	if (!p->ainsn.insn)
		return -ENOMEM;

L
Linus Torvalds 已提交
160
	memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
161
	p->opcode = *p->addr;
162
	if (can_boost(p->addr)) {
M
Masami Hiramatsu 已提交
163 164 165 166
		p->ainsn.boostable = 0;
	} else {
		p->ainsn.boostable = -1;
	}
167
	return 0;
L
Linus Torvalds 已提交
168 169
}

170
void __kprobes arch_arm_kprobe(struct kprobe *p)
L
Linus Torvalds 已提交
171
{
172 173 174
	*p->addr = BREAKPOINT_INSTRUCTION;
	flush_icache_range((unsigned long) p->addr,
			   (unsigned long) p->addr + sizeof(kprobe_opcode_t));
L
Linus Torvalds 已提交
175 176
}

177
void __kprobes arch_disarm_kprobe(struct kprobe *p)
L
Linus Torvalds 已提交
178 179
{
	*p->addr = p->opcode;
180 181 182 183
	flush_icache_range((unsigned long) p->addr,
			   (unsigned long) p->addr + sizeof(kprobe_opcode_t));
}

184 185
void __kprobes arch_remove_kprobe(struct kprobe *p)
{
I
Ingo Molnar 已提交
186
	mutex_lock(&kprobe_mutex);
187
	free_insn_slot(p->ainsn.insn);
I
Ingo Molnar 已提交
188
	mutex_unlock(&kprobe_mutex);
189 190
}

191
static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
192
{
193 194 195 196
	kcb->prev_kprobe.kp = kprobe_running();
	kcb->prev_kprobe.status = kcb->kprobe_status;
	kcb->prev_kprobe.old_eflags = kcb->kprobe_old_eflags;
	kcb->prev_kprobe.saved_eflags = kcb->kprobe_saved_eflags;
197 198
}

199
static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
200
{
201 202 203 204
	__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
	kcb->kprobe_status = kcb->prev_kprobe.status;
	kcb->kprobe_old_eflags = kcb->prev_kprobe.old_eflags;
	kcb->kprobe_saved_eflags = kcb->prev_kprobe.saved_eflags;
205 206
}

207
static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
208
				struct kprobe_ctlblk *kcb)
209
{
210 211
	__get_cpu_var(current_kprobe) = p;
	kcb->kprobe_saved_eflags = kcb->kprobe_old_eflags
212 213
		= (regs->eflags & (TF_MASK | IF_MASK));
	if (is_IF_modifier(p->opcode))
214
		kcb->kprobe_saved_eflags &= ~IF_MASK;
215 216
}

217
static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
L
Linus Torvalds 已提交
218 219 220 221 222 223 224
{
	regs->eflags |= TF_MASK;
	regs->eflags &= ~IF_MASK;
	/*single step inline if the instruction is an int3*/
	if (p->opcode == BREAKPOINT_INSTRUCTION)
		regs->eip = (unsigned long)p->addr;
	else
225
		regs->eip = (unsigned long)p->ainsn.insn;
L
Linus Torvalds 已提交
226 227
}

228
/* Called with kretprobe_lock held */
229 230
void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
				      struct pt_regs *regs)
231 232
{
	unsigned long *sara = (unsigned long *)&regs->esp;
233

B
bibo,mao 已提交
234 235 236 237 238
	struct kretprobe_instance *ri;

	if ((ri = get_free_rp_inst(rp)) != NULL) {
		ri->rp = rp;
		ri->task = current;
239
		ri->ret_addr = (kprobe_opcode_t *) *sara;
240 241 242

		/* Replace the return addr with trampoline addr */
		*sara = (unsigned long) &kretprobe_trampoline;
B
bibo,mao 已提交
243 244 245 246
		add_rp_inst(ri);
	} else {
		rp->nmissed++;
	}
247 248
}

L
Linus Torvalds 已提交
249 250 251 252
/*
 * Interrupts are disabled on entry as trap3 is an interrupt gate and they
 * remain disabled thorough out this function.
 */
253
static int __kprobes kprobe_handler(struct pt_regs *regs)
L
Linus Torvalds 已提交
254 255 256
{
	struct kprobe *p;
	int ret = 0;
257
	kprobe_opcode_t *addr;
258 259
	struct kprobe_ctlblk *kcb;

260 261
	addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t));

262 263 264 265 266 267
	/*
	 * We don't want to be preempted for the entire
	 * duration of kprobe processing
	 */
	preempt_disable();
	kcb = get_kprobe_ctlblk();
L
Linus Torvalds 已提交
268 269 270 271 272

	/* Check we're not actually recursing */
	if (kprobe_running()) {
		p = get_kprobe(addr);
		if (p) {
273
			if (kcb->kprobe_status == KPROBE_HIT_SS &&
274
				*p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
L
Linus Torvalds 已提交
275
				regs->eflags &= ~TF_MASK;
276
				regs->eflags |= kcb->kprobe_saved_eflags;
L
Linus Torvalds 已提交
277 278
				goto no_kprobe;
			}
279 280 281 282 283 284
			/* We have reentered the kprobe_handler(), since
			 * another probe was hit while within the handler.
			 * We here save the original kprobes variables and
			 * just single step on the instruction of the new probe
			 * without calling any user handlers.
			 */
285 286
			save_previous_kprobe(kcb);
			set_current_kprobe(p, regs, kcb);
287
			kprobes_inc_nmissed_count(p);
288
			prepare_singlestep(p, regs);
289
			kcb->kprobe_status = KPROBE_REENTER;
290
			return 1;
L
Linus Torvalds 已提交
291
		} else {
292 293 294 295 296 297 298 299 300
			if (*addr != BREAKPOINT_INSTRUCTION) {
			/* The breakpoint instruction was removed by
			 * another cpu right after we hit, no further
			 * handling of this interrupt is appropriate
			 */
				regs->eip -= sizeof(kprobe_opcode_t);
				ret = 1;
				goto no_kprobe;
			}
301
			p = __get_cpu_var(current_kprobe);
L
Linus Torvalds 已提交
302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317
			if (p->break_handler && p->break_handler(p, regs)) {
				goto ss_probe;
			}
		}
		goto no_kprobe;
	}

	p = get_kprobe(addr);
	if (!p) {
		if (*addr != BREAKPOINT_INSTRUCTION) {
			/*
			 * The breakpoint instruction was removed right
			 * after we hit it.  Another cpu has removed
			 * either a probepoint or a debugger breakpoint
			 * at this address.  In either case, no further
			 * handling of this interrupt is appropriate.
318 319
			 * Back up over the (now missing) int3 and run
			 * the original instruction.
L
Linus Torvalds 已提交
320
			 */
321
			regs->eip -= sizeof(kprobe_opcode_t);
L
Linus Torvalds 已提交
322 323 324 325 326 327
			ret = 1;
		}
		/* Not one of ours: let kernel handle it */
		goto no_kprobe;
	}

328 329
	set_current_kprobe(p, regs, kcb);
	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
L
Linus Torvalds 已提交
330 331 332 333 334

	if (p->pre_handler && p->pre_handler(p, regs))
		/* handler has already set things up, so skip ss setup */
		return 1;

335
ss_probe:
336 337
#ifndef CONFIG_PREEMPT
	if (p->ainsn.boostable == 1 && !p->post_handler){
M
Masami Hiramatsu 已提交
338 339 340 341 342 343
		/* Boost up -- we can execute copied instructions directly */
		reset_current_kprobe();
		regs->eip = (unsigned long)p->ainsn.insn;
		preempt_enable_no_resched();
		return 1;
	}
344
#endif
L
Linus Torvalds 已提交
345
	prepare_singlestep(p, regs);
346
	kcb->kprobe_status = KPROBE_HIT_SS;
L
Linus Torvalds 已提交
347 348 349
	return 1;

no_kprobe:
350
	preempt_enable_no_resched();
L
Linus Torvalds 已提交
351 352 353
	return ret;
}

354 355 356 357 358
/*
 * For function-return probes, init_kprobes() establishes a probepoint
 * here. When a retprobed function returns, this probe is hit and
 * trampoline_probe_handler() runs, calling the kretprobe's handler.
 */
359
 void __kprobes kretprobe_trampoline_holder(void)
360
 {
361
	asm volatile ( ".global kretprobe_trampoline\n"
B
bibo,mao 已提交
362
			"kretprobe_trampoline: \n"
363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391
			"	pushf\n"
			/* skip cs, eip, orig_eax, es, ds */
			"	subl $20, %esp\n"
			"	pushl %eax\n"
			"	pushl %ebp\n"
			"	pushl %edi\n"
			"	pushl %esi\n"
			"	pushl %edx\n"
			"	pushl %ecx\n"
			"	pushl %ebx\n"
			"	movl %esp, %eax\n"
			"	call trampoline_handler\n"
			/* move eflags to cs */
			"	movl 48(%esp), %edx\n"
			"	movl %edx, 44(%esp)\n"
			/* save true return address on eflags */
			"	movl %eax, 48(%esp)\n"
			"	popl %ebx\n"
			"	popl %ecx\n"
			"	popl %edx\n"
			"	popl %esi\n"
			"	popl %edi\n"
			"	popl %ebp\n"
			"	popl %eax\n"
			/* skip eip, orig_eax, es, ds */
			"	addl $16, %esp\n"
			"	popf\n"
			"	ret\n");
}
392 393

/*
394
 * Called from kretprobe_trampoline
395
 */
396
fastcall void *__kprobes trampoline_handler(struct pt_regs *regs)
397
{
B
bibo,mao 已提交
398 399 400
	struct kretprobe_instance *ri = NULL;
	struct hlist_head *head;
	struct hlist_node *node, *tmp;
401
	unsigned long flags, orig_ret_address = 0;
402
	unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
403

404
	spin_lock_irqsave(&kretprobe_lock, flags);
B
bibo,mao 已提交
405
	head = kretprobe_inst_table_head(current);
406

407 408 409 410 411 412 413 414 415
	/*
	 * It is possible to have multiple instances associated with a given
	 * task either because an multiple functions in the call path
	 * have a return probe installed on them, and/or more then one return
	 * return probe was registered for a target function.
	 *
	 * We can handle this because:
	 *     - instances are always inserted at the head of the list
	 *     - when multiple return probes are registered for the same
B
bibo,mao 已提交
416
	 *       function, the first instance's ret_addr will point to the
417 418 419 420
	 *       real return address, and all the rest will point to
	 *       kretprobe_trampoline
	 */
	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
B
bibo,mao 已提交
421
		if (ri->task != current)
422
			/* another task is sharing our hash bucket */
B
bibo,mao 已提交
423
			continue;
424

425 426
		if (ri->rp && ri->rp->handler){
			__get_cpu_var(current_kprobe) = &ri->rp->kp;
427
			ri->rp->handler(ri, regs);
428 429
			__get_cpu_var(current_kprobe) = NULL;
		}
430 431

		orig_ret_address = (unsigned long)ri->ret_addr;
432
		recycle_rp_inst(ri);
433 434 435 436 437 438 439 440

		if (orig_ret_address != trampoline_address)
			/*
			 * This is the real return address. Any other
			 * instances associated with this task are for
			 * other calls deeper on the call stack
			 */
			break;
441
	}
442 443 444

	BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));

445
	spin_unlock_irqrestore(&kretprobe_lock, flags);
446

447
	return (void*)orig_ret_address;
448 449
}

L
Linus Torvalds 已提交
450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470
/*
 * Called after single-stepping.  p->addr is the address of the
 * instruction whose first byte has been replaced by the "int 3"
 * instruction.  To avoid the SMP problems that can occur when we
 * temporarily put back the original opcode to single-step, we
 * single-stepped a copy of the instruction.  The address of this
 * copy is p->ainsn.insn.
 *
 * This function prepares to return from the post-single-step
 * interrupt.  We have to fix up the stack as follows:
 *
 * 0) Except in the case of absolute or indirect jump or call instructions,
 * the new eip is relative to the copied instruction.  We need to make
 * it relative to the original instruction.
 *
 * 1) If the single-stepped instruction was pushfl, then the TF and IF
 * flags are set in the just-pushed eflags, and may need to be cleared.
 *
 * 2) If the single-stepped instruction was a call, the return address
 * that is atop the stack is the address following the copied instruction.
 * We need to make it the address following the original instruction.
M
Masami Hiramatsu 已提交
471 472
 *
 * This function also checks instruction size for preparing direct execution.
L
Linus Torvalds 已提交
473
 */
474 475
static void __kprobes resume_execution(struct kprobe *p,
		struct pt_regs *regs, struct kprobe_ctlblk *kcb)
L
Linus Torvalds 已提交
476 477
{
	unsigned long *tos = (unsigned long *)&regs->esp;
478
	unsigned long copy_eip = (unsigned long)p->ainsn.insn;
L
Linus Torvalds 已提交
479 480
	unsigned long orig_eip = (unsigned long)p->addr;

481
	regs->eflags &= ~TF_MASK;
L
Linus Torvalds 已提交
482 483 484
	switch (p->ainsn.insn[0]) {
	case 0x9c:		/* pushfl */
		*tos &= ~(TF_MASK | IF_MASK);
485
		*tos |= kcb->kprobe_old_eflags;
L
Linus Torvalds 已提交
486
		break;
487 488
	case 0xc2:		/* iret/ret/lret */
	case 0xc3:
489
	case 0xca:
490 491
	case 0xcb:
	case 0xcf:
492 493
	case 0xea:		/* jmp absolute -- eip is correct */
		/* eip is already adjusted, no more changes required */
M
Masami Hiramatsu 已提交
494
		p->ainsn.boostable = 1;
495
		goto no_change;
L
Linus Torvalds 已提交
496 497 498
	case 0xe8:		/* call relative - Fix return addr */
		*tos = orig_eip + (*tos - copy_eip);
		break;
499 500 501
	case 0x9a:		/* call absolute -- same as call absolute, indirect */
		*tos = orig_eip + (*tos - copy_eip);
		goto no_change;
L
Linus Torvalds 已提交
502 503
	case 0xff:
		if ((p->ainsn.insn[1] & 0x30) == 0x10) {
M
Masami Hiramatsu 已提交
504
			/*
505
			 * call absolute, indirect
M
Masami Hiramatsu 已提交
506 507 508
			 * Fix return addr; eip is correct.
			 * But this is not boostable
			 */
L
Linus Torvalds 已提交
509
			*tos = orig_eip + (*tos - copy_eip);
510
			goto no_change;
L
Linus Torvalds 已提交
511 512
		} else if (((p->ainsn.insn[1] & 0x31) == 0x20) ||	/* jmp near, absolute indirect */
			   ((p->ainsn.insn[1] & 0x31) == 0x21)) {	/* jmp far, absolute indirect */
M
Masami Hiramatsu 已提交
513 514
			/* eip is correct. And this is boostable */
			p->ainsn.boostable = 1;
515
			goto no_change;
L
Linus Torvalds 已提交
516 517 518 519 520
		}
	default:
		break;
	}

M
Masami Hiramatsu 已提交
521 522 523 524 525 526 527 528 529 530 531 532 533 534 535
	if (p->ainsn.boostable == 0) {
		if ((regs->eip > copy_eip) &&
		    (regs->eip - copy_eip) + 5 < MAX_INSN_SIZE) {
			/*
			 * These instructions can be executed directly if it
			 * jumps back to correct address.
			 */
			set_jmp_op((void *)regs->eip,
				   (void *)orig_eip + (regs->eip - copy_eip));
			p->ainsn.boostable = 1;
		} else {
			p->ainsn.boostable = -1;
		}
	}

536 537 538 539
	regs->eip = orig_eip + (regs->eip - copy_eip);

no_change:
	return;
L
Linus Torvalds 已提交
540 541 542 543
}

/*
 * Interrupts are disabled on entry as trap1 is an interrupt gate and they
544
 * remain disabled thoroughout this function.
L
Linus Torvalds 已提交
545
 */
546
static int __kprobes post_kprobe_handler(struct pt_regs *regs)
L
Linus Torvalds 已提交
547
{
548 549 550 551
	struct kprobe *cur = kprobe_running();
	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();

	if (!cur)
L
Linus Torvalds 已提交
552 553
		return 0;

554 555 556
	if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
		kcb->kprobe_status = KPROBE_HIT_SSDONE;
		cur->post_handler(cur, regs, 0);
557
	}
L
Linus Torvalds 已提交
558

559 560
	resume_execution(cur, regs, kcb);
	regs->eflags |= kcb->kprobe_saved_eflags;
L
Linus Torvalds 已提交
561

562
	/*Restore back the original saved kprobes variables and continue. */
563 564
	if (kcb->kprobe_status == KPROBE_REENTER) {
		restore_previous_kprobe(kcb);
565 566
		goto out;
	}
567
	reset_current_kprobe();
568
out:
L
Linus Torvalds 已提交
569 570 571 572 573 574 575 576 577 578 579 580 581
	preempt_enable_no_resched();

	/*
	 * if somebody else is singlestepping across a probe point, eflags
	 * will have TF set, in which case, continue the remaining processing
	 * of do_debug, as if this is not a probe hit.
	 */
	if (regs->eflags & TF_MASK)
		return 0;

	return 1;
}

582
static int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
L
Linus Torvalds 已提交
583
{
584 585 586
	struct kprobe *cur = kprobe_running();
	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();

587 588 589 590 591 592 593 594 595 596 597
	switch(kcb->kprobe_status) {
	case KPROBE_HIT_SS:
	case KPROBE_REENTER:
		/*
		 * We are here because the instruction being single
		 * stepped caused a page fault. We reset the current
		 * kprobe and the eip points back to the probe address
		 * and allow the page fault handler to continue as a
		 * normal page fault.
		 */
		regs->eip = (unsigned long)cur->addr;
598
		regs->eflags |= kcb->kprobe_old_eflags;
599 600 601 602
		if (kcb->kprobe_status == KPROBE_REENTER)
			restore_previous_kprobe(kcb);
		else
			reset_current_kprobe();
L
Linus Torvalds 已提交
603
		preempt_enable_no_resched();
604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637
		break;
	case KPROBE_HIT_ACTIVE:
	case KPROBE_HIT_SSDONE:
		/*
		 * We increment the nmissed count for accounting,
		 * we can also use npre/npostfault count for accouting
		 * these specific fault cases.
		 */
		kprobes_inc_nmissed_count(cur);

		/*
		 * We come here because instructions in the pre/post
		 * handler caused the page_fault, this could happen
		 * if handler tries to access user space by
		 * copy_from_user(), get_user() etc. Let the
		 * user-specified handler try to fix it first.
		 */
		if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
			return 1;

		/*
		 * In case the user-specified fault handler returned
		 * zero, try to fix up.
		 */
		if (fixup_exception(regs))
			return 1;

		/*
		 * fixup_exception() could not handle it,
		 * Let do_page_fault() fix it.
		 */
		break;
	default:
		break;
L
Linus Torvalds 已提交
638 639 640 641 642 643 644
	}
	return 0;
}

/*
 * Wrapper routine to for handling exceptions.
 */
645 646
int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
				       unsigned long val, void *data)
L
Linus Torvalds 已提交
647 648
{
	struct die_args *args = (struct die_args *)data;
649 650
	int ret = NOTIFY_DONE;

651
	if (args->regs && user_mode_vm(args->regs))
652 653
		return ret;

L
Linus Torvalds 已提交
654 655 656
	switch (val) {
	case DIE_INT3:
		if (kprobe_handler(args->regs))
657
			ret = NOTIFY_STOP;
L
Linus Torvalds 已提交
658 659 660
		break;
	case DIE_DEBUG:
		if (post_kprobe_handler(args->regs))
661
			ret = NOTIFY_STOP;
L
Linus Torvalds 已提交
662 663 664
		break;
	case DIE_GPF:
	case DIE_PAGE_FAULT:
665 666
		/* kprobe_running() needs smp_processor_id() */
		preempt_disable();
L
Linus Torvalds 已提交
667 668
		if (kprobe_running() &&
		    kprobe_fault_handler(args->regs, args->trapnr))
669
			ret = NOTIFY_STOP;
670
		preempt_enable();
L
Linus Torvalds 已提交
671 672 673 674
		break;
	default:
		break;
	}
675
	return ret;
L
Linus Torvalds 已提交
676 677
}

678
int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
L
Linus Torvalds 已提交
679 680 681
{
	struct jprobe *jp = container_of(p, struct jprobe, kp);
	unsigned long addr;
682
	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
L
Linus Torvalds 已提交
683

684 685 686
	kcb->jprobe_saved_regs = *regs;
	kcb->jprobe_saved_esp = &regs->esp;
	addr = (unsigned long)(kcb->jprobe_saved_esp);
L
Linus Torvalds 已提交
687 688 689 690 691 692 693 694

	/*
	 * TBD: As Linus pointed out, gcc assumes that the callee
	 * owns the argument space and could overwrite it, e.g.
	 * tailcall optimization. So, to be absolutely safe
	 * we also save and restore enough stack bytes to cover
	 * the argument area.
	 */
695 696
	memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
			MIN_STACK_SIZE(addr));
L
Linus Torvalds 已提交
697 698 699 700 701
	regs->eflags &= ~IF_MASK;
	regs->eip = (unsigned long)(jp->entry);
	return 1;
}

702
void __kprobes jprobe_return(void)
L
Linus Torvalds 已提交
703
{
704 705
	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();

L
Linus Torvalds 已提交
706 707 708 709 710
	asm volatile ("       xchgl   %%ebx,%%esp     \n"
		      "       int3			\n"
		      "       .globl jprobe_return_end	\n"
		      "       jprobe_return_end:	\n"
		      "       nop			\n"::"b"
711
		      (kcb->jprobe_saved_esp):"memory");
L
Linus Torvalds 已提交
712 713
}

714
int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
L
Linus Torvalds 已提交
715
{
716
	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
L
Linus Torvalds 已提交
717
	u8 *addr = (u8 *) (regs->eip - 1);
718
	unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_esp);
L
Linus Torvalds 已提交
719 720 721
	struct jprobe *jp = container_of(p, struct jprobe, kp);

	if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) {
722
		if (&regs->esp != kcb->jprobe_saved_esp) {
L
Linus Torvalds 已提交
723
			struct pt_regs *saved_regs =
724 725
			    container_of(kcb->jprobe_saved_esp,
					    struct pt_regs, esp);
L
Linus Torvalds 已提交
726
			printk("current esp %p does not match saved esp %p\n",
727
			       &regs->esp, kcb->jprobe_saved_esp);
L
Linus Torvalds 已提交
728 729 730 731 732 733
			printk("Saved registers for jprobe %p\n", jp);
			show_registers(saved_regs);
			printk("Current registers\n");
			show_registers(regs);
			BUG();
		}
734 735
		*regs = kcb->jprobe_saved_regs;
		memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack,
L
Linus Torvalds 已提交
736
		       MIN_STACK_SIZE(stack_addr));
737
		preempt_enable_no_resched();
L
Linus Torvalds 已提交
738 739 740 741
		return 1;
	}
	return 0;
}
742

743
int __init arch_init_kprobes(void)
744
{
745
	return 0;
746
}