kprobes.c 21.7 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
/*
 *  Kernel Probes (KProbes)
 *  arch/i386/kernel/kprobes.c
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 * Copyright (C) IBM Corporation, 2002, 2004
 *
 * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
 *		Probes initial implementation ( includes contributions from
 *		Rusty Russell).
 * 2004-July	Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
 *		interface to access function arguments.
26 27 28
 * 2005-May	Hien Nguyen <hien@us.ibm.com>, Jim Keniston
 *		<jkenisto@us.ibm.com> and Prasanna S Panchamukhi
 *		<prasanna@in.ibm.com> added function-return probes.
L
Linus Torvalds 已提交
29 30 31 32 33
 */

#include <linux/kprobes.h>
#include <linux/ptrace.h>
#include <linux/preempt.h>
34
#include <asm/cacheflush.h>
L
Linus Torvalds 已提交
35 36
#include <asm/kdebug.h>
#include <asm/desc.h>
37
#include <asm/uaccess.h>
L
Linus Torvalds 已提交
38 39 40

void jprobe_return_end(void);

41 42 43
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);

M
Masami Hiramatsu 已提交
44
/* insert a jmp code */
45
static __always_inline void set_jmp_op(void *from, void *to)
M
Masami Hiramatsu 已提交
46 47 48 49 50 51 52 53 54 55 56 57 58
{
	struct __arch_jmp_op {
		char op;
		long raddr;
	} __attribute__((packed)) *jop;
	jop = (struct __arch_jmp_op *)from;
	jop->raddr = (long)(to) - ((long)(from) + 5);
	jop->op = RELATIVEJUMP_INSTRUCTION;
}

/*
 * returns non-zero if opcodes can be boosted.
 */
59
static __always_inline int can_boost(kprobe_opcode_t *opcodes)
M
Masami Hiramatsu 已提交
60
{
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf)		      \
	(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) |   \
	  (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) |   \
	  (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) |   \
	  (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf))    \
	 << (row % 32))
	/*
	 * Undefined/reserved opcodes, conditional jump, Opcode Extension
	 * Groups, and some special opcodes can not be boost.
	 */
	static const unsigned long twobyte_is_boostable[256 / 32] = {
		/*      0 1 2 3 4 5 6 7 8 9 a b c d e f         */
		/*      -------------------------------         */
		W(0x00, 0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0)| /* 00 */
		W(0x10, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 10 */
		W(0x20, 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0)| /* 20 */
		W(0x30, 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */
		W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */
		W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 50 */
		W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1)| /* 60 */
		W(0x70, 0,0,0,0,1,1,1,1,0,0,0,0,0,0,1,1), /* 70 */
		W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 80 */
		W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */
		W(0xa0, 1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1)| /* a0 */
		W(0xb0, 1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1), /* b0 */
		W(0xc0, 1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1)| /* c0 */
		W(0xd0, 0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1), /* d0 */
		W(0xe0, 0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,1)| /* e0 */
		W(0xf0, 0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0)  /* f0 */
		/*      -------------------------------         */
		/*      0 1 2 3 4 5 6 7 8 9 a b c d e f         */
	};
#undef W
	kprobe_opcode_t opcode;
	kprobe_opcode_t *orig_opcodes = opcodes;
retry:
	if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
		return 0;
	opcode = *(opcodes++);

	/* 2nd-byte opcode */
	if (opcode == 0x0f) {
		if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
			return 0;
		return test_bit(*opcodes, twobyte_is_boostable);
	}

	switch (opcode & 0xf0) {
	case 0x60:
		if (0x63 < opcode && opcode < 0x67)
			goto retry; /* prefixes */
		/* can't boost Address-size override and bound */
		return (opcode != 0x62 && opcode != 0x67);
M
Masami Hiramatsu 已提交
114 115 116
	case 0x70:
		return 0; /* can't boost conditional jump */
	case 0xc0:
117 118
		/* can't boost software-interruptions */
		return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf;
M
Masami Hiramatsu 已提交
119 120 121 122
	case 0xd0:
		/* can boost AA* and XLAT */
		return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7);
	case 0xe0:
123 124
		/* can boost in/out and absolute jmps */
		return ((opcode & 0x04) || opcode == 0xea);
M
Masami Hiramatsu 已提交
125
	case 0xf0:
126 127
		if ((opcode & 0x0c) == 0 && opcode != 0xf1)
			goto retry; /* lock/rep(ne) prefix */
M
Masami Hiramatsu 已提交
128 129 130
		/* clear and set flags can be boost */
		return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
	default:
131 132 133 134
		if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e)
			goto retry; /* prefixes */
		/* can't boost CS override and call */
		return (opcode != 0x2e && opcode != 0x9a);
M
Masami Hiramatsu 已提交
135 136 137
	}
}

L
Linus Torvalds 已提交
138 139 140
/*
 * returns non-zero if opcode modifies the interrupt flag.
 */
141
static int __kprobes is_IF_modifier(kprobe_opcode_t opcode)
L
Linus Torvalds 已提交
142 143 144 145 146 147 148 149 150 151 152
{
	switch (opcode) {
	case 0xfa:		/* cli */
	case 0xfb:		/* sti */
	case 0xcf:		/* iret/iretd */
	case 0x9d:		/* popf/popfd */
		return 1;
	}
	return 0;
}

153
int __kprobes arch_prepare_kprobe(struct kprobe *p)
L
Linus Torvalds 已提交
154
{
155 156 157 158 159
	/* insn: must be on special executable page on i386. */
	p->ainsn.insn = get_insn_slot();
	if (!p->ainsn.insn)
		return -ENOMEM;

L
Linus Torvalds 已提交
160
	memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
161
	p->opcode = *p->addr;
162
	if (can_boost(p->addr)) {
M
Masami Hiramatsu 已提交
163 164 165 166
		p->ainsn.boostable = 0;
	} else {
		p->ainsn.boostable = -1;
	}
167
	return 0;
L
Linus Torvalds 已提交
168 169
}

170
void __kprobes arch_arm_kprobe(struct kprobe *p)
L
Linus Torvalds 已提交
171
{
172 173 174
	*p->addr = BREAKPOINT_INSTRUCTION;
	flush_icache_range((unsigned long) p->addr,
			   (unsigned long) p->addr + sizeof(kprobe_opcode_t));
L
Linus Torvalds 已提交
175 176
}

177
void __kprobes arch_disarm_kprobe(struct kprobe *p)
L
Linus Torvalds 已提交
178 179
{
	*p->addr = p->opcode;
180 181 182 183
	flush_icache_range((unsigned long) p->addr,
			   (unsigned long) p->addr + sizeof(kprobe_opcode_t));
}

184 185
void __kprobes arch_remove_kprobe(struct kprobe *p)
{
I
Ingo Molnar 已提交
186
	mutex_lock(&kprobe_mutex);
187
	free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
I
Ingo Molnar 已提交
188
	mutex_unlock(&kprobe_mutex);
189 190
}

191
static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
192
{
193 194 195 196
	kcb->prev_kprobe.kp = kprobe_running();
	kcb->prev_kprobe.status = kcb->kprobe_status;
	kcb->prev_kprobe.old_eflags = kcb->kprobe_old_eflags;
	kcb->prev_kprobe.saved_eflags = kcb->kprobe_saved_eflags;
197 198
}

199
static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
200
{
201 202 203 204
	__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
	kcb->kprobe_status = kcb->prev_kprobe.status;
	kcb->kprobe_old_eflags = kcb->prev_kprobe.old_eflags;
	kcb->kprobe_saved_eflags = kcb->prev_kprobe.saved_eflags;
205 206
}

207
static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
208
				struct kprobe_ctlblk *kcb)
209
{
210 211
	__get_cpu_var(current_kprobe) = p;
	kcb->kprobe_saved_eflags = kcb->kprobe_old_eflags
212 213
		= (regs->eflags & (TF_MASK | IF_MASK));
	if (is_IF_modifier(p->opcode))
214
		kcb->kprobe_saved_eflags &= ~IF_MASK;
215 216
}

217
static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
L
Linus Torvalds 已提交
218 219 220 221 222 223 224
{
	regs->eflags |= TF_MASK;
	regs->eflags &= ~IF_MASK;
	/*single step inline if the instruction is an int3*/
	if (p->opcode == BREAKPOINT_INSTRUCTION)
		regs->eip = (unsigned long)p->addr;
	else
225
		regs->eip = (unsigned long)p->ainsn.insn;
L
Linus Torvalds 已提交
226 227
}

228
/* Called with kretprobe_lock held */
229 230
void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
				      struct pt_regs *regs)
231 232
{
	unsigned long *sara = (unsigned long *)&regs->esp;
233

B
bibo,mao 已提交
234 235 236 237 238
	struct kretprobe_instance *ri;

	if ((ri = get_free_rp_inst(rp)) != NULL) {
		ri->rp = rp;
		ri->task = current;
239
		ri->ret_addr = (kprobe_opcode_t *) *sara;
240 241 242

		/* Replace the return addr with trampoline addr */
		*sara = (unsigned long) &kretprobe_trampoline;
B
bibo,mao 已提交
243 244 245 246
		add_rp_inst(ri);
	} else {
		rp->nmissed++;
	}
247 248
}

L
Linus Torvalds 已提交
249 250 251 252
/*
 * Interrupts are disabled on entry as trap3 is an interrupt gate and they
 * remain disabled thorough out this function.
 */
253
static int __kprobes kprobe_handler(struct pt_regs *regs)
L
Linus Torvalds 已提交
254 255 256
{
	struct kprobe *p;
	int ret = 0;
257
	kprobe_opcode_t *addr;
258 259
	struct kprobe_ctlblk *kcb;

260 261
	addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t));

262 263 264 265 266 267
	/*
	 * We don't want to be preempted for the entire
	 * duration of kprobe processing
	 */
	preempt_disable();
	kcb = get_kprobe_ctlblk();
L
Linus Torvalds 已提交
268 269 270 271 272

	/* Check we're not actually recursing */
	if (kprobe_running()) {
		p = get_kprobe(addr);
		if (p) {
273
			if (kcb->kprobe_status == KPROBE_HIT_SS &&
274
				*p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
L
Linus Torvalds 已提交
275
				regs->eflags &= ~TF_MASK;
276
				regs->eflags |= kcb->kprobe_saved_eflags;
L
Linus Torvalds 已提交
277 278
				goto no_kprobe;
			}
279 280 281 282 283 284
			/* We have reentered the kprobe_handler(), since
			 * another probe was hit while within the handler.
			 * We here save the original kprobes variables and
			 * just single step on the instruction of the new probe
			 * without calling any user handlers.
			 */
285 286
			save_previous_kprobe(kcb);
			set_current_kprobe(p, regs, kcb);
287
			kprobes_inc_nmissed_count(p);
288
			prepare_singlestep(p, regs);
289
			kcb->kprobe_status = KPROBE_REENTER;
290
			return 1;
L
Linus Torvalds 已提交
291
		} else {
292 293 294 295 296 297 298 299 300
			if (*addr != BREAKPOINT_INSTRUCTION) {
			/* The breakpoint instruction was removed by
			 * another cpu right after we hit, no further
			 * handling of this interrupt is appropriate
			 */
				regs->eip -= sizeof(kprobe_opcode_t);
				ret = 1;
				goto no_kprobe;
			}
301
			p = __get_cpu_var(current_kprobe);
L
Linus Torvalds 已提交
302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317
			if (p->break_handler && p->break_handler(p, regs)) {
				goto ss_probe;
			}
		}
		goto no_kprobe;
	}

	p = get_kprobe(addr);
	if (!p) {
		if (*addr != BREAKPOINT_INSTRUCTION) {
			/*
			 * The breakpoint instruction was removed right
			 * after we hit it.  Another cpu has removed
			 * either a probepoint or a debugger breakpoint
			 * at this address.  In either case, no further
			 * handling of this interrupt is appropriate.
318 319
			 * Back up over the (now missing) int3 and run
			 * the original instruction.
L
Linus Torvalds 已提交
320
			 */
321
			regs->eip -= sizeof(kprobe_opcode_t);
L
Linus Torvalds 已提交
322 323 324 325 326 327
			ret = 1;
		}
		/* Not one of ours: let kernel handle it */
		goto no_kprobe;
	}

328 329
	set_current_kprobe(p, regs, kcb);
	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
L
Linus Torvalds 已提交
330 331 332 333 334

	if (p->pre_handler && p->pre_handler(p, regs))
		/* handler has already set things up, so skip ss setup */
		return 1;

335
ss_probe:
336
#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM)
337
	if (p->ainsn.boostable == 1 && !p->post_handler){
M
Masami Hiramatsu 已提交
338 339 340 341 342 343
		/* Boost up -- we can execute copied instructions directly */
		reset_current_kprobe();
		regs->eip = (unsigned long)p->ainsn.insn;
		preempt_enable_no_resched();
		return 1;
	}
344
#endif
L
Linus Torvalds 已提交
345
	prepare_singlestep(p, regs);
346
	kcb->kprobe_status = KPROBE_HIT_SS;
L
Linus Torvalds 已提交
347 348 349
	return 1;

no_kprobe:
350
	preempt_enable_no_resched();
L
Linus Torvalds 已提交
351 352 353
	return ret;
}

354 355 356 357 358
/*
 * For function-return probes, init_kprobes() establishes a probepoint
 * here. When a retprobed function returns, this probe is hit and
 * trampoline_probe_handler() runs, calling the kretprobe's handler.
 */
359
 void __kprobes kretprobe_trampoline_holder(void)
360
 {
361
	asm volatile ( ".global kretprobe_trampoline\n"
B
bibo,mao 已提交
362
			"kretprobe_trampoline: \n"
363
			"	pushf\n"
364 365
			/* skip cs, eip, orig_eax */
			"	subl $12, %esp\n"
366
			"	pushl %fs\n"
367 368
			"	pushl %ds\n"
			"	pushl %es\n"
369 370 371 372 373 374 375 376 377 378
			"	pushl %eax\n"
			"	pushl %ebp\n"
			"	pushl %edi\n"
			"	pushl %esi\n"
			"	pushl %edx\n"
			"	pushl %ecx\n"
			"	pushl %ebx\n"
			"	movl %esp, %eax\n"
			"	call trampoline_handler\n"
			/* move eflags to cs */
379 380
			"	movl 52(%esp), %edx\n"
			"	movl %edx, 48(%esp)\n"
381
			/* save true return address on eflags */
382
			"	movl %eax, 52(%esp)\n"
383 384 385 386 387 388 389
			"	popl %ebx\n"
			"	popl %ecx\n"
			"	popl %edx\n"
			"	popl %esi\n"
			"	popl %edi\n"
			"	popl %ebp\n"
			"	popl %eax\n"
390
			/* skip eip, orig_eax, es, ds, fs */
391
			"	addl $20, %esp\n"
392 393 394
			"	popf\n"
			"	ret\n");
}
395 396

/*
397
 * Called from kretprobe_trampoline
398
 */
399
fastcall void *__kprobes trampoline_handler(struct pt_regs *regs)
400
{
B
bibo,mao 已提交
401
	struct kretprobe_instance *ri = NULL;
402
	struct hlist_head *head, empty_rp;
B
bibo,mao 已提交
403
	struct hlist_node *node, *tmp;
404
	unsigned long flags, orig_ret_address = 0;
405
	unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
406

407
	INIT_HLIST_HEAD(&empty_rp);
408
	spin_lock_irqsave(&kretprobe_lock, flags);
B
bibo,mao 已提交
409
	head = kretprobe_inst_table_head(current);
410
	/* fixup registers */
Z
Zachary Amsden 已提交
411
	regs->xcs = __KERNEL_CS | get_kernel_rpl();
412 413
	regs->eip = trampoline_address;
	regs->orig_eax = 0xffffffff;
414

415 416 417 418 419 420 421 422 423
	/*
	 * It is possible to have multiple instances associated with a given
	 * task either because an multiple functions in the call path
	 * have a return probe installed on them, and/or more then one return
	 * return probe was registered for a target function.
	 *
	 * We can handle this because:
	 *     - instances are always inserted at the head of the list
	 *     - when multiple return probes are registered for the same
B
bibo,mao 已提交
424
	 *       function, the first instance's ret_addr will point to the
425 426 427 428
	 *       real return address, and all the rest will point to
	 *       kretprobe_trampoline
	 */
	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
B
bibo,mao 已提交
429
		if (ri->task != current)
430
			/* another task is sharing our hash bucket */
B
bibo,mao 已提交
431
			continue;
432

433 434
		if (ri->rp && ri->rp->handler){
			__get_cpu_var(current_kprobe) = &ri->rp->kp;
435
			get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
436
			ri->rp->handler(ri, regs);
437 438
			__get_cpu_var(current_kprobe) = NULL;
		}
439 440

		orig_ret_address = (unsigned long)ri->ret_addr;
441
		recycle_rp_inst(ri, &empty_rp);
442 443 444 445 446 447 448 449

		if (orig_ret_address != trampoline_address)
			/*
			 * This is the real return address. Any other
			 * instances associated with this task are for
			 * other calls deeper on the call stack
			 */
			break;
450
	}
451 452 453

	BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));

454
	spin_unlock_irqrestore(&kretprobe_lock, flags);
455

456 457 458 459
	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
		hlist_del(&ri->hlist);
		kfree(ri);
	}
460
	return (void*)orig_ret_address;
461 462
}

L
Linus Torvalds 已提交
463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483
/*
 * Called after single-stepping.  p->addr is the address of the
 * instruction whose first byte has been replaced by the "int 3"
 * instruction.  To avoid the SMP problems that can occur when we
 * temporarily put back the original opcode to single-step, we
 * single-stepped a copy of the instruction.  The address of this
 * copy is p->ainsn.insn.
 *
 * This function prepares to return from the post-single-step
 * interrupt.  We have to fix up the stack as follows:
 *
 * 0) Except in the case of absolute or indirect jump or call instructions,
 * the new eip is relative to the copied instruction.  We need to make
 * it relative to the original instruction.
 *
 * 1) If the single-stepped instruction was pushfl, then the TF and IF
 * flags are set in the just-pushed eflags, and may need to be cleared.
 *
 * 2) If the single-stepped instruction was a call, the return address
 * that is atop the stack is the address following the copied instruction.
 * We need to make it the address following the original instruction.
M
Masami Hiramatsu 已提交
484 485
 *
 * This function also checks instruction size for preparing direct execution.
L
Linus Torvalds 已提交
486
 */
487 488
static void __kprobes resume_execution(struct kprobe *p,
		struct pt_regs *regs, struct kprobe_ctlblk *kcb)
L
Linus Torvalds 已提交
489 490
{
	unsigned long *tos = (unsigned long *)&regs->esp;
491
	unsigned long copy_eip = (unsigned long)p->ainsn.insn;
L
Linus Torvalds 已提交
492 493
	unsigned long orig_eip = (unsigned long)p->addr;

494
	regs->eflags &= ~TF_MASK;
L
Linus Torvalds 已提交
495 496 497
	switch (p->ainsn.insn[0]) {
	case 0x9c:		/* pushfl */
		*tos &= ~(TF_MASK | IF_MASK);
498
		*tos |= kcb->kprobe_old_eflags;
L
Linus Torvalds 已提交
499
		break;
500 501
	case 0xc2:		/* iret/ret/lret */
	case 0xc3:
502
	case 0xca:
503 504
	case 0xcb:
	case 0xcf:
505 506
	case 0xea:		/* jmp absolute -- eip is correct */
		/* eip is already adjusted, no more changes required */
M
Masami Hiramatsu 已提交
507
		p->ainsn.boostable = 1;
508
		goto no_change;
L
Linus Torvalds 已提交
509 510 511
	case 0xe8:		/* call relative - Fix return addr */
		*tos = orig_eip + (*tos - copy_eip);
		break;
512 513 514
	case 0x9a:		/* call absolute -- same as call absolute, indirect */
		*tos = orig_eip + (*tos - copy_eip);
		goto no_change;
L
Linus Torvalds 已提交
515 516
	case 0xff:
		if ((p->ainsn.insn[1] & 0x30) == 0x10) {
M
Masami Hiramatsu 已提交
517
			/*
518
			 * call absolute, indirect
M
Masami Hiramatsu 已提交
519 520 521
			 * Fix return addr; eip is correct.
			 * But this is not boostable
			 */
L
Linus Torvalds 已提交
522
			*tos = orig_eip + (*tos - copy_eip);
523
			goto no_change;
L
Linus Torvalds 已提交
524 525
		} else if (((p->ainsn.insn[1] & 0x31) == 0x20) ||	/* jmp near, absolute indirect */
			   ((p->ainsn.insn[1] & 0x31) == 0x21)) {	/* jmp far, absolute indirect */
M
Masami Hiramatsu 已提交
526 527
			/* eip is correct. And this is boostable */
			p->ainsn.boostable = 1;
528
			goto no_change;
L
Linus Torvalds 已提交
529 530 531 532 533
		}
	default:
		break;
	}

M
Masami Hiramatsu 已提交
534 535 536 537 538 539 540 541 542 543 544 545 546 547 548
	if (p->ainsn.boostable == 0) {
		if ((regs->eip > copy_eip) &&
		    (regs->eip - copy_eip) + 5 < MAX_INSN_SIZE) {
			/*
			 * These instructions can be executed directly if it
			 * jumps back to correct address.
			 */
			set_jmp_op((void *)regs->eip,
				   (void *)orig_eip + (regs->eip - copy_eip));
			p->ainsn.boostable = 1;
		} else {
			p->ainsn.boostable = -1;
		}
	}

549 550 551 552
	regs->eip = orig_eip + (regs->eip - copy_eip);

no_change:
	return;
L
Linus Torvalds 已提交
553 554 555 556
}

/*
 * Interrupts are disabled on entry as trap1 is an interrupt gate and they
557
 * remain disabled thoroughout this function.
L
Linus Torvalds 已提交
558
 */
559
static int __kprobes post_kprobe_handler(struct pt_regs *regs)
L
Linus Torvalds 已提交
560
{
561 562 563 564
	struct kprobe *cur = kprobe_running();
	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();

	if (!cur)
L
Linus Torvalds 已提交
565 566
		return 0;

567 568 569
	if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
		kcb->kprobe_status = KPROBE_HIT_SSDONE;
		cur->post_handler(cur, regs, 0);
570
	}
L
Linus Torvalds 已提交
571

572 573
	resume_execution(cur, regs, kcb);
	regs->eflags |= kcb->kprobe_saved_eflags;
L
Linus Torvalds 已提交
574

575
	/*Restore back the original saved kprobes variables and continue. */
576 577
	if (kcb->kprobe_status == KPROBE_REENTER) {
		restore_previous_kprobe(kcb);
578 579
		goto out;
	}
580
	reset_current_kprobe();
581
out:
L
Linus Torvalds 已提交
582 583 584 585 586 587 588 589 590 591 592 593 594
	preempt_enable_no_resched();

	/*
	 * if somebody else is singlestepping across a probe point, eflags
	 * will have TF set, in which case, continue the remaining processing
	 * of do_debug, as if this is not a probe hit.
	 */
	if (regs->eflags & TF_MASK)
		return 0;

	return 1;
}

595
static int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
L
Linus Torvalds 已提交
596
{
597 598 599
	struct kprobe *cur = kprobe_running();
	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();

600 601 602 603 604 605 606 607 608 609 610
	switch(kcb->kprobe_status) {
	case KPROBE_HIT_SS:
	case KPROBE_REENTER:
		/*
		 * We are here because the instruction being single
		 * stepped caused a page fault. We reset the current
		 * kprobe and the eip points back to the probe address
		 * and allow the page fault handler to continue as a
		 * normal page fault.
		 */
		regs->eip = (unsigned long)cur->addr;
611
		regs->eflags |= kcb->kprobe_old_eflags;
612 613 614 615
		if (kcb->kprobe_status == KPROBE_REENTER)
			restore_previous_kprobe(kcb);
		else
			reset_current_kprobe();
L
Linus Torvalds 已提交
616
		preempt_enable_no_resched();
617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650
		break;
	case KPROBE_HIT_ACTIVE:
	case KPROBE_HIT_SSDONE:
		/*
		 * We increment the nmissed count for accounting,
		 * we can also use npre/npostfault count for accouting
		 * these specific fault cases.
		 */
		kprobes_inc_nmissed_count(cur);

		/*
		 * We come here because instructions in the pre/post
		 * handler caused the page_fault, this could happen
		 * if handler tries to access user space by
		 * copy_from_user(), get_user() etc. Let the
		 * user-specified handler try to fix it first.
		 */
		if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
			return 1;

		/*
		 * In case the user-specified fault handler returned
		 * zero, try to fix up.
		 */
		if (fixup_exception(regs))
			return 1;

		/*
		 * fixup_exception() could not handle it,
		 * Let do_page_fault() fix it.
		 */
		break;
	default:
		break;
L
Linus Torvalds 已提交
651 652 653 654 655 656 657
	}
	return 0;
}

/*
 * Wrapper routine to for handling exceptions.
 */
658 659
int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
				       unsigned long val, void *data)
L
Linus Torvalds 已提交
660 661
{
	struct die_args *args = (struct die_args *)data;
662 663
	int ret = NOTIFY_DONE;

664
	if (args->regs && user_mode_vm(args->regs))
665 666
		return ret;

L
Linus Torvalds 已提交
667 668 669
	switch (val) {
	case DIE_INT3:
		if (kprobe_handler(args->regs))
670
			ret = NOTIFY_STOP;
L
Linus Torvalds 已提交
671 672 673
		break;
	case DIE_DEBUG:
		if (post_kprobe_handler(args->regs))
674
			ret = NOTIFY_STOP;
L
Linus Torvalds 已提交
675 676 677
		break;
	case DIE_GPF:
	case DIE_PAGE_FAULT:
678 679
		/* kprobe_running() needs smp_processor_id() */
		preempt_disable();
L
Linus Torvalds 已提交
680 681
		if (kprobe_running() &&
		    kprobe_fault_handler(args->regs, args->trapnr))
682
			ret = NOTIFY_STOP;
683
		preempt_enable();
L
Linus Torvalds 已提交
684 685 686 687
		break;
	default:
		break;
	}
688
	return ret;
L
Linus Torvalds 已提交
689 690
}

691
int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
L
Linus Torvalds 已提交
692 693 694
{
	struct jprobe *jp = container_of(p, struct jprobe, kp);
	unsigned long addr;
695
	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
L
Linus Torvalds 已提交
696

697 698 699
	kcb->jprobe_saved_regs = *regs;
	kcb->jprobe_saved_esp = &regs->esp;
	addr = (unsigned long)(kcb->jprobe_saved_esp);
L
Linus Torvalds 已提交
700 701 702 703 704 705 706 707

	/*
	 * TBD: As Linus pointed out, gcc assumes that the callee
	 * owns the argument space and could overwrite it, e.g.
	 * tailcall optimization. So, to be absolutely safe
	 * we also save and restore enough stack bytes to cover
	 * the argument area.
	 */
708 709
	memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
			MIN_STACK_SIZE(addr));
L
Linus Torvalds 已提交
710 711 712 713 714
	regs->eflags &= ~IF_MASK;
	regs->eip = (unsigned long)(jp->entry);
	return 1;
}

715
void __kprobes jprobe_return(void)
L
Linus Torvalds 已提交
716
{
717 718
	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();

L
Linus Torvalds 已提交
719 720 721 722 723
	asm volatile ("       xchgl   %%ebx,%%esp     \n"
		      "       int3			\n"
		      "       .globl jprobe_return_end	\n"
		      "       jprobe_return_end:	\n"
		      "       nop			\n"::"b"
724
		      (kcb->jprobe_saved_esp):"memory");
L
Linus Torvalds 已提交
725 726
}

727
int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
L
Linus Torvalds 已提交
728
{
729
	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
L
Linus Torvalds 已提交
730
	u8 *addr = (u8 *) (regs->eip - 1);
731
	unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_esp);
L
Linus Torvalds 已提交
732 733 734
	struct jprobe *jp = container_of(p, struct jprobe, kp);

	if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) {
735
		if (&regs->esp != kcb->jprobe_saved_esp) {
L
Linus Torvalds 已提交
736
			struct pt_regs *saved_regs =
737 738
			    container_of(kcb->jprobe_saved_esp,
					    struct pt_regs, esp);
L
Linus Torvalds 已提交
739
			printk("current esp %p does not match saved esp %p\n",
740
			       &regs->esp, kcb->jprobe_saved_esp);
L
Linus Torvalds 已提交
741 742 743 744 745 746
			printk("Saved registers for jprobe %p\n", jp);
			show_registers(saved_regs);
			printk("Current registers\n");
			show_registers(regs);
			BUG();
		}
747 748
		*regs = kcb->jprobe_saved_regs;
		memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack,
L
Linus Torvalds 已提交
749
		       MIN_STACK_SIZE(stack_addr));
750
		preempt_enable_no_resched();
L
Linus Torvalds 已提交
751 752 753 754
		return 1;
	}
	return 0;
}
755

756
int __init arch_init_kprobes(void)
757
{
758
	return 0;
759
}