kprobes.c 19.5 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
/*
 *  Kernel Probes (KProbes)
 *  arch/i386/kernel/kprobes.c
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 * Copyright (C) IBM Corporation, 2002, 2004
 *
 * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
 *		Probes initial implementation ( includes contributions from
 *		Rusty Russell).
 * 2004-July	Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
 *		interface to access function arguments.
26 27 28
 * 2005-May	Hien Nguyen <hien@us.ibm.com>, Jim Keniston
 *		<jkenisto@us.ibm.com> and Prasanna S Panchamukhi
 *		<prasanna@in.ibm.com> added function-return probes.
L
Linus Torvalds 已提交
29 30 31 32 33 34
 */

#include <linux/config.h>
#include <linux/kprobes.h>
#include <linux/ptrace.h>
#include <linux/preempt.h>
35
#include <asm/cacheflush.h>
L
Linus Torvalds 已提交
36 37
#include <asm/kdebug.h>
#include <asm/desc.h>
38
#include <asm/uaccess.h>
L
Linus Torvalds 已提交
39 40 41

void jprobe_return_end(void);

42 43 44
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);

M
Masami Hiramatsu 已提交
45
/* insert a jmp code */
46
static __always_inline void set_jmp_op(void *from, void *to)
M
Masami Hiramatsu 已提交
47 48 49 50 51 52 53 54 55 56 57 58 59
{
	struct __arch_jmp_op {
		char op;
		long raddr;
	} __attribute__((packed)) *jop;
	jop = (struct __arch_jmp_op *)from;
	jop->raddr = (long)(to) - ((long)(from) + 5);
	jop->op = RELATIVEJUMP_INSTRUCTION;
}

/*
 * returns non-zero if opcodes can be boosted.
 */
60
static __always_inline int can_boost(kprobe_opcode_t opcode)
M
Masami Hiramatsu 已提交
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
{
	switch (opcode & 0xf0 ) {
	case 0x70:
		return 0; /* can't boost conditional jump */
	case 0x90:
		/* can't boost call and pushf */
		return opcode != 0x9a && opcode != 0x9c;
	case 0xc0:
		/* can't boost undefined opcodes and soft-interruptions */
		return (0xc1 < opcode && opcode < 0xc6) ||
			(0xc7 < opcode && opcode < 0xcc) || opcode == 0xcf;
	case 0xd0:
		/* can boost AA* and XLAT */
		return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7);
	case 0xe0:
		/* can boost in/out and (may be) jmps */
		return (0xe3 < opcode && opcode != 0xe8);
	case 0xf0:
		/* clear and set flags can be boost */
		return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
	default:
		/* currently, can't boost 2 bytes opcodes */
		return opcode != 0x0f;
	}
}


L
Linus Torvalds 已提交
88 89 90
/*
 * returns non-zero if opcode modifies the interrupt flag.
 */
91
static int __kprobes is_IF_modifier(kprobe_opcode_t opcode)
L
Linus Torvalds 已提交
92 93 94 95 96 97 98 99 100 101 102
{
	switch (opcode) {
	case 0xfa:		/* cli */
	case 0xfb:		/* sti */
	case 0xcf:		/* iret/iretd */
	case 0x9d:		/* popf/popfd */
		return 1;
	}
	return 0;
}

103
int __kprobes arch_prepare_kprobe(struct kprobe *p)
L
Linus Torvalds 已提交
104
{
105 106 107 108 109
	/* insn: must be on special executable page on i386. */
	p->ainsn.insn = get_insn_slot();
	if (!p->ainsn.insn)
		return -ENOMEM;

L
Linus Torvalds 已提交
110
	memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
111
	p->opcode = *p->addr;
M
Masami Hiramatsu 已提交
112 113 114 115 116
	if (can_boost(p->opcode)) {
		p->ainsn.boostable = 0;
	} else {
		p->ainsn.boostable = -1;
	}
117
	return 0;
L
Linus Torvalds 已提交
118 119
}

120
void __kprobes arch_arm_kprobe(struct kprobe *p)
L
Linus Torvalds 已提交
121
{
122 123 124
	*p->addr = BREAKPOINT_INSTRUCTION;
	flush_icache_range((unsigned long) p->addr,
			   (unsigned long) p->addr + sizeof(kprobe_opcode_t));
L
Linus Torvalds 已提交
125 126
}

127
void __kprobes arch_disarm_kprobe(struct kprobe *p)
L
Linus Torvalds 已提交
128 129
{
	*p->addr = p->opcode;
130 131 132 133
	flush_icache_range((unsigned long) p->addr,
			   (unsigned long) p->addr + sizeof(kprobe_opcode_t));
}

134 135
void __kprobes arch_remove_kprobe(struct kprobe *p)
{
I
Ingo Molnar 已提交
136
	mutex_lock(&kprobe_mutex);
137
	free_insn_slot(p->ainsn.insn);
I
Ingo Molnar 已提交
138
	mutex_unlock(&kprobe_mutex);
139 140
}

141
static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
142
{
143 144 145 146
	kcb->prev_kprobe.kp = kprobe_running();
	kcb->prev_kprobe.status = kcb->kprobe_status;
	kcb->prev_kprobe.old_eflags = kcb->kprobe_old_eflags;
	kcb->prev_kprobe.saved_eflags = kcb->kprobe_saved_eflags;
147 148
}

149
static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
150
{
151 152 153 154
	__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
	kcb->kprobe_status = kcb->prev_kprobe.status;
	kcb->kprobe_old_eflags = kcb->prev_kprobe.old_eflags;
	kcb->kprobe_saved_eflags = kcb->prev_kprobe.saved_eflags;
155 156
}

157
static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
158
				struct kprobe_ctlblk *kcb)
159
{
160 161
	__get_cpu_var(current_kprobe) = p;
	kcb->kprobe_saved_eflags = kcb->kprobe_old_eflags
162 163
		= (regs->eflags & (TF_MASK | IF_MASK));
	if (is_IF_modifier(p->opcode))
164
		kcb->kprobe_saved_eflags &= ~IF_MASK;
165 166
}

167
static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
L
Linus Torvalds 已提交
168 169 170 171 172 173 174
{
	regs->eflags |= TF_MASK;
	regs->eflags &= ~IF_MASK;
	/*single step inline if the instruction is an int3*/
	if (p->opcode == BREAKPOINT_INSTRUCTION)
		regs->eip = (unsigned long)p->addr;
	else
175
		regs->eip = (unsigned long)p->ainsn.insn;
L
Linus Torvalds 已提交
176 177
}

178
/* Called with kretprobe_lock held */
179 180
void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
				      struct pt_regs *regs)
181 182
{
	unsigned long *sara = (unsigned long *)&regs->esp;
183 184 185 186 187 188
        struct kretprobe_instance *ri;

        if ((ri = get_free_rp_inst(rp)) != NULL) {
                ri->rp = rp;
                ri->task = current;
		ri->ret_addr = (kprobe_opcode_t *) *sara;
189 190 191 192

		/* Replace the return addr with trampoline addr */
		*sara = (unsigned long) &kretprobe_trampoline;

193 194 195 196
                add_rp_inst(ri);
        } else {
                rp->nmissed++;
        }
197 198
}

L
Linus Torvalds 已提交
199 200 201 202
/*
 * Interrupts are disabled on entry as trap3 is an interrupt gate and they
 * remain disabled thorough out this function.
 */
203
static int __kprobes kprobe_handler(struct pt_regs *regs)
L
Linus Torvalds 已提交
204 205 206
{
	struct kprobe *p;
	int ret = 0;
207
	kprobe_opcode_t *addr;
208
	struct kprobe_ctlblk *kcb;
M
Masami Hiramatsu 已提交
209 210 211
#ifdef CONFIG_PREEMPT
	unsigned pre_preempt_count = preempt_count();
#endif /* CONFIG_PREEMPT */
212

213 214
	addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t));

215 216 217 218 219 220
	/*
	 * We don't want to be preempted for the entire
	 * duration of kprobe processing
	 */
	preempt_disable();
	kcb = get_kprobe_ctlblk();
L
Linus Torvalds 已提交
221 222 223 224 225

	/* Check we're not actually recursing */
	if (kprobe_running()) {
		p = get_kprobe(addr);
		if (p) {
226
			if (kcb->kprobe_status == KPROBE_HIT_SS &&
227
				*p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
L
Linus Torvalds 已提交
228
				regs->eflags &= ~TF_MASK;
229
				regs->eflags |= kcb->kprobe_saved_eflags;
L
Linus Torvalds 已提交
230 231
				goto no_kprobe;
			}
232 233 234 235 236 237
			/* We have reentered the kprobe_handler(), since
			 * another probe was hit while within the handler.
			 * We here save the original kprobes variables and
			 * just single step on the instruction of the new probe
			 * without calling any user handlers.
			 */
238 239
			save_previous_kprobe(kcb);
			set_current_kprobe(p, regs, kcb);
240
			kprobes_inc_nmissed_count(p);
241
			prepare_singlestep(p, regs);
242
			kcb->kprobe_status = KPROBE_REENTER;
243
			return 1;
L
Linus Torvalds 已提交
244
		} else {
245 246 247 248 249 250 251 252 253
			if (*addr != BREAKPOINT_INSTRUCTION) {
			/* The breakpoint instruction was removed by
			 * another cpu right after we hit, no further
			 * handling of this interrupt is appropriate
			 */
				regs->eip -= sizeof(kprobe_opcode_t);
				ret = 1;
				goto no_kprobe;
			}
254
			p = __get_cpu_var(current_kprobe);
L
Linus Torvalds 已提交
255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
			if (p->break_handler && p->break_handler(p, regs)) {
				goto ss_probe;
			}
		}
		goto no_kprobe;
	}

	p = get_kprobe(addr);
	if (!p) {
		if (*addr != BREAKPOINT_INSTRUCTION) {
			/*
			 * The breakpoint instruction was removed right
			 * after we hit it.  Another cpu has removed
			 * either a probepoint or a debugger breakpoint
			 * at this address.  In either case, no further
			 * handling of this interrupt is appropriate.
271 272
			 * Back up over the (now missing) int3 and run
			 * the original instruction.
L
Linus Torvalds 已提交
273
			 */
274
			regs->eip -= sizeof(kprobe_opcode_t);
L
Linus Torvalds 已提交
275 276 277 278 279 280
			ret = 1;
		}
		/* Not one of ours: let kernel handle it */
		goto no_kprobe;
	}

281 282
	set_current_kprobe(p, regs, kcb);
	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
L
Linus Torvalds 已提交
283 284 285 286 287

	if (p->pre_handler && p->pre_handler(p, regs))
		/* handler has already set things up, so skip ss setup */
		return 1;

M
Masami Hiramatsu 已提交
288 289 290 291 292 293 294 295 296 297 298 299 300 301 302
	if (p->ainsn.boostable == 1 &&
#ifdef CONFIG_PREEMPT
	    !(pre_preempt_count) && /*
				       * This enables booster when the direct
				       * execution path aren't preempted.
				       */
#endif /* CONFIG_PREEMPT */
	    !p->post_handler && !p->break_handler ) {
		/* Boost up -- we can execute copied instructions directly */
		reset_current_kprobe();
		regs->eip = (unsigned long)p->ainsn.insn;
		preempt_enable_no_resched();
		return 1;
	}

L
Linus Torvalds 已提交
303 304
ss_probe:
	prepare_singlestep(p, regs);
305
	kcb->kprobe_status = KPROBE_HIT_SS;
L
Linus Torvalds 已提交
306 307 308
	return 1;

no_kprobe:
309
	preempt_enable_no_resched();
L
Linus Torvalds 已提交
310 311 312
	return ret;
}

313 314 315 316 317
/*
 * For function-return probes, init_kprobes() establishes a probepoint
 * here. When a retprobed function returns, this probe is hit and
 * trampoline_probe_handler() runs, calling the kretprobe's handler.
 */
318
 void __kprobes kretprobe_trampoline_holder(void)
319
 {
320
	asm volatile ( ".global kretprobe_trampoline\n"
321
 			"kretprobe_trampoline: \n"
322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350
			"	pushf\n"
			/* skip cs, eip, orig_eax, es, ds */
			"	subl $20, %esp\n"
			"	pushl %eax\n"
			"	pushl %ebp\n"
			"	pushl %edi\n"
			"	pushl %esi\n"
			"	pushl %edx\n"
			"	pushl %ecx\n"
			"	pushl %ebx\n"
			"	movl %esp, %eax\n"
			"	call trampoline_handler\n"
			/* move eflags to cs */
			"	movl 48(%esp), %edx\n"
			"	movl %edx, 44(%esp)\n"
			/* save true return address on eflags */
			"	movl %eax, 48(%esp)\n"
			"	popl %ebx\n"
			"	popl %ecx\n"
			"	popl %edx\n"
			"	popl %esi\n"
			"	popl %edi\n"
			"	popl %ebp\n"
			"	popl %eax\n"
			/* skip eip, orig_eax, es, ds */
			"	addl $16, %esp\n"
			"	popf\n"
			"	ret\n");
}
351 352

/*
353
 * Called from kretprobe_trampoline
354
 */
355
fastcall void *__kprobes trampoline_handler(struct pt_regs *regs)
356
{
357 358 359
        struct kretprobe_instance *ri = NULL;
        struct hlist_head *head;
        struct hlist_node *node, *tmp;
360
	unsigned long flags, orig_ret_address = 0;
361
	unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
362

363
	spin_lock_irqsave(&kretprobe_lock, flags);
364
        head = kretprobe_inst_table_head(current);
365

366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383
	/*
	 * It is possible to have multiple instances associated with a given
	 * task either because an multiple functions in the call path
	 * have a return probe installed on them, and/or more then one return
	 * return probe was registered for a target function.
	 *
	 * We can handle this because:
	 *     - instances are always inserted at the head of the list
	 *     - when multiple return probes are registered for the same
         *       function, the first instance's ret_addr will point to the
	 *       real return address, and all the rest will point to
	 *       kretprobe_trampoline
	 */
	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
                if (ri->task != current)
			/* another task is sharing our hash bucket */
                        continue;

384 385
		if (ri->rp && ri->rp->handler){
			__get_cpu_var(current_kprobe) = &ri->rp->kp;
386
			ri->rp->handler(ri, regs);
387 388
			__get_cpu_var(current_kprobe) = NULL;
		}
389 390

		orig_ret_address = (unsigned long)ri->ret_addr;
391
		recycle_rp_inst(ri);
392 393 394 395 396 397 398 399

		if (orig_ret_address != trampoline_address)
			/*
			 * This is the real return address. Any other
			 * instances associated with this task are for
			 * other calls deeper on the call stack
			 */
			break;
400
	}
401 402 403

	BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));

404
	spin_unlock_irqrestore(&kretprobe_lock, flags);
405

406
	return (void*)orig_ret_address;
407 408
}

L
Linus Torvalds 已提交
409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429
/*
 * Called after single-stepping.  p->addr is the address of the
 * instruction whose first byte has been replaced by the "int 3"
 * instruction.  To avoid the SMP problems that can occur when we
 * temporarily put back the original opcode to single-step, we
 * single-stepped a copy of the instruction.  The address of this
 * copy is p->ainsn.insn.
 *
 * This function prepares to return from the post-single-step
 * interrupt.  We have to fix up the stack as follows:
 *
 * 0) Except in the case of absolute or indirect jump or call instructions,
 * the new eip is relative to the copied instruction.  We need to make
 * it relative to the original instruction.
 *
 * 1) If the single-stepped instruction was pushfl, then the TF and IF
 * flags are set in the just-pushed eflags, and may need to be cleared.
 *
 * 2) If the single-stepped instruction was a call, the return address
 * that is atop the stack is the address following the copied instruction.
 * We need to make it the address following the original instruction.
M
Masami Hiramatsu 已提交
430 431
 *
 * This function also checks instruction size for preparing direct execution.
L
Linus Torvalds 已提交
432
 */
433 434
static void __kprobes resume_execution(struct kprobe *p,
		struct pt_regs *regs, struct kprobe_ctlblk *kcb)
L
Linus Torvalds 已提交
435 436
{
	unsigned long *tos = (unsigned long *)&regs->esp;
437
	unsigned long copy_eip = (unsigned long)p->ainsn.insn;
L
Linus Torvalds 已提交
438 439
	unsigned long orig_eip = (unsigned long)p->addr;

440
	regs->eflags &= ~TF_MASK;
L
Linus Torvalds 已提交
441 442 443
	switch (p->ainsn.insn[0]) {
	case 0x9c:		/* pushfl */
		*tos &= ~(TF_MASK | IF_MASK);
444
		*tos |= kcb->kprobe_old_eflags;
L
Linus Torvalds 已提交
445
		break;
446 447 448 449
	case 0xc3:		/* ret/lret */
	case 0xcb:
	case 0xc2:
	case 0xca:
450 451
	case 0xea:		/* jmp absolute -- eip is correct */
		/* eip is already adjusted, no more changes required */
M
Masami Hiramatsu 已提交
452
		p->ainsn.boostable = 1;
453
		goto no_change;
L
Linus Torvalds 已提交
454 455 456 457 458 459
	case 0xe8:		/* call relative - Fix return addr */
		*tos = orig_eip + (*tos - copy_eip);
		break;
	case 0xff:
		if ((p->ainsn.insn[1] & 0x30) == 0x10) {
			/* call absolute, indirect */
M
Masami Hiramatsu 已提交
460 461 462 463
			/*
			 * Fix return addr; eip is correct.
			 * But this is not boostable
			 */
L
Linus Torvalds 已提交
464
			*tos = orig_eip + (*tos - copy_eip);
465
			goto no_change;
L
Linus Torvalds 已提交
466 467
		} else if (((p->ainsn.insn[1] & 0x31) == 0x20) ||	/* jmp near, absolute indirect */
			   ((p->ainsn.insn[1] & 0x31) == 0x21)) {	/* jmp far, absolute indirect */
M
Masami Hiramatsu 已提交
468 469
			/* eip is correct. And this is boostable */
			p->ainsn.boostable = 1;
470
			goto no_change;
L
Linus Torvalds 已提交
471 472 473 474 475
		}
	default:
		break;
	}

M
Masami Hiramatsu 已提交
476 477 478 479 480 481 482 483 484 485 486 487 488 489 490
	if (p->ainsn.boostable == 0) {
		if ((regs->eip > copy_eip) &&
		    (regs->eip - copy_eip) + 5 < MAX_INSN_SIZE) {
			/*
			 * These instructions can be executed directly if it
			 * jumps back to correct address.
			 */
			set_jmp_op((void *)regs->eip,
				   (void *)orig_eip + (regs->eip - copy_eip));
			p->ainsn.boostable = 1;
		} else {
			p->ainsn.boostable = -1;
		}
	}

491 492 493 494
	regs->eip = orig_eip + (regs->eip - copy_eip);

no_change:
	return;
L
Linus Torvalds 已提交
495 496 497 498
}

/*
 * Interrupts are disabled on entry as trap1 is an interrupt gate and they
499
 * remain disabled thoroughout this function.
L
Linus Torvalds 已提交
500
 */
501
static int __kprobes post_kprobe_handler(struct pt_regs *regs)
L
Linus Torvalds 已提交
502
{
503 504 505 506
	struct kprobe *cur = kprobe_running();
	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();

	if (!cur)
L
Linus Torvalds 已提交
507 508
		return 0;

509 510 511
	if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
		kcb->kprobe_status = KPROBE_HIT_SSDONE;
		cur->post_handler(cur, regs, 0);
512
	}
L
Linus Torvalds 已提交
513

514 515
	resume_execution(cur, regs, kcb);
	regs->eflags |= kcb->kprobe_saved_eflags;
L
Linus Torvalds 已提交
516

517
	/*Restore back the original saved kprobes variables and continue. */
518 519
	if (kcb->kprobe_status == KPROBE_REENTER) {
		restore_previous_kprobe(kcb);
520 521
		goto out;
	}
522
	reset_current_kprobe();
523
out:
L
Linus Torvalds 已提交
524 525 526 527 528 529 530 531 532 533 534 535 536
	preempt_enable_no_resched();

	/*
	 * if somebody else is singlestepping across a probe point, eflags
	 * will have TF set, in which case, continue the remaining processing
	 * of do_debug, as if this is not a probe hit.
	 */
	if (regs->eflags & TF_MASK)
		return 0;

	return 1;
}

537
static int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
L
Linus Torvalds 已提交
538
{
539 540 541
	struct kprobe *cur = kprobe_running();
	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();

542 543 544 545 546 547 548 549 550 551 552
	switch(kcb->kprobe_status) {
	case KPROBE_HIT_SS:
	case KPROBE_REENTER:
		/*
		 * We are here because the instruction being single
		 * stepped caused a page fault. We reset the current
		 * kprobe and the eip points back to the probe address
		 * and allow the page fault handler to continue as a
		 * normal page fault.
		 */
		regs->eip = (unsigned long)cur->addr;
553
		regs->eflags |= kcb->kprobe_old_eflags;
554 555 556 557
		if (kcb->kprobe_status == KPROBE_REENTER)
			restore_previous_kprobe(kcb);
		else
			reset_current_kprobe();
L
Linus Torvalds 已提交
558
		preempt_enable_no_resched();
559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592
		break;
	case KPROBE_HIT_ACTIVE:
	case KPROBE_HIT_SSDONE:
		/*
		 * We increment the nmissed count for accounting,
		 * we can also use npre/npostfault count for accouting
		 * these specific fault cases.
		 */
		kprobes_inc_nmissed_count(cur);

		/*
		 * We come here because instructions in the pre/post
		 * handler caused the page_fault, this could happen
		 * if handler tries to access user space by
		 * copy_from_user(), get_user() etc. Let the
		 * user-specified handler try to fix it first.
		 */
		if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
			return 1;

		/*
		 * In case the user-specified fault handler returned
		 * zero, try to fix up.
		 */
		if (fixup_exception(regs))
			return 1;

		/*
		 * fixup_exception() could not handle it,
		 * Let do_page_fault() fix it.
		 */
		break;
	default:
		break;
L
Linus Torvalds 已提交
593 594 595 596 597 598 599
	}
	return 0;
}

/*
 * Wrapper routine to for handling exceptions.
 */
600 601
int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
				       unsigned long val, void *data)
L
Linus Torvalds 已提交
602 603
{
	struct die_args *args = (struct die_args *)data;
604 605
	int ret = NOTIFY_DONE;

606 607 608
	if (args->regs && user_mode(args->regs))
		return ret;

L
Linus Torvalds 已提交
609 610 611
	switch (val) {
	case DIE_INT3:
		if (kprobe_handler(args->regs))
612
			ret = NOTIFY_STOP;
L
Linus Torvalds 已提交
613 614 615
		break;
	case DIE_DEBUG:
		if (post_kprobe_handler(args->regs))
616
			ret = NOTIFY_STOP;
L
Linus Torvalds 已提交
617 618 619
		break;
	case DIE_GPF:
	case DIE_PAGE_FAULT:
620 621
		/* kprobe_running() needs smp_processor_id() */
		preempt_disable();
L
Linus Torvalds 已提交
622 623
		if (kprobe_running() &&
		    kprobe_fault_handler(args->regs, args->trapnr))
624
			ret = NOTIFY_STOP;
625
		preempt_enable();
L
Linus Torvalds 已提交
626 627 628 629
		break;
	default:
		break;
	}
630
	return ret;
L
Linus Torvalds 已提交
631 632
}

633
int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
L
Linus Torvalds 已提交
634 635 636
{
	struct jprobe *jp = container_of(p, struct jprobe, kp);
	unsigned long addr;
637
	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
L
Linus Torvalds 已提交
638

639 640 641
	kcb->jprobe_saved_regs = *regs;
	kcb->jprobe_saved_esp = &regs->esp;
	addr = (unsigned long)(kcb->jprobe_saved_esp);
L
Linus Torvalds 已提交
642 643 644 645 646 647 648 649

	/*
	 * TBD: As Linus pointed out, gcc assumes that the callee
	 * owns the argument space and could overwrite it, e.g.
	 * tailcall optimization. So, to be absolutely safe
	 * we also save and restore enough stack bytes to cover
	 * the argument area.
	 */
650 651
	memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
			MIN_STACK_SIZE(addr));
L
Linus Torvalds 已提交
652 653 654 655 656
	regs->eflags &= ~IF_MASK;
	regs->eip = (unsigned long)(jp->entry);
	return 1;
}

657
void __kprobes jprobe_return(void)
L
Linus Torvalds 已提交
658
{
659 660
	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();

L
Linus Torvalds 已提交
661 662 663 664 665
	asm volatile ("       xchgl   %%ebx,%%esp     \n"
		      "       int3			\n"
		      "       .globl jprobe_return_end	\n"
		      "       jprobe_return_end:	\n"
		      "       nop			\n"::"b"
666
		      (kcb->jprobe_saved_esp):"memory");
L
Linus Torvalds 已提交
667 668
}

669
int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
L
Linus Torvalds 已提交
670
{
671
	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
L
Linus Torvalds 已提交
672
	u8 *addr = (u8 *) (regs->eip - 1);
673
	unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_esp);
L
Linus Torvalds 已提交
674 675 676
	struct jprobe *jp = container_of(p, struct jprobe, kp);

	if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) {
677
		if (&regs->esp != kcb->jprobe_saved_esp) {
L
Linus Torvalds 已提交
678
			struct pt_regs *saved_regs =
679 680
			    container_of(kcb->jprobe_saved_esp,
					    struct pt_regs, esp);
L
Linus Torvalds 已提交
681
			printk("current esp %p does not match saved esp %p\n",
682
			       &regs->esp, kcb->jprobe_saved_esp);
L
Linus Torvalds 已提交
683 684 685 686 687 688
			printk("Saved registers for jprobe %p\n", jp);
			show_registers(saved_regs);
			printk("Current registers\n");
			show_registers(regs);
			BUG();
		}
689 690
		*regs = kcb->jprobe_saved_regs;
		memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack,
L
Linus Torvalds 已提交
691
		       MIN_STACK_SIZE(stack_addr));
692
		preempt_enable_no_resched();
L
Linus Torvalds 已提交
693 694 695 696
		return 1;
	}
	return 0;
}
697

698
int __init arch_init_kprobes(void)
699
{
700
	return 0;
701
}