kprobes.c 21.7 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
/*
 *  Kernel Probes (KProbes)
 *  arch/i386/kernel/kprobes.c
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 * Copyright (C) IBM Corporation, 2002, 2004
 *
 * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
 *		Probes initial implementation ( includes contributions from
 *		Rusty Russell).
 * 2004-July	Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
 *		interface to access function arguments.
26 27 28
 * 2005-May	Hien Nguyen <hien@us.ibm.com>, Jim Keniston
 *		<jkenisto@us.ibm.com> and Prasanna S Panchamukhi
 *		<prasanna@in.ibm.com> added function-return probes.
L
Linus Torvalds 已提交
29 30 31 32 33
 */

#include <linux/kprobes.h>
#include <linux/ptrace.h>
#include <linux/preempt.h>
34
#include <linux/kdebug.h>
35
#include <asm/cacheflush.h>
L
Linus Torvalds 已提交
36
#include <asm/desc.h>
37
#include <asm/uaccess.h>
L
Linus Torvalds 已提交
38 39 40

void jprobe_return_end(void);

41 42 43
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);

M
Masami Hiramatsu 已提交
44
/* insert a jmp code */
45
static __always_inline void set_jmp_op(void *from, void *to)
M
Masami Hiramatsu 已提交
46 47 48 49 50 51 52 53 54 55 56 57 58
{
	struct __arch_jmp_op {
		char op;
		long raddr;
	} __attribute__((packed)) *jop;
	jop = (struct __arch_jmp_op *)from;
	jop->raddr = (long)(to) - ((long)(from) + 5);
	jop->op = RELATIVEJUMP_INSTRUCTION;
}

/*
 * returns non-zero if opcodes can be boosted.
 */
59
static __always_inline int can_boost(kprobe_opcode_t *opcodes)
M
Masami Hiramatsu 已提交
60
{
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf)		      \
	(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) |   \
	  (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) |   \
	  (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) |   \
	  (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf))    \
	 << (row % 32))
	/*
	 * Undefined/reserved opcodes, conditional jump, Opcode Extension
	 * Groups, and some special opcodes can not be boost.
	 */
	static const unsigned long twobyte_is_boostable[256 / 32] = {
		/*      0 1 2 3 4 5 6 7 8 9 a b c d e f         */
		/*      -------------------------------         */
		W(0x00, 0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0)| /* 00 */
		W(0x10, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 10 */
		W(0x20, 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0)| /* 20 */
		W(0x30, 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */
		W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */
		W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 50 */
		W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1)| /* 60 */
		W(0x70, 0,0,0,0,1,1,1,1,0,0,0,0,0,0,1,1), /* 70 */
		W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 80 */
		W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */
		W(0xa0, 1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1)| /* a0 */
		W(0xb0, 1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1), /* b0 */
		W(0xc0, 1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1)| /* c0 */
		W(0xd0, 0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1), /* d0 */
		W(0xe0, 0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,1)| /* e0 */
		W(0xf0, 0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0)  /* f0 */
		/*      -------------------------------         */
		/*      0 1 2 3 4 5 6 7 8 9 a b c d e f         */
	};
#undef W
	kprobe_opcode_t opcode;
	kprobe_opcode_t *orig_opcodes = opcodes;
retry:
	if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
		return 0;
	opcode = *(opcodes++);

	/* 2nd-byte opcode */
	if (opcode == 0x0f) {
		if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
			return 0;
		return test_bit(*opcodes, twobyte_is_boostable);
	}

	switch (opcode & 0xf0) {
	case 0x60:
		if (0x63 < opcode && opcode < 0x67)
			goto retry; /* prefixes */
		/* can't boost Address-size override and bound */
		return (opcode != 0x62 && opcode != 0x67);
M
Masami Hiramatsu 已提交
114 115 116
	case 0x70:
		return 0; /* can't boost conditional jump */
	case 0xc0:
117 118
		/* can't boost software-interruptions */
		return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf;
M
Masami Hiramatsu 已提交
119 120 121 122
	case 0xd0:
		/* can boost AA* and XLAT */
		return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7);
	case 0xe0:
123 124
		/* can boost in/out and absolute jmps */
		return ((opcode & 0x04) || opcode == 0xea);
M
Masami Hiramatsu 已提交
125
	case 0xf0:
126 127
		if ((opcode & 0x0c) == 0 && opcode != 0xf1)
			goto retry; /* lock/rep(ne) prefix */
M
Masami Hiramatsu 已提交
128 129 130
		/* clear and set flags can be boost */
		return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
	default:
131 132 133 134
		if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e)
			goto retry; /* prefixes */
		/* can't boost CS override and call */
		return (opcode != 0x2e && opcode != 0x9a);
M
Masami Hiramatsu 已提交
135 136 137
	}
}

L
Linus Torvalds 已提交
138 139 140
/*
 * returns non-zero if opcode modifies the interrupt flag.
 */
141
static int __kprobes is_IF_modifier(kprobe_opcode_t opcode)
L
Linus Torvalds 已提交
142 143 144 145 146 147 148 149 150 151 152
{
	switch (opcode) {
	case 0xfa:		/* cli */
	case 0xfb:		/* sti */
	case 0xcf:		/* iret/iretd */
	case 0x9d:		/* popf/popfd */
		return 1;
	}
	return 0;
}

153
int __kprobes arch_prepare_kprobe(struct kprobe *p)
L
Linus Torvalds 已提交
154
{
155 156 157 158 159
	/* insn: must be on special executable page on i386. */
	p->ainsn.insn = get_insn_slot();
	if (!p->ainsn.insn)
		return -ENOMEM;

L
Linus Torvalds 已提交
160
	memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
161
	p->opcode = *p->addr;
162
	if (can_boost(p->addr)) {
M
Masami Hiramatsu 已提交
163 164 165 166
		p->ainsn.boostable = 0;
	} else {
		p->ainsn.boostable = -1;
	}
167
	return 0;
L
Linus Torvalds 已提交
168 169
}

170
void __kprobes arch_arm_kprobe(struct kprobe *p)
L
Linus Torvalds 已提交
171
{
172 173 174
	*p->addr = BREAKPOINT_INSTRUCTION;
	flush_icache_range((unsigned long) p->addr,
			   (unsigned long) p->addr + sizeof(kprobe_opcode_t));
L
Linus Torvalds 已提交
175 176
}

177
void __kprobes arch_disarm_kprobe(struct kprobe *p)
L
Linus Torvalds 已提交
178 179
{
	*p->addr = p->opcode;
180 181 182 183
	flush_icache_range((unsigned long) p->addr,
			   (unsigned long) p->addr + sizeof(kprobe_opcode_t));
}

184 185
void __kprobes arch_remove_kprobe(struct kprobe *p)
{
I
Ingo Molnar 已提交
186
	mutex_lock(&kprobe_mutex);
187
	free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
I
Ingo Molnar 已提交
188
	mutex_unlock(&kprobe_mutex);
189 190
}

191
static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
192
{
193 194 195 196
	kcb->prev_kprobe.kp = kprobe_running();
	kcb->prev_kprobe.status = kcb->kprobe_status;
	kcb->prev_kprobe.old_eflags = kcb->kprobe_old_eflags;
	kcb->prev_kprobe.saved_eflags = kcb->kprobe_saved_eflags;
197 198
}

199
static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
200
{
201 202 203 204
	__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
	kcb->kprobe_status = kcb->prev_kprobe.status;
	kcb->kprobe_old_eflags = kcb->prev_kprobe.old_eflags;
	kcb->kprobe_saved_eflags = kcb->prev_kprobe.saved_eflags;
205 206
}

207
static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
208
				struct kprobe_ctlblk *kcb)
209
{
210 211
	__get_cpu_var(current_kprobe) = p;
	kcb->kprobe_saved_eflags = kcb->kprobe_old_eflags
212 213
		= (regs->eflags & (TF_MASK | IF_MASK));
	if (is_IF_modifier(p->opcode))
214
		kcb->kprobe_saved_eflags &= ~IF_MASK;
215 216
}

217
static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
L
Linus Torvalds 已提交
218 219 220 221 222 223 224
{
	regs->eflags |= TF_MASK;
	regs->eflags &= ~IF_MASK;
	/*single step inline if the instruction is an int3*/
	if (p->opcode == BREAKPOINT_INSTRUCTION)
		regs->eip = (unsigned long)p->addr;
	else
225
		regs->eip = (unsigned long)p->ainsn.insn;
L
Linus Torvalds 已提交
226 227
}

228
/* Called with kretprobe_lock held */
229
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
230
				      struct pt_regs *regs)
231 232
{
	unsigned long *sara = (unsigned long *)&regs->esp;
233

234
	ri->ret_addr = (kprobe_opcode_t *) *sara;
B
bibo,mao 已提交
235

236 237
	/* Replace the return addr with trampoline addr */
	*sara = (unsigned long) &kretprobe_trampoline;
238 239
}

L
Linus Torvalds 已提交
240 241 242 243
/*
 * Interrupts are disabled on entry as trap3 is an interrupt gate and they
 * remain disabled thorough out this function.
 */
244
static int __kprobes kprobe_handler(struct pt_regs *regs)
L
Linus Torvalds 已提交
245 246 247
{
	struct kprobe *p;
	int ret = 0;
248
	kprobe_opcode_t *addr;
249 250
	struct kprobe_ctlblk *kcb;

251 252
	addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t));

253 254 255 256 257 258
	/*
	 * We don't want to be preempted for the entire
	 * duration of kprobe processing
	 */
	preempt_disable();
	kcb = get_kprobe_ctlblk();
L
Linus Torvalds 已提交
259 260 261 262 263

	/* Check we're not actually recursing */
	if (kprobe_running()) {
		p = get_kprobe(addr);
		if (p) {
264
			if (kcb->kprobe_status == KPROBE_HIT_SS &&
265
				*p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
L
Linus Torvalds 已提交
266
				regs->eflags &= ~TF_MASK;
267
				regs->eflags |= kcb->kprobe_saved_eflags;
L
Linus Torvalds 已提交
268 269
				goto no_kprobe;
			}
270 271 272 273 274 275
			/* We have reentered the kprobe_handler(), since
			 * another probe was hit while within the handler.
			 * We here save the original kprobes variables and
			 * just single step on the instruction of the new probe
			 * without calling any user handlers.
			 */
276 277
			save_previous_kprobe(kcb);
			set_current_kprobe(p, regs, kcb);
278
			kprobes_inc_nmissed_count(p);
279
			prepare_singlestep(p, regs);
280
			kcb->kprobe_status = KPROBE_REENTER;
281
			return 1;
L
Linus Torvalds 已提交
282
		} else {
283 284 285 286 287 288 289 290 291
			if (*addr != BREAKPOINT_INSTRUCTION) {
			/* The breakpoint instruction was removed by
			 * another cpu right after we hit, no further
			 * handling of this interrupt is appropriate
			 */
				regs->eip -= sizeof(kprobe_opcode_t);
				ret = 1;
				goto no_kprobe;
			}
292
			p = __get_cpu_var(current_kprobe);
L
Linus Torvalds 已提交
293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308
			if (p->break_handler && p->break_handler(p, regs)) {
				goto ss_probe;
			}
		}
		goto no_kprobe;
	}

	p = get_kprobe(addr);
	if (!p) {
		if (*addr != BREAKPOINT_INSTRUCTION) {
			/*
			 * The breakpoint instruction was removed right
			 * after we hit it.  Another cpu has removed
			 * either a probepoint or a debugger breakpoint
			 * at this address.  In either case, no further
			 * handling of this interrupt is appropriate.
309 310
			 * Back up over the (now missing) int3 and run
			 * the original instruction.
L
Linus Torvalds 已提交
311
			 */
312
			regs->eip -= sizeof(kprobe_opcode_t);
L
Linus Torvalds 已提交
313 314 315 316 317 318
			ret = 1;
		}
		/* Not one of ours: let kernel handle it */
		goto no_kprobe;
	}

319 320
	set_current_kprobe(p, regs, kcb);
	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
L
Linus Torvalds 已提交
321 322 323 324 325

	if (p->pre_handler && p->pre_handler(p, regs))
		/* handler has already set things up, so skip ss setup */
		return 1;

326
ss_probe:
327
#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM)
328
	if (p->ainsn.boostable == 1 && !p->post_handler){
M
Masami Hiramatsu 已提交
329 330 331 332 333 334
		/* Boost up -- we can execute copied instructions directly */
		reset_current_kprobe();
		regs->eip = (unsigned long)p->ainsn.insn;
		preempt_enable_no_resched();
		return 1;
	}
335
#endif
L
Linus Torvalds 已提交
336
	prepare_singlestep(p, regs);
337
	kcb->kprobe_status = KPROBE_HIT_SS;
L
Linus Torvalds 已提交
338 339 340
	return 1;

no_kprobe:
341
	preempt_enable_no_resched();
L
Linus Torvalds 已提交
342 343 344
	return ret;
}

345 346 347 348 349
/*
 * For function-return probes, init_kprobes() establishes a probepoint
 * here. When a retprobed function returns, this probe is hit and
 * trampoline_probe_handler() runs, calling the kretprobe's handler.
 */
350
 void __kprobes kretprobe_trampoline_holder(void)
351
 {
352
	asm volatile ( ".global kretprobe_trampoline\n"
B
bibo,mao 已提交
353
			"kretprobe_trampoline: \n"
354
			"	pushf\n"
355 356
			/* skip cs, eip, orig_eax */
			"	subl $12, %esp\n"
357
			"	pushl %fs\n"
358 359
			"	pushl %ds\n"
			"	pushl %es\n"
360 361 362 363 364 365 366 367 368 369
			"	pushl %eax\n"
			"	pushl %ebp\n"
			"	pushl %edi\n"
			"	pushl %esi\n"
			"	pushl %edx\n"
			"	pushl %ecx\n"
			"	pushl %ebx\n"
			"	movl %esp, %eax\n"
			"	call trampoline_handler\n"
			/* move eflags to cs */
370 371
			"	movl 52(%esp), %edx\n"
			"	movl %edx, 48(%esp)\n"
372
			/* save true return address on eflags */
373
			"	movl %eax, 52(%esp)\n"
374 375 376 377 378 379 380
			"	popl %ebx\n"
			"	popl %ecx\n"
			"	popl %edx\n"
			"	popl %esi\n"
			"	popl %edi\n"
			"	popl %ebp\n"
			"	popl %eax\n"
381
			/* skip eip, orig_eax, es, ds, fs */
382
			"	addl $20, %esp\n"
383 384 385
			"	popf\n"
			"	ret\n");
}
386 387

/*
388
 * Called from kretprobe_trampoline
389
 */
390
fastcall void *__kprobes trampoline_handler(struct pt_regs *regs)
391
{
B
bibo,mao 已提交
392
	struct kretprobe_instance *ri = NULL;
393
	struct hlist_head *head, empty_rp;
B
bibo,mao 已提交
394
	struct hlist_node *node, *tmp;
395
	unsigned long flags, orig_ret_address = 0;
396
	unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
397

398
	INIT_HLIST_HEAD(&empty_rp);
399
	spin_lock_irqsave(&kretprobe_lock, flags);
B
bibo,mao 已提交
400
	head = kretprobe_inst_table_head(current);
401
	/* fixup registers */
Z
Zachary Amsden 已提交
402
	regs->xcs = __KERNEL_CS | get_kernel_rpl();
403 404
	regs->eip = trampoline_address;
	regs->orig_eax = 0xffffffff;
405

406 407 408 409 410 411 412 413 414
	/*
	 * It is possible to have multiple instances associated with a given
	 * task either because an multiple functions in the call path
	 * have a return probe installed on them, and/or more then one return
	 * return probe was registered for a target function.
	 *
	 * We can handle this because:
	 *     - instances are always inserted at the head of the list
	 *     - when multiple return probes are registered for the same
B
bibo,mao 已提交
415
	 *       function, the first instance's ret_addr will point to the
416 417 418 419
	 *       real return address, and all the rest will point to
	 *       kretprobe_trampoline
	 */
	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
B
bibo,mao 已提交
420
		if (ri->task != current)
421
			/* another task is sharing our hash bucket */
B
bibo,mao 已提交
422
			continue;
423

424 425
		if (ri->rp && ri->rp->handler){
			__get_cpu_var(current_kprobe) = &ri->rp->kp;
426
			get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
427
			ri->rp->handler(ri, regs);
428 429
			__get_cpu_var(current_kprobe) = NULL;
		}
430 431

		orig_ret_address = (unsigned long)ri->ret_addr;
432
		recycle_rp_inst(ri, &empty_rp);
433 434 435 436 437 438 439 440

		if (orig_ret_address != trampoline_address)
			/*
			 * This is the real return address. Any other
			 * instances associated with this task are for
			 * other calls deeper on the call stack
			 */
			break;
441
	}
442

443
	kretprobe_assert(ri, orig_ret_address, trampoline_address);
444
	spin_unlock_irqrestore(&kretprobe_lock, flags);
445

446 447 448 449
	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
		hlist_del(&ri->hlist);
		kfree(ri);
	}
450
	return (void*)orig_ret_address;
451 452
}

L
Linus Torvalds 已提交
453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473
/*
 * Called after single-stepping.  p->addr is the address of the
 * instruction whose first byte has been replaced by the "int 3"
 * instruction.  To avoid the SMP problems that can occur when we
 * temporarily put back the original opcode to single-step, we
 * single-stepped a copy of the instruction.  The address of this
 * copy is p->ainsn.insn.
 *
 * This function prepares to return from the post-single-step
 * interrupt.  We have to fix up the stack as follows:
 *
 * 0) Except in the case of absolute or indirect jump or call instructions,
 * the new eip is relative to the copied instruction.  We need to make
 * it relative to the original instruction.
 *
 * 1) If the single-stepped instruction was pushfl, then the TF and IF
 * flags are set in the just-pushed eflags, and may need to be cleared.
 *
 * 2) If the single-stepped instruction was a call, the return address
 * that is atop the stack is the address following the copied instruction.
 * We need to make it the address following the original instruction.
M
Masami Hiramatsu 已提交
474 475
 *
 * This function also checks instruction size for preparing direct execution.
L
Linus Torvalds 已提交
476
 */
477 478
static void __kprobes resume_execution(struct kprobe *p,
		struct pt_regs *regs, struct kprobe_ctlblk *kcb)
L
Linus Torvalds 已提交
479 480
{
	unsigned long *tos = (unsigned long *)&regs->esp;
481
	unsigned long copy_eip = (unsigned long)p->ainsn.insn;
L
Linus Torvalds 已提交
482 483
	unsigned long orig_eip = (unsigned long)p->addr;

484
	regs->eflags &= ~TF_MASK;
L
Linus Torvalds 已提交
485 486 487
	switch (p->ainsn.insn[0]) {
	case 0x9c:		/* pushfl */
		*tos &= ~(TF_MASK | IF_MASK);
488
		*tos |= kcb->kprobe_old_eflags;
L
Linus Torvalds 已提交
489
		break;
490 491
	case 0xc2:		/* iret/ret/lret */
	case 0xc3:
492
	case 0xca:
493 494
	case 0xcb:
	case 0xcf:
495 496
	case 0xea:		/* jmp absolute -- eip is correct */
		/* eip is already adjusted, no more changes required */
M
Masami Hiramatsu 已提交
497
		p->ainsn.boostable = 1;
498
		goto no_change;
L
Linus Torvalds 已提交
499 500 501
	case 0xe8:		/* call relative - Fix return addr */
		*tos = orig_eip + (*tos - copy_eip);
		break;
502 503 504
	case 0x9a:		/* call absolute -- same as call absolute, indirect */
		*tos = orig_eip + (*tos - copy_eip);
		goto no_change;
L
Linus Torvalds 已提交
505 506
	case 0xff:
		if ((p->ainsn.insn[1] & 0x30) == 0x10) {
M
Masami Hiramatsu 已提交
507
			/*
508
			 * call absolute, indirect
M
Masami Hiramatsu 已提交
509 510 511
			 * Fix return addr; eip is correct.
			 * But this is not boostable
			 */
L
Linus Torvalds 已提交
512
			*tos = orig_eip + (*tos - copy_eip);
513
			goto no_change;
L
Linus Torvalds 已提交
514 515
		} else if (((p->ainsn.insn[1] & 0x31) == 0x20) ||	/* jmp near, absolute indirect */
			   ((p->ainsn.insn[1] & 0x31) == 0x21)) {	/* jmp far, absolute indirect */
M
Masami Hiramatsu 已提交
516 517
			/* eip is correct. And this is boostable */
			p->ainsn.boostable = 1;
518
			goto no_change;
L
Linus Torvalds 已提交
519 520 521 522 523
		}
	default:
		break;
	}

M
Masami Hiramatsu 已提交
524 525 526 527 528 529 530 531 532 533 534 535 536 537 538
	if (p->ainsn.boostable == 0) {
		if ((regs->eip > copy_eip) &&
		    (regs->eip - copy_eip) + 5 < MAX_INSN_SIZE) {
			/*
			 * These instructions can be executed directly if it
			 * jumps back to correct address.
			 */
			set_jmp_op((void *)regs->eip,
				   (void *)orig_eip + (regs->eip - copy_eip));
			p->ainsn.boostable = 1;
		} else {
			p->ainsn.boostable = -1;
		}
	}

539 540 541 542
	regs->eip = orig_eip + (regs->eip - copy_eip);

no_change:
	return;
L
Linus Torvalds 已提交
543 544 545 546
}

/*
 * Interrupts are disabled on entry as trap1 is an interrupt gate and they
547
 * remain disabled thoroughout this function.
L
Linus Torvalds 已提交
548
 */
549
static int __kprobes post_kprobe_handler(struct pt_regs *regs)
L
Linus Torvalds 已提交
550
{
551 552 553 554
	struct kprobe *cur = kprobe_running();
	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();

	if (!cur)
L
Linus Torvalds 已提交
555 556
		return 0;

557 558 559
	if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
		kcb->kprobe_status = KPROBE_HIT_SSDONE;
		cur->post_handler(cur, regs, 0);
560
	}
L
Linus Torvalds 已提交
561

562 563
	resume_execution(cur, regs, kcb);
	regs->eflags |= kcb->kprobe_saved_eflags;
L
Linus Torvalds 已提交
564

565
	/*Restore back the original saved kprobes variables and continue. */
566 567
	if (kcb->kprobe_status == KPROBE_REENTER) {
		restore_previous_kprobe(kcb);
568 569
		goto out;
	}
570
	reset_current_kprobe();
571
out:
L
Linus Torvalds 已提交
572 573 574 575 576 577 578 579 580 581 582 583 584
	preempt_enable_no_resched();

	/*
	 * if somebody else is singlestepping across a probe point, eflags
	 * will have TF set, in which case, continue the remaining processing
	 * of do_debug, as if this is not a probe hit.
	 */
	if (regs->eflags & TF_MASK)
		return 0;

	return 1;
}

585
static int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
L
Linus Torvalds 已提交
586
{
587 588 589
	struct kprobe *cur = kprobe_running();
	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();

590 591 592 593 594 595 596 597 598 599 600
	switch(kcb->kprobe_status) {
	case KPROBE_HIT_SS:
	case KPROBE_REENTER:
		/*
		 * We are here because the instruction being single
		 * stepped caused a page fault. We reset the current
		 * kprobe and the eip points back to the probe address
		 * and allow the page fault handler to continue as a
		 * normal page fault.
		 */
		regs->eip = (unsigned long)cur->addr;
601
		regs->eflags |= kcb->kprobe_old_eflags;
602 603 604 605
		if (kcb->kprobe_status == KPROBE_REENTER)
			restore_previous_kprobe(kcb);
		else
			reset_current_kprobe();
L
Linus Torvalds 已提交
606
		preempt_enable_no_resched();
607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640
		break;
	case KPROBE_HIT_ACTIVE:
	case KPROBE_HIT_SSDONE:
		/*
		 * We increment the nmissed count for accounting,
		 * we can also use npre/npostfault count for accouting
		 * these specific fault cases.
		 */
		kprobes_inc_nmissed_count(cur);

		/*
		 * We come here because instructions in the pre/post
		 * handler caused the page_fault, this could happen
		 * if handler tries to access user space by
		 * copy_from_user(), get_user() etc. Let the
		 * user-specified handler try to fix it first.
		 */
		if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
			return 1;

		/*
		 * In case the user-specified fault handler returned
		 * zero, try to fix up.
		 */
		if (fixup_exception(regs))
			return 1;

		/*
		 * fixup_exception() could not handle it,
		 * Let do_page_fault() fix it.
		 */
		break;
	default:
		break;
L
Linus Torvalds 已提交
641 642 643 644 645 646 647
	}
	return 0;
}

/*
 * Wrapper routine to for handling exceptions.
 */
648 649
int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
				       unsigned long val, void *data)
L
Linus Torvalds 已提交
650 651
{
	struct die_args *args = (struct die_args *)data;
652 653
	int ret = NOTIFY_DONE;

654
	if (args->regs && user_mode_vm(args->regs))
655 656
		return ret;

L
Linus Torvalds 已提交
657 658 659
	switch (val) {
	case DIE_INT3:
		if (kprobe_handler(args->regs))
660
			ret = NOTIFY_STOP;
L
Linus Torvalds 已提交
661 662 663
		break;
	case DIE_DEBUG:
		if (post_kprobe_handler(args->regs))
664
			ret = NOTIFY_STOP;
L
Linus Torvalds 已提交
665 666 667
		break;
	case DIE_GPF:
	case DIE_PAGE_FAULT:
668 669
		/* kprobe_running() needs smp_processor_id() */
		preempt_disable();
L
Linus Torvalds 已提交
670 671
		if (kprobe_running() &&
		    kprobe_fault_handler(args->regs, args->trapnr))
672
			ret = NOTIFY_STOP;
673
		preempt_enable();
L
Linus Torvalds 已提交
674 675 676 677
		break;
	default:
		break;
	}
678
	return ret;
L
Linus Torvalds 已提交
679 680
}

681
int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
L
Linus Torvalds 已提交
682 683 684
{
	struct jprobe *jp = container_of(p, struct jprobe, kp);
	unsigned long addr;
685
	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
L
Linus Torvalds 已提交
686

687 688 689
	kcb->jprobe_saved_regs = *regs;
	kcb->jprobe_saved_esp = &regs->esp;
	addr = (unsigned long)(kcb->jprobe_saved_esp);
L
Linus Torvalds 已提交
690 691 692 693 694 695 696 697

	/*
	 * TBD: As Linus pointed out, gcc assumes that the callee
	 * owns the argument space and could overwrite it, e.g.
	 * tailcall optimization. So, to be absolutely safe
	 * we also save and restore enough stack bytes to cover
	 * the argument area.
	 */
698 699
	memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
			MIN_STACK_SIZE(addr));
L
Linus Torvalds 已提交
700 701 702 703 704
	regs->eflags &= ~IF_MASK;
	regs->eip = (unsigned long)(jp->entry);
	return 1;
}

705
void __kprobes jprobe_return(void)
L
Linus Torvalds 已提交
706
{
707 708
	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();

L
Linus Torvalds 已提交
709 710 711 712 713
	asm volatile ("       xchgl   %%ebx,%%esp     \n"
		      "       int3			\n"
		      "       .globl jprobe_return_end	\n"
		      "       jprobe_return_end:	\n"
		      "       nop			\n"::"b"
714
		      (kcb->jprobe_saved_esp):"memory");
L
Linus Torvalds 已提交
715 716
}

717
int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
L
Linus Torvalds 已提交
718
{
719
	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
L
Linus Torvalds 已提交
720
	u8 *addr = (u8 *) (regs->eip - 1);
721
	unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_esp);
L
Linus Torvalds 已提交
722 723 724
	struct jprobe *jp = container_of(p, struct jprobe, kp);

	if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) {
725
		if (&regs->esp != kcb->jprobe_saved_esp) {
L
Linus Torvalds 已提交
726
			struct pt_regs *saved_regs =
727 728
			    container_of(kcb->jprobe_saved_esp,
					    struct pt_regs, esp);
L
Linus Torvalds 已提交
729
			printk("current esp %p does not match saved esp %p\n",
730
			       &regs->esp, kcb->jprobe_saved_esp);
L
Linus Torvalds 已提交
731 732 733 734 735 736
			printk("Saved registers for jprobe %p\n", jp);
			show_registers(saved_regs);
			printk("Current registers\n");
			show_registers(regs);
			BUG();
		}
737 738
		*regs = kcb->jprobe_saved_regs;
		memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack,
L
Linus Torvalds 已提交
739
		       MIN_STACK_SIZE(stack_addr));
740
		preempt_enable_no_resched();
L
Linus Torvalds 已提交
741 742 743 744
		return 1;
	}
	return 0;
}
745

746 747 748 749 750
int __kprobes arch_trampoline_kprobe(struct kprobe *p)
{
	return 0;
}

751
int __init arch_init_kprobes(void)
752
{
753
	return 0;
754
}