traps.c 25.0 KB
Newer Older
L
Linus Torvalds 已提交
1 2
/*
 *  Copyright (C) 1991, 1992  Linus Torvalds
3
 *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
L
Linus Torvalds 已提交
4 5 6 7 8 9
 *
 *  Pentium III FXSR, SSE support
 *	Gareth Hughes <gareth@valinux.com>, May 2000
 */

/*
10
 * Handle hardware traps and faults.
L
Linus Torvalds 已提交
11
 */
I
Ingo Molnar 已提交
12 13 14 15 16 17 18
#include <linux/interrupt.h>
#include <linux/kallsyms.h>
#include <linux/spinlock.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
#include <linux/utsname.h>
#include <linux/kdebug.h>
L
Linus Torvalds 已提交
19
#include <linux/kernel.h>
I
Ingo Molnar 已提交
20 21
#include <linux/module.h>
#include <linux/ptrace.h>
L
Linus Torvalds 已提交
22
#include <linux/string.h>
I
Ingo Molnar 已提交
23 24
#include <linux/unwind.h>
#include <linux/delay.h>
L
Linus Torvalds 已提交
25
#include <linux/errno.h>
I
Ingo Molnar 已提交
26 27
#include <linux/kexec.h>
#include <linux/sched.h>
L
Linus Torvalds 已提交
28 29
#include <linux/timer.h>
#include <linux/init.h>
J
Jeremy Fitzhardinge 已提交
30
#include <linux/bug.h>
I
Ingo Molnar 已提交
31 32
#include <linux/nmi.h>
#include <linux/mm.h>
33 34
#include <linux/smp.h>
#include <linux/io.h>
L
Linus Torvalds 已提交
35 36 37 38 39 40 41 42 43 44

#ifdef CONFIG_EISA
#include <linux/ioport.h>
#include <linux/eisa.h>
#endif

#ifdef CONFIG_MCA
#include <linux/mca.h>
#endif

D
Dave Jiang 已提交
45 46 47 48
#if defined(CONFIG_EDAC)
#include <linux/edac.h>
#endif

I
Ingo Molnar 已提交
49
#include <asm/stacktrace.h>
L
Linus Torvalds 已提交
50 51
#include <asm/processor.h>
#include <asm/debugreg.h>
I
Ingo Molnar 已提交
52 53 54
#include <asm/atomic.h>
#include <asm/system.h>
#include <asm/unwind.h>
55
#include <asm/traps.h>
L
Linus Torvalds 已提交
56 57
#include <asm/desc.h>
#include <asm/i387.h>
58 59 60

#include <mach_traps.h>

61 62 63 64 65
#ifdef CONFIG_X86_64
#include <asm/pgalloc.h>
#include <asm/proto.h>
#include <asm/pda.h>
#else
66 67
#include <asm/processor-flags.h>
#include <asm/arch_hooks.h>
L
Linus Torvalds 已提交
68 69
#include <asm/nmi.h>
#include <asm/smp.h>
I
Ingo Molnar 已提交
70
#include <asm/io.h>
71
#include <asm/traps.h>
L
Linus Torvalds 已提交
72

73
#include "cpu/mcheck/mce.h"
L
Linus Torvalds 已提交
74

75 76 77
DECLARE_BITMAP(used_vectors, NR_VECTORS);
EXPORT_SYMBOL_GPL(used_vectors);

L
Linus Torvalds 已提交
78 79 80
asmlinkage int system_call(void);

/* Do we ignore FPU interrupts ? */
I
Ingo Molnar 已提交
81
char ignore_fpu_irq;
L
Linus Torvalds 已提交
82 83 84 85 86 87

/*
 * The IDT has to be page-aligned to simplify the Pentium
 * F0 0F bug workaround.. We have a special link segment
 * for this.
 */
88
gate_desc idt_table[256]
89
	__attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
90
#endif
L
Linus Torvalds 已提交
91

92
static int ignore_nmis;
93

94 95 96 97 98 99
static inline void conditional_sti(struct pt_regs *regs)
{
	if (regs->flags & X86_EFLAGS_IF)
		local_irq_enable();
}

100 101 102 103 104 105 106 107 108 109 110 111 112 113
static inline void preempt_conditional_sti(struct pt_regs *regs)
{
	inc_preempt_count();
	if (regs->flags & X86_EFLAGS_IF)
		local_irq_enable();
}

static inline void preempt_conditional_cli(struct pt_regs *regs)
{
	if (regs->flags & X86_EFLAGS_IF)
		local_irq_disable();
	dec_preempt_count();
}

114
#ifdef CONFIG_X86_32
I
Ingo Molnar 已提交
115 116
static inline void
die_if_kernel(const char *str, struct pt_regs *regs, long err)
L
Linus Torvalds 已提交
117
{
118
	if (!user_mode_vm(regs))
L
Linus Torvalds 已提交
119 120 121
		die(str, regs, err);
}

122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
/*
 * Perform the lazy TSS's I/O bitmap copy. If the TSS has an
 * invalid offset set (the LAZY one) and the faulting thread has
 * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS,
 * we set the offset field correctly and return 1.
 */
static int lazy_iobitmap_copy(void)
{
	struct thread_struct *thread;
	struct tss_struct *tss;
	int cpu;

	cpu = get_cpu();
	tss = &per_cpu(init_tss, cpu);
	thread = &current->thread;

	if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
	    thread->io_bitmap_ptr) {
		memcpy(tss->io_bitmap, thread->io_bitmap_ptr,
		       thread->io_bitmap_max);
		/*
		 * If the previously set map was extending to higher ports
		 * than the current one, pad extra space with 0xff (no access).
		 */
		if (thread->io_bitmap_max < tss->io_bitmap_max) {
			memset((char *) tss->io_bitmap +
				thread->io_bitmap_max, 0xff,
				tss->io_bitmap_max - thread->io_bitmap_max);
		}
		tss->io_bitmap_max = thread->io_bitmap_max;
		tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
		tss->io_bitmap_owner = thread;
		put_cpu();

		return 1;
	}
	put_cpu();

	return 0;
}
162
#endif
163

I
Ingo Molnar 已提交
164
static void __kprobes
165
do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
I
Ingo Molnar 已提交
166
	long error_code, siginfo_t *info)
L
Linus Torvalds 已提交
167
{
168 169
	struct task_struct *tsk = current;

170
#ifdef CONFIG_X86_32
171
	if (regs->flags & X86_VM_MASK) {
172 173 174 175 176
		/*
		 * traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
		 * On nmi (interrupt 2), do_trap should not be called.
		 */
		if (trapnr < 6)
L
Linus Torvalds 已提交
177 178 179
			goto vm86_trap;
		goto trap_signal;
	}
180
#endif
L
Linus Torvalds 已提交
181

182
	if (!user_mode(regs))
L
Linus Torvalds 已提交
183 184
		goto kernel_trap;

185
#ifdef CONFIG_X86_32
I
Ingo Molnar 已提交
186
trap_signal:
187
#endif
I
Ingo Molnar 已提交
188 189 190 191 192 193 194 195 196 197 198
	/*
	 * We want error_code and trap_no set for userspace faults and
	 * kernelspace faults which result in die(), but not
	 * kernelspace faults which are fixed up.  die() gives the
	 * process no chance to handle the signal and notice the
	 * kernel fault information, so that won't result in polluting
	 * the information about previously queued, but not yet
	 * delivered, faults.  See also do_general_protection below.
	 */
	tsk->thread.error_code = error_code;
	tsk->thread.trap_no = trapnr;
199

200 201 202 203 204 205 206 207 208 209 210 211
#ifdef CONFIG_X86_64
	if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
	    printk_ratelimit()) {
		printk(KERN_INFO
		       "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
		       tsk->comm, tsk->pid, str,
		       regs->ip, regs->sp, error_code);
		print_vma_addr(" in ", regs->ip);
		printk("\n");
	}
#endif

I
Ingo Molnar 已提交
212 213 214 215 216
	if (info)
		force_sig_info(signr, info, tsk);
	else
		force_sig(signr, tsk);
	return;
L
Linus Torvalds 已提交
217

I
Ingo Molnar 已提交
218 219 220 221 222
kernel_trap:
	if (!fixup_exception(regs)) {
		tsk->thread.error_code = error_code;
		tsk->thread.trap_no = trapnr;
		die(str, regs, error_code);
L
Linus Torvalds 已提交
223
	}
I
Ingo Molnar 已提交
224
	return;
L
Linus Torvalds 已提交
225

226
#ifdef CONFIG_X86_32
I
Ingo Molnar 已提交
227 228 229 230 231
vm86_trap:
	if (handle_vm86_trap((struct kernel_vm86_regs *) regs,
						error_code, trapnr))
		goto trap_signal;
	return;
232
#endif
L
Linus Torvalds 已提交
233 234
}

I
Ingo Molnar 已提交
235
#define DO_ERROR(trapnr, signr, str, name)				\
236
dotraplinkage void do_##name(struct pt_regs *regs, long error_code)	\
I
Ingo Molnar 已提交
237 238
{									\
	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)	\
239
							== NOTIFY_STOP)	\
I
Ingo Molnar 已提交
240
		return;							\
241
	conditional_sti(regs);						\
242
	do_trap(trapnr, signr, str, regs, error_code, NULL);		\
L
Linus Torvalds 已提交
243 244
}

245
#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr)		\
246
dotraplinkage void do_##name(struct pt_regs *regs, long error_code)	\
I
Ingo Molnar 已提交
247 248 249 250 251 252 253
{									\
	siginfo_t info;							\
	info.si_signo = signr;						\
	info.si_errno = 0;						\
	info.si_code = sicode;						\
	info.si_addr = (void __user *)siaddr;				\
	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)	\
254
							== NOTIFY_STOP)	\
I
Ingo Molnar 已提交
255
		return;							\
256
	conditional_sti(regs);						\
257
	do_trap(trapnr, signr, str, regs, error_code, &info);		\
L
Linus Torvalds 已提交
258 259
}

260 261 262 263
DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
DO_ERROR(4, SIGSEGV, "overflow", overflow)
DO_ERROR(5, SIGSEGV, "bounds", bounds)
DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip)
264
DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
265
DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
266
DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
267
#ifdef CONFIG_X86_32
268
DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
269
#endif
270
DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
L
Linus Torvalds 已提交
271

272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301
#ifdef CONFIG_X86_64
/* Runs on IST stack */
dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
{
	if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
			12, SIGBUS) == NOTIFY_STOP)
		return;
	preempt_conditional_sti(regs);
	do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL);
	preempt_conditional_cli(regs);
}

dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
{
	static const char str[] = "double fault";
	struct task_struct *tsk = current;

	/* Return not checked because double check cannot be ignored */
	notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV);

	tsk->thread.error_code = error_code;
	tsk->thread.trap_no = 8;

	/* This is always a kernel trap and never fixable (and thus must
	   never return). */
	for (;;)
		die(str, regs, error_code);
}
#endif

302
dotraplinkage void __kprobes
303
do_general_protection(struct pt_regs *regs, long error_code)
L
Linus Torvalds 已提交
304
{
305
	struct task_struct *tsk;
I
Ingo Molnar 已提交
306

307 308
	conditional_sti(regs);

309
#ifdef CONFIG_X86_32
310 311
	if (lazy_iobitmap_copy()) {
		/* restart the faulting instruction */
L
Linus Torvalds 已提交
312 313 314
		return;
	}

315
	if (regs->flags & X86_VM_MASK)
L
Linus Torvalds 已提交
316
		goto gp_in_vm86;
317
#endif
L
Linus Torvalds 已提交
318

319
	tsk = current;
320
	if (!user_mode(regs))
L
Linus Torvalds 已提交
321 322
		goto gp_in_kernel;

323 324
	tsk->thread.error_code = error_code;
	tsk->thread.trap_no = 13;
I
Ingo Molnar 已提交
325

326 327
	if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
			printk_ratelimit()) {
328
		printk(KERN_INFO
329 330 331
			"%s[%d] general protection ip:%lx sp:%lx error:%lx",
			tsk->comm, task_pid_nr(tsk),
			regs->ip, regs->sp, error_code);
332 333 334
		print_vma_addr(" in ", regs->ip);
		printk("\n");
	}
335

336
	force_sig(SIGSEGV, tsk);
L
Linus Torvalds 已提交
337 338
	return;

339
#ifdef CONFIG_X86_32
L
Linus Torvalds 已提交
340 341 342 343
gp_in_vm86:
	local_irq_enable();
	handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
	return;
344
#endif
L
Linus Torvalds 已提交
345 346

gp_in_kernel:
347 348 349 350 351 352
	if (fixup_exception(regs))
		return;

	tsk->thread.error_code = error_code;
	tsk->thread.trap_no = 13;
	if (notify_die(DIE_GPF, "general protection fault", regs,
L
Linus Torvalds 已提交
353
				error_code, 13, SIGSEGV) == NOTIFY_STOP)
354 355
		return;
	die("general protection fault", regs, error_code);
L
Linus Torvalds 已提交
356 357
}

358
static notrace __kprobes void
I
Ingo Molnar 已提交
359
mem_parity_error(unsigned char reason, struct pt_regs *regs)
L
Linus Torvalds 已提交
360
{
I
Ingo Molnar 已提交
361 362 363 364 365 366
	printk(KERN_EMERG
		"Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
			reason, smp_processor_id());

	printk(KERN_EMERG
		"You have some hardware problem, likely on the PCI bus.\n");
D
Dave Jiang 已提交
367 368

#if defined(CONFIG_EDAC)
I
Ingo Molnar 已提交
369
	if (edac_handler_set()) {
D
Dave Jiang 已提交
370 371 372 373 374
		edac_atomic_assert_error();
		return;
	}
#endif

375
	if (panic_on_unrecovered_nmi)
I
Ingo Molnar 已提交
376
		panic("NMI: Not continuing");
L
Linus Torvalds 已提交
377

378
	printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
L
Linus Torvalds 已提交
379 380

	/* Clear and disable the memory parity error line. */
381 382
	reason = (reason & 0xf) | 4;
	outb(reason, 0x61);
L
Linus Torvalds 已提交
383 384
}

385
static notrace __kprobes void
I
Ingo Molnar 已提交
386
io_check_error(unsigned char reason, struct pt_regs *regs)
L
Linus Torvalds 已提交
387 388 389
{
	unsigned long i;

390
	printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
L
Linus Torvalds 已提交
391 392 393 394 395
	show_registers(regs);

	/* Re-enable the IOCK line, wait for a few seconds */
	reason = (reason & 0xf) | 8;
	outb(reason, 0x61);
I
Ingo Molnar 已提交
396

L
Linus Torvalds 已提交
397
	i = 2000;
I
Ingo Molnar 已提交
398 399 400
	while (--i)
		udelay(1000);

L
Linus Torvalds 已提交
401 402 403 404
	reason &= ~8;
	outb(reason, 0x61);
}

405
static notrace __kprobes void
I
Ingo Molnar 已提交
406
unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
L
Linus Torvalds 已提交
407
{
408 409
	if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
			NOTIFY_STOP)
J
Jason Wessel 已提交
410
		return;
L
Linus Torvalds 已提交
411
#ifdef CONFIG_MCA
I
Ingo Molnar 已提交
412 413 414 415 416
	/*
	 * Might actually be able to figure out what the guilty party
	 * is:
	 */
	if (MCA_bus) {
L
Linus Torvalds 已提交
417 418 419 420
		mca_handle_nmi();
		return;
	}
#endif
I
Ingo Molnar 已提交
421 422 423 424
	printk(KERN_EMERG
		"Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
			reason, smp_processor_id());

425
	printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
426
	if (panic_on_unrecovered_nmi)
I
Ingo Molnar 已提交
427
		panic("NMI: Not continuing");
428

429
	printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
L
Linus Torvalds 已提交
430 431
}

432
static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
L
Linus Torvalds 已提交
433 434
{
	unsigned char reason = 0;
435 436 437
	int cpu;

	cpu = smp_processor_id();
L
Linus Torvalds 已提交
438

439 440
	/* Only the BSP gets external NMIs from the system. */
	if (!cpu)
L
Linus Torvalds 已提交
441
		reason = get_nmi_reason();
I
Ingo Molnar 已提交
442

L
Linus Torvalds 已提交
443
	if (!(reason & 0xc0)) {
444
		if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
445
								== NOTIFY_STOP)
L
Linus Torvalds 已提交
446 447 448 449 450 451
			return;
#ifdef CONFIG_X86_LOCAL_APIC
		/*
		 * Ok, so this is none of the documented NMI sources,
		 * so it must be the NMI watchdog.
		 */
452
		if (nmi_watchdog_tick(regs, reason))
L
Linus Torvalds 已提交
453
			return;
454
		if (!do_nmi_callback(regs, cpu))
455
			unknown_nmi_error(reason, regs);
I
Ingo Molnar 已提交
456 457 458
#else
		unknown_nmi_error(reason, regs);
#endif
459

L
Linus Torvalds 已提交
460 461
		return;
	}
462
	if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
L
Linus Torvalds 已提交
463
		return;
464 465

	/* AK: following checks seem to be broken on modern chipsets. FIXME */
L
Linus Torvalds 已提交
466 467 468 469
	if (reason & 0x80)
		mem_parity_error(reason, regs);
	if (reason & 0x40)
		io_check_error(reason, regs);
470
#ifdef CONFIG_X86_32
L
Linus Torvalds 已提交
471 472
	/*
	 * Reassert NMI in case it became active meanwhile
I
Ingo Molnar 已提交
473
	 * as it's edge-triggered:
L
Linus Torvalds 已提交
474 475
	 */
	reassert_nmi();
476
#endif
L
Linus Torvalds 已提交
477 478
}

479 480
dotraplinkage notrace __kprobes void
do_nmi(struct pt_regs *regs, long error_code)
L
Linus Torvalds 已提交
481 482 483
{
	nmi_enter();

484
	inc_irq_stat(__nmi_count);
L
Linus Torvalds 已提交
485

486 487
	if (!ignore_nmis)
		default_do_nmi(regs);
L
Linus Torvalds 已提交
488 489 490 491

	nmi_exit();
}

492 493 494 495 496 497 498 499 500 501 502 503
void stop_nmi(void)
{
	acpi_nmi_disable();
	ignore_nmis++;
}

void restart_nmi(void)
{
	ignore_nmis--;
	acpi_nmi_enable();
}

504
/* May run on IST stack. */
505
dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
L
Linus Torvalds 已提交
506
{
507
#ifdef CONFIG_KPROBES
L
Linus Torvalds 已提交
508 509
	if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
			== NOTIFY_STOP)
510
		return;
511 512 513 514 515
#else
	if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP)
			== NOTIFY_STOP)
		return;
#endif
I
Ingo Molnar 已提交
516

517
	preempt_conditional_sti(regs);
518
	do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
519
	preempt_conditional_cli(regs);
L
Linus Torvalds 已提交
520 521
}

522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544
#ifdef CONFIG_X86_64
/* Help handler running on IST stack to switch back to user stack
   for scheduling or signal handling. The actual stack switch is done in
   entry.S */
asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
{
	struct pt_regs *regs = eregs;
	/* Did already sync */
	if (eregs == (struct pt_regs *)eregs->sp)
		;
	/* Exception from user space */
	else if (user_mode(eregs))
		regs = task_pt_regs(current);
	/* Exception from kernel and interrupts are enabled. Move to
	   kernel process stack. */
	else if (eregs->flags & X86_EFLAGS_IF)
		regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
	if (eregs != regs)
		*regs = *eregs;
	return regs;
}
#endif

L
Linus Torvalds 已提交
545 546 547 548 549 550 551 552 553 554
/*
 * Our handling of the processor debug registers is non-trivial.
 * We do not clear them on entry and exit from the kernel. Therefore
 * it is possible to get a watchpoint trap here from inside the kernel.
 * However, the code in ./ptrace.c has ensured that the user can
 * only set watchpoints on userspace addresses. Therefore the in-kernel
 * watchpoint trap can only occur in code which is reading/writing
 * from user space. Such code must not hold kernel locks (since it
 * can equally take a page fault), therefore it is safe to call
 * force_sig_info even though that claims and releases locks.
I
Ingo Molnar 已提交
555
 *
L
Linus Torvalds 已提交
556 557 558 559 560 561 562 563 564 565
 * Code in ./signal.c ensures that the debug control register
 * is restored before we deliver any signal, and therefore that
 * user code runs with the correct debug control register even though
 * we clear it here.
 *
 * Being careful here means that we don't have to be as careful in a
 * lot of more complicated places (task switching can be a bit lazy
 * about restoring all the debug state, and ptrace doesn't have to
 * find every occurrence of the TF bit that could be saved away even
 * by user code)
566 567
 *
 * May run on IST stack.
L
Linus Torvalds 已提交
568
 */
569
dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
L
Linus Torvalds 已提交
570 571
{
	struct task_struct *tsk = current;
572
	unsigned long condition;
573
	int si_code;
L
Linus Torvalds 已提交
574

575
	get_debugreg(condition, 6);
L
Linus Torvalds 已提交
576

577 578 579 580 581 582
	/*
	 * The processor cleared BTF, so don't mark that we need it set.
	 */
	clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
	tsk->thread.debugctlmsr = 0;

L
Linus Torvalds 已提交
583
	if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
584
						SIGTRAP) == NOTIFY_STOP)
L
Linus Torvalds 已提交
585
		return;
586

L
Linus Torvalds 已提交
587
	/* It's safe to allow irq's after DR6 has been saved */
588
	preempt_conditional_sti(regs);
L
Linus Torvalds 已提交
589 590 591

	/* Mask out spurious debug traps due to lazy DR7 setting */
	if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
592
		if (!tsk->thread.debugreg7)
L
Linus Torvalds 已提交
593 594 595
			goto clear_dr7;
	}

596
#ifdef CONFIG_X86_32
597
	if (regs->flags & X86_VM_MASK)
L
Linus Torvalds 已提交
598
		goto debug_vm86;
599
#endif
L
Linus Torvalds 已提交
600 601

	/* Save debug status register where ptrace can see it */
602
	tsk->thread.debugreg6 = condition;
L
Linus Torvalds 已提交
603 604 605 606 607 608

	/*
	 * Single-stepping through TF: make sure we ignore any events in
	 * kernel space (but re-enable TF when returning to user mode).
	 */
	if (condition & DR_STEP) {
609
		if (!user_mode(regs))
L
Linus Torvalds 已提交
610 611 612
			goto clear_TF_reenable;
	}

613
	si_code = get_si_code(condition);
L
Linus Torvalds 已提交
614
	/* Ok, finally something we can handle */
615
	send_sigtrap(tsk, regs, error_code, si_code);
L
Linus Torvalds 已提交
616

I
Ingo Molnar 已提交
617 618
	/*
	 * Disable additional traps. They'll be re-enabled when
L
Linus Torvalds 已提交
619 620 621
	 * the signal is delivered.
	 */
clear_dr7:
622
	set_debugreg(0, 7);
623
	preempt_conditional_cli(regs);
L
Linus Torvalds 已提交
624 625
	return;

626
#ifdef CONFIG_X86_32
L
Linus Torvalds 已提交
627 628
debug_vm86:
	handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
629
	preempt_conditional_cli(regs);
L
Linus Torvalds 已提交
630
	return;
631
#endif
L
Linus Torvalds 已提交
632 633 634

clear_TF_reenable:
	set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
635
	regs->flags &= ~X86_EFLAGS_TF;
636
	preempt_conditional_cli(regs);
L
Linus Torvalds 已提交
637 638 639
	return;
}

640 641 642 643 644 645 646 647 648 649 650 651 652 653
#ifdef CONFIG_X86_64
static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
{
	if (fixup_exception(regs))
		return 1;

	notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE);
	/* Illegal floating point operation in the kernel */
	current->thread.trap_no = trapnr;
	die(str, regs, 0);
	return 0;
}
#endif

L
Linus Torvalds 已提交
654 655 656 657 658
/*
 * Note that we play around with the 'TS' bit in an attempt to get
 * the correct behaviour even in the presence of the asynchronous
 * IRQ13 behaviour
 */
659
void math_error(void __user *ip)
L
Linus Torvalds 已提交
660
{
I
Ingo Molnar 已提交
661
	struct task_struct *task;
L
Linus Torvalds 已提交
662
	siginfo_t info;
663
	unsigned short cwd, swd;
L
Linus Torvalds 已提交
664 665 666 667 668 669 670 671 672 673 674

	/*
	 * Save the info for the exception handler and clear the error.
	 */
	task = current;
	save_init_fpu(task);
	task->thread.trap_no = 16;
	task->thread.error_code = 0;
	info.si_signo = SIGFPE;
	info.si_errno = 0;
	info.si_code = __SI_FAULT;
675
	info.si_addr = ip;
L
Linus Torvalds 已提交
676 677 678 679 680 681
	/*
	 * (~cwd & swd) will mask out exceptions that are not set to unmasked
	 * status.  0x3f is the exception bits in these regs, 0x200 is the
	 * C1 reg you need in case of a stack fault, 0x040 is the stack
	 * fault bit.  We should only be taking one exception at a time,
	 * so if this combination doesn't produce any single exception,
682
	 * then we have a bad program that isn't synchronizing its FPU usage
L
Linus Torvalds 已提交
683 684 685 686 687
	 * and it will suffer the consequences since we won't be able to
	 * fully reproduce the context of the exception
	 */
	cwd = get_fpu_cwd(task);
	swd = get_fpu_swd(task);
688
	switch (swd & ~cwd & 0x3f) {
I
Ingo Molnar 已提交
689
	case 0x000: /* No unmasked exception */
690
#ifdef CONFIG_X86_32
I
Ingo Molnar 已提交
691
		return;
692
#endif
693
	default: /* Multiple exceptions */
I
Ingo Molnar 已提交
694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715
		break;
	case 0x001: /* Invalid Op */
		/*
		 * swd & 0x240 == 0x040: Stack Underflow
		 * swd & 0x240 == 0x240: Stack Overflow
		 * User must clear the SF bit (0x40) if set
		 */
		info.si_code = FPE_FLTINV;
		break;
	case 0x002: /* Denormalize */
	case 0x010: /* Underflow */
		info.si_code = FPE_FLTUND;
		break;
	case 0x004: /* Zero Divide */
		info.si_code = FPE_FLTDIV;
		break;
	case 0x008: /* Overflow */
		info.si_code = FPE_FLTOVF;
		break;
	case 0x020: /* Precision */
		info.si_code = FPE_FLTRES;
		break;
L
Linus Torvalds 已提交
716 717 718 719
	}
	force_sig_info(SIGFPE, &info, task);
}

720
dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
L
Linus Torvalds 已提交
721
{
722
	conditional_sti(regs);
723 724

#ifdef CONFIG_X86_32
L
Linus Torvalds 已提交
725
	ignore_fpu_irq = 1;
726 727 728 729 730 731
#else
	if (!user_mode(regs) &&
	    kernel_math_error(regs, "kernel x87 math error", 16))
		return;
#endif

732
	math_error((void __user *)regs->ip);
L
Linus Torvalds 已提交
733 734
}

735
static void simd_math_error(void __user *ip)
L
Linus Torvalds 已提交
736
{
I
Ingo Molnar 已提交
737 738
	struct task_struct *task;
	siginfo_t info;
739
	unsigned short mxcsr;
L
Linus Torvalds 已提交
740 741 742 743 744 745 746 747 748 749 750

	/*
	 * Save the info for the exception handler and clear the error.
	 */
	task = current;
	save_init_fpu(task);
	task->thread.trap_no = 19;
	task->thread.error_code = 0;
	info.si_signo = SIGFPE;
	info.si_errno = 0;
	info.si_code = __SI_FAULT;
751
	info.si_addr = ip;
L
Linus Torvalds 已提交
752 753 754 755 756 757 758 759
	/*
	 * The SIMD FPU exceptions are handled a little differently, as there
	 * is only a single status/control register.  Thus, to determine which
	 * unmasked exception was caught we must mask the exception mask bits
	 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
	 */
	mxcsr = get_fpu_mxcsr(task);
	switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
I
Ingo Molnar 已提交
760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778
	case 0x000:
	default:
		break;
	case 0x001: /* Invalid Op */
		info.si_code = FPE_FLTINV;
		break;
	case 0x002: /* Denormalize */
	case 0x010: /* Underflow */
		info.si_code = FPE_FLTUND;
		break;
	case 0x004: /* Zero Divide */
		info.si_code = FPE_FLTDIV;
		break;
	case 0x008: /* Overflow */
		info.si_code = FPE_FLTOVF;
		break;
	case 0x020: /* Precision */
		info.si_code = FPE_FLTRES;
		break;
L
Linus Torvalds 已提交
779 780 781 782
	}
	force_sig_info(SIGFPE, &info, task);
}

783 784
dotraplinkage void
do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
L
Linus Torvalds 已提交
785
{
786 787
	conditional_sti(regs);

788
#ifdef CONFIG_X86_32
L
Linus Torvalds 已提交
789 790 791
	if (cpu_has_xmm) {
		/* Handle SIMD FPU exceptions on PIII+ processors. */
		ignore_fpu_irq = 1;
792
		simd_math_error((void __user *)regs->ip);
I
Ingo Molnar 已提交
793 794 795 796 797 798
		return;
	}
	/*
	 * Handle strange cache flush from user space exception
	 * in all other cases.  This is undocumented behaviour.
	 */
799
	if (regs->flags & X86_VM_MASK) {
I
Ingo Molnar 已提交
800 801
		handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code);
		return;
L
Linus Torvalds 已提交
802
	}
I
Ingo Molnar 已提交
803 804 805 806
	current->thread.trap_no = 19;
	current->thread.error_code = error_code;
	die_if_kernel("cache flush denied", regs, error_code);
	force_sig(SIGSEGV, current);
807 808 809 810 811 812
#else
	if (!user_mode(regs) &&
			kernel_math_error(regs, "kernel simd math error", 19))
		return;
	simd_math_error((void __user *)regs->ip);
#endif
L
Linus Torvalds 已提交
813 814
}

815 816
dotraplinkage void
do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
L
Linus Torvalds 已提交
817
{
818
	conditional_sti(regs);
L
Linus Torvalds 已提交
819 820
#if 0
	/* No need to warn about this any longer. */
I
Ingo Molnar 已提交
821
	printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
L
Linus Torvalds 已提交
822 823 824
#endif
}

825
#ifdef CONFIG_X86_32
I
Ingo Molnar 已提交
826
unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
L
Linus Torvalds 已提交
827
{
G
Glauber Costa 已提交
828
	struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id());
S
Stas Sergeev 已提交
829 830 831 832
	unsigned long base = (kesp - uesp) & -THREAD_SIZE;
	unsigned long new_kesp = kesp - base;
	unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
	__u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
I
Ingo Molnar 已提交
833

S
Stas Sergeev 已提交
834
	/* Set up base for espfix segment */
I
Ingo Molnar 已提交
835 836
	desc &= 0x00f0ff0000000000ULL;
	desc |=	((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
S
Stas Sergeev 已提交
837 838 839 840
		((((__u64)base) << 32) & 0xff00000000000000ULL) |
		((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) |
		(lim_pages & 0xffff);
	*(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
I
Ingo Molnar 已提交
841

S
Stas Sergeev 已提交
842
	return new_kesp;
L
Linus Torvalds 已提交
843
}
844 845 846 847 848 849 850 851 852
#else
asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
{
}

asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
{
}
#endif
L
Linus Torvalds 已提交
853 854

/*
I
Ingo Molnar 已提交
855
 * 'math_state_restore()' saves the current math information in the
L
Linus Torvalds 已提交
856 857 858 859 860 861 862 863
 * old math state array, and gets the new ones from the current task
 *
 * Careful.. There are problems with IBM-designed IRQ13 behaviour.
 * Don't touch unless you *really* know how it works.
 *
 * Must be called with kernel preemption disabled (in this case,
 * local interrupts are disabled at the call-site in entry.S).
 */
864
asmlinkage void math_state_restore(void)
L
Linus Torvalds 已提交
865 866 867 868
{
	struct thread_info *thread = current_thread_info();
	struct task_struct *tsk = thread->task;

869 870 871 872 873 874 875 876 877 878 879 880 881 882 883
	if (!tsk_used_math(tsk)) {
		local_irq_enable();
		/*
		 * does a slab alloc which can sleep
		 */
		if (init_fpu(tsk)) {
			/*
			 * ran out of memory!
			 */
			do_group_exit(SIGKILL);
			return;
		}
		local_irq_disable();
	}

I
Ingo Molnar 已提交
884
	clts();				/* Allow maths ops (or we recurse) */
885
#ifdef CONFIG_X86_32
L
Linus Torvalds 已提交
886
	restore_fpu(tsk);
887 888 889 890 891 892 893 894 895 896
#else
	/*
	 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
	 */
	if (unlikely(restore_fpu_checking(tsk))) {
		stts();
		force_sig(SIGSEGV, tsk);
		return;
	}
#endif
L
Linus Torvalds 已提交
897
	thread->status |= TS_USEDFPU;	/* So we fnsave on switch_to() */
898
	tsk->fpu_counter++;
L
Linus Torvalds 已提交
899
}
900
EXPORT_SYMBOL_GPL(math_state_restore);
L
Linus Torvalds 已提交
901 902 903 904

#ifndef CONFIG_MATH_EMULATION
asmlinkage void math_emulate(long arg)
{
I
Ingo Molnar 已提交
905 906 907 908
	printk(KERN_EMERG
		"math-emulation not enabled and no coprocessor found.\n");
	printk(KERN_EMERG "killing %s.\n", current->comm);
	force_sig(SIGFPE, current);
L
Linus Torvalds 已提交
909 910 911 912
	schedule();
}
#endif /* CONFIG_MATH_EMULATION */

913 914
dotraplinkage void __kprobes
do_device_not_available(struct pt_regs *regs, long error)
915
{
916
#ifdef CONFIG_X86_32
917 918 919 920 921 922 923
	if (read_cr0() & X86_CR0_EM) {
		conditional_sti(regs);
		math_emulate(0);
	} else {
		math_state_restore(); /* interrupts still off */
		conditional_sti(regs);
	}
924 925 926
#else
	math_state_restore();
#endif
927 928
}

929
#ifdef CONFIG_X86_32
930
dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
931 932 933 934 935 936 937 938 939 940 941
{
	siginfo_t info;
	local_irq_enable();

	info.si_signo = SIGILL;
	info.si_errno = 0;
	info.si_code = ILL_BADSTK;
	info.si_addr = 0;
	if (notify_die(DIE_TRAP, "iret exception",
			regs, error_code, 32, SIGILL) == NOTIFY_STOP)
		return;
942
	do_trap(32, SIGILL, "iret exception", regs, error_code, &info);
943
}
944
#endif
945

L
Linus Torvalds 已提交
946 947
void __init trap_init(void)
{
948
#ifdef CONFIG_X86_32
949
	int i;
950
#endif
951

L
Linus Torvalds 已提交
952
#ifdef CONFIG_EISA
I
Ingo Molnar 已提交
953
	void __iomem *p = early_ioremap(0x0FFFD9, 4);
I
Ingo Molnar 已提交
954 955

	if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24))
L
Linus Torvalds 已提交
956
		EISA_bus = 1;
I
Ingo Molnar 已提交
957
	early_iounmap(p, 4);
L
Linus Torvalds 已提交
958 959
#endif

960
	set_intr_gate(0, &divide_error);
961 962 963 964 965 966
	set_intr_gate_ist(1, &debug, DEBUG_STACK);
	set_intr_gate_ist(2, &nmi, NMI_STACK);
	/* int3 can be called from all */
	set_system_intr_gate_ist(3, &int3, DEBUG_STACK);
	/* int4 can be called from all */
	set_system_intr_gate(4, &overflow);
967
	set_intr_gate(5, &bounds);
968
	set_intr_gate(6, &invalid_op);
969
	set_intr_gate(7, &device_not_available);
970
#ifdef CONFIG_X86_32
971
	set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS);
972 973 974
#else
	set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK);
#endif
975
	set_intr_gate(9, &coprocessor_segment_overrun);
976
	set_intr_gate(10, &invalid_TSS);
977
	set_intr_gate(11, &segment_not_present);
978
	set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK);
979
	set_intr_gate(13, &general_protection);
I
Ingo Molnar 已提交
980
	set_intr_gate(14, &page_fault);
981
	set_intr_gate(15, &spurious_interrupt_bug);
982
	set_intr_gate(16, &coprocessor_error);
983
	set_intr_gate(17, &alignment_check);
L
Linus Torvalds 已提交
984
#ifdef CONFIG_X86_MCE
985
	set_intr_gate_ist(18, &machine_check, MCE_STACK);
L
Linus Torvalds 已提交
986
#endif
987
	set_intr_gate(19, &simd_coprocessor_error);
L
Linus Torvalds 已提交
988

989 990 991 992 993
#ifdef CONFIG_IA32_EMULATION
	set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
#endif

#ifdef CONFIG_X86_32
994 995 996 997 998 999
	if (cpu_has_fxsr) {
		printk(KERN_INFO "Enabling fast FPU save and restore... ");
		set_in_cr4(X86_CR4_OSFXSR);
		printk("done.\n");
	}
	if (cpu_has_xmm) {
I
Ingo Molnar 已提交
1000 1001
		printk(KERN_INFO
			"Enabling unmasked SIMD FPU exception support... ");
1002 1003 1004 1005
		set_in_cr4(X86_CR4_OSXMMEXCPT);
		printk("done.\n");
	}

1006
	set_system_trap_gate(SYSCALL_VECTOR, &system_call);
L
Linus Torvalds 已提交
1007

I
Ingo Molnar 已提交
1008
	/* Reserve all the builtin and the syscall vector: */
1009 1010
	for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
		set_bit(i, used_vectors);
I
Ingo Molnar 已提交
1011

1012
	set_bit(SYSCALL_VECTOR, used_vectors);
1013
#endif
L
Linus Torvalds 已提交
1014
	/*
I
Ingo Molnar 已提交
1015
	 * Should be a barrier for any external CPU state:
L
Linus Torvalds 已提交
1016 1017 1018
	 */
	cpu_init();

1019
#ifdef CONFIG_X86_32
L
Linus Torvalds 已提交
1020
	trap_init_hook();
1021
#endif
L
Linus Torvalds 已提交
1022
}