traps.c 25.1 KB
Newer Older
L
Linus Torvalds 已提交
1 2
/*
 *  Copyright (C) 1991, 1992  Linus Torvalds
3
 *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
L
Linus Torvalds 已提交
4 5 6 7 8 9
 *
 *  Pentium III FXSR, SSE support
 *	Gareth Hughes <gareth@valinux.com>, May 2000
 */

/*
10
 * Handle hardware traps and faults.
L
Linus Torvalds 已提交
11
 */
I
Ingo Molnar 已提交
12 13 14 15 16 17 18
#include <linux/interrupt.h>
#include <linux/kallsyms.h>
#include <linux/spinlock.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
#include <linux/utsname.h>
#include <linux/kdebug.h>
L
Linus Torvalds 已提交
19
#include <linux/kernel.h>
I
Ingo Molnar 已提交
20 21
#include <linux/module.h>
#include <linux/ptrace.h>
L
Linus Torvalds 已提交
22
#include <linux/string.h>
I
Ingo Molnar 已提交
23 24
#include <linux/unwind.h>
#include <linux/delay.h>
L
Linus Torvalds 已提交
25
#include <linux/errno.h>
I
Ingo Molnar 已提交
26 27
#include <linux/kexec.h>
#include <linux/sched.h>
L
Linus Torvalds 已提交
28 29
#include <linux/timer.h>
#include <linux/init.h>
J
Jeremy Fitzhardinge 已提交
30
#include <linux/bug.h>
I
Ingo Molnar 已提交
31 32
#include <linux/nmi.h>
#include <linux/mm.h>
33 34
#include <linux/smp.h>
#include <linux/io.h>
L
Linus Torvalds 已提交
35 36 37 38 39 40 41 42 43 44

#ifdef CONFIG_EISA
#include <linux/ioport.h>
#include <linux/eisa.h>
#endif

#ifdef CONFIG_MCA
#include <linux/mca.h>
#endif

D
Dave Jiang 已提交
45 46 47 48
#if defined(CONFIG_EDAC)
#include <linux/edac.h>
#endif

I
Ingo Molnar 已提交
49
#include <asm/stacktrace.h>
L
Linus Torvalds 已提交
50 51
#include <asm/processor.h>
#include <asm/debugreg.h>
I
Ingo Molnar 已提交
52 53 54
#include <asm/atomic.h>
#include <asm/system.h>
#include <asm/unwind.h>
55
#include <asm/traps.h>
L
Linus Torvalds 已提交
56 57
#include <asm/desc.h>
#include <asm/i387.h>
58 59 60

#include <mach_traps.h>

61 62 63 64 65
#ifdef CONFIG_X86_64
#include <asm/pgalloc.h>
#include <asm/proto.h>
#include <asm/pda.h>
#else
66 67
#include <asm/processor-flags.h>
#include <asm/arch_hooks.h>
L
Linus Torvalds 已提交
68 69
#include <asm/nmi.h>
#include <asm/smp.h>
I
Ingo Molnar 已提交
70
#include <asm/io.h>
71
#include <asm/traps.h>
L
Linus Torvalds 已提交
72

73
#include "cpu/mcheck/mce.h"
L
Linus Torvalds 已提交
74

75 76 77
DECLARE_BITMAP(used_vectors, NR_VECTORS);
EXPORT_SYMBOL_GPL(used_vectors);

L
Linus Torvalds 已提交
78 79 80
asmlinkage int system_call(void);

/* Do we ignore FPU interrupts ? */
I
Ingo Molnar 已提交
81
char ignore_fpu_irq;
L
Linus Torvalds 已提交
82 83 84 85 86 87

/*
 * The IDT has to be page-aligned to simplify the Pentium
 * F0 0F bug workaround.. We have a special link segment
 * for this.
 */
88
gate_desc idt_table[256]
89
	__attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
90
#endif
L
Linus Torvalds 已提交
91

92
static int ignore_nmis;
93

94 95 96 97 98 99
static inline void conditional_sti(struct pt_regs *regs)
{
	if (regs->flags & X86_EFLAGS_IF)
		local_irq_enable();
}

100 101 102 103 104 105 106 107 108 109 110 111 112 113
static inline void preempt_conditional_sti(struct pt_regs *regs)
{
	inc_preempt_count();
	if (regs->flags & X86_EFLAGS_IF)
		local_irq_enable();
}

static inline void preempt_conditional_cli(struct pt_regs *regs)
{
	if (regs->flags & X86_EFLAGS_IF)
		local_irq_disable();
	dec_preempt_count();
}

114
#ifdef CONFIG_X86_32
I
Ingo Molnar 已提交
115 116
static inline void
die_if_kernel(const char *str, struct pt_regs *regs, long err)
L
Linus Torvalds 已提交
117
{
118
	if (!user_mode_vm(regs))
L
Linus Torvalds 已提交
119 120 121
		die(str, regs, err);
}

122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
/*
 * Perform the lazy TSS's I/O bitmap copy. If the TSS has an
 * invalid offset set (the LAZY one) and the faulting thread has
 * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS,
 * we set the offset field correctly and return 1.
 */
static int lazy_iobitmap_copy(void)
{
	struct thread_struct *thread;
	struct tss_struct *tss;
	int cpu;

	cpu = get_cpu();
	tss = &per_cpu(init_tss, cpu);
	thread = &current->thread;

	if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
	    thread->io_bitmap_ptr) {
		memcpy(tss->io_bitmap, thread->io_bitmap_ptr,
		       thread->io_bitmap_max);
		/*
		 * If the previously set map was extending to higher ports
		 * than the current one, pad extra space with 0xff (no access).
		 */
		if (thread->io_bitmap_max < tss->io_bitmap_max) {
			memset((char *) tss->io_bitmap +
				thread->io_bitmap_max, 0xff,
				tss->io_bitmap_max - thread->io_bitmap_max);
		}
		tss->io_bitmap_max = thread->io_bitmap_max;
		tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
		tss->io_bitmap_owner = thread;
		put_cpu();

		return 1;
	}
	put_cpu();

	return 0;
}
162
#endif
163

I
Ingo Molnar 已提交
164
static void __kprobes
165
do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
I
Ingo Molnar 已提交
166
	long error_code, siginfo_t *info)
L
Linus Torvalds 已提交
167
{
168 169
	struct task_struct *tsk = current;

170
#ifdef CONFIG_X86_32
171
	if (regs->flags & X86_VM_MASK) {
172 173 174 175 176
		/*
		 * traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
		 * On nmi (interrupt 2), do_trap should not be called.
		 */
		if (trapnr < 6)
L
Linus Torvalds 已提交
177 178 179
			goto vm86_trap;
		goto trap_signal;
	}
180
#endif
L
Linus Torvalds 已提交
181

182
	if (!user_mode(regs))
L
Linus Torvalds 已提交
183 184
		goto kernel_trap;

185
#ifdef CONFIG_X86_32
I
Ingo Molnar 已提交
186
trap_signal:
187
#endif
I
Ingo Molnar 已提交
188 189 190 191 192 193 194 195 196 197 198
	/*
	 * We want error_code and trap_no set for userspace faults and
	 * kernelspace faults which result in die(), but not
	 * kernelspace faults which are fixed up.  die() gives the
	 * process no chance to handle the signal and notice the
	 * kernel fault information, so that won't result in polluting
	 * the information about previously queued, but not yet
	 * delivered, faults.  See also do_general_protection below.
	 */
	tsk->thread.error_code = error_code;
	tsk->thread.trap_no = trapnr;
199

200 201 202 203 204 205 206 207 208 209 210 211
#ifdef CONFIG_X86_64
	if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
	    printk_ratelimit()) {
		printk(KERN_INFO
		       "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
		       tsk->comm, tsk->pid, str,
		       regs->ip, regs->sp, error_code);
		print_vma_addr(" in ", regs->ip);
		printk("\n");
	}
#endif

I
Ingo Molnar 已提交
212 213 214 215 216
	if (info)
		force_sig_info(signr, info, tsk);
	else
		force_sig(signr, tsk);
	return;
L
Linus Torvalds 已提交
217

I
Ingo Molnar 已提交
218 219 220 221 222
kernel_trap:
	if (!fixup_exception(regs)) {
		tsk->thread.error_code = error_code;
		tsk->thread.trap_no = trapnr;
		die(str, regs, error_code);
L
Linus Torvalds 已提交
223
	}
I
Ingo Molnar 已提交
224
	return;
L
Linus Torvalds 已提交
225

226
#ifdef CONFIG_X86_32
I
Ingo Molnar 已提交
227 228 229 230 231
vm86_trap:
	if (handle_vm86_trap((struct kernel_vm86_regs *) regs,
						error_code, trapnr))
		goto trap_signal;
	return;
232
#endif
L
Linus Torvalds 已提交
233 234
}

I
Ingo Molnar 已提交
235
#define DO_ERROR(trapnr, signr, str, name)				\
236
dotraplinkage void do_##name(struct pt_regs *regs, long error_code)	\
I
Ingo Molnar 已提交
237 238
{									\
	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)	\
239
							== NOTIFY_STOP)	\
I
Ingo Molnar 已提交
240
		return;							\
241
	conditional_sti(regs);						\
242
	do_trap(trapnr, signr, str, regs, error_code, NULL);		\
L
Linus Torvalds 已提交
243 244
}

245
#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr)		\
246
dotraplinkage void do_##name(struct pt_regs *regs, long error_code)	\
I
Ingo Molnar 已提交
247 248 249 250 251 252 253
{									\
	siginfo_t info;							\
	info.si_signo = signr;						\
	info.si_errno = 0;						\
	info.si_code = sicode;						\
	info.si_addr = (void __user *)siaddr;				\
	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)	\
254
							== NOTIFY_STOP)	\
I
Ingo Molnar 已提交
255
		return;							\
256
	conditional_sti(regs);						\
257
	do_trap(trapnr, signr, str, regs, error_code, &info);		\
L
Linus Torvalds 已提交
258 259
}

260 261 262 263
DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
DO_ERROR(4, SIGSEGV, "overflow", overflow)
DO_ERROR(5, SIGSEGV, "bounds", bounds)
DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip)
264
DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
265
DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
266
DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
267
#ifdef CONFIG_X86_32
268
DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
269
#endif
270
DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
L
Linus Torvalds 已提交
271

272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301
#ifdef CONFIG_X86_64
/* Runs on IST stack */
dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
{
	if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
			12, SIGBUS) == NOTIFY_STOP)
		return;
	preempt_conditional_sti(regs);
	do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL);
	preempt_conditional_cli(regs);
}

dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
{
	static const char str[] = "double fault";
	struct task_struct *tsk = current;

	/* Return not checked because double check cannot be ignored */
	notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV);

	tsk->thread.error_code = error_code;
	tsk->thread.trap_no = 8;

	/* This is always a kernel trap and never fixable (and thus must
	   never return). */
	for (;;)
		die(str, regs, error_code);
}
#endif

302
dotraplinkage void __kprobes
303
do_general_protection(struct pt_regs *regs, long error_code)
L
Linus Torvalds 已提交
304
{
305
	struct task_struct *tsk;
I
Ingo Molnar 已提交
306

307 308
	conditional_sti(regs);

309
#ifdef CONFIG_X86_32
310 311
	if (lazy_iobitmap_copy()) {
		/* restart the faulting instruction */
L
Linus Torvalds 已提交
312 313 314
		return;
	}

315
	if (regs->flags & X86_VM_MASK)
L
Linus Torvalds 已提交
316
		goto gp_in_vm86;
317
#endif
L
Linus Torvalds 已提交
318

319
	tsk = current;
320
	if (!user_mode(regs))
L
Linus Torvalds 已提交
321 322
		goto gp_in_kernel;

323 324
	tsk->thread.error_code = error_code;
	tsk->thread.trap_no = 13;
I
Ingo Molnar 已提交
325

326 327
	if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
			printk_ratelimit()) {
328
		printk(KERN_INFO
329 330 331
			"%s[%d] general protection ip:%lx sp:%lx error:%lx",
			tsk->comm, task_pid_nr(tsk),
			regs->ip, regs->sp, error_code);
332 333 334
		print_vma_addr(" in ", regs->ip);
		printk("\n");
	}
335

336
	force_sig(SIGSEGV, tsk);
L
Linus Torvalds 已提交
337 338
	return;

339
#ifdef CONFIG_X86_32
L
Linus Torvalds 已提交
340 341 342 343
gp_in_vm86:
	local_irq_enable();
	handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
	return;
344
#endif
L
Linus Torvalds 已提交
345 346

gp_in_kernel:
347 348 349 350 351 352
	if (fixup_exception(regs))
		return;

	tsk->thread.error_code = error_code;
	tsk->thread.trap_no = 13;
	if (notify_die(DIE_GPF, "general protection fault", regs,
L
Linus Torvalds 已提交
353
				error_code, 13, SIGSEGV) == NOTIFY_STOP)
354 355
		return;
	die("general protection fault", regs, error_code);
L
Linus Torvalds 已提交
356 357
}

358
static notrace __kprobes void
I
Ingo Molnar 已提交
359
mem_parity_error(unsigned char reason, struct pt_regs *regs)
L
Linus Torvalds 已提交
360
{
I
Ingo Molnar 已提交
361 362 363 364 365 366
	printk(KERN_EMERG
		"Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
			reason, smp_processor_id());

	printk(KERN_EMERG
		"You have some hardware problem, likely on the PCI bus.\n");
D
Dave Jiang 已提交
367 368

#if defined(CONFIG_EDAC)
I
Ingo Molnar 已提交
369
	if (edac_handler_set()) {
D
Dave Jiang 已提交
370 371 372 373 374
		edac_atomic_assert_error();
		return;
	}
#endif

375
	if (panic_on_unrecovered_nmi)
I
Ingo Molnar 已提交
376
		panic("NMI: Not continuing");
L
Linus Torvalds 已提交
377

378
	printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
L
Linus Torvalds 已提交
379 380

	/* Clear and disable the memory parity error line. */
381 382
	reason = (reason & 0xf) | 4;
	outb(reason, 0x61);
L
Linus Torvalds 已提交
383 384
}

385
static notrace __kprobes void
I
Ingo Molnar 已提交
386
io_check_error(unsigned char reason, struct pt_regs *regs)
L
Linus Torvalds 已提交
387 388 389
{
	unsigned long i;

390
	printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
L
Linus Torvalds 已提交
391 392 393 394 395
	show_registers(regs);

	/* Re-enable the IOCK line, wait for a few seconds */
	reason = (reason & 0xf) | 8;
	outb(reason, 0x61);
I
Ingo Molnar 已提交
396

L
Linus Torvalds 已提交
397
	i = 2000;
I
Ingo Molnar 已提交
398 399 400
	while (--i)
		udelay(1000);

L
Linus Torvalds 已提交
401 402 403 404
	reason &= ~8;
	outb(reason, 0x61);
}

405
static notrace __kprobes void
I
Ingo Molnar 已提交
406
unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
L
Linus Torvalds 已提交
407
{
408 409
	if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
			NOTIFY_STOP)
J
Jason Wessel 已提交
410
		return;
L
Linus Torvalds 已提交
411
#ifdef CONFIG_MCA
I
Ingo Molnar 已提交
412 413 414 415 416
	/*
	 * Might actually be able to figure out what the guilty party
	 * is:
	 */
	if (MCA_bus) {
L
Linus Torvalds 已提交
417 418 419 420
		mca_handle_nmi();
		return;
	}
#endif
I
Ingo Molnar 已提交
421 422 423 424
	printk(KERN_EMERG
		"Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
			reason, smp_processor_id());

425
	printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
426
	if (panic_on_unrecovered_nmi)
I
Ingo Molnar 已提交
427
		panic("NMI: Not continuing");
428

429
	printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
L
Linus Torvalds 已提交
430 431
}

432
static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
L
Linus Torvalds 已提交
433 434
{
	unsigned char reason = 0;
435 436 437
	int cpu;

	cpu = smp_processor_id();
L
Linus Torvalds 已提交
438

439 440
	/* Only the BSP gets external NMIs from the system. */
	if (!cpu)
L
Linus Torvalds 已提交
441
		reason = get_nmi_reason();
I
Ingo Molnar 已提交
442

L
Linus Torvalds 已提交
443
	if (!(reason & 0xc0)) {
444
		if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
445
								== NOTIFY_STOP)
L
Linus Torvalds 已提交
446 447 448 449 450 451
			return;
#ifdef CONFIG_X86_LOCAL_APIC
		/*
		 * Ok, so this is none of the documented NMI sources,
		 * so it must be the NMI watchdog.
		 */
452
		if (nmi_watchdog_tick(regs, reason))
L
Linus Torvalds 已提交
453
			return;
454
		if (!do_nmi_callback(regs, cpu))
455
			unknown_nmi_error(reason, regs);
I
Ingo Molnar 已提交
456 457 458
#else
		unknown_nmi_error(reason, regs);
#endif
459

L
Linus Torvalds 已提交
460 461
		return;
	}
462
	if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
L
Linus Torvalds 已提交
463
		return;
464 465

	/* AK: following checks seem to be broken on modern chipsets. FIXME */
L
Linus Torvalds 已提交
466 467 468 469
	if (reason & 0x80)
		mem_parity_error(reason, regs);
	if (reason & 0x40)
		io_check_error(reason, regs);
470
#ifdef CONFIG_X86_32
L
Linus Torvalds 已提交
471 472
	/*
	 * Reassert NMI in case it became active meanwhile
I
Ingo Molnar 已提交
473
	 * as it's edge-triggered:
L
Linus Torvalds 已提交
474 475
	 */
	reassert_nmi();
476
#endif
L
Linus Torvalds 已提交
477 478
}

479 480
dotraplinkage notrace __kprobes void
do_nmi(struct pt_regs *regs, long error_code)
L
Linus Torvalds 已提交
481 482 483
{
	nmi_enter();

484
#ifdef CONFIG_X86_32
485
	{ int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); }
486 487 488
#else
	add_pda(__nmi_count, 1);
#endif
L
Linus Torvalds 已提交
489

490 491
	if (!ignore_nmis)
		default_do_nmi(regs);
L
Linus Torvalds 已提交
492 493 494 495

	nmi_exit();
}

496 497 498 499 500 501 502 503 504 505 506 507
void stop_nmi(void)
{
	acpi_nmi_disable();
	ignore_nmis++;
}

void restart_nmi(void)
{
	ignore_nmis--;
	acpi_nmi_enable();
}

508
/* May run on IST stack. */
509
dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
L
Linus Torvalds 已提交
510
{
511
#ifdef CONFIG_KPROBES
L
Linus Torvalds 已提交
512 513
	if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
			== NOTIFY_STOP)
514
		return;
515 516 517 518 519
#else
	if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP)
			== NOTIFY_STOP)
		return;
#endif
I
Ingo Molnar 已提交
520

521
	preempt_conditional_sti(regs);
522
	do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
523
	preempt_conditional_cli(regs);
L
Linus Torvalds 已提交
524 525
}

526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548
#ifdef CONFIG_X86_64
/* Help handler running on IST stack to switch back to user stack
   for scheduling or signal handling. The actual stack switch is done in
   entry.S */
asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
{
	struct pt_regs *regs = eregs;
	/* Did already sync */
	if (eregs == (struct pt_regs *)eregs->sp)
		;
	/* Exception from user space */
	else if (user_mode(eregs))
		regs = task_pt_regs(current);
	/* Exception from kernel and interrupts are enabled. Move to
	   kernel process stack. */
	else if (eregs->flags & X86_EFLAGS_IF)
		regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
	if (eregs != regs)
		*regs = *eregs;
	return regs;
}
#endif

L
Linus Torvalds 已提交
549 550 551 552 553 554 555 556 557 558
/*
 * Our handling of the processor debug registers is non-trivial.
 * We do not clear them on entry and exit from the kernel. Therefore
 * it is possible to get a watchpoint trap here from inside the kernel.
 * However, the code in ./ptrace.c has ensured that the user can
 * only set watchpoints on userspace addresses. Therefore the in-kernel
 * watchpoint trap can only occur in code which is reading/writing
 * from user space. Such code must not hold kernel locks (since it
 * can equally take a page fault), therefore it is safe to call
 * force_sig_info even though that claims and releases locks.
I
Ingo Molnar 已提交
559
 *
L
Linus Torvalds 已提交
560 561 562 563 564 565 566 567 568 569
 * Code in ./signal.c ensures that the debug control register
 * is restored before we deliver any signal, and therefore that
 * user code runs with the correct debug control register even though
 * we clear it here.
 *
 * Being careful here means that we don't have to be as careful in a
 * lot of more complicated places (task switching can be a bit lazy
 * about restoring all the debug state, and ptrace doesn't have to
 * find every occurrence of the TF bit that could be saved away even
 * by user code)
570 571
 *
 * May run on IST stack.
L
Linus Torvalds 已提交
572
 */
573
dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
L
Linus Torvalds 已提交
574 575
{
	struct task_struct *tsk = current;
576
	unsigned long condition;
577
	int si_code;
L
Linus Torvalds 已提交
578

579
	get_debugreg(condition, 6);
L
Linus Torvalds 已提交
580

581 582 583 584 585 586
	/*
	 * The processor cleared BTF, so don't mark that we need it set.
	 */
	clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
	tsk->thread.debugctlmsr = 0;

L
Linus Torvalds 已提交
587
	if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
588
						SIGTRAP) == NOTIFY_STOP)
L
Linus Torvalds 已提交
589
		return;
590

L
Linus Torvalds 已提交
591
	/* It's safe to allow irq's after DR6 has been saved */
592
	preempt_conditional_sti(regs);
L
Linus Torvalds 已提交
593 594 595

	/* Mask out spurious debug traps due to lazy DR7 setting */
	if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
596
		if (!tsk->thread.debugreg7)
L
Linus Torvalds 已提交
597 598 599
			goto clear_dr7;
	}

600
#ifdef CONFIG_X86_32
601
	if (regs->flags & X86_VM_MASK)
L
Linus Torvalds 已提交
602
		goto debug_vm86;
603
#endif
L
Linus Torvalds 已提交
604 605

	/* Save debug status register where ptrace can see it */
606
	tsk->thread.debugreg6 = condition;
L
Linus Torvalds 已提交
607 608 609 610 611 612

	/*
	 * Single-stepping through TF: make sure we ignore any events in
	 * kernel space (but re-enable TF when returning to user mode).
	 */
	if (condition & DR_STEP) {
613
		if (!user_mode(regs))
L
Linus Torvalds 已提交
614 615 616
			goto clear_TF_reenable;
	}

617
	si_code = get_si_code(condition);
L
Linus Torvalds 已提交
618
	/* Ok, finally something we can handle */
619
	send_sigtrap(tsk, regs, error_code, si_code);
L
Linus Torvalds 已提交
620

I
Ingo Molnar 已提交
621 622
	/*
	 * Disable additional traps. They'll be re-enabled when
L
Linus Torvalds 已提交
623 624 625
	 * the signal is delivered.
	 */
clear_dr7:
626
	set_debugreg(0, 7);
627
	preempt_conditional_cli(regs);
L
Linus Torvalds 已提交
628 629
	return;

630
#ifdef CONFIG_X86_32
L
Linus Torvalds 已提交
631 632
debug_vm86:
	handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
633
	preempt_conditional_cli(regs);
L
Linus Torvalds 已提交
634
	return;
635
#endif
L
Linus Torvalds 已提交
636 637 638

clear_TF_reenable:
	set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
639
	regs->flags &= ~X86_EFLAGS_TF;
640
	preempt_conditional_cli(regs);
L
Linus Torvalds 已提交
641 642 643
	return;
}

644 645 646 647 648 649 650 651 652 653 654 655 656 657
#ifdef CONFIG_X86_64
static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
{
	if (fixup_exception(regs))
		return 1;

	notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE);
	/* Illegal floating point operation in the kernel */
	current->thread.trap_no = trapnr;
	die(str, regs, 0);
	return 0;
}
#endif

L
Linus Torvalds 已提交
658 659 660 661 662
/*
 * Note that we play around with the 'TS' bit in an attempt to get
 * the correct behaviour even in the presence of the asynchronous
 * IRQ13 behaviour
 */
663
void math_error(void __user *ip)
L
Linus Torvalds 已提交
664
{
I
Ingo Molnar 已提交
665
	struct task_struct *task;
L
Linus Torvalds 已提交
666
	siginfo_t info;
667
	unsigned short cwd, swd;
L
Linus Torvalds 已提交
668 669 670 671 672 673 674 675 676 677 678

	/*
	 * Save the info for the exception handler and clear the error.
	 */
	task = current;
	save_init_fpu(task);
	task->thread.trap_no = 16;
	task->thread.error_code = 0;
	info.si_signo = SIGFPE;
	info.si_errno = 0;
	info.si_code = __SI_FAULT;
679
	info.si_addr = ip;
L
Linus Torvalds 已提交
680 681 682 683 684 685
	/*
	 * (~cwd & swd) will mask out exceptions that are not set to unmasked
	 * status.  0x3f is the exception bits in these regs, 0x200 is the
	 * C1 reg you need in case of a stack fault, 0x040 is the stack
	 * fault bit.  We should only be taking one exception at a time,
	 * so if this combination doesn't produce any single exception,
686
	 * then we have a bad program that isn't synchronizing its FPU usage
L
Linus Torvalds 已提交
687 688 689 690 691
	 * and it will suffer the consequences since we won't be able to
	 * fully reproduce the context of the exception
	 */
	cwd = get_fpu_cwd(task);
	swd = get_fpu_swd(task);
692
	switch (swd & ~cwd & 0x3f) {
I
Ingo Molnar 已提交
693
	case 0x000: /* No unmasked exception */
694
#ifdef CONFIG_X86_32
I
Ingo Molnar 已提交
695
		return;
696
#endif
697
	default: /* Multiple exceptions */
I
Ingo Molnar 已提交
698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719
		break;
	case 0x001: /* Invalid Op */
		/*
		 * swd & 0x240 == 0x040: Stack Underflow
		 * swd & 0x240 == 0x240: Stack Overflow
		 * User must clear the SF bit (0x40) if set
		 */
		info.si_code = FPE_FLTINV;
		break;
	case 0x002: /* Denormalize */
	case 0x010: /* Underflow */
		info.si_code = FPE_FLTUND;
		break;
	case 0x004: /* Zero Divide */
		info.si_code = FPE_FLTDIV;
		break;
	case 0x008: /* Overflow */
		info.si_code = FPE_FLTOVF;
		break;
	case 0x020: /* Precision */
		info.si_code = FPE_FLTRES;
		break;
L
Linus Torvalds 已提交
720 721 722 723
	}
	force_sig_info(SIGFPE, &info, task);
}

724
dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
L
Linus Torvalds 已提交
725
{
726
	conditional_sti(regs);
727 728

#ifdef CONFIG_X86_32
L
Linus Torvalds 已提交
729
	ignore_fpu_irq = 1;
730 731 732 733 734 735
#else
	if (!user_mode(regs) &&
	    kernel_math_error(regs, "kernel x87 math error", 16))
		return;
#endif

736
	math_error((void __user *)regs->ip);
L
Linus Torvalds 已提交
737 738
}

739
static void simd_math_error(void __user *ip)
L
Linus Torvalds 已提交
740
{
I
Ingo Molnar 已提交
741 742
	struct task_struct *task;
	siginfo_t info;
743
	unsigned short mxcsr;
L
Linus Torvalds 已提交
744 745 746 747 748 749 750 751 752 753 754

	/*
	 * Save the info for the exception handler and clear the error.
	 */
	task = current;
	save_init_fpu(task);
	task->thread.trap_no = 19;
	task->thread.error_code = 0;
	info.si_signo = SIGFPE;
	info.si_errno = 0;
	info.si_code = __SI_FAULT;
755
	info.si_addr = ip;
L
Linus Torvalds 已提交
756 757 758 759 760 761 762 763
	/*
	 * The SIMD FPU exceptions are handled a little differently, as there
	 * is only a single status/control register.  Thus, to determine which
	 * unmasked exception was caught we must mask the exception mask bits
	 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
	 */
	mxcsr = get_fpu_mxcsr(task);
	switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
I
Ingo Molnar 已提交
764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782
	case 0x000:
	default:
		break;
	case 0x001: /* Invalid Op */
		info.si_code = FPE_FLTINV;
		break;
	case 0x002: /* Denormalize */
	case 0x010: /* Underflow */
		info.si_code = FPE_FLTUND;
		break;
	case 0x004: /* Zero Divide */
		info.si_code = FPE_FLTDIV;
		break;
	case 0x008: /* Overflow */
		info.si_code = FPE_FLTOVF;
		break;
	case 0x020: /* Precision */
		info.si_code = FPE_FLTRES;
		break;
L
Linus Torvalds 已提交
783 784 785 786
	}
	force_sig_info(SIGFPE, &info, task);
}

787 788
dotraplinkage void
do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
L
Linus Torvalds 已提交
789
{
790 791
	conditional_sti(regs);

792
#ifdef CONFIG_X86_32
L
Linus Torvalds 已提交
793 794 795
	if (cpu_has_xmm) {
		/* Handle SIMD FPU exceptions on PIII+ processors. */
		ignore_fpu_irq = 1;
796
		simd_math_error((void __user *)regs->ip);
I
Ingo Molnar 已提交
797 798 799 800 801 802
		return;
	}
	/*
	 * Handle strange cache flush from user space exception
	 * in all other cases.  This is undocumented behaviour.
	 */
803
	if (regs->flags & X86_VM_MASK) {
I
Ingo Molnar 已提交
804 805
		handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code);
		return;
L
Linus Torvalds 已提交
806
	}
I
Ingo Molnar 已提交
807 808 809 810
	current->thread.trap_no = 19;
	current->thread.error_code = error_code;
	die_if_kernel("cache flush denied", regs, error_code);
	force_sig(SIGSEGV, current);
811 812 813 814 815 816
#else
	if (!user_mode(regs) &&
			kernel_math_error(regs, "kernel simd math error", 19))
		return;
	simd_math_error((void __user *)regs->ip);
#endif
L
Linus Torvalds 已提交
817 818
}

819 820
dotraplinkage void
do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
L
Linus Torvalds 已提交
821
{
822
	conditional_sti(regs);
L
Linus Torvalds 已提交
823 824
#if 0
	/* No need to warn about this any longer. */
I
Ingo Molnar 已提交
825
	printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
L
Linus Torvalds 已提交
826 827 828
#endif
}

829
#ifdef CONFIG_X86_32
I
Ingo Molnar 已提交
830
unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
L
Linus Torvalds 已提交
831
{
G
Glauber Costa 已提交
832
	struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id());
S
Stas Sergeev 已提交
833 834 835 836
	unsigned long base = (kesp - uesp) & -THREAD_SIZE;
	unsigned long new_kesp = kesp - base;
	unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
	__u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
I
Ingo Molnar 已提交
837

S
Stas Sergeev 已提交
838
	/* Set up base for espfix segment */
I
Ingo Molnar 已提交
839 840
	desc &= 0x00f0ff0000000000ULL;
	desc |=	((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
S
Stas Sergeev 已提交
841 842 843 844
		((((__u64)base) << 32) & 0xff00000000000000ULL) |
		((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) |
		(lim_pages & 0xffff);
	*(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
I
Ingo Molnar 已提交
845

S
Stas Sergeev 已提交
846
	return new_kesp;
L
Linus Torvalds 已提交
847
}
848 849 850 851 852 853 854 855 856
#else
asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
{
}

asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
{
}
#endif
L
Linus Torvalds 已提交
857 858

/*
I
Ingo Molnar 已提交
859
 * 'math_state_restore()' saves the current math information in the
L
Linus Torvalds 已提交
860 861 862 863 864 865 866 867
 * old math state array, and gets the new ones from the current task
 *
 * Careful.. There are problems with IBM-designed IRQ13 behaviour.
 * Don't touch unless you *really* know how it works.
 *
 * Must be called with kernel preemption disabled (in this case,
 * local interrupts are disabled at the call-site in entry.S).
 */
868
asmlinkage void math_state_restore(void)
L
Linus Torvalds 已提交
869 870 871 872
{
	struct thread_info *thread = current_thread_info();
	struct task_struct *tsk = thread->task;

873 874 875 876 877 878 879 880 881 882 883 884 885 886 887
	if (!tsk_used_math(tsk)) {
		local_irq_enable();
		/*
		 * does a slab alloc which can sleep
		 */
		if (init_fpu(tsk)) {
			/*
			 * ran out of memory!
			 */
			do_group_exit(SIGKILL);
			return;
		}
		local_irq_disable();
	}

I
Ingo Molnar 已提交
888
	clts();				/* Allow maths ops (or we recurse) */
889
#ifdef CONFIG_X86_32
L
Linus Torvalds 已提交
890
	restore_fpu(tsk);
891 892 893 894 895 896 897 898 899 900
#else
	/*
	 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
	 */
	if (unlikely(restore_fpu_checking(tsk))) {
		stts();
		force_sig(SIGSEGV, tsk);
		return;
	}
#endif
L
Linus Torvalds 已提交
901
	thread->status |= TS_USEDFPU;	/* So we fnsave on switch_to() */
902
	tsk->fpu_counter++;
L
Linus Torvalds 已提交
903
}
904
EXPORT_SYMBOL_GPL(math_state_restore);
L
Linus Torvalds 已提交
905 906 907 908

#ifndef CONFIG_MATH_EMULATION
asmlinkage void math_emulate(long arg)
{
I
Ingo Molnar 已提交
909 910 911 912
	printk(KERN_EMERG
		"math-emulation not enabled and no coprocessor found.\n");
	printk(KERN_EMERG "killing %s.\n", current->comm);
	force_sig(SIGFPE, current);
L
Linus Torvalds 已提交
913 914 915 916
	schedule();
}
#endif /* CONFIG_MATH_EMULATION */

917 918
dotraplinkage void __kprobes
do_device_not_available(struct pt_regs *regs, long error)
919
{
920
#ifdef CONFIG_X86_32
921 922 923 924 925 926 927
	if (read_cr0() & X86_CR0_EM) {
		conditional_sti(regs);
		math_emulate(0);
	} else {
		math_state_restore(); /* interrupts still off */
		conditional_sti(regs);
	}
928 929 930
#else
	math_state_restore();
#endif
931 932
}

933
#ifdef CONFIG_X86_32
934
dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
935 936 937 938 939 940 941 942 943 944 945
{
	siginfo_t info;
	local_irq_enable();

	info.si_signo = SIGILL;
	info.si_errno = 0;
	info.si_code = ILL_BADSTK;
	info.si_addr = 0;
	if (notify_die(DIE_TRAP, "iret exception",
			regs, error_code, 32, SIGILL) == NOTIFY_STOP)
		return;
946
	do_trap(32, SIGILL, "iret exception", regs, error_code, &info);
947
}
948
#endif
949

L
Linus Torvalds 已提交
950 951
void __init trap_init(void)
{
952
#ifdef CONFIG_X86_32
953
	int i;
954
#endif
955

L
Linus Torvalds 已提交
956
#ifdef CONFIG_EISA
I
Ingo Molnar 已提交
957
	void __iomem *p = early_ioremap(0x0FFFD9, 4);
I
Ingo Molnar 已提交
958 959

	if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24))
L
Linus Torvalds 已提交
960
		EISA_bus = 1;
I
Ingo Molnar 已提交
961
	early_iounmap(p, 4);
L
Linus Torvalds 已提交
962 963
#endif

964
	set_intr_gate(0, &divide_error);
965 966 967 968 969 970
	set_intr_gate_ist(1, &debug, DEBUG_STACK);
	set_intr_gate_ist(2, &nmi, NMI_STACK);
	/* int3 can be called from all */
	set_system_intr_gate_ist(3, &int3, DEBUG_STACK);
	/* int4 can be called from all */
	set_system_intr_gate(4, &overflow);
971
	set_intr_gate(5, &bounds);
972
	set_intr_gate(6, &invalid_op);
973
	set_intr_gate(7, &device_not_available);
974
#ifdef CONFIG_X86_32
975
	set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS);
976 977 978
#else
	set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK);
#endif
979
	set_intr_gate(9, &coprocessor_segment_overrun);
980
	set_intr_gate(10, &invalid_TSS);
981
	set_intr_gate(11, &segment_not_present);
982
	set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK);
983
	set_intr_gate(13, &general_protection);
I
Ingo Molnar 已提交
984
	set_intr_gate(14, &page_fault);
985
	set_intr_gate(15, &spurious_interrupt_bug);
986
	set_intr_gate(16, &coprocessor_error);
987
	set_intr_gate(17, &alignment_check);
L
Linus Torvalds 已提交
988
#ifdef CONFIG_X86_MCE
989
	set_intr_gate_ist(18, &machine_check, MCE_STACK);
L
Linus Torvalds 已提交
990
#endif
991
	set_intr_gate(19, &simd_coprocessor_error);
L
Linus Torvalds 已提交
992

993 994 995 996 997
#ifdef CONFIG_IA32_EMULATION
	set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
#endif

#ifdef CONFIG_X86_32
998 999 1000 1001 1002 1003
	if (cpu_has_fxsr) {
		printk(KERN_INFO "Enabling fast FPU save and restore... ");
		set_in_cr4(X86_CR4_OSFXSR);
		printk("done.\n");
	}
	if (cpu_has_xmm) {
I
Ingo Molnar 已提交
1004 1005
		printk(KERN_INFO
			"Enabling unmasked SIMD FPU exception support... ");
1006 1007 1008 1009
		set_in_cr4(X86_CR4_OSXMMEXCPT);
		printk("done.\n");
	}

1010
	set_system_trap_gate(SYSCALL_VECTOR, &system_call);
L
Linus Torvalds 已提交
1011

I
Ingo Molnar 已提交
1012
	/* Reserve all the builtin and the syscall vector: */
1013 1014
	for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
		set_bit(i, used_vectors);
I
Ingo Molnar 已提交
1015

1016
	set_bit(SYSCALL_VECTOR, used_vectors);
1017
#endif
L
Linus Torvalds 已提交
1018
	/*
I
Ingo Molnar 已提交
1019
	 * Should be a barrier for any external CPU state:
L
Linus Torvalds 已提交
1020 1021 1022
	 */
	cpu_init();

1023
#ifdef CONFIG_X86_32
L
Linus Torvalds 已提交
1024
	trap_init_hook();
1025
#endif
L
Linus Torvalds 已提交
1026
}