process_64.c 13.5 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5
/*
 *  Copyright (C) 1995  Linus Torvalds
 *
 *  Pentium III FXSR, SSE support
 *	Gareth Hughes <gareth@valinux.com>, May 2000
6
 *
L
Linus Torvalds 已提交
7 8
 *  X86-64 port
 *	Andi Kleen.
A
Ashok Raj 已提交
9 10
 *
 *	CPU hotplug support - ashok.raj@intel.com
L
Linus Torvalds 已提交
11 12 13 14 15 16
 */

/*
 * This file handles the architecture-dependent parts of process handling..
 */

A
Ashok Raj 已提交
17
#include <linux/cpu.h>
L
Linus Torvalds 已提交
18 19
#include <linux/errno.h>
#include <linux/sched.h>
20
#include <linux/fs.h>
L
Linus Torvalds 已提交
21 22 23 24 25 26 27 28
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/elfcore.h>
#include <linux/smp.h>
#include <linux/slab.h>
#include <linux/user.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
29
#include <linux/module.h>
L
Linus Torvalds 已提交
30
#include <linux/ptrace.h>
A
Andi Kleen 已提交
31
#include <linux/notifier.h>
32
#include <linux/kprobes.h>
33
#include <linux/kdebug.h>
34
#include <linux/prctl.h>
35 36
#include <linux/uaccess.h>
#include <linux/io.h>
37
#include <linux/ftrace.h>
L
Linus Torvalds 已提交
38 39 40 41

#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/i387.h>
42
#include <asm/fpu-internal.h>
L
Linus Torvalds 已提交
43 44 45 46 47
#include <asm/mmu_context.h>
#include <asm/prctl.h>
#include <asm/desc.h>
#include <asm/proto.h>
#include <asm/ia32.h>
A
Andi Kleen 已提交
48
#include <asm/idle.h>
49
#include <asm/syscalls.h>
50
#include <asm/debugreg.h>
51
#include <asm/switch_to.h>
L
Linus Torvalds 已提交
52 53 54

asmlinkage extern void ret_from_fork(void);

55
DEFINE_PER_CPU(unsigned long, old_rsp);
L
Linus Torvalds 已提交
56

57
/* Prints also some state that isn't saved in the pt_regs */
58
void __show_regs(struct pt_regs *regs, int all)
L
Linus Torvalds 已提交
59 60
{
	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
61
	unsigned long d0, d1, d2, d3, d6, d7;
62 63
	unsigned int fsindex, gsindex;
	unsigned int ds, cs, es;
64 65

	show_regs_common();
66
	printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
67
	printk_address(regs->ip, 1);
68
	printk(KERN_DEFAULT "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
69
			regs->sp, regs->flags);
70
	printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
71
	       regs->ax, regs->bx, regs->cx);
72
	printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
73
	       regs->dx, regs->si, regs->di);
74
	printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n",
75
	       regs->bp, regs->r8, regs->r9);
76
	printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n",
77
	       regs->r10, regs->r11, regs->r12);
78
	printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
79
	       regs->r13, regs->r14, regs->r15);
L
Linus Torvalds 已提交
80

81 82 83
	asm("movl %%ds,%0" : "=r" (ds));
	asm("movl %%cs,%0" : "=r" (cs));
	asm("movl %%es,%0" : "=r" (es));
L
Linus Torvalds 已提交
84 85 86 87
	asm("movl %%fs,%0" : "=r" (fsindex));
	asm("movl %%gs,%0" : "=r" (gsindex));

	rdmsrl(MSR_FS_BASE, fs);
88 89
	rdmsrl(MSR_GS_BASE, gs);
	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
L
Linus Torvalds 已提交
90

91 92
	if (!all)
		return;
L
Linus Torvalds 已提交
93

94 95 96 97
	cr0 = read_cr0();
	cr2 = read_cr2();
	cr3 = read_cr3();
	cr4 = read_cr4();
L
Linus Torvalds 已提交
98

99
	printk(KERN_DEFAULT "FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
100
	       fs, fsindex, gs, gsindex, shadowgs);
101
	printk(KERN_DEFAULT "CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
102
			es, cr0);
103
	printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
104
			cr4);
105 106 107 108

	get_debugreg(d0, 0);
	get_debugreg(d1, 1);
	get_debugreg(d2, 2);
109
	printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
110 111 112
	get_debugreg(d3, 3);
	get_debugreg(d6, 6);
	get_debugreg(d7, 7);
113
	printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
L
Linus Torvalds 已提交
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
}

void release_thread(struct task_struct *dead_task)
{
	if (dead_task->mm) {
		if (dead_task->mm->context.size) {
			printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
					dead_task->comm,
					dead_task->mm->context.ldt,
					dead_task->mm->context.size);
			BUG();
		}
	}
}

static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
{
131
	struct user_desc ud = {
L
Linus Torvalds 已提交
132 133 134 135 136 137
		.base_addr = addr,
		.limit = 0xfffff,
		.seg_32bit = 1,
		.limit_in_pages = 1,
		.useable = 1,
	};
J
Jan Engelhardt 已提交
138
	struct desc_struct *desc = t->thread.tls_array;
L
Linus Torvalds 已提交
139
	desc += tls;
140
	fill_ldt(desc, &ud);
L
Linus Torvalds 已提交
141 142 143 144
}

static inline u32 read_32bit_tls(struct task_struct *t, int tls)
{
R
Roland McGrath 已提交
145
	return get_desc_base(&t->thread.tls_array[tls]);
L
Linus Torvalds 已提交
146 147
}

A
Alexey Dobriyan 已提交
148
int copy_thread(unsigned long clone_flags, unsigned long sp,
L
Linus Torvalds 已提交
149
		unsigned long unused,
150
	struct task_struct *p, struct pt_regs *regs)
L
Linus Torvalds 已提交
151 152
{
	int err;
153
	struct pt_regs *childregs;
L
Linus Torvalds 已提交
154 155
	struct task_struct *me = current;

156
	childregs = ((struct pt_regs *)
A
Al Viro 已提交
157
			(THREAD_SIZE + task_stack_page(p))) - 1;
L
Linus Torvalds 已提交
158 159
	*childregs = *regs;

160
	childregs->ax = 0;
161 162 163
	if (user_mode(regs))
		childregs->sp = sp;
	else
164
		childregs->sp = (unsigned long)childregs;
L
Linus Torvalds 已提交
165

166 167 168
	p->thread.sp = (unsigned long) childregs;
	p->thread.sp0 = (unsigned long) (childregs+1);
	p->thread.usersp = me->thread.usersp;
L
Linus Torvalds 已提交
169

A
Al Viro 已提交
170
	set_tsk_thread_flag(p, TIF_FORK);
L
Linus Torvalds 已提交
171

172
	p->fpu_counter = 0;
173
	p->thread.io_bitmap_ptr = NULL;
L
Linus Torvalds 已提交
174

175
	savesegment(gs, p->thread.gsindex);
176
	p->thread.gs = p->thread.gsindex ? 0 : me->thread.gs;
177
	savesegment(fs, p->thread.fsindex);
178
	p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs;
179 180
	savesegment(es, p->thread.es);
	savesegment(ds, p->thread.ds);
L
Linus Torvalds 已提交
181

182
	err = -ENOMEM;
183
	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
184

185
	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
186 187
		p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
						  IO_BITMAP_BYTES, GFP_KERNEL);
L
Linus Torvalds 已提交
188 189 190 191
		if (!p->thread.io_bitmap_ptr) {
			p->thread.io_bitmap_max = 0;
			return -ENOMEM;
		}
192
		set_tsk_thread_flag(p, TIF_IO_BITMAP);
193
	}
L
Linus Torvalds 已提交
194 195 196 197 198 199 200

	/*
	 * Set a new TLS for the child thread?
	 */
	if (clone_flags & CLONE_SETTLS) {
#ifdef CONFIG_IA32_EMULATION
		if (test_thread_flag(TIF_IA32))
R
Roland McGrath 已提交
201
			err = do_set_thread_area(p, -1,
202
				(struct user_desc __user *)childregs->si, 0);
203 204 205 206
		else
#endif
			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
		if (err)
L
Linus Torvalds 已提交
207 208 209 210 211 212 213 214
			goto out;
	}
	err = 0;
out:
	if (err && p->thread.io_bitmap_ptr) {
		kfree(p->thread.io_bitmap_ptr);
		p->thread.io_bitmap_max = 0;
	}
215

L
Linus Torvalds 已提交
216 217 218
	return err;
}

219 220 221 222
static void
start_thread_common(struct pt_regs *regs, unsigned long new_ip,
		    unsigned long new_sp,
		    unsigned int _cs, unsigned int _ss, unsigned int _ds)
I
Ingo Molnar 已提交
223
{
224
	loadsegment(fs, 0);
225 226
	loadsegment(es, _ds);
	loadsegment(ds, _ds);
I
Ingo Molnar 已提交
227
	load_gs_index(0);
228
	current->thread.usersp	= new_sp;
I
Ingo Molnar 已提交
229 230
	regs->ip		= new_ip;
	regs->sp		= new_sp;
231
	this_cpu_write(old_rsp, new_sp);
232 233
	regs->cs		= _cs;
	regs->ss		= _ss;
234
	regs->flags		= X86_EFLAGS_IF;
235 236 237 238
	/*
	 * Free the old FP and other extended state
	 */
	free_thread_xstate(current);
I
Ingo Molnar 已提交
239
}
240 241 242 243 244 245 246

void
start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
{
	start_thread_common(regs, new_ip, new_sp,
			    __USER_CS, __USER_DS, 0);
}
I
Ingo Molnar 已提交
247

248 249 250
#ifdef CONFIG_IA32_EMULATION
void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp)
{
251
	start_thread_common(regs, new_ip, new_sp,
H
H. Peter Anvin 已提交
252 253 254
			    test_thread_flag(TIF_X32)
			    ? __USER_CS : __USER32_CS,
			    __USER_DS, __USER_DS);
255 256
}
#endif
I
Ingo Molnar 已提交
257

L
Linus Torvalds 已提交
258 259 260
/*
 *	switch_to(x,y) should switch tasks from x to y.
 *
261
 * This could still be optimized:
L
Linus Torvalds 已提交
262 263
 * - fold all the options into a flag word and test it with a single test.
 * - could test fs/gs bitsliced
264 265
 *
 * Kprobes not supported here. Set the probe on schedule instead.
266
 * Function graph tracer not supported too.
L
Linus Torvalds 已提交
267
 */
268
__notrace_funcgraph struct task_struct *
269
__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
L
Linus Torvalds 已提交
270
{
271 272
	struct thread_struct *prev = &prev_p->thread;
	struct thread_struct *next = &next_p->thread;
273
	int cpu = smp_processor_id();
L
Linus Torvalds 已提交
274
	struct tss_struct *tss = &per_cpu(init_tss, cpu);
275
	unsigned fsindex, gsindex;
276
	fpu_switch_t fpu;
277

278
	fpu = switch_fpu_prepare(prev_p, next_p, cpu);
279

L
Linus Torvalds 已提交
280 281 282
	/*
	 * Reload esp0, LDT and the page table pointer:
	 */
283
	load_sp0(tss, next);
L
Linus Torvalds 已提交
284

285
	/*
L
Linus Torvalds 已提交
286 287 288
	 * Switch DS and ES.
	 * This won't pick up thread selector changes, but I guess that is ok.
	 */
289
	savesegment(es, prev->es);
L
Linus Torvalds 已提交
290
	if (unlikely(next->es | prev->es))
291
		loadsegment(es, next->es);
292 293

	savesegment(ds, prev->ds);
L
Linus Torvalds 已提交
294 295 296
	if (unlikely(next->ds | prev->ds))
		loadsegment(ds, next->ds);

297 298 299 300 301 302 303 304 305

	/* We must save %fs and %gs before load_TLS() because
	 * %fs and %gs may be cleared by load_TLS().
	 *
	 * (e.g. xen_load_tls())
	 */
	savesegment(fs, fsindex);
	savesegment(gs, gsindex);

L
Linus Torvalds 已提交
306 307
	load_TLS(next, cpu);

308 309 310 311 312 313 314
	/*
	 * Leave lazy mode, flushing any hypercalls made here.
	 * This must be done before restoring TLS segments so
	 * the GDT and LDT are properly updated, and must be
	 * done before math_state_restore, so the TS bit is up
	 * to date.
	 */
315
	arch_end_context_switch(next_p);
316

317
	/*
L
Linus Torvalds 已提交
318
	 * Switch FS and GS.
319 320 321 322
	 *
	 * Segment register != 0 always requires a reload.  Also
	 * reload when it has changed.  When prev process used 64bit
	 * base always reload to avoid an information leak.
L
Linus Torvalds 已提交
323
	 */
324 325
	if (unlikely(fsindex | next->fsindex | prev->fs)) {
		loadsegment(fs, next->fsindex);
326
		/*
327 328 329 330 331
		 * Check if the user used a selector != 0; if yes
		 *  clear 64bit base, since overloaded base is always
		 *  mapped to the Null selector
		 */
		if (fsindex)
332
			prev->fs = 0;
L
Linus Torvalds 已提交
333
	}
334 335 336 337 338 339 340 341
	/* when next process has a 64bit base use it */
	if (next->fs)
		wrmsrl(MSR_FS_BASE, next->fs);
	prev->fsindex = fsindex;

	if (unlikely(gsindex | next->gsindex | prev->gs)) {
		load_gs_index(next->gsindex);
		if (gsindex)
342
			prev->gs = 0;
L
Linus Torvalds 已提交
343
	}
344 345 346
	if (next->gs)
		wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
	prev->gsindex = gsindex;
L
Linus Torvalds 已提交
347

348 349
	switch_fpu_finish(next_p, fpu);

350
	/*
351
	 * Switch the PDA and FPU contexts.
L
Linus Torvalds 已提交
352
	 */
353 354 355
	prev->usersp = this_cpu_read(old_rsp);
	this_cpu_write(old_rsp, next->usersp);
	this_cpu_write(current_task, next_p);
356

357
	this_cpu_write(kernel_stack,
358
		  (unsigned long)task_stack_page(next_p) +
359
		  THREAD_SIZE - KERNEL_STACK_OFFSET);
L
Linus Torvalds 已提交
360 361

	/*
362
	 * Now maybe reload the debug registers and handle I/O bitmaps
L
Linus Torvalds 已提交
363
	 */
364 365
	if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
366
		__switch_to_xtra(prev_p, next_p, tss);
L
Linus Torvalds 已提交
367 368 369 370 371 372 373 374 375

	return prev_p;
}

void set_personality_64bit(void)
{
	/* inherit personality from parent */

	/* Make sure to be in 64bit mode */
376
	clear_thread_flag(TIF_IA32);
377
	clear_thread_flag(TIF_ADDR32);
378
	clear_thread_flag(TIF_X32);
L
Linus Torvalds 已提交
379

380 381 382 383
	/* Ensure the corresponding mm is not marked. */
	if (current->mm)
		current->mm->context.ia32_compat = 0;

L
Linus Torvalds 已提交
384 385 386
	/* TBD: overwrites user setup. Should have two bits.
	   But 64bit processes have always behaved this way,
	   so it's not too bad. The main problem is just that
387
	   32bit childs are affected again. */
L
Linus Torvalds 已提交
388 389 390
	current->personality &= ~READ_IMPLIES_EXEC;
}

H
H. Peter Anvin 已提交
391
void set_personality_ia32(bool x32)
392 393 394 395
{
	/* inherit personality from parent */

	/* Make sure to be in 32bit mode */
396
	set_thread_flag(TIF_ADDR32);
397

398 399 400 401
	/* Mark the associated mm as containing 32-bit tasks. */
	if (current->mm)
		current->mm->context.ia32_compat = 1;

H
H. Peter Anvin 已提交
402 403 404 405
	if (x32) {
		clear_thread_flag(TIF_IA32);
		set_thread_flag(TIF_X32);
		current->personality &= ~READ_IMPLIES_EXEC;
406 407 408
		/* is_compat_task() uses the presence of the x32
		   syscall bit flag to determine compat status */
		current_thread_info()->status &= ~TS_COMPAT;
H
H. Peter Anvin 已提交
409 410 411 412 413 414 415
	} else {
		set_thread_flag(TIF_IA32);
		clear_thread_flag(TIF_X32);
		current->personality |= force_personality32;
		/* Prepare the first "return" to user space */
		current_thread_info()->status |= TS_COMPAT;
	}
416
}
417
EXPORT_SYMBOL_GPL(set_personality_ia32);
418

L
Linus Torvalds 已提交
419 420 421
unsigned long get_wchan(struct task_struct *p)
{
	unsigned long stack;
422
	u64 fp, ip;
L
Linus Torvalds 已提交
423 424
	int count = 0;

425 426
	if (!p || p == current || p->state == TASK_RUNNING)
		return 0;
A
Al Viro 已提交
427
	stack = (unsigned long)task_stack_page(p);
428
	if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
L
Linus Torvalds 已提交
429
		return 0;
430
	fp = *(u64 *)(p->thread.sp);
431
	do {
432
		if (fp < (unsigned long)stack ||
433
		    fp >= (unsigned long)stack+THREAD_SIZE)
434
			return 0;
435 436 437
		ip = *(u64 *)(fp+8);
		if (!in_sched_functions(ip))
			return ip;
438 439
		fp = *(u64 *)fp;
	} while (count++ < 16);
L
Linus Torvalds 已提交
440 441 442 443
	return 0;
}

long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
444 445
{
	int ret = 0;
L
Linus Torvalds 已提交
446 447 448
	int doit = task == current;
	int cpu;

449
	switch (code) {
L
Linus Torvalds 已提交
450
	case ARCH_SET_GS:
451
		if (addr >= TASK_SIZE_OF(task))
452
			return -EPERM;
L
Linus Torvalds 已提交
453
		cpu = get_cpu();
454
		/* handle small bases via the GDT because that's faster to
L
Linus Torvalds 已提交
455
		   switch. */
456 457 458
		if (addr <= 0xffffffff) {
			set_32bit_tls(task, GS_TLS, addr);
			if (doit) {
L
Linus Torvalds 已提交
459
				load_TLS(&task->thread, cpu);
460
				load_gs_index(GS_TLS_SEL);
L
Linus Torvalds 已提交
461
			}
462
			task->thread.gsindex = GS_TLS_SEL;
L
Linus Torvalds 已提交
463
			task->thread.gs = 0;
464
		} else {
L
Linus Torvalds 已提交
465 466 467
			task->thread.gsindex = 0;
			task->thread.gs = addr;
			if (doit) {
468
				load_gs_index(0);
469
				ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
470
			}
L
Linus Torvalds 已提交
471 472 473 474 475 476
		}
		put_cpu();
		break;
	case ARCH_SET_FS:
		/* Not strictly needed for fs, but do it for symmetry
		   with gs */
477
		if (addr >= TASK_SIZE_OF(task))
478
			return -EPERM;
L
Linus Torvalds 已提交
479
		cpu = get_cpu();
480
		/* handle small bases via the GDT because that's faster to
L
Linus Torvalds 已提交
481
		   switch. */
482
		if (addr <= 0xffffffff) {
L
Linus Torvalds 已提交
483
			set_32bit_tls(task, FS_TLS, addr);
484 485
			if (doit) {
				load_TLS(&task->thread, cpu);
486
				loadsegment(fs, FS_TLS_SEL);
L
Linus Torvalds 已提交
487 488 489
			}
			task->thread.fsindex = FS_TLS_SEL;
			task->thread.fs = 0;
490
		} else {
L
Linus Torvalds 已提交
491 492 493 494 495
			task->thread.fsindex = 0;
			task->thread.fs = addr;
			if (doit) {
				/* set the selector to 0 to not confuse
				   __switch_to */
496
				loadsegment(fs, 0);
497
				ret = wrmsrl_safe(MSR_FS_BASE, addr);
L
Linus Torvalds 已提交
498 499 500 501
			}
		}
		put_cpu();
		break;
502 503
	case ARCH_GET_FS: {
		unsigned long base;
L
Linus Torvalds 已提交
504 505
		if (task->thread.fsindex == FS_TLS_SEL)
			base = read_32bit_tls(task, FS_TLS);
506
		else if (doit)
L
Linus Torvalds 已提交
507
			rdmsrl(MSR_FS_BASE, base);
508
		else
L
Linus Torvalds 已提交
509
			base = task->thread.fs;
510 511
		ret = put_user(base, (unsigned long __user *)addr);
		break;
L
Linus Torvalds 已提交
512
	}
513
	case ARCH_GET_GS: {
L
Linus Torvalds 已提交
514
		unsigned long base;
515
		unsigned gsindex;
L
Linus Torvalds 已提交
516 517
		if (task->thread.gsindex == GS_TLS_SEL)
			base = read_32bit_tls(task, GS_TLS);
518
		else if (doit) {
519
			savesegment(gs, gsindex);
520 521 522 523
			if (gsindex)
				rdmsrl(MSR_KERNEL_GS_BASE, base);
			else
				base = task->thread.gs;
524
		} else
L
Linus Torvalds 已提交
525
			base = task->thread.gs;
526
		ret = put_user(base, (unsigned long __user *)addr);
L
Linus Torvalds 已提交
527 528 529 530 531 532
		break;
	}

	default:
		ret = -EINVAL;
		break;
533
	}
L
Linus Torvalds 已提交
534

535 536
	return ret;
}
L
Linus Torvalds 已提交
537 538 539 540 541 542

long sys_arch_prctl(int code, unsigned long addr)
{
	return do_arch_prctl(current, code, addr);
}

543 544 545 546 547
unsigned long KSTK_ESP(struct task_struct *task)
{
	return (test_tsk_thread_flag(task, TIF_IA32)) ?
			(task_pt_regs(task)->sp) : ((task)->thread.usersp);
}