process_64.c 20.6 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5
/*
 *  Copyright (C) 1995  Linus Torvalds
 *
 *  Pentium III FXSR, SSE support
 *	Gareth Hughes <gareth@valinux.com>, May 2000
6
 *
L
Linus Torvalds 已提交
7 8
 *  X86-64 port
 *	Andi Kleen.
A
Ashok Raj 已提交
9 10
 *
 *	CPU hotplug support - ashok.raj@intel.com
L
Linus Torvalds 已提交
11 12 13 14 15 16 17 18
 */

/*
 * This file handles the architecture-dependent parts of process handling..
 */

#include <stdarg.h>

19
#include <linux/stackprotector.h>
A
Ashok Raj 已提交
20
#include <linux/cpu.h>
L
Linus Torvalds 已提交
21 22
#include <linux/errno.h>
#include <linux/sched.h>
23
#include <linux/fs.h>
L
Linus Torvalds 已提交
24 25 26 27 28 29 30
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/elfcore.h>
#include <linux/smp.h>
#include <linux/slab.h>
#include <linux/user.h>
#include <linux/interrupt.h>
31
#include <linux/utsname.h>
L
Linus Torvalds 已提交
32
#include <linux/delay.h>
33
#include <linux/module.h>
L
Linus Torvalds 已提交
34 35
#include <linux/ptrace.h>
#include <linux/random.h>
A
Andi Kleen 已提交
36
#include <linux/notifier.h>
37
#include <linux/kprobes.h>
38
#include <linux/kdebug.h>
39
#include <linux/tick.h>
40
#include <linux/prctl.h>
41 42
#include <linux/uaccess.h>
#include <linux/io.h>
43
#include <linux/ftrace.h>
44
#include <linux/dmi.h>
L
Linus Torvalds 已提交
45 46 47 48 49 50 51 52 53 54

#include <asm/pgtable.h>
#include <asm/system.h>
#include <asm/processor.h>
#include <asm/i387.h>
#include <asm/mmu_context.h>
#include <asm/prctl.h>
#include <asm/desc.h>
#include <asm/proto.h>
#include <asm/ia32.h>
A
Andi Kleen 已提交
55
#include <asm/idle.h>
56
#include <asm/syscalls.h>
57
#include <asm/ds.h>
L
Linus Torvalds 已提交
58 59 60

asmlinkage extern void ret_from_fork(void);

61 62 63
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);

64
DEFINE_PER_CPU(unsigned long, old_rsp);
65
static DEFINE_PER_CPU(unsigned char, is_idle);
66

L
Linus Torvalds 已提交
67 68
unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;

69
static ATOMIC_NOTIFIER_HEAD(idle_notifier);
A
Andi Kleen 已提交
70 71 72

void idle_notifier_register(struct notifier_block *n)
{
73
	atomic_notifier_chain_register(&idle_notifier, n);
A
Andi Kleen 已提交
74
}
75 76 77 78 79 80 81
EXPORT_SYMBOL_GPL(idle_notifier_register);

void idle_notifier_unregister(struct notifier_block *n)
{
	atomic_notifier_chain_unregister(&idle_notifier, n);
}
EXPORT_SYMBOL_GPL(idle_notifier_unregister);
A
Andi Kleen 已提交
82 83 84

void enter_idle(void)
{
85
	percpu_write(is_idle, 1);
86
	atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
A
Andi Kleen 已提交
87 88 89 90
}

static void __exit_idle(void)
{
91
	if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
A
Andi Kleen 已提交
92
		return;
93
	atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
A
Andi Kleen 已提交
94 95 96 97 98
}

/* Called from interrupts to signify idle end */
void exit_idle(void)
{
A
Andi Kleen 已提交
99 100
	/* idle loop has pid 0 */
	if (current->pid)
A
Andi Kleen 已提交
101 102 103 104
		return;
	__exit_idle();
}

A
Alex Nixon 已提交
105
#ifndef CONFIG_SMP
A
Ashok Raj 已提交
106 107 108 109
static inline void play_dead(void)
{
	BUG();
}
A
Alex Nixon 已提交
110
#endif
A
Ashok Raj 已提交
111

L
Linus Torvalds 已提交
112 113 114 115 116 117
/*
 * The idle thread. There's no useful work to be
 * done, so just try to conserve power and have a
 * low exit latency (ie sit in a loop waiting for
 * somebody to say that they'd like to reschedule)
 */
P
Pavel Machek 已提交
118
void cpu_idle(void)
L
Linus Torvalds 已提交
119
{
120
	current_thread_info()->status |= TS_POLLING;
121 122

	/*
T
Tejun Heo 已提交
123 124 125 126 127
	 * If we're the non-boot CPU, nothing set the stack canary up
	 * for us.  CPU0 already has it initialized but no harm in
	 * doing it again.  This is a good place for updating it, as
	 * we wont ever return from this function (so the invalid
	 * canaries already on the stack wont ever trigger).
128
	 */
129 130
	boot_init_stack_canary();

L
Linus Torvalds 已提交
131 132
	/* endless idle loop with no priority at all */
	while (1) {
133
		tick_nohz_stop_sched_tick(1);
L
Linus Torvalds 已提交
134 135 136
		while (!need_resched()) {

			rmb();
T
Thomas Gleixner 已提交
137

A
Ashok Raj 已提交
138 139
			if (cpu_is_offline(smp_processor_id()))
				play_dead();
140 141 142 143 144 145
			/*
			 * Idle routines should keep interrupts disabled
			 * from here on, until they go to idle.
			 * Otherwise, idle callbacks can misfire.
			 */
			local_irq_disable();
A
Andi Kleen 已提交
146
			enter_idle();
147 148
			/* Don't trace irqs off for idle */
			stop_critical_timings();
T
Thomas Gleixner 已提交
149
			pm_idle();
150
			start_critical_timings();
A
Andi Kleen 已提交
151 152 153
			/* In many cases the interrupt that ended idle
			   has already called exit_idle. But some idle
			   loops can be woken up without interrupt. */
A
Andi Kleen 已提交
154
			__exit_idle();
L
Linus Torvalds 已提交
155 156
		}

157
		tick_nohz_restart_sched_tick();
158
		preempt_enable_no_resched();
L
Linus Torvalds 已提交
159
		schedule();
160
		preempt_disable();
L
Linus Torvalds 已提交
161 162 163
	}
}

164
/* Prints also some state that isn't saved in the pt_regs */
165
void __show_regs(struct pt_regs *regs, int all)
L
Linus Torvalds 已提交
166 167
{
	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
168
	unsigned long d0, d1, d2, d3, d6, d7;
169 170
	unsigned int fsindex, gsindex;
	unsigned int ds, cs, es;
171
	const char *board;
L
Linus Torvalds 已提交
172 173 174

	printk("\n");
	print_modules();
175 176 177 178
	board = dmi_get_system_info(DMI_PRODUCT_NAME);
	if (!board)
		board = "";
	printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n",
179
		current->pid, current->comm, print_tainted(),
180 181
		init_utsname()->release,
		(int)strcspn(init_utsname()->version, " "),
182
		init_utsname()->version, board);
183
	printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
184
	printk_address(regs->ip, 1);
185 186 187
	printk(KERN_INFO "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
			regs->sp, regs->flags);
	printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
188
	       regs->ax, regs->bx, regs->cx);
189
	printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
190
	       regs->dx, regs->si, regs->di);
191
	printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
192
	       regs->bp, regs->r8, regs->r9);
193
	printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
194
	       regs->r10, regs->r11, regs->r12);
195
	printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
196
	       regs->r13, regs->r14, regs->r15);
L
Linus Torvalds 已提交
197

198 199 200
	asm("movl %%ds,%0" : "=r" (ds));
	asm("movl %%cs,%0" : "=r" (cs));
	asm("movl %%es,%0" : "=r" (es));
L
Linus Torvalds 已提交
201 202 203 204
	asm("movl %%fs,%0" : "=r" (fsindex));
	asm("movl %%gs,%0" : "=r" (gsindex));

	rdmsrl(MSR_FS_BASE, fs);
205 206
	rdmsrl(MSR_GS_BASE, gs);
	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
L
Linus Torvalds 已提交
207

208 209
	if (!all)
		return;
L
Linus Torvalds 已提交
210

211 212 213 214
	cr0 = read_cr0();
	cr2 = read_cr2();
	cr3 = read_cr3();
	cr4 = read_cr4();
L
Linus Torvalds 已提交
215

216
	printk(KERN_INFO "FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
217
	       fs, fsindex, gs, gsindex, shadowgs);
218 219 220 221
	printk(KERN_INFO "CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
			es, cr0);
	printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
			cr4);
222 223 224 225

	get_debugreg(d0, 0);
	get_debugreg(d1, 1);
	get_debugreg(d2, 2);
226
	printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
227 228 229
	get_debugreg(d3, 3);
	get_debugreg(d6, 6);
	get_debugreg(d7, 7);
230
	printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
L
Linus Torvalds 已提交
231 232 233 234
}

void show_regs(struct pt_regs *regs)
{
235
	printk(KERN_INFO "CPU %d:", smp_processor_id());
236
	__show_regs(regs, 1);
237
	show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
L
Linus Torvalds 已提交
238 239 240 241 242 243 244 245 246
}

/*
 * Free current thread data structures etc..
 */
void exit_thread(void)
{
	struct task_struct *me = current;
	struct thread_struct *t = &me->thread;
247

248
	if (me->thread.io_bitmap_ptr) {
L
Linus Torvalds 已提交
249 250 251 252
		struct tss_struct *tss = &per_cpu(init_tss, get_cpu());

		kfree(t->io_bitmap_ptr);
		t->io_bitmap_ptr = NULL;
253
		clear_thread_flag(TIF_IO_BITMAP);
L
Linus Torvalds 已提交
254 255 256 257 258 259 260
		/*
		 * Careful, clear this in the TSS too:
		 */
		memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
		t->io_bitmap_max = 0;
		put_cpu();
	}
261 262

	ds_exit_thread(current);
L
Linus Torvalds 已提交
263 264 265 266 267 268
}

void flush_thread(void)
{
	struct task_struct *tsk = current;

269 270 271 272 273 274
	if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
		clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
		if (test_tsk_thread_flag(tsk, TIF_IA32)) {
			clear_tsk_thread_flag(tsk, TIF_IA32);
		} else {
			set_tsk_thread_flag(tsk, TIF_IA32);
275
			current_thread_info()->status |= TS_COMPAT;
276
		}
277
	}
278
	clear_tsk_thread_flag(tsk, TIF_DEBUG);
L
Linus Torvalds 已提交
279 280 281 282 283 284 285

	tsk->thread.debugreg0 = 0;
	tsk->thread.debugreg1 = 0;
	tsk->thread.debugreg2 = 0;
	tsk->thread.debugreg3 = 0;
	tsk->thread.debugreg6 = 0;
	tsk->thread.debugreg7 = 0;
286
	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
L
Linus Torvalds 已提交
287 288 289
	/*
	 * Forget coprocessor state..
	 */
290
	tsk->fpu_counter = 0;
L
Linus Torvalds 已提交
291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
	clear_fpu(tsk);
	clear_used_math();
}

void release_thread(struct task_struct *dead_task)
{
	if (dead_task->mm) {
		if (dead_task->mm->context.size) {
			printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
					dead_task->comm,
					dead_task->mm->context.ldt,
					dead_task->mm->context.size);
			BUG();
		}
	}
}

static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
{
310
	struct user_desc ud = {
L
Linus Torvalds 已提交
311 312 313 314 315 316
		.base_addr = addr,
		.limit = 0xfffff,
		.seg_32bit = 1,
		.limit_in_pages = 1,
		.useable = 1,
	};
J
Jan Engelhardt 已提交
317
	struct desc_struct *desc = t->thread.tls_array;
L
Linus Torvalds 已提交
318
	desc += tls;
319
	fill_ldt(desc, &ud);
L
Linus Torvalds 已提交
320 321 322 323
}

static inline u32 read_32bit_tls(struct task_struct *t, int tls)
{
R
Roland McGrath 已提交
324
	return get_desc_base(&t->thread.tls_array[tls]);
L
Linus Torvalds 已提交
325 326 327 328 329 330 331 332 333 334 335
}

/*
 * This gets called before we allocate a new thread and copy
 * the current task into it.
 */
void prepare_to_copy(struct task_struct *tsk)
{
	unlazy_fpu(tsk);
}

336
int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
L
Linus Torvalds 已提交
337
		unsigned long unused,
338
	struct task_struct *p, struct pt_regs *regs)
L
Linus Torvalds 已提交
339 340
{
	int err;
341
	struct pt_regs *childregs;
L
Linus Torvalds 已提交
342 343
	struct task_struct *me = current;

344
	childregs = ((struct pt_regs *)
A
Al Viro 已提交
345
			(THREAD_SIZE + task_stack_page(p))) - 1;
L
Linus Torvalds 已提交
346 347
	*childregs = *regs;

348 349 350 351
	childregs->ax = 0;
	childregs->sp = sp;
	if (sp == ~0UL)
		childregs->sp = (unsigned long)childregs;
L
Linus Torvalds 已提交
352

353 354 355
	p->thread.sp = (unsigned long) childregs;
	p->thread.sp0 = (unsigned long) (childregs+1);
	p->thread.usersp = me->thread.usersp;
L
Linus Torvalds 已提交
356

A
Al Viro 已提交
357
	set_tsk_thread_flag(p, TIF_FORK);
L
Linus Torvalds 已提交
358 359 360 361

	p->thread.fs = me->thread.fs;
	p->thread.gs = me->thread.gs;

362 363 364 365
	savesegment(gs, p->thread.gsindex);
	savesegment(fs, p->thread.fsindex);
	savesegment(es, p->thread.es);
	savesegment(ds, p->thread.ds);
L
Linus Torvalds 已提交
366

367
	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
L
Linus Torvalds 已提交
368 369 370 371 372
		p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
		if (!p->thread.io_bitmap_ptr) {
			p->thread.io_bitmap_max = 0;
			return -ENOMEM;
		}
373 374
		memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
				IO_BITMAP_BYTES);
375
		set_tsk_thread_flag(p, TIF_IO_BITMAP);
376
	}
L
Linus Torvalds 已提交
377 378 379 380 381 382 383

	/*
	 * Set a new TLS for the child thread?
	 */
	if (clone_flags & CLONE_SETTLS) {
#ifdef CONFIG_IA32_EMULATION
		if (test_thread_flag(TIF_IA32))
R
Roland McGrath 已提交
384
			err = do_set_thread_area(p, -1,
385
				(struct user_desc __user *)childregs->si, 0);
386 387 388 389
		else
#endif
			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
		if (err)
L
Linus Torvalds 已提交
390 391
			goto out;
	}
392 393 394 395 396 397

	ds_copy_thread(p, me);

	clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
	p->thread.debugctlmsr = 0;

L
Linus Torvalds 已提交
398 399 400 401 402 403 404 405 406
	err = 0;
out:
	if (err && p->thread.io_bitmap_ptr) {
		kfree(p->thread.io_bitmap_ptr);
		p->thread.io_bitmap_max = 0;
	}
	return err;
}

I
Ingo Molnar 已提交
407 408 409
void
start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
{
410 411 412
	loadsegment(fs, 0);
	loadsegment(es, 0);
	loadsegment(ds, 0);
I
Ingo Molnar 已提交
413 414 415
	load_gs_index(0);
	regs->ip		= new_ip;
	regs->sp		= new_sp;
416
	percpu_write(old_rsp, new_sp);
I
Ingo Molnar 已提交
417 418 419 420
	regs->cs		= __USER_CS;
	regs->ss		= __USER_DS;
	regs->flags		= 0x200;
	set_fs(USER_DS);
421 422 423 424
	/*
	 * Free the old FP and other extended state
	 */
	free_thread_xstate(current);
I
Ingo Molnar 已提交
425 426 427
}
EXPORT_SYMBOL_GPL(start_thread);

428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449
static void hard_disable_TSC(void)
{
	write_cr4(read_cr4() | X86_CR4_TSD);
}

void disable_TSC(void)
{
	preempt_disable();
	if (!test_and_set_thread_flag(TIF_NOTSC))
		/*
		 * Must flip the CPU state synchronously with
		 * TIF_NOTSC in the current running context.
		 */
		hard_disable_TSC();
	preempt_enable();
}

static void hard_enable_TSC(void)
{
	write_cr4(read_cr4() & ~X86_CR4_TSD);
}

I
Ingo Molnar 已提交
450
static void enable_TSC(void)
451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
{
	preempt_disable();
	if (test_and_clear_thread_flag(TIF_NOTSC))
		/*
		 * Must flip the CPU state synchronously with
		 * TIF_NOTSC in the current running context.
		 */
		hard_enable_TSC();
	preempt_enable();
}

int get_tsc_mode(unsigned long adr)
{
	unsigned int val;

	if (test_thread_flag(TIF_NOTSC))
		val = PR_TSC_SIGSEGV;
	else
		val = PR_TSC_ENABLE;

	return put_user(val, (unsigned int __user *)adr);
}

int set_tsc_mode(unsigned int val)
{
	if (val == PR_TSC_SIGSEGV)
		disable_TSC();
	else if (val == PR_TSC_ENABLE)
		enable_TSC();
	else
		return -EINVAL;

	return 0;
}

L
Linus Torvalds 已提交
486 487 488
/*
 * This special macro can be used to load a debugging register
 */
489 490
#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)

491
static inline void __switch_to_xtra(struct task_struct *prev_p,
492 493
				    struct task_struct *next_p,
				    struct tss_struct *tss)
494 495 496 497 498 499
{
	struct thread_struct *prev, *next;

	prev = &prev_p->thread,
	next = &next_p->thread;

500 501 502 503
	if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
	    test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
		ds_switch_to(prev_p, next_p);
	else if (next->debugctlmsr != prev->debugctlmsr)
504
		update_debugctlmsr(next->debugctlmsr);
R
Roland McGrath 已提交
505

506 507 508 509 510 511 512 513 514 515
	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
		loaddebug(next, 0);
		loaddebug(next, 1);
		loaddebug(next, 2);
		loaddebug(next, 3);
		/* no 4 and 5 */
		loaddebug(next, 6);
		loaddebug(next, 7);
	}

516 517 518 519 520 521 522 523 524
	if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
	    test_tsk_thread_flag(next_p, TIF_NOTSC)) {
		/* prev and next are different */
		if (test_tsk_thread_flag(next_p, TIF_NOTSC))
			hard_disable_TSC();
		else
			hard_enable_TSC();
	}

525 526 527 528 529 530 531 532 533 534 535 536 537 538 539
	if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
		/*
		 * Copy the relevant range of the IO bitmap.
		 * Normally this is 128 bytes or less:
		 */
		memcpy(tss->io_bitmap, next->io_bitmap_ptr,
		       max(prev->io_bitmap_max, next->io_bitmap_max));
	} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
		/*
		 * Clear any possible leftover bits:
		 */
		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
	}
}

L
Linus Torvalds 已提交
540 541 542
/*
 *	switch_to(x,y) should switch tasks from x to y.
 *
543
 * This could still be optimized:
L
Linus Torvalds 已提交
544 545
 * - fold all the options into a flag word and test it with a single test.
 * - could test fs/gs bitsliced
546 547
 *
 * Kprobes not supported here. Set the probe on schedule instead.
548
 * Function graph tracer not supported too.
L
Linus Torvalds 已提交
549
 */
550
__notrace_funcgraph struct task_struct *
551
__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
L
Linus Torvalds 已提交
552
{
553 554
	struct thread_struct *prev = &prev_p->thread;
	struct thread_struct *next = &next_p->thread;
555
	int cpu = smp_processor_id();
L
Linus Torvalds 已提交
556
	struct tss_struct *tss = &per_cpu(init_tss, cpu);
557
	unsigned fsindex, gsindex;
L
Linus Torvalds 已提交
558

559
	/* we're going to use this soon, after a few expensive things */
560
	if (next_p->fpu_counter > 5)
561
		prefetch(next->xstate);
562

L
Linus Torvalds 已提交
563 564 565
	/*
	 * Reload esp0, LDT and the page table pointer:
	 */
566
	load_sp0(tss, next);
L
Linus Torvalds 已提交
567

568
	/*
L
Linus Torvalds 已提交
569 570 571
	 * Switch DS and ES.
	 * This won't pick up thread selector changes, but I guess that is ok.
	 */
572
	savesegment(es, prev->es);
L
Linus Torvalds 已提交
573
	if (unlikely(next->es | prev->es))
574
		loadsegment(es, next->es);
575 576

	savesegment(ds, prev->ds);
L
Linus Torvalds 已提交
577 578 579
	if (unlikely(next->ds | prev->ds))
		loadsegment(ds, next->ds);

580 581 582 583 584 585 586 587 588

	/* We must save %fs and %gs before load_TLS() because
	 * %fs and %gs may be cleared by load_TLS().
	 *
	 * (e.g. xen_load_tls())
	 */
	savesegment(fs, fsindex);
	savesegment(gs, gsindex);

L
Linus Torvalds 已提交
589 590
	load_TLS(next, cpu);

591 592 593 594 595 596 597 598 599
	/*
	 * Leave lazy mode, flushing any hypercalls made here.
	 * This must be done before restoring TLS segments so
	 * the GDT and LDT are properly updated, and must be
	 * done before math_state_restore, so the TS bit is up
	 * to date.
	 */
	arch_leave_lazy_cpu_mode();

600
	/*
L
Linus Torvalds 已提交
601
	 * Switch FS and GS.
602 603 604 605
	 *
	 * Segment register != 0 always requires a reload.  Also
	 * reload when it has changed.  When prev process used 64bit
	 * base always reload to avoid an information leak.
L
Linus Torvalds 已提交
606
	 */
607 608
	if (unlikely(fsindex | next->fsindex | prev->fs)) {
		loadsegment(fs, next->fsindex);
609
		/*
610 611 612 613 614
		 * Check if the user used a selector != 0; if yes
		 *  clear 64bit base, since overloaded base is always
		 *  mapped to the Null selector
		 */
		if (fsindex)
615
			prev->fs = 0;
L
Linus Torvalds 已提交
616
	}
617 618 619 620 621 622 623 624
	/* when next process has a 64bit base use it */
	if (next->fs)
		wrmsrl(MSR_FS_BASE, next->fs);
	prev->fsindex = fsindex;

	if (unlikely(gsindex | next->gsindex | prev->gs)) {
		load_gs_index(next->gsindex);
		if (gsindex)
625
			prev->gs = 0;
L
Linus Torvalds 已提交
626
	}
627 628 629
	if (next->gs)
		wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
	prev->gsindex = gsindex;
L
Linus Torvalds 已提交
630

A
Andi Kleen 已提交
631 632 633
	/* Must be after DS reload */
	unlazy_fpu(prev_p);

634
	/*
635
	 * Switch the PDA and FPU contexts.
L
Linus Torvalds 已提交
636
	 */
637 638
	prev->usersp = percpu_read(old_rsp);
	percpu_write(old_rsp, next->usersp);
639
	percpu_write(current_task, next_p);
640

641
	percpu_write(kernel_stack,
642
		  (unsigned long)task_stack_page(next_p) +
643
		  THREAD_SIZE - KERNEL_STACK_OFFSET);
L
Linus Torvalds 已提交
644 645

	/*
646
	 * Now maybe reload the debug registers and handle I/O bitmaps
L
Linus Torvalds 已提交
647
	 */
648 649
	if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
650
		__switch_to_xtra(prev_p, next_p, tss);
L
Linus Torvalds 已提交
651

652 653 654
	/* If the task has used fpu the last 5 timeslices, just do a full
	 * restore of the math state immediately to avoid the trap; the
	 * chances of needing FPU soon are obviously high now
655 656 657
	 *
	 * tsk_used_math() checks prevent calling math_state_restore(),
	 * which can sleep in the case of !tsk_used_math()
658
	 */
659
	if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
660
		math_state_restore();
L
Linus Torvalds 已提交
661 662 663 664 665 666
	return prev_p;
}

/*
 * sys_execve() executes a new program.
 */
667
asmlinkage
L
Linus Torvalds 已提交
668
long sys_execve(char __user *name, char __user * __user *argv,
669
		char __user * __user *envp, struct pt_regs *regs)
L
Linus Torvalds 已提交
670 671
{
	long error;
672
	char *filename;
L
Linus Torvalds 已提交
673 674 675

	filename = getname(name);
	error = PTR_ERR(filename);
676
	if (IS_ERR(filename))
L
Linus Torvalds 已提交
677
		return error;
678
	error = do_execve(filename, argv, envp, regs);
L
Linus Torvalds 已提交
679 680 681 682 683 684 685 686 687
	putname(filename);
	return error;
}

void set_personality_64bit(void)
{
	/* inherit personality from parent */

	/* Make sure to be in 64bit mode */
688
	clear_thread_flag(TIF_IA32);
L
Linus Torvalds 已提交
689 690 691 692

	/* TBD: overwrites user setup. Should have two bits.
	   But 64bit processes have always behaved this way,
	   so it's not too bad. The main problem is just that
693
	   32bit childs are affected again. */
L
Linus Torvalds 已提交
694 695 696 697 698
	current->personality &= ~READ_IMPLIES_EXEC;
}

asmlinkage long sys_fork(struct pt_regs *regs)
{
699
	return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
L
Linus Torvalds 已提交
700 701
}

702 703 704
asmlinkage long
sys_clone(unsigned long clone_flags, unsigned long newsp,
	  void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
L
Linus Torvalds 已提交
705 706
{
	if (!newsp)
707
		newsp = regs->sp;
L
Linus Torvalds 已提交
708 709 710 711 712 713 714 715 716 717 718 719 720 721 722
	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
}

/*
 * This is trivial, and on the face of it looks like it
 * could equally well be done in user mode.
 *
 * Not so, for quite unobvious reasons - register pressure.
 * In user mode vfork() cannot have a stack frame, and if
 * done by calling the "clone()" system call directly, you
 * do not have enough call-clobbered registers to hold all
 * the information you need.
 */
asmlinkage long sys_vfork(struct pt_regs *regs)
{
723
	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
L
Linus Torvalds 已提交
724 725 726 727 728 729
		    NULL, NULL);
}

unsigned long get_wchan(struct task_struct *p)
{
	unsigned long stack;
730
	u64 fp, ip;
L
Linus Torvalds 已提交
731 732
	int count = 0;

733 734
	if (!p || p == current || p->state == TASK_RUNNING)
		return 0;
A
Al Viro 已提交
735
	stack = (unsigned long)task_stack_page(p);
736
	if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
L
Linus Torvalds 已提交
737
		return 0;
738
	fp = *(u64 *)(p->thread.sp);
739
	do {
740
		if (fp < (unsigned long)stack ||
741
		    fp >= (unsigned long)stack+THREAD_SIZE)
742
			return 0;
743 744 745
		ip = *(u64 *)(fp+8);
		if (!in_sched_functions(ip))
			return ip;
746 747
		fp = *(u64 *)fp;
	} while (count++ < 16);
L
Linus Torvalds 已提交
748 749 750 751
	return 0;
}

long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
752 753
{
	int ret = 0;
L
Linus Torvalds 已提交
754 755 756
	int doit = task == current;
	int cpu;

757
	switch (code) {
L
Linus Torvalds 已提交
758
	case ARCH_SET_GS:
759
		if (addr >= TASK_SIZE_OF(task))
760
			return -EPERM;
L
Linus Torvalds 已提交
761
		cpu = get_cpu();
762
		/* handle small bases via the GDT because that's faster to
L
Linus Torvalds 已提交
763
		   switch. */
764 765 766
		if (addr <= 0xffffffff) {
			set_32bit_tls(task, GS_TLS, addr);
			if (doit) {
L
Linus Torvalds 已提交
767
				load_TLS(&task->thread, cpu);
768
				load_gs_index(GS_TLS_SEL);
L
Linus Torvalds 已提交
769
			}
770
			task->thread.gsindex = GS_TLS_SEL;
L
Linus Torvalds 已提交
771
			task->thread.gs = 0;
772
		} else {
L
Linus Torvalds 已提交
773 774 775
			task->thread.gsindex = 0;
			task->thread.gs = addr;
			if (doit) {
776 777
				load_gs_index(0);
				ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
778
			}
L
Linus Torvalds 已提交
779 780 781 782 783 784
		}
		put_cpu();
		break;
	case ARCH_SET_FS:
		/* Not strictly needed for fs, but do it for symmetry
		   with gs */
785
		if (addr >= TASK_SIZE_OF(task))
786
			return -EPERM;
L
Linus Torvalds 已提交
787
		cpu = get_cpu();
788
		/* handle small bases via the GDT because that's faster to
L
Linus Torvalds 已提交
789
		   switch. */
790
		if (addr <= 0xffffffff) {
L
Linus Torvalds 已提交
791
			set_32bit_tls(task, FS_TLS, addr);
792 793
			if (doit) {
				load_TLS(&task->thread, cpu);
794
				loadsegment(fs, FS_TLS_SEL);
L
Linus Torvalds 已提交
795 796 797
			}
			task->thread.fsindex = FS_TLS_SEL;
			task->thread.fs = 0;
798
		} else {
L
Linus Torvalds 已提交
799 800 801 802 803
			task->thread.fsindex = 0;
			task->thread.fs = addr;
			if (doit) {
				/* set the selector to 0 to not confuse
				   __switch_to */
804
				loadsegment(fs, 0);
805
				ret = checking_wrmsrl(MSR_FS_BASE, addr);
L
Linus Torvalds 已提交
806 807 808 809
			}
		}
		put_cpu();
		break;
810 811
	case ARCH_GET_FS: {
		unsigned long base;
L
Linus Torvalds 已提交
812 813
		if (task->thread.fsindex == FS_TLS_SEL)
			base = read_32bit_tls(task, FS_TLS);
814
		else if (doit)
L
Linus Torvalds 已提交
815
			rdmsrl(MSR_FS_BASE, base);
816
		else
L
Linus Torvalds 已提交
817
			base = task->thread.fs;
818 819
		ret = put_user(base, (unsigned long __user *)addr);
		break;
L
Linus Torvalds 已提交
820
	}
821
	case ARCH_GET_GS: {
L
Linus Torvalds 已提交
822
		unsigned long base;
823
		unsigned gsindex;
L
Linus Torvalds 已提交
824 825
		if (task->thread.gsindex == GS_TLS_SEL)
			base = read_32bit_tls(task, GS_TLS);
826
		else if (doit) {
827
			savesegment(gs, gsindex);
828 829 830 831
			if (gsindex)
				rdmsrl(MSR_KERNEL_GS_BASE, base);
			else
				base = task->thread.gs;
832
		} else
L
Linus Torvalds 已提交
833
			base = task->thread.gs;
834
		ret = put_user(base, (unsigned long __user *)addr);
L
Linus Torvalds 已提交
835 836 837 838 839 840
		break;
	}

	default:
		ret = -EINVAL;
		break;
841
	}
L
Linus Torvalds 已提交
842

843 844
	return ret;
}
L
Linus Torvalds 已提交
845 846 847 848 849 850 851 852

long sys_arch_prctl(int code, unsigned long addr)
{
	return do_arch_prctl(current, code, addr);
}

unsigned long arch_align_stack(unsigned long sp)
{
853
	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
L
Linus Torvalds 已提交
854 855 856
		sp -= get_random_int() % 8192;
	return sp & ~0xf;
}
J
Jiri Kosina 已提交
857 858 859 860 861 862

unsigned long arch_randomize_brk(struct mm_struct *mm)
{
	unsigned long range_end = mm->brk + 0x02000000;
	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
}