process_32.c 17.4 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 *  Copyright (C) 1995  Linus Torvalds
 *
 *  Pentium III FXSR, SSE support
 *	Gareth Hughes <gareth@valinux.com>, May 2000
 */

/*
 * This file handles the architecture-dependent parts of process handling..
 */

#include <stdarg.h>

T
Tejun Heo 已提交
14
#include <linux/stackprotector.h>
Z
Zwane Mwaikambo 已提交
15
#include <linux/cpu.h>
L
Linus Torvalds 已提交
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/elfcore.h>
#include <linux/smp.h>
#include <linux/stddef.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/user.h>
#include <linux/interrupt.h>
#include <linux/utsname.h>
#include <linux/delay.h>
#include <linux/reboot.h>
#include <linux/init.h>
#include <linux/mc146818rtc.h>
#include <linux/module.h>
#include <linux/kallsyms.h>
#include <linux/ptrace.h>
#include <linux/random.h>
37
#include <linux/personality.h>
I
Ingo Molnar 已提交
38
#include <linux/tick.h>
39
#include <linux/percpu.h>
40
#include <linux/prctl.h>
41
#include <linux/dmi.h>
42
#include <linux/ftrace.h>
43 44 45
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/kdebug.h>
L
Linus Torvalds 已提交
46 47 48 49 50 51 52 53 54 55 56 57 58

#include <asm/pgtable.h>
#include <asm/system.h>
#include <asm/ldt.h>
#include <asm/processor.h>
#include <asm/i387.h>
#include <asm/desc.h>
#ifdef CONFIG_MATH_EMULATION
#include <asm/math_emu.h>
#endif

#include <linux/err.h>

Z
Zwane Mwaikambo 已提交
59 60
#include <asm/tlbflush.h>
#include <asm/cpu.h>
61
#include <asm/idle.h>
62
#include <asm/syscalls.h>
63
#include <asm/ds.h>
Z
Zwane Mwaikambo 已提交
64

L
Linus Torvalds 已提交
65 66
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");

67 68 69
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);

L
Linus Torvalds 已提交
70 71 72 73 74
/*
 * Return saved PC of a blocked thread.
 */
unsigned long thread_saved_pc(struct task_struct *tsk)
{
75
	return ((unsigned long *)tsk->thread.sp)[3];
L
Linus Torvalds 已提交
76 77
}

A
Alex Nixon 已提交
78 79 80 81 82 83 84
#ifndef CONFIG_SMP
static inline void play_dead(void)
{
	BUG();
}
#endif

L
Linus Torvalds 已提交
85 86 87 88 89 90
/*
 * The idle thread. There's no useful work to be
 * done, so just try to conserve power and have a
 * low exit latency (ie sit in a loop waiting for
 * somebody to say that they'd like to reschedule)
 */
Z
Zwane Mwaikambo 已提交
91
void cpu_idle(void)
L
Linus Torvalds 已提交
92
{
93
	int cpu = smp_processor_id();
Z
Zwane Mwaikambo 已提交
94

T
Tejun Heo 已提交
95 96 97 98 99 100 101 102 103
	/*
	 * If we're the non-boot CPU, nothing set the stack canary up
	 * for us.  CPU0 already has it initialized but no harm in
	 * doing it again.  This is a good place for updating it, as
	 * we wont ever return from this function (so the invalid
	 * canaries already on the stack wont ever trigger).
	 */
	boot_init_stack_canary();

104
	current_thread_info()->status |= TS_POLLING;
105

L
Linus Torvalds 已提交
106 107
	/* endless idle loop with no priority at all */
	while (1) {
108
		tick_nohz_stop_sched_tick(1);
L
Linus Torvalds 已提交
109 110
		while (!need_resched()) {

C
Christoph Lameter 已提交
111
			check_pgt_cache();
L
Linus Torvalds 已提交
112 113
			rmb();

114 115 116
			if (rcu_pending(cpu))
				rcu_check_callbacks(cpu, 0);

Z
Zwane Mwaikambo 已提交
117 118 119
			if (cpu_is_offline(cpu))
				play_dead();

120
			local_irq_disable();
121 122
			/* Don't trace irqs off for idle */
			stop_critical_timings();
T
Thomas Gleixner 已提交
123
			pm_idle();
124
			start_critical_timings();
L
Linus Torvalds 已提交
125
		}
I
Ingo Molnar 已提交
126
		tick_nohz_restart_sched_tick();
127
		preempt_enable_no_resched();
L
Linus Torvalds 已提交
128
		schedule();
129
		preempt_disable();
L
Linus Torvalds 已提交
130 131 132
	}
}

133
void __show_regs(struct pt_regs *regs, int all)
L
Linus Torvalds 已提交
134 135
{
	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
136
	unsigned long d0, d1, d2, d3, d6, d7;
137
	unsigned long sp;
138
	unsigned short ss, gs;
139
	const char *board;
140 141

	if (user_mode_vm(regs)) {
142 143
		sp = regs->sp;
		ss = regs->ss & 0xffff;
T
Tejun Heo 已提交
144
		gs = get_user_gs(regs);
145
	} else {
146
		sp = (unsigned long) (&regs->sp);
147 148 149
		savesegment(ss, ss);
		savesegment(gs, gs);
	}
L
Linus Torvalds 已提交
150 151

	printk("\n");
152 153 154 155 156

	board = dmi_get_system_info(DMI_PRODUCT_NAME);
	if (!board)
		board = "";
	printk("Pid: %d, comm: %s %s (%s %.*s) %s\n",
157
			task_pid_nr(current), current->comm,
158 159
			print_tainted(), init_utsname()->release,
			(int)strcspn(init_utsname()->version, " "),
160
			init_utsname()->version, board);
161 162

	printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
163
			(u16)regs->cs, regs->ip, regs->flags,
164
			smp_processor_id());
165
	print_symbol("EIP is at %s\n", regs->ip);
L
Linus Torvalds 已提交
166 167

	printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
168
		regs->ax, regs->bx, regs->cx, regs->dx);
169
	printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
170
		regs->si, regs->di, regs->bp, sp);
171
	printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
172
	       (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss);
173 174 175

	if (!all)
		return;
L
Linus Torvalds 已提交
176

177 178 179
	cr0 = read_cr0();
	cr2 = read_cr2();
	cr3 = read_cr3();
180
	cr4 = read_cr4_safe();
181 182
	printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
			cr0, cr2, cr3, cr4);
183 184 185 186 187 188 189

	get_debugreg(d0, 0);
	get_debugreg(d1, 1);
	get_debugreg(d2, 2);
	get_debugreg(d3, 3);
	printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
			d0, d1, d2, d3);
190

191 192
	get_debugreg(d6, 6);
	get_debugreg(d7, 7);
193 194 195
	printk("DR6: %08lx DR7: %08lx\n",
			d6, d7);
}
196

197 198
void show_regs(struct pt_regs *regs)
{
199
	__show_regs(regs, 1);
200
	show_trace(NULL, regs, &regs->sp, regs->bp);
L
Linus Torvalds 已提交
201 202 203
}

/*
204 205
 * This gets run with %bx containing the
 * function to call, and %dx containing
L
Linus Torvalds 已提交
206 207 208 209 210 211 212
 * the "args".
 */
extern void kernel_thread_helper(void);

/*
 * Create a kernel thread
 */
213
int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
L
Linus Torvalds 已提交
214 215 216 217 218
{
	struct pt_regs regs;

	memset(&regs, 0, sizeof(regs));

219 220
	regs.bx = (unsigned long) fn;
	regs.dx = (unsigned long) arg;
L
Linus Torvalds 已提交
221

222 223 224
	regs.ds = __USER_DS;
	regs.es = __USER_DS;
	regs.fs = __KERNEL_PERCPU;
225
	regs.gs = __KERNEL_STACK_CANARY;
226 227 228 229
	regs.orig_ax = -1;
	regs.ip = (unsigned long) kernel_thread_helper;
	regs.cs = __KERNEL_CS | get_kernel_rpl();
	regs.flags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
L
Linus Torvalds 已提交
230 231

	/* Ok, create the new process.. */
232
	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
L
Linus Torvalds 已提交
233
}
234
EXPORT_SYMBOL(kernel_thread);
L
Linus Torvalds 已提交
235 236 237 238 239 240 241

/*
 * Free current thread data structures etc..
 */
void exit_thread(void)
{
	/* The process may have allocated an io port bitmap... nuke it. */
242 243 244
	if (unlikely(test_thread_flag(TIF_IO_BITMAP))) {
		struct task_struct *tsk = current;
		struct thread_struct *t = &tsk->thread;
L
Linus Torvalds 已提交
245 246 247 248 249
		int cpu = get_cpu();
		struct tss_struct *tss = &per_cpu(init_tss, cpu);

		kfree(t->io_bitmap_ptr);
		t->io_bitmap_ptr = NULL;
250
		clear_thread_flag(TIF_IO_BITMAP);
L
Linus Torvalds 已提交
251 252 253 254 255 256 257
		/*
		 * Careful, clear this in the TSS too:
		 */
		memset(tss->io_bitmap, 0xff, tss->io_bitmap_max);
		t->io_bitmap_max = 0;
		tss->io_bitmap_owner = NULL;
		tss->io_bitmap_max = 0;
258
		tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
L
Linus Torvalds 已提交
259 260
		put_cpu();
	}
261 262

	ds_exit_thread(current);
L
Linus Torvalds 已提交
263 264 265 266 267 268
}

void flush_thread(void)
{
	struct task_struct *tsk = current;

269 270 271 272 273 274
	tsk->thread.debugreg0 = 0;
	tsk->thread.debugreg1 = 0;
	tsk->thread.debugreg2 = 0;
	tsk->thread.debugreg3 = 0;
	tsk->thread.debugreg6 = 0;
	tsk->thread.debugreg7 = 0;
275
	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
276
	clear_tsk_thread_flag(tsk, TIF_DEBUG);
L
Linus Torvalds 已提交
277 278 279
	/*
	 * Forget coprocessor state..
	 */
280
	tsk->fpu_counter = 0;
L
Linus Torvalds 已提交
281 282 283 284 285 286
	clear_fpu(tsk);
	clear_used_math();
}

void release_thread(struct task_struct *dead_task)
{
287
	BUG_ON(dead_task->mm);
L
Linus Torvalds 已提交
288 289 290 291 292 293 294 295 296 297 298 299
	release_vm86_irqs(dead_task);
}

/*
 * This gets called before we allocate a new thread and copy
 * the current task into it.
 */
void prepare_to_copy(struct task_struct *tsk)
{
	unlazy_fpu(tsk);
}

300
int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
L
Linus Torvalds 已提交
301
	unsigned long unused,
302
	struct task_struct *p, struct pt_regs *regs)
L
Linus Torvalds 已提交
303
{
304
	struct pt_regs *childregs;
L
Linus Torvalds 已提交
305 306 307
	struct task_struct *tsk;
	int err;

A
akpm@osdl.org 已提交
308
	childregs = task_pt_regs(p);
309
	*childregs = *regs;
310 311
	childregs->ax = 0;
	childregs->sp = sp;
312

313 314
	p->thread.sp = (unsigned long) childregs;
	p->thread.sp0 = (unsigned long) (childregs+1);
L
Linus Torvalds 已提交
315

316
	p->thread.ip = (unsigned long) ret_from_fork;
L
Linus Torvalds 已提交
317

T
Tejun Heo 已提交
318
	task_user_gs(p) = get_user_gs(regs);
L
Linus Torvalds 已提交
319 320

	tsk = current;
321
	if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
A
Alexey Dobriyan 已提交
322 323
		p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
						IO_BITMAP_BYTES, GFP_KERNEL);
L
Linus Torvalds 已提交
324 325 326 327
		if (!p->thread.io_bitmap_ptr) {
			p->thread.io_bitmap_max = 0;
			return -ENOMEM;
		}
328
		set_tsk_thread_flag(p, TIF_IO_BITMAP);
L
Linus Torvalds 已提交
329 330
	}

R
Roland McGrath 已提交
331 332
	err = 0;

L
Linus Torvalds 已提交
333 334 335
	/*
	 * Set a new TLS for the child thread?
	 */
R
Roland McGrath 已提交
336 337
	if (clone_flags & CLONE_SETTLS)
		err = do_set_thread_area(p, -1,
338
			(struct user_desc __user *)childregs->si, 0);
L
Linus Torvalds 已提交
339 340 341 342 343

	if (err && p->thread.io_bitmap_ptr) {
		kfree(p->thread.io_bitmap_ptr);
		p->thread.io_bitmap_max = 0;
	}
344 345 346 347 348 349

	ds_copy_thread(p, current);

	clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
	p->thread.debugctlmsr = 0;

L
Linus Torvalds 已提交
350 351 352
	return err;
}

I
Ingo Molnar 已提交
353 354 355
void
start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
{
T
Tejun Heo 已提交
356
	set_user_gs(regs, 0);
I
Ingo Molnar 已提交
357 358 359 360 361 362 363 364
	regs->fs		= 0;
	set_fs(USER_DS);
	regs->ds		= __USER_DS;
	regs->es		= __USER_DS;
	regs->ss		= __USER_DS;
	regs->cs		= __USER_CS;
	regs->ip		= new_ip;
	regs->sp		= new_sp;
365 366 367 368
	/*
	 * Free the old FP and other extended state
	 */
	free_thread_xstate(current);
I
Ingo Molnar 已提交
369 370 371
}
EXPORT_SYMBOL_GPL(start_thread);

372
static void hard_disable_TSC(void)
373 374 375
{
	write_cr4(read_cr4() | X86_CR4_TSD);
}
376

377 378 379 380 381 382 383 384 385 386 387
void disable_TSC(void)
{
	preempt_disable();
	if (!test_and_set_thread_flag(TIF_NOTSC))
		/*
		 * Must flip the CPU state synchronously with
		 * TIF_NOTSC in the current running context.
		 */
		hard_disable_TSC();
	preempt_enable();
}
388

389
static void hard_enable_TSC(void)
390 391 392
{
	write_cr4(read_cr4() & ~X86_CR4_TSD);
}
393

I
Ingo Molnar 已提交
394
static void enable_TSC(void)
395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428
{
	preempt_disable();
	if (test_and_clear_thread_flag(TIF_NOTSC))
		/*
		 * Must flip the CPU state synchronously with
		 * TIF_NOTSC in the current running context.
		 */
		hard_enable_TSC();
	preempt_enable();
}

int get_tsc_mode(unsigned long adr)
{
	unsigned int val;

	if (test_thread_flag(TIF_NOTSC))
		val = PR_TSC_SIGSEGV;
	else
		val = PR_TSC_ENABLE;

	return put_user(val, (unsigned int __user *)adr);
}

int set_tsc_mode(unsigned int val)
{
	if (val == PR_TSC_SIGSEGV)
		disable_TSC();
	else if (val == PR_TSC_ENABLE)
		enable_TSC();
	else
		return -EINVAL;

	return 0;
}
429 430 431 432

static noinline void
__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
		 struct tss_struct *tss)
L
Linus Torvalds 已提交
433
{
R
Roland McGrath 已提交
434
	struct thread_struct *prev, *next;
435

R
Roland McGrath 已提交
436
	prev = &prev_p->thread;
437 438
	next = &next_p->thread;

439 440 441 442
	if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
	    test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
		ds_switch_to(prev_p, next_p);
	else if (next->debugctlmsr != prev->debugctlmsr)
443
		update_debugctlmsr(next->debugctlmsr);
R
Roland McGrath 已提交
444

445
	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
446 447 448 449
		set_debugreg(next->debugreg0, 0);
		set_debugreg(next->debugreg1, 1);
		set_debugreg(next->debugreg2, 2);
		set_debugreg(next->debugreg3, 3);
450
		/* no 4 and 5 */
451 452
		set_debugreg(next->debugreg6, 6);
		set_debugreg(next->debugreg7, 7);
453 454
	}

455 456 457 458 459 460 461 462 463
	if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
	    test_tsk_thread_flag(next_p, TIF_NOTSC)) {
		/* prev and next are different */
		if (test_tsk_thread_flag(next_p, TIF_NOTSC))
			hard_disable_TSC();
		else
			hard_enable_TSC();
	}

464
	if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
L
Linus Torvalds 已提交
465 466 467 468
		/*
		 * Disable the bitmap via an invalid offset. We still cache
		 * the previous bitmap owner and the IO bitmap contents:
		 */
469
		tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
L
Linus Torvalds 已提交
470 471
		return;
	}
472

L
Linus Torvalds 已提交
473 474 475 476 477 478
	if (likely(next == tss->io_bitmap_owner)) {
		/*
		 * Previous owner of the bitmap (hence the bitmap content)
		 * matches the next task, we dont have to do anything but
		 * to set a valid offset in the TSS:
		 */
479
		tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
L
Linus Torvalds 已提交
480 481 482 483 484 485 486 487 488 489 490
		return;
	}
	/*
	 * Lazy TSS's I/O bitmap copy. We set an invalid offset here
	 * and we let the task to get a GPF in case an I/O instruction
	 * is performed.  The handler of the GPF will verify that the
	 * faulting task has a valid I/O bitmap and, it true, does the
	 * real copy and restart the instruction.  This will save us
	 * redundant copies when the currently switched task does not
	 * perform any I/O during its timeslice.
	 */
491
	tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
L
Linus Torvalds 已提交
492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516
}

/*
 *	switch_to(x,yn) should switch tasks from x to y.
 *
 * We fsave/fwait so that an exception goes off at the right time
 * (as a call from the fsave or fwait in effect) rather than to
 * the wrong process. Lazy FP saving no longer makes any sense
 * with modern CPU's, and this simplifies a lot of things (SMP
 * and UP become the same).
 *
 * NOTE! We used to use the x86 hardware context switching. The
 * reason for not using it any more becomes apparent when you
 * try to recover gracefully from saved state that is no longer
 * valid (stale segment register values in particular). With the
 * hardware task-switch, there is no way to fix up bad state in
 * a reasonable manner.
 *
 * The fact that Intel documents the hardware task-switching to
 * be slow is a fairly red herring - this code is not noticeably
 * faster. However, there _is_ some room for improvement here,
 * so the performance issues may eventually be a valid point.
 * More important, however, is the fact that this allows us much
 * more flexibility.
 *
517
 * The return value (in %ax) will be the "prev" task after
L
Linus Torvalds 已提交
518 519 520
 * the task-switch, and shows up in ret_from_fork in entry.S,
 * for example.
 */
521 522
__notrace_funcgraph struct task_struct *
__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
L
Linus Torvalds 已提交
523 524 525 526 527 528 529 530 531 532
{
	struct thread_struct *prev = &prev_p->thread,
				 *next = &next_p->thread;
	int cpu = smp_processor_id();
	struct tss_struct *tss = &per_cpu(init_tss, cpu);

	/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */

	__unlazy_fpu(prev_p);

533 534 535

	/* we're going to use this soon, after a few expensive things */
	if (next_p->fpu_counter > 5)
536
		prefetch(next->xstate);
537

L
Linus Torvalds 已提交
538
	/*
Z
Zachary Amsden 已提交
539
	 * Reload esp0.
L
Linus Torvalds 已提交
540
	 */
541
	load_sp0(tss, next);
L
Linus Torvalds 已提交
542 543

	/*
544
	 * Save away %gs. No need to save %fs, as it was saved on the
545 546 547 548 549 550 551
	 * stack on entry.  No need to save %es and %ds, as those are
	 * always kernel segments while inside the kernel.  Doing this
	 * before setting the new TLS descriptors avoids the situation
	 * where we temporarily have non-reloadable segments in %fs
	 * and %gs.  This could be an issue if the NMI handler ever
	 * used %fs or %gs (it does not today), or if the kernel is
	 * running inside of a hypervisor layer.
L
Linus Torvalds 已提交
552
	 */
553
	lazy_save_gs(prev->gs);
L
Linus Torvalds 已提交
554 555

	/*
Z
Zachary Amsden 已提交
556
	 * Load the per-thread Thread-Local Storage descriptor.
L
Linus Torvalds 已提交
557
	 */
Z
Zachary Amsden 已提交
558
	load_TLS(next, cpu);
L
Linus Torvalds 已提交
559

560 561 562 563 564 565 566 567 568
	/*
	 * Restore IOPL if needed.  In normal use, the flags restore
	 * in the switch assembly will handle this.  But if the kernel
	 * is running virtualized at a non-zero CPL, the popf will
	 * not restore flags, so it must be done in a separate step.
	 */
	if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl))
		set_iopl_mask(next->iopl);

L
Linus Torvalds 已提交
569
	/*
570
	 * Now maybe handle debug registers and/or IO bitmaps
L
Linus Torvalds 已提交
571
	 */
572 573 574
	if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV ||
		     task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
		__switch_to_xtra(prev_p, next_p, tss);
A
Andrea Arcangeli 已提交
575

576 577 578 579 580 581 582 583 584
	/*
	 * Leave lazy mode, flushing any hypercalls made here.
	 * This must be done before restoring TLS segments so
	 * the GDT and LDT are properly updated, and must be
	 * done before math_state_restore, so the TS bit is up
	 * to date.
	 */
	arch_leave_lazy_cpu_mode();

585 586 587
	/* If the task has used fpu the last 5 timeslices, just do a full
	 * restore of the math state immediately to avoid the trap; the
	 * chances of needing FPU soon are obviously high now
588 589 590
	 *
	 * tsk_used_math() checks prevent calling math_state_restore(),
	 * which can sleep in the case of !tsk_used_math()
591
	 */
592
	if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
593 594
		math_state_restore();

595 596 597 598
	/*
	 * Restore %gs if needed (which is common)
	 */
	if (prev->gs | next->gs)
599
		lazy_load_gs(next->gs);
600

601
	percpu_write(current_task, next_p);
602

L
Linus Torvalds 已提交
603 604 605
	return prev_p;
}

606
int sys_fork(struct pt_regs *regs)
L
Linus Torvalds 已提交
607
{
608
	return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
L
Linus Torvalds 已提交
609 610
}

611
int sys_clone(struct pt_regs *regs)
L
Linus Torvalds 已提交
612
{
613 614 615 616 617 618 619 620
	unsigned long clone_flags;
	unsigned long newsp;
	int __user *parent_tidptr, *child_tidptr;

	clone_flags = regs->bx;
	newsp = regs->cx;
	parent_tidptr = (int __user *)regs->dx;
	child_tidptr = (int __user *)regs->di;
L
Linus Torvalds 已提交
621
	if (!newsp)
622 623
		newsp = regs->sp;
	return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr);
L
Linus Torvalds 已提交
624 625 626 627 628 629 630 631 632 633 634 635
}

/*
 * This is trivial, and on the face of it looks like it
 * could equally well be done in user mode.
 *
 * Not so, for quite unobvious reasons - register pressure.
 * In user mode vfork() cannot have a stack frame, and if
 * done by calling the "clone()" system call directly, you
 * do not have enough call-clobbered registers to hold all
 * the information you need.
 */
636
int sys_vfork(struct pt_regs *regs)
L
Linus Torvalds 已提交
637
{
638
	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, NULL, NULL);
L
Linus Torvalds 已提交
639 640 641 642 643
}

/*
 * sys_execve() executes a new program.
 */
644
int sys_execve(struct pt_regs *regs)
L
Linus Torvalds 已提交
645 646
{
	int error;
647
	char *filename;
L
Linus Torvalds 已提交
648

649
	filename = getname((char __user *) regs->bx);
L
Linus Torvalds 已提交
650 651 652
	error = PTR_ERR(filename);
	if (IS_ERR(filename))
		goto out;
653 654 655 656
	error = do_execve(filename,
			(char __user * __user *) regs->cx,
			(char __user * __user *) regs->dx,
			regs);
L
Linus Torvalds 已提交
657 658 659 660 661 662 663 664 665 666 667 668 669 670
	if (error == 0) {
		/* Make sure we don't return using sysenter.. */
		set_thread_flag(TIF_IRET);
	}
	putname(filename);
out:
	return error;
}

#define top_esp                (THREAD_SIZE - sizeof(unsigned long))
#define top_ebp                (THREAD_SIZE - 2*sizeof(unsigned long))

unsigned long get_wchan(struct task_struct *p)
{
671
	unsigned long bp, sp, ip;
L
Linus Torvalds 已提交
672 673 674 675
	unsigned long stack_page;
	int count = 0;
	if (!p || p == current || p->state == TASK_RUNNING)
		return 0;
A
Al Viro 已提交
676
	stack_page = (unsigned long)task_stack_page(p);
677
	sp = p->thread.sp;
678
	if (!stack_page || sp < stack_page || sp > top_esp+stack_page)
L
Linus Torvalds 已提交
679
		return 0;
680 681
	/* include/asm-i386/system.h:switch_to() pushes bp last. */
	bp = *(unsigned long *) sp;
L
Linus Torvalds 已提交
682
	do {
683
		if (bp < stack_page || bp > top_ebp+stack_page)
L
Linus Torvalds 已提交
684
			return 0;
685 686 687 688
		ip = *(unsigned long *) (bp+4);
		if (!in_sched_functions(ip))
			return ip;
		bp = *(unsigned long *) bp;
L
Linus Torvalds 已提交
689 690 691 692 693 694
	} while (count++ < 16);
	return 0;
}

unsigned long arch_align_stack(unsigned long sp)
{
695
	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
L
Linus Torvalds 已提交
696 697 698
		sp -= get_random_int() % 8192;
	return sp & ~0xf;
}
J
Jiri Kosina 已提交
699 700 701 702 703 704

unsigned long arch_randomize_brk(struct mm_struct *mm)
{
	unsigned long range_end = mm->brk + 0x02000000;
	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
}