process_32.c 18.5 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 *  Copyright (C) 1995  Linus Torvalds
 *
 *  Pentium III FXSR, SSE support
 *	Gareth Hughes <gareth@valinux.com>, May 2000
 */

/*
 * This file handles the architecture-dependent parts of process handling..
 */

#include <stdarg.h>

Z
Zwane Mwaikambo 已提交
14
#include <linux/cpu.h>
L
Linus Torvalds 已提交
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/elfcore.h>
#include <linux/smp.h>
#include <linux/stddef.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/user.h>
#include <linux/interrupt.h>
#include <linux/utsname.h>
#include <linux/delay.h>
#include <linux/reboot.h>
#include <linux/init.h>
#include <linux/mc146818rtc.h>
#include <linux/module.h>
#include <linux/kallsyms.h>
#include <linux/ptrace.h>
#include <linux/random.h>
36
#include <linux/personality.h>
I
Ingo Molnar 已提交
37
#include <linux/tick.h>
38
#include <linux/percpu.h>
39
#include <linux/prctl.h>
L
Linus Torvalds 已提交
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54

#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/system.h>
#include <asm/io.h>
#include <asm/ldt.h>
#include <asm/processor.h>
#include <asm/i387.h>
#include <asm/desc.h>
#ifdef CONFIG_MATH_EMULATION
#include <asm/math_emu.h>
#endif

#include <linux/err.h>

Z
Zwane Mwaikambo 已提交
55 56
#include <asm/tlbflush.h>
#include <asm/cpu.h>
57
#include <asm/kdebug.h>
Z
Zwane Mwaikambo 已提交
58

L
Linus Torvalds 已提交
59 60 61 62 63 64 65
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");

static int hlt_counter;

unsigned long boot_option_idle_override = 0;
EXPORT_SYMBOL(boot_option_idle_override);

66 67 68 69 70 71
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);

DEFINE_PER_CPU(int, cpu_number);
EXPORT_PER_CPU_SYMBOL(cpu_number);

L
Linus Torvalds 已提交
72 73 74 75 76
/*
 * Return saved PC of a blocked thread.
 */
unsigned long thread_saved_pc(struct task_struct *tsk)
{
77
	return ((unsigned long *)tsk->thread.sp)[3];
L
Linus Torvalds 已提交
78 79 80 81 82 83
}

/*
 * Powermanagement idle function, if any..
 */
void (*pm_idle)(void);
84
EXPORT_SYMBOL(pm_idle);
L
Linus Torvalds 已提交
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106

void disable_hlt(void)
{
	hlt_counter++;
}

EXPORT_SYMBOL(disable_hlt);

void enable_hlt(void)
{
	hlt_counter--;
}

EXPORT_SYMBOL(enable_hlt);

/*
 * We use this if we don't have any better
 * idle routine..
 */
void default_idle(void)
{
	if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
107
		current_thread_info()->status &= ~TS_POLLING;
108 109 110 111 112 113
		/*
		 * TS_POLLING-cleared state must be visible before we
		 * test NEED_RESCHED:
		 */
		smp_mb();

114
		if (!need_resched())
115
			safe_halt();	/* enables interrupts racelessly */
116 117
		else
			local_irq_enable();
118
		current_thread_info()->status |= TS_POLLING;
L
Linus Torvalds 已提交
119
	} else {
120
		local_irq_enable();
121 122
		/* loop is done by the caller */
		cpu_relax();
L
Linus Torvalds 已提交
123 124
	}
}
125 126 127
#ifdef CONFIG_APM_MODULE
EXPORT_SYMBOL(default_idle);
#endif
L
Linus Torvalds 已提交
128

Z
Zwane Mwaikambo 已提交
129 130 131 132 133
#ifdef CONFIG_HOTPLUG_CPU
#include <asm/nmi.h>
/* We don't actually take CPU down, just spin without interrupts. */
static inline void play_dead(void)
{
134 135 136 137
	/* This must be done before dead CPU ack */
	cpu_exit_clear();
	wbinvd();
	mb();
Z
Zwane Mwaikambo 已提交
138 139 140
	/* Ack it */
	__get_cpu_var(cpu_state) = CPU_DEAD;

141 142 143
	/*
	 * With physical CPU hotplug, we should halt the cpu
	 */
Z
Zwane Mwaikambo 已提交
144
	local_irq_disable();
145
	while (1)
Z
Zachary Amsden 已提交
146
		halt();
Z
Zwane Mwaikambo 已提交
147 148 149 150 151 152 153 154
}
#else
static inline void play_dead(void)
{
	BUG();
}
#endif /* CONFIG_HOTPLUG_CPU */

L
Linus Torvalds 已提交
155 156 157 158 159 160
/*
 * The idle thread. There's no useful work to be
 * done, so just try to conserve power and have a
 * low exit latency (ie sit in a loop waiting for
 * somebody to say that they'd like to reschedule)
 */
Z
Zwane Mwaikambo 已提交
161
void cpu_idle(void)
L
Linus Torvalds 已提交
162
{
163
	int cpu = smp_processor_id();
Z
Zwane Mwaikambo 已提交
164

165
	current_thread_info()->status |= TS_POLLING;
166

L
Linus Torvalds 已提交
167 168
	/* endless idle loop with no priority at all */
	while (1) {
I
Ingo Molnar 已提交
169
		tick_nohz_stop_sched_tick();
L
Linus Torvalds 已提交
170 171 172
		while (!need_resched()) {
			void (*idle)(void);

C
Christoph Lameter 已提交
173
			check_pgt_cache();
L
Linus Torvalds 已提交
174 175 176
			rmb();
			idle = pm_idle;

177 178 179
			if (rcu_pending(cpu))
				rcu_check_callbacks(cpu, 0);

L
Linus Torvalds 已提交
180 181 182
			if (!idle)
				idle = default_idle;

Z
Zwane Mwaikambo 已提交
183 184 185
			if (cpu_is_offline(cpu))
				play_dead();

186
			local_irq_disable();
L
Linus Torvalds 已提交
187 188 189
			__get_cpu_var(irq_stat).idle_timestamp = jiffies;
			idle();
		}
I
Ingo Molnar 已提交
190
		tick_nohz_restart_sched_tick();
191
		preempt_enable_no_resched();
L
Linus Torvalds 已提交
192
		schedule();
193
		preempt_disable();
L
Linus Torvalds 已提交
194 195 196
	}
}

197
void __show_registers(struct pt_regs *regs, int all)
L
Linus Torvalds 已提交
198 199
{
	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
200
	unsigned long d0, d1, d2, d3, d6, d7;
201
	unsigned long sp;
202 203 204
	unsigned short ss, gs;

	if (user_mode_vm(regs)) {
205 206
		sp = regs->sp;
		ss = regs->ss & 0xffff;
207 208
		savesegment(gs, gs);
	} else {
209
		sp = (unsigned long) (&regs->sp);
210 211 212
		savesegment(ss, ss);
		savesegment(gs, gs);
	}
L
Linus Torvalds 已提交
213 214

	printk("\n");
215 216
	printk("Pid: %d, comm: %s %s (%s %.*s)\n",
			task_pid_nr(current), current->comm,
217 218 219 220 221
			print_tainted(), init_utsname()->release,
			(int)strcspn(init_utsname()->version, " "),
			init_utsname()->version);

	printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
222
			(u16)regs->cs, regs->ip, regs->flags,
223
			smp_processor_id());
224
	print_symbol("EIP is at %s\n", regs->ip);
L
Linus Torvalds 已提交
225 226

	printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
227
		regs->ax, regs->bx, regs->cx, regs->dx);
228
	printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
229
		regs->si, regs->di, regs->bp, sp);
230
	printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
231
	       (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss);
232 233 234

	if (!all)
		return;
L
Linus Torvalds 已提交
235

236 237 238
	cr0 = read_cr0();
	cr2 = read_cr2();
	cr3 = read_cr3();
239
	cr4 = read_cr4_safe();
240 241
	printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
			cr0, cr2, cr3, cr4);
242 243 244 245 246 247 248

	get_debugreg(d0, 0);
	get_debugreg(d1, 1);
	get_debugreg(d2, 2);
	get_debugreg(d3, 3);
	printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
			d0, d1, d2, d3);
249

250 251
	get_debugreg(d6, 6);
	get_debugreg(d7, 7);
252 253 254
	printk("DR6: %08lx DR7: %08lx\n",
			d6, d7);
}
255

256 257 258
void show_regs(struct pt_regs *regs)
{
	__show_registers(regs, 1);
259
	show_trace(NULL, regs, &regs->sp, regs->bp);
L
Linus Torvalds 已提交
260 261 262
}

/*
263 264
 * This gets run with %bx containing the
 * function to call, and %dx containing
L
Linus Torvalds 已提交
265 266 267 268 269 270 271 272 273 274 275 276 277
 * the "args".
 */
extern void kernel_thread_helper(void);

/*
 * Create a kernel thread
 */
int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
{
	struct pt_regs regs;

	memset(&regs, 0, sizeof(regs));

278 279
	regs.bx = (unsigned long) fn;
	regs.dx = (unsigned long) arg;
L
Linus Torvalds 已提交
280

281 282 283 284 285 286 287
	regs.ds = __USER_DS;
	regs.es = __USER_DS;
	regs.fs = __KERNEL_PERCPU;
	regs.orig_ax = -1;
	regs.ip = (unsigned long) kernel_thread_helper;
	regs.cs = __KERNEL_CS | get_kernel_rpl();
	regs.flags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
L
Linus Torvalds 已提交
288 289

	/* Ok, create the new process.. */
290
	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
L
Linus Torvalds 已提交
291
}
292
EXPORT_SYMBOL(kernel_thread);
L
Linus Torvalds 已提交
293 294 295 296 297 298 299

/*
 * Free current thread data structures etc..
 */
void exit_thread(void)
{
	/* The process may have allocated an io port bitmap... nuke it. */
300 301 302
	if (unlikely(test_thread_flag(TIF_IO_BITMAP))) {
		struct task_struct *tsk = current;
		struct thread_struct *t = &tsk->thread;
L
Linus Torvalds 已提交
303 304 305 306 307
		int cpu = get_cpu();
		struct tss_struct *tss = &per_cpu(init_tss, cpu);

		kfree(t->io_bitmap_ptr);
		t->io_bitmap_ptr = NULL;
308
		clear_thread_flag(TIF_IO_BITMAP);
L
Linus Torvalds 已提交
309 310 311 312 313 314 315
		/*
		 * Careful, clear this in the TSS too:
		 */
		memset(tss->io_bitmap, 0xff, tss->io_bitmap_max);
		t->io_bitmap_max = 0;
		tss->io_bitmap_owner = NULL;
		tss->io_bitmap_max = 0;
316
		tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
L
Linus Torvalds 已提交
317 318 319 320 321 322 323 324
		put_cpu();
	}
}

void flush_thread(void)
{
	struct task_struct *tsk = current;

325 326 327 328 329 330
	tsk->thread.debugreg0 = 0;
	tsk->thread.debugreg1 = 0;
	tsk->thread.debugreg2 = 0;
	tsk->thread.debugreg3 = 0;
	tsk->thread.debugreg6 = 0;
	tsk->thread.debugreg7 = 0;
L
Linus Torvalds 已提交
331
	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));	
332
	clear_tsk_thread_flag(tsk, TIF_DEBUG);
L
Linus Torvalds 已提交
333 334 335 336 337 338 339 340 341
	/*
	 * Forget coprocessor state..
	 */
	clear_fpu(tsk);
	clear_used_math();
}

void release_thread(struct task_struct *dead_task)
{
342
	BUG_ON(dead_task->mm);
L
Linus Torvalds 已提交
343 344 345 346 347 348 349 350 351 352 353 354
	release_vm86_irqs(dead_task);
}

/*
 * This gets called before we allocate a new thread and copy
 * the current task into it.
 */
void prepare_to_copy(struct task_struct *tsk)
{
	unlazy_fpu(tsk);
}

355
int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
L
Linus Torvalds 已提交
356 357 358 359 360 361 362
	unsigned long unused,
	struct task_struct * p, struct pt_regs * regs)
{
	struct pt_regs * childregs;
	struct task_struct *tsk;
	int err;

A
akpm@osdl.org 已提交
363
	childregs = task_pt_regs(p);
364
	*childregs = *regs;
365 366
	childregs->ax = 0;
	childregs->sp = sp;
367

368 369
	p->thread.sp = (unsigned long) childregs;
	p->thread.sp0 = (unsigned long) (childregs+1);
L
Linus Torvalds 已提交
370

371
	p->thread.ip = (unsigned long) ret_from_fork;
L
Linus Torvalds 已提交
372

373
	savesegment(gs, p->thread.gs);
L
Linus Torvalds 已提交
374 375

	tsk = current;
376
	if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
A
Alexey Dobriyan 已提交
377 378
		p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
						IO_BITMAP_BYTES, GFP_KERNEL);
L
Linus Torvalds 已提交
379 380 381 382
		if (!p->thread.io_bitmap_ptr) {
			p->thread.io_bitmap_max = 0;
			return -ENOMEM;
		}
383
		set_tsk_thread_flag(p, TIF_IO_BITMAP);
L
Linus Torvalds 已提交
384 385
	}

R
Roland McGrath 已提交
386 387
	err = 0;

L
Linus Torvalds 已提交
388 389 390
	/*
	 * Set a new TLS for the child thread?
	 */
R
Roland McGrath 已提交
391 392
	if (clone_flags & CLONE_SETTLS)
		err = do_set_thread_area(p, -1,
393
			(struct user_desc __user *)childregs->si, 0);
L
Linus Torvalds 已提交
394 395 396 397 398 399 400 401

	if (err && p->thread.io_bitmap_ptr) {
		kfree(p->thread.io_bitmap_ptr);
		p->thread.io_bitmap_max = 0;
	}
	return err;
}

I
Ingo Molnar 已提交
402 403 404 405 406 407 408 409 410 411 412 413
void
start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
{
	__asm__("movl %0, %%gs" :: "r"(0));
	regs->fs		= 0;
	set_fs(USER_DS);
	regs->ds		= __USER_DS;
	regs->es		= __USER_DS;
	regs->ss		= __USER_DS;
	regs->cs		= __USER_CS;
	regs->ip		= new_ip;
	regs->sp		= new_sp;
414 415 416 417
	/*
	 * Free the old FP and other extended state
	 */
	free_thread_xstate(current);
I
Ingo Molnar 已提交
418 419 420
}
EXPORT_SYMBOL_GPL(start_thread);

421
static void hard_disable_TSC(void)
422 423 424
{
	write_cr4(read_cr4() | X86_CR4_TSD);
}
425

426 427 428 429 430 431 432 433 434 435 436
void disable_TSC(void)
{
	preempt_disable();
	if (!test_and_set_thread_flag(TIF_NOTSC))
		/*
		 * Must flip the CPU state synchronously with
		 * TIF_NOTSC in the current running context.
		 */
		hard_disable_TSC();
	preempt_enable();
}
437

438
static void hard_enable_TSC(void)
439 440 441
{
	write_cr4(read_cr4() & ~X86_CR4_TSD);
}
442

I
Ingo Molnar 已提交
443
static void enable_TSC(void)
444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477
{
	preempt_disable();
	if (test_and_clear_thread_flag(TIF_NOTSC))
		/*
		 * Must flip the CPU state synchronously with
		 * TIF_NOTSC in the current running context.
		 */
		hard_enable_TSC();
	preempt_enable();
}

int get_tsc_mode(unsigned long adr)
{
	unsigned int val;

	if (test_thread_flag(TIF_NOTSC))
		val = PR_TSC_SIGSEGV;
	else
		val = PR_TSC_ENABLE;

	return put_user(val, (unsigned int __user *)adr);
}

int set_tsc_mode(unsigned int val)
{
	if (val == PR_TSC_SIGSEGV)
		disable_TSC();
	else if (val == PR_TSC_ENABLE)
		enable_TSC();
	else
		return -EINVAL;

	return 0;
}
478 479 480 481

static noinline void
__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
		 struct tss_struct *tss)
L
Linus Torvalds 已提交
482
{
R
Roland McGrath 已提交
483
	struct thread_struct *prev, *next;
484
	unsigned long debugctl;
485

R
Roland McGrath 已提交
486
	prev = &prev_p->thread;
487 488
	next = &next_p->thread;

489 490 491 492 493
	debugctl = prev->debugctlmsr;
	if (next->ds_area_msr != prev->ds_area_msr) {
		/* we clear debugctl to make sure DS
		 * is not in use when we change it */
		debugctl = 0;
494
		update_debugctlmsr(0);
495 496 497 498
		wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0);
	}

	if (next->debugctlmsr != debugctl)
499
		update_debugctlmsr(next->debugctlmsr);
R
Roland McGrath 已提交
500

501
	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
502 503 504 505
		set_debugreg(next->debugreg0, 0);
		set_debugreg(next->debugreg1, 1);
		set_debugreg(next->debugreg2, 2);
		set_debugreg(next->debugreg3, 3);
506
		/* no 4 and 5 */
507 508
		set_debugreg(next->debugreg6, 6);
		set_debugreg(next->debugreg7, 7);
509 510
	}

511 512 513 514 515 516 517 518 519
	if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
	    test_tsk_thread_flag(next_p, TIF_NOTSC)) {
		/* prev and next are different */
		if (test_tsk_thread_flag(next_p, TIF_NOTSC))
			hard_disable_TSC();
		else
			hard_enable_TSC();
	}

520
#ifdef X86_BTS
521 522 523 524 525
	if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
		ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);

	if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
		ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
526
#endif
527 528


529
	if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
L
Linus Torvalds 已提交
530 531 532 533
		/*
		 * Disable the bitmap via an invalid offset. We still cache
		 * the previous bitmap owner and the IO bitmap contents:
		 */
534
		tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
L
Linus Torvalds 已提交
535 536
		return;
	}
537

L
Linus Torvalds 已提交
538 539 540 541 542 543
	if (likely(next == tss->io_bitmap_owner)) {
		/*
		 * Previous owner of the bitmap (hence the bitmap content)
		 * matches the next task, we dont have to do anything but
		 * to set a valid offset in the TSS:
		 */
544
		tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
L
Linus Torvalds 已提交
545 546 547 548 549 550 551 552 553 554 555
		return;
	}
	/*
	 * Lazy TSS's I/O bitmap copy. We set an invalid offset here
	 * and we let the task to get a GPF in case an I/O instruction
	 * is performed.  The handler of the GPF will verify that the
	 * faulting task has a valid I/O bitmap and, it true, does the
	 * real copy and restart the instruction.  This will save us
	 * redundant copies when the currently switched task does not
	 * perform any I/O during its timeslice.
	 */
556
	tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
L
Linus Torvalds 已提交
557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581
}

/*
 *	switch_to(x,yn) should switch tasks from x to y.
 *
 * We fsave/fwait so that an exception goes off at the right time
 * (as a call from the fsave or fwait in effect) rather than to
 * the wrong process. Lazy FP saving no longer makes any sense
 * with modern CPU's, and this simplifies a lot of things (SMP
 * and UP become the same).
 *
 * NOTE! We used to use the x86 hardware context switching. The
 * reason for not using it any more becomes apparent when you
 * try to recover gracefully from saved state that is no longer
 * valid (stale segment register values in particular). With the
 * hardware task-switch, there is no way to fix up bad state in
 * a reasonable manner.
 *
 * The fact that Intel documents the hardware task-switching to
 * be slow is a fairly red herring - this code is not noticeably
 * faster. However, there _is_ some room for improvement here,
 * so the performance issues may eventually be a valid point.
 * More important, however, is the fact that this allows us much
 * more flexibility.
 *
582
 * The return value (in %ax) will be the "prev" task after
L
Linus Torvalds 已提交
583 584 585
 * the task-switch, and shows up in ret_from_fork in entry.S,
 * for example.
 */
586
struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
L
Linus Torvalds 已提交
587 588 589 590 591 592 593 594 595 596
{
	struct thread_struct *prev = &prev_p->thread,
				 *next = &next_p->thread;
	int cpu = smp_processor_id();
	struct tss_struct *tss = &per_cpu(init_tss, cpu);

	/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */

	__unlazy_fpu(prev_p);

597 598 599

	/* we're going to use this soon, after a few expensive things */
	if (next_p->fpu_counter > 5)
600
		prefetch(next->xstate);
601

L
Linus Torvalds 已提交
602
	/*
Z
Zachary Amsden 已提交
603
	 * Reload esp0.
L
Linus Torvalds 已提交
604
	 */
605
	load_sp0(tss, next);
L
Linus Torvalds 已提交
606 607

	/*
608
	 * Save away %gs. No need to save %fs, as it was saved on the
609 610 611 612 613 614 615
	 * stack on entry.  No need to save %es and %ds, as those are
	 * always kernel segments while inside the kernel.  Doing this
	 * before setting the new TLS descriptors avoids the situation
	 * where we temporarily have non-reloadable segments in %fs
	 * and %gs.  This could be an issue if the NMI handler ever
	 * used %fs or %gs (it does not today), or if the kernel is
	 * running inside of a hypervisor layer.
L
Linus Torvalds 已提交
616
	 */
617
	savesegment(gs, prev->gs);
L
Linus Torvalds 已提交
618 619

	/*
Z
Zachary Amsden 已提交
620
	 * Load the per-thread Thread-Local Storage descriptor.
L
Linus Torvalds 已提交
621
	 */
Z
Zachary Amsden 已提交
622
	load_TLS(next, cpu);
L
Linus Torvalds 已提交
623

624 625 626 627 628 629 630 631 632
	/*
	 * Restore IOPL if needed.  In normal use, the flags restore
	 * in the switch assembly will handle this.  But if the kernel
	 * is running virtualized at a non-zero CPL, the popf will
	 * not restore flags, so it must be done in a separate step.
	 */
	if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl))
		set_iopl_mask(next->iopl);

L
Linus Torvalds 已提交
633
	/*
634
	 * Now maybe handle debug registers and/or IO bitmaps
L
Linus Torvalds 已提交
635
	 */
636 637 638
	if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV ||
		     task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
		__switch_to_xtra(prev_p, next_p, tss);
A
Andrea Arcangeli 已提交
639

640 641 642 643 644 645 646 647 648
	/*
	 * Leave lazy mode, flushing any hypercalls made here.
	 * This must be done before restoring TLS segments so
	 * the GDT and LDT are properly updated, and must be
	 * done before math_state_restore, so the TS bit is up
	 * to date.
	 */
	arch_leave_lazy_cpu_mode();

649 650 651
	/* If the task has used fpu the last 5 timeslices, just do a full
	 * restore of the math state immediately to avoid the trap; the
	 * chances of needing FPU soon are obviously high now
652 653 654
	 *
	 * tsk_used_math() checks prevent calling math_state_restore(),
	 * which can sleep in the case of !tsk_used_math()
655
	 */
656
	if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
657 658
		math_state_restore();

659 660 661 662 663 664
	/*
	 * Restore %gs if needed (which is common)
	 */
	if (prev->gs | next->gs)
		loadsegment(gs, next->gs);

665
	x86_write_percpu(current_task, next_p);
666

L
Linus Torvalds 已提交
667 668 669 670 671
	return prev_p;
}

asmlinkage int sys_fork(struct pt_regs regs)
{
672
	return do_fork(SIGCHLD, regs.sp, &regs, 0, NULL, NULL);
L
Linus Torvalds 已提交
673 674 675 676 677 678 679 680
}

asmlinkage int sys_clone(struct pt_regs regs)
{
	unsigned long clone_flags;
	unsigned long newsp;
	int __user *parent_tidptr, *child_tidptr;

681 682 683 684
	clone_flags = regs.bx;
	newsp = regs.cx;
	parent_tidptr = (int __user *)regs.dx;
	child_tidptr = (int __user *)regs.di;
L
Linus Torvalds 已提交
685
	if (!newsp)
686
		newsp = regs.sp;
L
Linus Torvalds 已提交
687 688 689 690 691 692 693 694 695 696 697 698 699 700 701
	return do_fork(clone_flags, newsp, &regs, 0, parent_tidptr, child_tidptr);
}

/*
 * This is trivial, and on the face of it looks like it
 * could equally well be done in user mode.
 *
 * Not so, for quite unobvious reasons - register pressure.
 * In user mode vfork() cannot have a stack frame, and if
 * done by calling the "clone()" system call directly, you
 * do not have enough call-clobbered registers to hold all
 * the information you need.
 */
asmlinkage int sys_vfork(struct pt_regs regs)
{
702
	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.sp, &regs, 0, NULL, NULL);
L
Linus Torvalds 已提交
703 704 705 706 707 708 709 710 711 712
}

/*
 * sys_execve() executes a new program.
 */
asmlinkage int sys_execve(struct pt_regs regs)
{
	int error;
	char * filename;

713
	filename = getname((char __user *) regs.bx);
L
Linus Torvalds 已提交
714 715 716 717
	error = PTR_ERR(filename);
	if (IS_ERR(filename))
		goto out;
	error = do_execve(filename,
718 719
			(char __user * __user *) regs.cx,
			(char __user * __user *) regs.dx,
L
Linus Torvalds 已提交
720 721 722 723 724 725 726 727 728 729 730 731 732 733 734
			&regs);
	if (error == 0) {
		/* Make sure we don't return using sysenter.. */
		set_thread_flag(TIF_IRET);
	}
	putname(filename);
out:
	return error;
}

#define top_esp                (THREAD_SIZE - sizeof(unsigned long))
#define top_ebp                (THREAD_SIZE - 2*sizeof(unsigned long))

unsigned long get_wchan(struct task_struct *p)
{
735
	unsigned long bp, sp, ip;
L
Linus Torvalds 已提交
736 737 738 739
	unsigned long stack_page;
	int count = 0;
	if (!p || p == current || p->state == TASK_RUNNING)
		return 0;
A
Al Viro 已提交
740
	stack_page = (unsigned long)task_stack_page(p);
741
	sp = p->thread.sp;
742
	if (!stack_page || sp < stack_page || sp > top_esp+stack_page)
L
Linus Torvalds 已提交
743
		return 0;
744 745
	/* include/asm-i386/system.h:switch_to() pushes bp last. */
	bp = *(unsigned long *) sp;
L
Linus Torvalds 已提交
746
	do {
747
		if (bp < stack_page || bp > top_ebp+stack_page)
L
Linus Torvalds 已提交
748
			return 0;
749 750 751 752
		ip = *(unsigned long *) (bp+4);
		if (!in_sched_functions(ip))
			return ip;
		bp = *(unsigned long *) bp;
L
Linus Torvalds 已提交
753 754 755 756 757 758
	} while (count++ < 16);
	return 0;
}

unsigned long arch_align_stack(unsigned long sp)
{
759
	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
L
Linus Torvalds 已提交
760 761 762
		sp -= get_random_int() % 8192;
	return sp & ~0xf;
}
J
Jiri Kosina 已提交
763 764 765 766 767 768

unsigned long arch_randomize_brk(struct mm_struct *mm)
{
	unsigned long range_end = mm->brk + 0x02000000;
	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
}