process_32.c 17.8 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 *  Copyright (C) 1995  Linus Torvalds
 *
 *  Pentium III FXSR, SSE support
 *	Gareth Hughes <gareth@valinux.com>, May 2000
 */

/*
 * This file handles the architecture-dependent parts of process handling..
 */

#include <stdarg.h>

Z
Zwane Mwaikambo 已提交
14
#include <linux/cpu.h>
L
Linus Torvalds 已提交
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/elfcore.h>
#include <linux/smp.h>
#include <linux/stddef.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/user.h>
#include <linux/interrupt.h>
#include <linux/utsname.h>
#include <linux/delay.h>
#include <linux/reboot.h>
#include <linux/init.h>
#include <linux/mc146818rtc.h>
#include <linux/module.h>
#include <linux/kallsyms.h>
#include <linux/ptrace.h>
#include <linux/random.h>
36
#include <linux/personality.h>
I
Ingo Molnar 已提交
37
#include <linux/tick.h>
38
#include <linux/percpu.h>
39
#include <linux/prctl.h>
L
Linus Torvalds 已提交
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54

#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/system.h>
#include <asm/io.h>
#include <asm/ldt.h>
#include <asm/processor.h>
#include <asm/i387.h>
#include <asm/desc.h>
#ifdef CONFIG_MATH_EMULATION
#include <asm/math_emu.h>
#endif

#include <linux/err.h>

Z
Zwane Mwaikambo 已提交
55 56
#include <asm/tlbflush.h>
#include <asm/cpu.h>
57
#include <asm/kdebug.h>
Z
Zwane Mwaikambo 已提交
58

L
Linus Torvalds 已提交
59 60
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");

61 62 63 64 65 66
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);

DEFINE_PER_CPU(int, cpu_number);
EXPORT_PER_CPU_SYMBOL(cpu_number);

L
Linus Torvalds 已提交
67 68 69 70 71
/*
 * Return saved PC of a blocked thread.
 */
unsigned long thread_saved_pc(struct task_struct *tsk)
{
72
	return ((unsigned long *)tsk->thread.sp)[3];
L
Linus Torvalds 已提交
73 74
}

Z
Zwane Mwaikambo 已提交
75 76
#ifdef CONFIG_HOTPLUG_CPU
#include <asm/nmi.h>
L
Linus Torvalds 已提交
77

78
static void cpu_exit_clear(void)
L
Linus Torvalds 已提交
79
{
80
	int cpu = raw_smp_processor_id();
L
Linus Torvalds 已提交
81

82
	idle_task_exit();
L
Linus Torvalds 已提交
83

84 85
	cpu_uninit();
	irq_ctx_exit(cpu);
L
Linus Torvalds 已提交
86

87 88
	cpu_clear(cpu, cpu_callout_map);
	cpu_clear(cpu, cpu_callin_map);
L
Linus Torvalds 已提交
89

90
	numa_remove_cpu(cpu);
L
Linus Torvalds 已提交
91 92
}

Z
Zwane Mwaikambo 已提交
93 94 95
/* We don't actually take CPU down, just spin without interrupts. */
static inline void play_dead(void)
{
96 97 98 99
	/* This must be done before dead CPU ack */
	cpu_exit_clear();
	wbinvd();
	mb();
Z
Zwane Mwaikambo 已提交
100 101 102
	/* Ack it */
	__get_cpu_var(cpu_state) = CPU_DEAD;

103 104 105
	/*
	 * With physical CPU hotplug, we should halt the cpu
	 */
Z
Zwane Mwaikambo 已提交
106
	local_irq_disable();
107
	while (1)
Z
Zachary Amsden 已提交
108
		halt();
Z
Zwane Mwaikambo 已提交
109 110 111 112 113 114 115 116
}
#else
static inline void play_dead(void)
{
	BUG();
}
#endif /* CONFIG_HOTPLUG_CPU */

L
Linus Torvalds 已提交
117 118 119 120 121 122
/*
 * The idle thread. There's no useful work to be
 * done, so just try to conserve power and have a
 * low exit latency (ie sit in a loop waiting for
 * somebody to say that they'd like to reschedule)
 */
Z
Zwane Mwaikambo 已提交
123
void cpu_idle(void)
L
Linus Torvalds 已提交
124
{
125
	int cpu = smp_processor_id();
Z
Zwane Mwaikambo 已提交
126

127
	current_thread_info()->status |= TS_POLLING;
128

L
Linus Torvalds 已提交
129 130
	/* endless idle loop with no priority at all */
	while (1) {
131
		tick_nohz_stop_sched_tick(1);
L
Linus Torvalds 已提交
132 133
		while (!need_resched()) {

C
Christoph Lameter 已提交
134
			check_pgt_cache();
L
Linus Torvalds 已提交
135 136
			rmb();

137 138 139
			if (rcu_pending(cpu))
				rcu_check_callbacks(cpu, 0);

Z
Zwane Mwaikambo 已提交
140 141 142
			if (cpu_is_offline(cpu))
				play_dead();

143
			local_irq_disable();
L
Linus Torvalds 已提交
144
			__get_cpu_var(irq_stat).idle_timestamp = jiffies;
145 146
			/* Don't trace irqs off for idle */
			stop_critical_timings();
T
Thomas Gleixner 已提交
147
			pm_idle();
148
			start_critical_timings();
L
Linus Torvalds 已提交
149
		}
I
Ingo Molnar 已提交
150
		tick_nohz_restart_sched_tick();
151
		preempt_enable_no_resched();
L
Linus Torvalds 已提交
152
		schedule();
153
		preempt_disable();
L
Linus Torvalds 已提交
154 155 156
	}
}

157
void __show_registers(struct pt_regs *regs, int all)
L
Linus Torvalds 已提交
158 159
{
	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
160
	unsigned long d0, d1, d2, d3, d6, d7;
161
	unsigned long sp;
162 163 164
	unsigned short ss, gs;

	if (user_mode_vm(regs)) {
165 166
		sp = regs->sp;
		ss = regs->ss & 0xffff;
167 168
		savesegment(gs, gs);
	} else {
169
		sp = (unsigned long) (&regs->sp);
170 171 172
		savesegment(ss, ss);
		savesegment(gs, gs);
	}
L
Linus Torvalds 已提交
173 174

	printk("\n");
175 176
	printk("Pid: %d, comm: %s %s (%s %.*s)\n",
			task_pid_nr(current), current->comm,
177 178 179 180 181
			print_tainted(), init_utsname()->release,
			(int)strcspn(init_utsname()->version, " "),
			init_utsname()->version);

	printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
182
			(u16)regs->cs, regs->ip, regs->flags,
183
			smp_processor_id());
184
	print_symbol("EIP is at %s\n", regs->ip);
L
Linus Torvalds 已提交
185 186

	printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
187
		regs->ax, regs->bx, regs->cx, regs->dx);
188
	printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
189
		regs->si, regs->di, regs->bp, sp);
190
	printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
191
	       (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss);
192 193 194

	if (!all)
		return;
L
Linus Torvalds 已提交
195

196 197 198
	cr0 = read_cr0();
	cr2 = read_cr2();
	cr3 = read_cr3();
199
	cr4 = read_cr4_safe();
200 201
	printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
			cr0, cr2, cr3, cr4);
202 203 204 205 206 207 208

	get_debugreg(d0, 0);
	get_debugreg(d1, 1);
	get_debugreg(d2, 2);
	get_debugreg(d3, 3);
	printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
			d0, d1, d2, d3);
209

210 211
	get_debugreg(d6, 6);
	get_debugreg(d7, 7);
212 213 214
	printk("DR6: %08lx DR7: %08lx\n",
			d6, d7);
}
215

216 217 218
void show_regs(struct pt_regs *regs)
{
	__show_registers(regs, 1);
219
	show_trace(NULL, regs, &regs->sp, regs->bp);
L
Linus Torvalds 已提交
220 221 222
}

/*
223 224
 * This gets run with %bx containing the
 * function to call, and %dx containing
L
Linus Torvalds 已提交
225 226 227 228 229 230 231 232 233 234 235 236 237
 * the "args".
 */
extern void kernel_thread_helper(void);

/*
 * Create a kernel thread
 */
int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
{
	struct pt_regs regs;

	memset(&regs, 0, sizeof(regs));

238 239
	regs.bx = (unsigned long) fn;
	regs.dx = (unsigned long) arg;
L
Linus Torvalds 已提交
240

241 242 243 244 245 246 247
	regs.ds = __USER_DS;
	regs.es = __USER_DS;
	regs.fs = __KERNEL_PERCPU;
	regs.orig_ax = -1;
	regs.ip = (unsigned long) kernel_thread_helper;
	regs.cs = __KERNEL_CS | get_kernel_rpl();
	regs.flags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
L
Linus Torvalds 已提交
248 249

	/* Ok, create the new process.. */
250
	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
L
Linus Torvalds 已提交
251
}
252
EXPORT_SYMBOL(kernel_thread);
L
Linus Torvalds 已提交
253 254 255 256 257 258 259

/*
 * Free current thread data structures etc..
 */
void exit_thread(void)
{
	/* The process may have allocated an io port bitmap... nuke it. */
260 261 262
	if (unlikely(test_thread_flag(TIF_IO_BITMAP))) {
		struct task_struct *tsk = current;
		struct thread_struct *t = &tsk->thread;
L
Linus Torvalds 已提交
263 264 265 266 267
		int cpu = get_cpu();
		struct tss_struct *tss = &per_cpu(init_tss, cpu);

		kfree(t->io_bitmap_ptr);
		t->io_bitmap_ptr = NULL;
268
		clear_thread_flag(TIF_IO_BITMAP);
L
Linus Torvalds 已提交
269 270 271 272 273 274 275
		/*
		 * Careful, clear this in the TSS too:
		 */
		memset(tss->io_bitmap, 0xff, tss->io_bitmap_max);
		t->io_bitmap_max = 0;
		tss->io_bitmap_owner = NULL;
		tss->io_bitmap_max = 0;
276
		tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
L
Linus Torvalds 已提交
277 278 279 280 281 282 283 284
		put_cpu();
	}
}

void flush_thread(void)
{
	struct task_struct *tsk = current;

285 286 287 288 289 290
	tsk->thread.debugreg0 = 0;
	tsk->thread.debugreg1 = 0;
	tsk->thread.debugreg2 = 0;
	tsk->thread.debugreg3 = 0;
	tsk->thread.debugreg6 = 0;
	tsk->thread.debugreg7 = 0;
L
Linus Torvalds 已提交
291
	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));	
292
	clear_tsk_thread_flag(tsk, TIF_DEBUG);
L
Linus Torvalds 已提交
293 294 295
	/*
	 * Forget coprocessor state..
	 */
296
	tsk->fpu_counter = 0;
L
Linus Torvalds 已提交
297 298 299 300 301 302
	clear_fpu(tsk);
	clear_used_math();
}

void release_thread(struct task_struct *dead_task)
{
303
	BUG_ON(dead_task->mm);
L
Linus Torvalds 已提交
304 305 306 307 308 309 310 311 312 313 314 315
	release_vm86_irqs(dead_task);
}

/*
 * This gets called before we allocate a new thread and copy
 * the current task into it.
 */
void prepare_to_copy(struct task_struct *tsk)
{
	unlazy_fpu(tsk);
}

316
int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
L
Linus Torvalds 已提交
317 318 319 320 321 322 323
	unsigned long unused,
	struct task_struct * p, struct pt_regs * regs)
{
	struct pt_regs * childregs;
	struct task_struct *tsk;
	int err;

A
akpm@osdl.org 已提交
324
	childregs = task_pt_regs(p);
325
	*childregs = *regs;
326 327
	childregs->ax = 0;
	childregs->sp = sp;
328

329 330
	p->thread.sp = (unsigned long) childregs;
	p->thread.sp0 = (unsigned long) (childregs+1);
L
Linus Torvalds 已提交
331

332
	p->thread.ip = (unsigned long) ret_from_fork;
L
Linus Torvalds 已提交
333

334
	savesegment(gs, p->thread.gs);
L
Linus Torvalds 已提交
335 336

	tsk = current;
337
	if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
A
Alexey Dobriyan 已提交
338 339
		p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
						IO_BITMAP_BYTES, GFP_KERNEL);
L
Linus Torvalds 已提交
340 341 342 343
		if (!p->thread.io_bitmap_ptr) {
			p->thread.io_bitmap_max = 0;
			return -ENOMEM;
		}
344
		set_tsk_thread_flag(p, TIF_IO_BITMAP);
L
Linus Torvalds 已提交
345 346
	}

R
Roland McGrath 已提交
347 348
	err = 0;

L
Linus Torvalds 已提交
349 350 351
	/*
	 * Set a new TLS for the child thread?
	 */
R
Roland McGrath 已提交
352 353
	if (clone_flags & CLONE_SETTLS)
		err = do_set_thread_area(p, -1,
354
			(struct user_desc __user *)childregs->si, 0);
L
Linus Torvalds 已提交
355 356 357 358 359 360 361 362

	if (err && p->thread.io_bitmap_ptr) {
		kfree(p->thread.io_bitmap_ptr);
		p->thread.io_bitmap_max = 0;
	}
	return err;
}

I
Ingo Molnar 已提交
363 364 365 366 367 368 369 370 371 372 373 374
void
start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
{
	__asm__("movl %0, %%gs" :: "r"(0));
	regs->fs		= 0;
	set_fs(USER_DS);
	regs->ds		= __USER_DS;
	regs->es		= __USER_DS;
	regs->ss		= __USER_DS;
	regs->cs		= __USER_CS;
	regs->ip		= new_ip;
	regs->sp		= new_sp;
375 376 377 378
	/*
	 * Free the old FP and other extended state
	 */
	free_thread_xstate(current);
I
Ingo Molnar 已提交
379 380 381
}
EXPORT_SYMBOL_GPL(start_thread);

382
static void hard_disable_TSC(void)
383 384 385
{
	write_cr4(read_cr4() | X86_CR4_TSD);
}
386

387 388 389 390 391 392 393 394 395 396 397
void disable_TSC(void)
{
	preempt_disable();
	if (!test_and_set_thread_flag(TIF_NOTSC))
		/*
		 * Must flip the CPU state synchronously with
		 * TIF_NOTSC in the current running context.
		 */
		hard_disable_TSC();
	preempt_enable();
}
398

399
static void hard_enable_TSC(void)
400 401 402
{
	write_cr4(read_cr4() & ~X86_CR4_TSD);
}
403

I
Ingo Molnar 已提交
404
static void enable_TSC(void)
405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
{
	preempt_disable();
	if (test_and_clear_thread_flag(TIF_NOTSC))
		/*
		 * Must flip the CPU state synchronously with
		 * TIF_NOTSC in the current running context.
		 */
		hard_enable_TSC();
	preempt_enable();
}

int get_tsc_mode(unsigned long adr)
{
	unsigned int val;

	if (test_thread_flag(TIF_NOTSC))
		val = PR_TSC_SIGSEGV;
	else
		val = PR_TSC_ENABLE;

	return put_user(val, (unsigned int __user *)adr);
}

int set_tsc_mode(unsigned int val)
{
	if (val == PR_TSC_SIGSEGV)
		disable_TSC();
	else if (val == PR_TSC_ENABLE)
		enable_TSC();
	else
		return -EINVAL;

	return 0;
}
439 440 441 442

static noinline void
__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
		 struct tss_struct *tss)
L
Linus Torvalds 已提交
443
{
R
Roland McGrath 已提交
444
	struct thread_struct *prev, *next;
445
	unsigned long debugctl;
446

R
Roland McGrath 已提交
447
	prev = &prev_p->thread;
448 449
	next = &next_p->thread;

450 451 452 453 454
	debugctl = prev->debugctlmsr;
	if (next->ds_area_msr != prev->ds_area_msr) {
		/* we clear debugctl to make sure DS
		 * is not in use when we change it */
		debugctl = 0;
455
		update_debugctlmsr(0);
456 457 458 459
		wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0);
	}

	if (next->debugctlmsr != debugctl)
460
		update_debugctlmsr(next->debugctlmsr);
R
Roland McGrath 已提交
461

462
	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
463 464 465 466
		set_debugreg(next->debugreg0, 0);
		set_debugreg(next->debugreg1, 1);
		set_debugreg(next->debugreg2, 2);
		set_debugreg(next->debugreg3, 3);
467
		/* no 4 and 5 */
468 469
		set_debugreg(next->debugreg6, 6);
		set_debugreg(next->debugreg7, 7);
470 471
	}

472 473 474 475 476 477 478 479 480
	if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
	    test_tsk_thread_flag(next_p, TIF_NOTSC)) {
		/* prev and next are different */
		if (test_tsk_thread_flag(next_p, TIF_NOTSC))
			hard_disable_TSC();
		else
			hard_enable_TSC();
	}

481
#ifdef X86_BTS
482 483 484 485 486
	if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
		ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);

	if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
		ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
487
#endif
488 489


490
	if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
L
Linus Torvalds 已提交
491 492 493 494
		/*
		 * Disable the bitmap via an invalid offset. We still cache
		 * the previous bitmap owner and the IO bitmap contents:
		 */
495
		tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
L
Linus Torvalds 已提交
496 497
		return;
	}
498

L
Linus Torvalds 已提交
499 500 501 502 503 504
	if (likely(next == tss->io_bitmap_owner)) {
		/*
		 * Previous owner of the bitmap (hence the bitmap content)
		 * matches the next task, we dont have to do anything but
		 * to set a valid offset in the TSS:
		 */
505
		tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
L
Linus Torvalds 已提交
506 507 508 509 510 511 512 513 514 515 516
		return;
	}
	/*
	 * Lazy TSS's I/O bitmap copy. We set an invalid offset here
	 * and we let the task to get a GPF in case an I/O instruction
	 * is performed.  The handler of the GPF will verify that the
	 * faulting task has a valid I/O bitmap and, it true, does the
	 * real copy and restart the instruction.  This will save us
	 * redundant copies when the currently switched task does not
	 * perform any I/O during its timeslice.
	 */
517
	tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
L
Linus Torvalds 已提交
518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542
}

/*
 *	switch_to(x,yn) should switch tasks from x to y.
 *
 * We fsave/fwait so that an exception goes off at the right time
 * (as a call from the fsave or fwait in effect) rather than to
 * the wrong process. Lazy FP saving no longer makes any sense
 * with modern CPU's, and this simplifies a lot of things (SMP
 * and UP become the same).
 *
 * NOTE! We used to use the x86 hardware context switching. The
 * reason for not using it any more becomes apparent when you
 * try to recover gracefully from saved state that is no longer
 * valid (stale segment register values in particular). With the
 * hardware task-switch, there is no way to fix up bad state in
 * a reasonable manner.
 *
 * The fact that Intel documents the hardware task-switching to
 * be slow is a fairly red herring - this code is not noticeably
 * faster. However, there _is_ some room for improvement here,
 * so the performance issues may eventually be a valid point.
 * More important, however, is the fact that this allows us much
 * more flexibility.
 *
543
 * The return value (in %ax) will be the "prev" task after
L
Linus Torvalds 已提交
544 545 546
 * the task-switch, and shows up in ret_from_fork in entry.S,
 * for example.
 */
547
struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
L
Linus Torvalds 已提交
548 549 550 551 552 553 554 555 556 557
{
	struct thread_struct *prev = &prev_p->thread,
				 *next = &next_p->thread;
	int cpu = smp_processor_id();
	struct tss_struct *tss = &per_cpu(init_tss, cpu);

	/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */

	__unlazy_fpu(prev_p);

558 559 560

	/* we're going to use this soon, after a few expensive things */
	if (next_p->fpu_counter > 5)
561
		prefetch(next->xstate);
562

L
Linus Torvalds 已提交
563
	/*
Z
Zachary Amsden 已提交
564
	 * Reload esp0.
L
Linus Torvalds 已提交
565
	 */
566
	load_sp0(tss, next);
L
Linus Torvalds 已提交
567 568

	/*
569
	 * Save away %gs. No need to save %fs, as it was saved on the
570 571 572 573 574 575 576
	 * stack on entry.  No need to save %es and %ds, as those are
	 * always kernel segments while inside the kernel.  Doing this
	 * before setting the new TLS descriptors avoids the situation
	 * where we temporarily have non-reloadable segments in %fs
	 * and %gs.  This could be an issue if the NMI handler ever
	 * used %fs or %gs (it does not today), or if the kernel is
	 * running inside of a hypervisor layer.
L
Linus Torvalds 已提交
577
	 */
578
	savesegment(gs, prev->gs);
L
Linus Torvalds 已提交
579 580

	/*
Z
Zachary Amsden 已提交
581
	 * Load the per-thread Thread-Local Storage descriptor.
L
Linus Torvalds 已提交
582
	 */
Z
Zachary Amsden 已提交
583
	load_TLS(next, cpu);
L
Linus Torvalds 已提交
584

585 586 587 588 589 590 591 592 593
	/*
	 * Restore IOPL if needed.  In normal use, the flags restore
	 * in the switch assembly will handle this.  But if the kernel
	 * is running virtualized at a non-zero CPL, the popf will
	 * not restore flags, so it must be done in a separate step.
	 */
	if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl))
		set_iopl_mask(next->iopl);

L
Linus Torvalds 已提交
594
	/*
595
	 * Now maybe handle debug registers and/or IO bitmaps
L
Linus Torvalds 已提交
596
	 */
597 598 599
	if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV ||
		     task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
		__switch_to_xtra(prev_p, next_p, tss);
A
Andrea Arcangeli 已提交
600

601 602 603 604 605 606 607 608 609
	/*
	 * Leave lazy mode, flushing any hypercalls made here.
	 * This must be done before restoring TLS segments so
	 * the GDT and LDT are properly updated, and must be
	 * done before math_state_restore, so the TS bit is up
	 * to date.
	 */
	arch_leave_lazy_cpu_mode();

610 611 612
	/* If the task has used fpu the last 5 timeslices, just do a full
	 * restore of the math state immediately to avoid the trap; the
	 * chances of needing FPU soon are obviously high now
613 614 615
	 *
	 * tsk_used_math() checks prevent calling math_state_restore(),
	 * which can sleep in the case of !tsk_used_math()
616
	 */
617
	if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
618 619
		math_state_restore();

620 621 622 623 624 625
	/*
	 * Restore %gs if needed (which is common)
	 */
	if (prev->gs | next->gs)
		loadsegment(gs, next->gs);

626
	x86_write_percpu(current_task, next_p);
627

L
Linus Torvalds 已提交
628 629 630 631 632
	return prev_p;
}

asmlinkage int sys_fork(struct pt_regs regs)
{
633
	return do_fork(SIGCHLD, regs.sp, &regs, 0, NULL, NULL);
L
Linus Torvalds 已提交
634 635 636 637 638 639 640 641
}

asmlinkage int sys_clone(struct pt_regs regs)
{
	unsigned long clone_flags;
	unsigned long newsp;
	int __user *parent_tidptr, *child_tidptr;

642 643 644 645
	clone_flags = regs.bx;
	newsp = regs.cx;
	parent_tidptr = (int __user *)regs.dx;
	child_tidptr = (int __user *)regs.di;
L
Linus Torvalds 已提交
646
	if (!newsp)
647
		newsp = regs.sp;
L
Linus Torvalds 已提交
648 649 650 651 652 653 654 655 656 657 658 659 660 661 662
	return do_fork(clone_flags, newsp, &regs, 0, parent_tidptr, child_tidptr);
}

/*
 * This is trivial, and on the face of it looks like it
 * could equally well be done in user mode.
 *
 * Not so, for quite unobvious reasons - register pressure.
 * In user mode vfork() cannot have a stack frame, and if
 * done by calling the "clone()" system call directly, you
 * do not have enough call-clobbered registers to hold all
 * the information you need.
 */
asmlinkage int sys_vfork(struct pt_regs regs)
{
663
	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.sp, &regs, 0, NULL, NULL);
L
Linus Torvalds 已提交
664 665 666 667 668 669 670 671 672 673
}

/*
 * sys_execve() executes a new program.
 */
asmlinkage int sys_execve(struct pt_regs regs)
{
	int error;
	char * filename;

674
	filename = getname((char __user *) regs.bx);
L
Linus Torvalds 已提交
675 676 677 678
	error = PTR_ERR(filename);
	if (IS_ERR(filename))
		goto out;
	error = do_execve(filename,
679 680
			(char __user * __user *) regs.cx,
			(char __user * __user *) regs.dx,
L
Linus Torvalds 已提交
681 682 683 684 685 686 687 688 689 690 691 692 693 694 695
			&regs);
	if (error == 0) {
		/* Make sure we don't return using sysenter.. */
		set_thread_flag(TIF_IRET);
	}
	putname(filename);
out:
	return error;
}

#define top_esp                (THREAD_SIZE - sizeof(unsigned long))
#define top_ebp                (THREAD_SIZE - 2*sizeof(unsigned long))

unsigned long get_wchan(struct task_struct *p)
{
696
	unsigned long bp, sp, ip;
L
Linus Torvalds 已提交
697 698 699 700
	unsigned long stack_page;
	int count = 0;
	if (!p || p == current || p->state == TASK_RUNNING)
		return 0;
A
Al Viro 已提交
701
	stack_page = (unsigned long)task_stack_page(p);
702
	sp = p->thread.sp;
703
	if (!stack_page || sp < stack_page || sp > top_esp+stack_page)
L
Linus Torvalds 已提交
704
		return 0;
705 706
	/* include/asm-i386/system.h:switch_to() pushes bp last. */
	bp = *(unsigned long *) sp;
L
Linus Torvalds 已提交
707
	do {
708
		if (bp < stack_page || bp > top_ebp+stack_page)
L
Linus Torvalds 已提交
709
			return 0;
710 711 712 713
		ip = *(unsigned long *) (bp+4);
		if (!in_sched_functions(ip))
			return ip;
		bp = *(unsigned long *) bp;
L
Linus Torvalds 已提交
714 715 716 717 718 719
	} while (count++ < 16);
	return 0;
}

unsigned long arch_align_stack(unsigned long sp)
{
720
	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
L
Linus Torvalds 已提交
721 722 723
		sp -= get_random_int() % 8192;
	return sp & ~0xf;
}
J
Jiri Kosina 已提交
724 725 726 727 728 729

unsigned long arch_randomize_brk(struct mm_struct *mm)
{
	unsigned long range_end = mm->brk + 0x02000000;
	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
}