process_32.c 21.0 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 *  Copyright (C) 1995  Linus Torvalds
 *
 *  Pentium III FXSR, SSE support
 *	Gareth Hughes <gareth@valinux.com>, May 2000
 */

/*
 * This file handles the architecture-dependent parts of process handling..
 */

#include <stdarg.h>

Z
Zwane Mwaikambo 已提交
14
#include <linux/cpu.h>
L
Linus Torvalds 已提交
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/elfcore.h>
#include <linux/smp.h>
#include <linux/stddef.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/user.h>
#include <linux/interrupt.h>
#include <linux/utsname.h>
#include <linux/delay.h>
#include <linux/reboot.h>
#include <linux/init.h>
#include <linux/mc146818rtc.h>
#include <linux/module.h>
#include <linux/kallsyms.h>
#include <linux/ptrace.h>
#include <linux/random.h>
36
#include <linux/personality.h>
I
Ingo Molnar 已提交
37
#include <linux/tick.h>
38
#include <linux/percpu.h>
39
#include <linux/prctl.h>
L
Linus Torvalds 已提交
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54

#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/system.h>
#include <asm/io.h>
#include <asm/ldt.h>
#include <asm/processor.h>
#include <asm/i387.h>
#include <asm/desc.h>
#ifdef CONFIG_MATH_EMULATION
#include <asm/math_emu.h>
#endif

#include <linux/err.h>

Z
Zwane Mwaikambo 已提交
55 56
#include <asm/tlbflush.h>
#include <asm/cpu.h>
57
#include <asm/kdebug.h>
Z
Zwane Mwaikambo 已提交
58

L
Linus Torvalds 已提交
59 60 61 62 63 64 65
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");

static int hlt_counter;

unsigned long boot_option_idle_override = 0;
EXPORT_SYMBOL(boot_option_idle_override);

66 67 68 69 70 71
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);

DEFINE_PER_CPU(int, cpu_number);
EXPORT_PER_CPU_SYMBOL(cpu_number);

L
Linus Torvalds 已提交
72 73 74 75 76
/*
 * Return saved PC of a blocked thread.
 */
unsigned long thread_saved_pc(struct task_struct *tsk)
{
77
	return ((unsigned long *)tsk->thread.sp)[3];
L
Linus Torvalds 已提交
78 79 80 81 82 83
}

/*
 * Powermanagement idle function, if any..
 */
void (*pm_idle)(void);
84
EXPORT_SYMBOL(pm_idle);
L
Linus Torvalds 已提交
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106

void disable_hlt(void)
{
	hlt_counter++;
}

EXPORT_SYMBOL(disable_hlt);

void enable_hlt(void)
{
	hlt_counter--;
}

EXPORT_SYMBOL(enable_hlt);

/*
 * We use this if we don't have any better
 * idle routine..
 */
void default_idle(void)
{
	if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
107
		current_thread_info()->status &= ~TS_POLLING;
108 109 110 111 112 113
		/*
		 * TS_POLLING-cleared state must be visible before we
		 * test NEED_RESCHED:
		 */
		smp_mb();

114
		local_irq_disable();
115
		if (!need_resched()) {
116
			safe_halt();	/* enables interrupts racelessly */
117 118 119
			local_irq_disable();
		}
		local_irq_enable();
120
		current_thread_info()->status |= TS_POLLING;
L
Linus Torvalds 已提交
121
	} else {
122
		local_irq_enable();
123 124
		/* loop is done by the caller */
		cpu_relax();
L
Linus Torvalds 已提交
125 126
	}
}
127 128 129
#ifdef CONFIG_APM_MODULE
EXPORT_SYMBOL(default_idle);
#endif
L
Linus Torvalds 已提交
130 131 132 133 134 135

/*
 * On SMP it's slightly faster (but much more power-consuming!)
 * to poll the ->work.need_resched flag instead of waiting for the
 * cross-CPU IPI to arrive. Use this option with caution.
 */
136
static void poll_idle(void)
L
Linus Torvalds 已提交
137
{
138
	local_irq_enable();
139
	cpu_relax();
L
Linus Torvalds 已提交
140 141
}

Z
Zwane Mwaikambo 已提交
142 143 144 145 146
#ifdef CONFIG_HOTPLUG_CPU
#include <asm/nmi.h>
/* We don't actually take CPU down, just spin without interrupts. */
static inline void play_dead(void)
{
147 148 149 150
	/* This must be done before dead CPU ack */
	cpu_exit_clear();
	wbinvd();
	mb();
Z
Zwane Mwaikambo 已提交
151 152 153
	/* Ack it */
	__get_cpu_var(cpu_state) = CPU_DEAD;

154 155 156
	/*
	 * With physical CPU hotplug, we should halt the cpu
	 */
Z
Zwane Mwaikambo 已提交
157
	local_irq_disable();
158
	while (1)
Z
Zachary Amsden 已提交
159
		halt();
Z
Zwane Mwaikambo 已提交
160 161 162 163 164 165 166 167
}
#else
static inline void play_dead(void)
{
	BUG();
}
#endif /* CONFIG_HOTPLUG_CPU */

L
Linus Torvalds 已提交
168 169 170 171 172 173
/*
 * The idle thread. There's no useful work to be
 * done, so just try to conserve power and have a
 * low exit latency (ie sit in a loop waiting for
 * somebody to say that they'd like to reschedule)
 */
Z
Zwane Mwaikambo 已提交
174
void cpu_idle(void)
L
Linus Torvalds 已提交
175
{
176
	int cpu = smp_processor_id();
Z
Zwane Mwaikambo 已提交
177

178
	current_thread_info()->status |= TS_POLLING;
179

L
Linus Torvalds 已提交
180 181
	/* endless idle loop with no priority at all */
	while (1) {
I
Ingo Molnar 已提交
182
		tick_nohz_stop_sched_tick();
L
Linus Torvalds 已提交
183 184 185
		while (!need_resched()) {
			void (*idle)(void);

C
Christoph Lameter 已提交
186
			check_pgt_cache();
L
Linus Torvalds 已提交
187 188 189
			rmb();
			idle = pm_idle;

190 191 192
			if (rcu_pending(cpu))
				rcu_check_callbacks(cpu, 0);

L
Linus Torvalds 已提交
193 194 195
			if (!idle)
				idle = default_idle;

Z
Zwane Mwaikambo 已提交
196 197 198
			if (cpu_is_offline(cpu))
				play_dead();

L
Linus Torvalds 已提交
199 200 201
			__get_cpu_var(irq_stat).idle_timestamp = jiffies;
			idle();
		}
I
Ingo Molnar 已提交
202
		tick_nohz_restart_sched_tick();
203
		preempt_enable_no_resched();
L
Linus Torvalds 已提交
204
		schedule();
205
		preempt_disable();
L
Linus Torvalds 已提交
206 207 208
	}
}

209 210 211 212
static void do_nothing(void *unused)
{
}

V
Venki Pallipadi 已提交
213 214 215 216 217 218 219 220
/*
 * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
 * pm_idle and update to new pm_idle value. Required while changing pm_idle
 * handler on SMP systems.
 *
 * Caller must have changed pm_idle to the new value before the call. Old
 * pm_idle value will not be used by any CPU after the return of this function.
 */
L
Linus Torvalds 已提交
221 222
void cpu_idle_wait(void)
{
V
Venki Pallipadi 已提交
223 224 225
	smp_mb();
	/* kick all the CPUs so that they exit out of pm_idle */
	smp_call_function(do_nothing, NULL, 0, 1);
L
Linus Torvalds 已提交
226 227 228 229 230 231 232 233 234
}
EXPORT_SYMBOL_GPL(cpu_idle_wait);

/*
 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
 * which can obviate IPI to trigger checking of need_resched.
 * We execute MONITOR against need_resched and enter optimized wait state
 * through MWAIT. Whenever someone changes need_resched, we would be woken
 * up from MWAIT (without an IPI).
235 236 237
 *
 * New with Core Duo processors, MWAIT can take some hints based on CPU
 * capability.
L
Linus Torvalds 已提交
238
 */
239
void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
L
Linus Torvalds 已提交
240
{
241
	if (!need_resched()) {
242 243
		__monitor((void *)&current_thread_info()->flags, 0, 0);
		smp_mb();
244
		if (!need_resched())
245 246 247 248 249
			__sti_mwait(ax, cx);
		else
			local_irq_enable();
	} else
		local_irq_enable();
L
Linus Torvalds 已提交
250 251
}

252 253 254 255
/* Default MONITOR/MWAIT with no hints, used for default C1 state */
static void mwait_idle(void)
{
	local_irq_enable();
256
	mwait_idle_with_hints(0, 0);
257 258
}

259
static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
260 261 262 263 264 265 266
{
	if (force_mwait)
		return 1;
	/* Any C1 states supported? */
	return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
}

267
void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
L
Linus Torvalds 已提交
268
{
269 270 271 272 273 274 275 276 277 278
	static int selected;

	if (selected)
		return;
#ifdef CONFIG_X86_SMP
	if (pm_idle == poll_idle && smp_num_siblings > 1) {
		printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
			" performance may degrade.\n");
	}
#endif
279
	if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
L
Linus Torvalds 已提交
280 281 282 283 284
		/*
		 * Skip, if setup has overridden idle.
		 * One CPU supports mwait => All CPUs supports mwait
		 */
		if (!pm_idle) {
285
			printk(KERN_INFO "using mwait in idle threads.\n");
L
Linus Torvalds 已提交
286 287 288
			pm_idle = mwait_idle;
		}
	}
289
	selected = 1;
L
Linus Torvalds 已提交
290 291
}

292
static int __init idle_setup(char *str)
L
Linus Torvalds 已提交
293
{
294
	if (!strcmp(str, "poll")) {
L
Linus Torvalds 已提交
295 296
		printk("using polling idle threads.\n");
		pm_idle = poll_idle;
297 298 299 300
	} else if (!strcmp(str, "mwait"))
		force_mwait = 1;
	else
		return -1;
L
Linus Torvalds 已提交
301 302

	boot_option_idle_override = 1;
303
	return 0;
L
Linus Torvalds 已提交
304
}
305
early_param("idle", idle_setup);
L
Linus Torvalds 已提交
306

307
void __show_registers(struct pt_regs *regs, int all)
L
Linus Torvalds 已提交
308 309
{
	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
310
	unsigned long d0, d1, d2, d3, d6, d7;
311
	unsigned long sp;
312 313 314
	unsigned short ss, gs;

	if (user_mode_vm(regs)) {
315 316
		sp = regs->sp;
		ss = regs->ss & 0xffff;
317 318
		savesegment(gs, gs);
	} else {
319
		sp = (unsigned long) (&regs->sp);
320 321 322
		savesegment(ss, ss);
		savesegment(gs, gs);
	}
L
Linus Torvalds 已提交
323 324

	printk("\n");
325 326
	printk("Pid: %d, comm: %s %s (%s %.*s)\n",
			task_pid_nr(current), current->comm,
327 328 329 330 331
			print_tainted(), init_utsname()->release,
			(int)strcspn(init_utsname()->version, " "),
			init_utsname()->version);

	printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
332
			(u16)regs->cs, regs->ip, regs->flags,
333
			smp_processor_id());
334
	print_symbol("EIP is at %s\n", regs->ip);
L
Linus Torvalds 已提交
335 336

	printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
337
		regs->ax, regs->bx, regs->cx, regs->dx);
338
	printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
339
		regs->si, regs->di, regs->bp, sp);
340
	printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
341
	       (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss);
342 343 344

	if (!all)
		return;
L
Linus Torvalds 已提交
345

346 347 348
	cr0 = read_cr0();
	cr2 = read_cr2();
	cr3 = read_cr3();
349
	cr4 = read_cr4_safe();
350 351
	printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
			cr0, cr2, cr3, cr4);
352 353 354 355 356 357 358

	get_debugreg(d0, 0);
	get_debugreg(d1, 1);
	get_debugreg(d2, 2);
	get_debugreg(d3, 3);
	printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
			d0, d1, d2, d3);
359

360 361
	get_debugreg(d6, 6);
	get_debugreg(d7, 7);
362 363 364
	printk("DR6: %08lx DR7: %08lx\n",
			d6, d7);
}
365

366 367 368
void show_regs(struct pt_regs *regs)
{
	__show_registers(regs, 1);
369
	show_trace(NULL, regs, &regs->sp, regs->bp);
L
Linus Torvalds 已提交
370 371 372
}

/*
373 374
 * This gets run with %bx containing the
 * function to call, and %dx containing
L
Linus Torvalds 已提交
375 376 377 378 379 380 381 382 383 384 385 386 387
 * the "args".
 */
extern void kernel_thread_helper(void);

/*
 * Create a kernel thread
 */
int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
{
	struct pt_regs regs;

	memset(&regs, 0, sizeof(regs));

388 389
	regs.bx = (unsigned long) fn;
	regs.dx = (unsigned long) arg;
L
Linus Torvalds 已提交
390

391 392 393 394 395 396 397
	regs.ds = __USER_DS;
	regs.es = __USER_DS;
	regs.fs = __KERNEL_PERCPU;
	regs.orig_ax = -1;
	regs.ip = (unsigned long) kernel_thread_helper;
	regs.cs = __KERNEL_CS | get_kernel_rpl();
	regs.flags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
L
Linus Torvalds 已提交
398 399

	/* Ok, create the new process.. */
400
	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
L
Linus Torvalds 已提交
401
}
402
EXPORT_SYMBOL(kernel_thread);
L
Linus Torvalds 已提交
403 404 405 406 407 408 409

/*
 * Free current thread data structures etc..
 */
void exit_thread(void)
{
	/* The process may have allocated an io port bitmap... nuke it. */
410 411 412
	if (unlikely(test_thread_flag(TIF_IO_BITMAP))) {
		struct task_struct *tsk = current;
		struct thread_struct *t = &tsk->thread;
L
Linus Torvalds 已提交
413 414 415 416 417
		int cpu = get_cpu();
		struct tss_struct *tss = &per_cpu(init_tss, cpu);

		kfree(t->io_bitmap_ptr);
		t->io_bitmap_ptr = NULL;
418
		clear_thread_flag(TIF_IO_BITMAP);
L
Linus Torvalds 已提交
419 420 421 422 423 424 425
		/*
		 * Careful, clear this in the TSS too:
		 */
		memset(tss->io_bitmap, 0xff, tss->io_bitmap_max);
		t->io_bitmap_max = 0;
		tss->io_bitmap_owner = NULL;
		tss->io_bitmap_max = 0;
426
		tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
L
Linus Torvalds 已提交
427 428 429 430 431 432 433 434
		put_cpu();
	}
}

void flush_thread(void)
{
	struct task_struct *tsk = current;

435 436 437 438 439 440
	tsk->thread.debugreg0 = 0;
	tsk->thread.debugreg1 = 0;
	tsk->thread.debugreg2 = 0;
	tsk->thread.debugreg3 = 0;
	tsk->thread.debugreg6 = 0;
	tsk->thread.debugreg7 = 0;
L
Linus Torvalds 已提交
441
	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));	
442
	clear_tsk_thread_flag(tsk, TIF_DEBUG);
L
Linus Torvalds 已提交
443 444 445 446 447 448 449 450 451
	/*
	 * Forget coprocessor state..
	 */
	clear_fpu(tsk);
	clear_used_math();
}

void release_thread(struct task_struct *dead_task)
{
452
	BUG_ON(dead_task->mm);
L
Linus Torvalds 已提交
453 454 455 456 457 458 459 460 461 462 463 464
	release_vm86_irqs(dead_task);
}

/*
 * This gets called before we allocate a new thread and copy
 * the current task into it.
 */
void prepare_to_copy(struct task_struct *tsk)
{
	unlazy_fpu(tsk);
}

465
int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
L
Linus Torvalds 已提交
466 467 468 469 470 471 472
	unsigned long unused,
	struct task_struct * p, struct pt_regs * regs)
{
	struct pt_regs * childregs;
	struct task_struct *tsk;
	int err;

A
akpm@osdl.org 已提交
473
	childregs = task_pt_regs(p);
474
	*childregs = *regs;
475 476
	childregs->ax = 0;
	childregs->sp = sp;
477

478 479
	p->thread.sp = (unsigned long) childregs;
	p->thread.sp0 = (unsigned long) (childregs+1);
L
Linus Torvalds 已提交
480

481
	p->thread.ip = (unsigned long) ret_from_fork;
L
Linus Torvalds 已提交
482

483
	savesegment(gs, p->thread.gs);
L
Linus Torvalds 已提交
484 485

	tsk = current;
486
	if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
A
Alexey Dobriyan 已提交
487 488
		p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
						IO_BITMAP_BYTES, GFP_KERNEL);
L
Linus Torvalds 已提交
489 490 491 492
		if (!p->thread.io_bitmap_ptr) {
			p->thread.io_bitmap_max = 0;
			return -ENOMEM;
		}
493
		set_tsk_thread_flag(p, TIF_IO_BITMAP);
L
Linus Torvalds 已提交
494 495
	}

R
Roland McGrath 已提交
496 497
	err = 0;

L
Linus Torvalds 已提交
498 499 500
	/*
	 * Set a new TLS for the child thread?
	 */
R
Roland McGrath 已提交
501 502
	if (clone_flags & CLONE_SETTLS)
		err = do_set_thread_area(p, -1,
503
			(struct user_desc __user *)childregs->si, 0);
L
Linus Torvalds 已提交
504 505 506 507 508 509 510 511

	if (err && p->thread.io_bitmap_ptr) {
		kfree(p->thread.io_bitmap_ptr);
		p->thread.io_bitmap_max = 0;
	}
	return err;
}

I
Ingo Molnar 已提交
512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
void
start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
{
	__asm__("movl %0, %%gs" :: "r"(0));
	regs->fs		= 0;
	set_fs(USER_DS);
	regs->ds		= __USER_DS;
	regs->es		= __USER_DS;
	regs->ss		= __USER_DS;
	regs->cs		= __USER_CS;
	regs->ip		= new_ip;
	regs->sp		= new_sp;
}
EXPORT_SYMBOL_GPL(start_thread);

527
static void hard_disable_TSC(void)
528 529 530
{
	write_cr4(read_cr4() | X86_CR4_TSD);
}
531

532 533 534 535 536 537 538 539 540 541 542
void disable_TSC(void)
{
	preempt_disable();
	if (!test_and_set_thread_flag(TIF_NOTSC))
		/*
		 * Must flip the CPU state synchronously with
		 * TIF_NOTSC in the current running context.
		 */
		hard_disable_TSC();
	preempt_enable();
}
543

544
static void hard_enable_TSC(void)
545 546 547
{
	write_cr4(read_cr4() & ~X86_CR4_TSD);
}
548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583

void enable_TSC(void)
{
	preempt_disable();
	if (test_and_clear_thread_flag(TIF_NOTSC))
		/*
		 * Must flip the CPU state synchronously with
		 * TIF_NOTSC in the current running context.
		 */
		hard_enable_TSC();
	preempt_enable();
}

int get_tsc_mode(unsigned long adr)
{
	unsigned int val;

	if (test_thread_flag(TIF_NOTSC))
		val = PR_TSC_SIGSEGV;
	else
		val = PR_TSC_ENABLE;

	return put_user(val, (unsigned int __user *)adr);
}

int set_tsc_mode(unsigned int val)
{
	if (val == PR_TSC_SIGSEGV)
		disable_TSC();
	else if (val == PR_TSC_ENABLE)
		enable_TSC();
	else
		return -EINVAL;

	return 0;
}
584 585 586 587

static noinline void
__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
		 struct tss_struct *tss)
L
Linus Torvalds 已提交
588
{
R
Roland McGrath 已提交
589
	struct thread_struct *prev, *next;
590
	unsigned long debugctl;
591

R
Roland McGrath 已提交
592
	prev = &prev_p->thread;
593 594
	next = &next_p->thread;

595 596 597 598 599
	debugctl = prev->debugctlmsr;
	if (next->ds_area_msr != prev->ds_area_msr) {
		/* we clear debugctl to make sure DS
		 * is not in use when we change it */
		debugctl = 0;
600
		update_debugctlmsr(0);
601 602 603 604
		wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0);
	}

	if (next->debugctlmsr != debugctl)
605
		update_debugctlmsr(next->debugctlmsr);
R
Roland McGrath 已提交
606

607
	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
608 609 610 611
		set_debugreg(next->debugreg0, 0);
		set_debugreg(next->debugreg1, 1);
		set_debugreg(next->debugreg2, 2);
		set_debugreg(next->debugreg3, 3);
612
		/* no 4 and 5 */
613 614
		set_debugreg(next->debugreg6, 6);
		set_debugreg(next->debugreg7, 7);
615 616
	}

617 618 619 620 621 622 623 624 625
	if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
	    test_tsk_thread_flag(next_p, TIF_NOTSC)) {
		/* prev and next are different */
		if (test_tsk_thread_flag(next_p, TIF_NOTSC))
			hard_disable_TSC();
		else
			hard_enable_TSC();
	}

626
#ifdef X86_BTS
627 628 629 630 631
	if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
		ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);

	if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
		ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
632
#endif
633 634


635
	if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
L
Linus Torvalds 已提交
636 637 638 639
		/*
		 * Disable the bitmap via an invalid offset. We still cache
		 * the previous bitmap owner and the IO bitmap contents:
		 */
640
		tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
L
Linus Torvalds 已提交
641 642
		return;
	}
643

L
Linus Torvalds 已提交
644 645 646 647 648 649
	if (likely(next == tss->io_bitmap_owner)) {
		/*
		 * Previous owner of the bitmap (hence the bitmap content)
		 * matches the next task, we dont have to do anything but
		 * to set a valid offset in the TSS:
		 */
650
		tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
L
Linus Torvalds 已提交
651 652 653 654 655 656 657 658 659 660 661
		return;
	}
	/*
	 * Lazy TSS's I/O bitmap copy. We set an invalid offset here
	 * and we let the task to get a GPF in case an I/O instruction
	 * is performed.  The handler of the GPF will verify that the
	 * faulting task has a valid I/O bitmap and, it true, does the
	 * real copy and restart the instruction.  This will save us
	 * redundant copies when the currently switched task does not
	 * perform any I/O during its timeslice.
	 */
662
	tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
L
Linus Torvalds 已提交
663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687
}

/*
 *	switch_to(x,yn) should switch tasks from x to y.
 *
 * We fsave/fwait so that an exception goes off at the right time
 * (as a call from the fsave or fwait in effect) rather than to
 * the wrong process. Lazy FP saving no longer makes any sense
 * with modern CPU's, and this simplifies a lot of things (SMP
 * and UP become the same).
 *
 * NOTE! We used to use the x86 hardware context switching. The
 * reason for not using it any more becomes apparent when you
 * try to recover gracefully from saved state that is no longer
 * valid (stale segment register values in particular). With the
 * hardware task-switch, there is no way to fix up bad state in
 * a reasonable manner.
 *
 * The fact that Intel documents the hardware task-switching to
 * be slow is a fairly red herring - this code is not noticeably
 * faster. However, there _is_ some room for improvement here,
 * so the performance issues may eventually be a valid point.
 * More important, however, is the fact that this allows us much
 * more flexibility.
 *
688
 * The return value (in %ax) will be the "prev" task after
L
Linus Torvalds 已提交
689 690 691
 * the task-switch, and shows up in ret_from_fork in entry.S,
 * for example.
 */
692
struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
L
Linus Torvalds 已提交
693 694 695 696 697 698 699 700 701 702
{
	struct thread_struct *prev = &prev_p->thread,
				 *next = &next_p->thread;
	int cpu = smp_processor_id();
	struct tss_struct *tss = &per_cpu(init_tss, cpu);

	/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */

	__unlazy_fpu(prev_p);

703 704 705 706 707

	/* we're going to use this soon, after a few expensive things */
	if (next_p->fpu_counter > 5)
		prefetch(&next->i387.fxsave);

L
Linus Torvalds 已提交
708
	/*
Z
Zachary Amsden 已提交
709
	 * Reload esp0.
L
Linus Torvalds 已提交
710
	 */
711
	load_sp0(tss, next);
L
Linus Torvalds 已提交
712 713

	/*
714
	 * Save away %gs. No need to save %fs, as it was saved on the
715 716 717 718 719 720 721
	 * stack on entry.  No need to save %es and %ds, as those are
	 * always kernel segments while inside the kernel.  Doing this
	 * before setting the new TLS descriptors avoids the situation
	 * where we temporarily have non-reloadable segments in %fs
	 * and %gs.  This could be an issue if the NMI handler ever
	 * used %fs or %gs (it does not today), or if the kernel is
	 * running inside of a hypervisor layer.
L
Linus Torvalds 已提交
722
	 */
723
	savesegment(gs, prev->gs);
L
Linus Torvalds 已提交
724 725

	/*
Z
Zachary Amsden 已提交
726
	 * Load the per-thread Thread-Local Storage descriptor.
L
Linus Torvalds 已提交
727
	 */
Z
Zachary Amsden 已提交
728
	load_TLS(next, cpu);
L
Linus Torvalds 已提交
729

730 731 732 733 734 735 736 737 738
	/*
	 * Restore IOPL if needed.  In normal use, the flags restore
	 * in the switch assembly will handle this.  But if the kernel
	 * is running virtualized at a non-zero CPL, the popf will
	 * not restore flags, so it must be done in a separate step.
	 */
	if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl))
		set_iopl_mask(next->iopl);

L
Linus Torvalds 已提交
739
	/*
740
	 * Now maybe handle debug registers and/or IO bitmaps
L
Linus Torvalds 已提交
741
	 */
742 743 744
	if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV ||
		     task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
		__switch_to_xtra(prev_p, next_p, tss);
A
Andrea Arcangeli 已提交
745

746 747 748 749 750 751 752 753 754
	/*
	 * Leave lazy mode, flushing any hypercalls made here.
	 * This must be done before restoring TLS segments so
	 * the GDT and LDT are properly updated, and must be
	 * done before math_state_restore, so the TS bit is up
	 * to date.
	 */
	arch_leave_lazy_cpu_mode();

755 756 757 758 759 760 761
	/* If the task has used fpu the last 5 timeslices, just do a full
	 * restore of the math state immediately to avoid the trap; the
	 * chances of needing FPU soon are obviously high now
	 */
	if (next_p->fpu_counter > 5)
		math_state_restore();

762 763 764 765 766 767
	/*
	 * Restore %gs if needed (which is common)
	 */
	if (prev->gs | next->gs)
		loadsegment(gs, next->gs);

768
	x86_write_percpu(current_task, next_p);
769

L
Linus Torvalds 已提交
770 771 772 773 774
	return prev_p;
}

asmlinkage int sys_fork(struct pt_regs regs)
{
775
	return do_fork(SIGCHLD, regs.sp, &regs, 0, NULL, NULL);
L
Linus Torvalds 已提交
776 777 778 779 780 781 782 783
}

asmlinkage int sys_clone(struct pt_regs regs)
{
	unsigned long clone_flags;
	unsigned long newsp;
	int __user *parent_tidptr, *child_tidptr;

784 785 786 787
	clone_flags = regs.bx;
	newsp = regs.cx;
	parent_tidptr = (int __user *)regs.dx;
	child_tidptr = (int __user *)regs.di;
L
Linus Torvalds 已提交
788
	if (!newsp)
789
		newsp = regs.sp;
L
Linus Torvalds 已提交
790 791 792 793 794 795 796 797 798 799 800 801 802 803 804
	return do_fork(clone_flags, newsp, &regs, 0, parent_tidptr, child_tidptr);
}

/*
 * This is trivial, and on the face of it looks like it
 * could equally well be done in user mode.
 *
 * Not so, for quite unobvious reasons - register pressure.
 * In user mode vfork() cannot have a stack frame, and if
 * done by calling the "clone()" system call directly, you
 * do not have enough call-clobbered registers to hold all
 * the information you need.
 */
asmlinkage int sys_vfork(struct pt_regs regs)
{
805
	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.sp, &regs, 0, NULL, NULL);
L
Linus Torvalds 已提交
806 807 808 809 810 811 812 813 814 815
}

/*
 * sys_execve() executes a new program.
 */
asmlinkage int sys_execve(struct pt_regs regs)
{
	int error;
	char * filename;

816
	filename = getname((char __user *) regs.bx);
L
Linus Torvalds 已提交
817 818 819 820
	error = PTR_ERR(filename);
	if (IS_ERR(filename))
		goto out;
	error = do_execve(filename,
821 822
			(char __user * __user *) regs.cx,
			(char __user * __user *) regs.dx,
L
Linus Torvalds 已提交
823 824 825 826 827 828 829 830 831 832 833 834 835 836 837
			&regs);
	if (error == 0) {
		/* Make sure we don't return using sysenter.. */
		set_thread_flag(TIF_IRET);
	}
	putname(filename);
out:
	return error;
}

#define top_esp                (THREAD_SIZE - sizeof(unsigned long))
#define top_ebp                (THREAD_SIZE - 2*sizeof(unsigned long))

unsigned long get_wchan(struct task_struct *p)
{
838
	unsigned long bp, sp, ip;
L
Linus Torvalds 已提交
839 840 841 842
	unsigned long stack_page;
	int count = 0;
	if (!p || p == current || p->state == TASK_RUNNING)
		return 0;
A
Al Viro 已提交
843
	stack_page = (unsigned long)task_stack_page(p);
844
	sp = p->thread.sp;
845
	if (!stack_page || sp < stack_page || sp > top_esp+stack_page)
L
Linus Torvalds 已提交
846
		return 0;
847 848
	/* include/asm-i386/system.h:switch_to() pushes bp last. */
	bp = *(unsigned long *) sp;
L
Linus Torvalds 已提交
849
	do {
850
		if (bp < stack_page || bp > top_ebp+stack_page)
L
Linus Torvalds 已提交
851
			return 0;
852 853 854 855
		ip = *(unsigned long *) (bp+4);
		if (!in_sched_functions(ip))
			return ip;
		bp = *(unsigned long *) bp;
L
Linus Torvalds 已提交
856 857 858 859 860 861
	} while (count++ < 16);
	return 0;
}

unsigned long arch_align_stack(unsigned long sp)
{
862
	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
L
Linus Torvalds 已提交
863 864 865
		sp -= get_random_int() % 8192;
	return sp & ~0xf;
}
J
Jiri Kosina 已提交
866 867 868 869 870 871

unsigned long arch_randomize_brk(struct mm_struct *mm)
{
	unsigned long range_end = mm->brk + 0x02000000;
	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
}