process_32.c 17.1 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 *  Copyright (C) 1995  Linus Torvalds
 *
 *  Pentium III FXSR, SSE support
 *	Gareth Hughes <gareth@valinux.com>, May 2000
 */

/*
 * This file handles the architecture-dependent parts of process handling..
 */

#include <stdarg.h>

Z
Zwane Mwaikambo 已提交
14
#include <linux/cpu.h>
L
Linus Torvalds 已提交
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/elfcore.h>
#include <linux/smp.h>
#include <linux/stddef.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/user.h>
#include <linux/interrupt.h>
#include <linux/utsname.h>
#include <linux/delay.h>
#include <linux/reboot.h>
#include <linux/init.h>
#include <linux/mc146818rtc.h>
#include <linux/module.h>
#include <linux/kallsyms.h>
#include <linux/ptrace.h>
#include <linux/random.h>
36
#include <linux/personality.h>
I
Ingo Molnar 已提交
37
#include <linux/tick.h>
38
#include <linux/percpu.h>
39
#include <linux/prctl.h>
40
#include <linux/dmi.h>
41
#include <linux/ftrace.h>
42 43 44
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/kdebug.h>
L
Linus Torvalds 已提交
45 46 47 48 49 50 51 52 53 54 55 56 57

#include <asm/pgtable.h>
#include <asm/system.h>
#include <asm/ldt.h>
#include <asm/processor.h>
#include <asm/i387.h>
#include <asm/desc.h>
#ifdef CONFIG_MATH_EMULATION
#include <asm/math_emu.h>
#endif

#include <linux/err.h>

Z
Zwane Mwaikambo 已提交
58 59
#include <asm/tlbflush.h>
#include <asm/cpu.h>
60
#include <asm/idle.h>
61
#include <asm/syscalls.h>
62
#include <asm/ds.h>
Z
Zwane Mwaikambo 已提交
63

L
Linus Torvalds 已提交
64 65
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");

66 67 68
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);

L
Linus Torvalds 已提交
69 70 71 72 73
/*
 * Return saved PC of a blocked thread.
 */
unsigned long thread_saved_pc(struct task_struct *tsk)
{
74
	return ((unsigned long *)tsk->thread.sp)[3];
L
Linus Torvalds 已提交
75 76
}

A
Alex Nixon 已提交
77 78 79 80 81 82 83
#ifndef CONFIG_SMP
static inline void play_dead(void)
{
	BUG();
}
#endif

L
Linus Torvalds 已提交
84 85 86 87 88 89
/*
 * The idle thread. There's no useful work to be
 * done, so just try to conserve power and have a
 * low exit latency (ie sit in a loop waiting for
 * somebody to say that they'd like to reschedule)
 */
Z
Zwane Mwaikambo 已提交
90
void cpu_idle(void)
L
Linus Torvalds 已提交
91
{
92
	int cpu = smp_processor_id();
Z
Zwane Mwaikambo 已提交
93

94
	current_thread_info()->status |= TS_POLLING;
95

L
Linus Torvalds 已提交
96 97
	/* endless idle loop with no priority at all */
	while (1) {
98
		tick_nohz_stop_sched_tick(1);
L
Linus Torvalds 已提交
99 100
		while (!need_resched()) {

C
Christoph Lameter 已提交
101
			check_pgt_cache();
L
Linus Torvalds 已提交
102 103
			rmb();

104 105 106
			if (rcu_pending(cpu))
				rcu_check_callbacks(cpu, 0);

Z
Zwane Mwaikambo 已提交
107 108 109
			if (cpu_is_offline(cpu))
				play_dead();

110
			local_irq_disable();
111 112
			/* Don't trace irqs off for idle */
			stop_critical_timings();
T
Thomas Gleixner 已提交
113
			pm_idle();
114
			start_critical_timings();
L
Linus Torvalds 已提交
115
		}
I
Ingo Molnar 已提交
116
		tick_nohz_restart_sched_tick();
117
		preempt_enable_no_resched();
L
Linus Torvalds 已提交
118
		schedule();
119
		preempt_disable();
L
Linus Torvalds 已提交
120 121 122
	}
}

123
void __show_regs(struct pt_regs *regs, int all)
L
Linus Torvalds 已提交
124 125
{
	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
126
	unsigned long d0, d1, d2, d3, d6, d7;
127
	unsigned long sp;
128
	unsigned short ss, gs;
129
	const char *board;
130 131

	if (user_mode_vm(regs)) {
132 133
		sp = regs->sp;
		ss = regs->ss & 0xffff;
T
Tejun Heo 已提交
134
		gs = get_user_gs(regs);
135
	} else {
136
		sp = (unsigned long) (&regs->sp);
137 138 139
		savesegment(ss, ss);
		savesegment(gs, gs);
	}
L
Linus Torvalds 已提交
140 141

	printk("\n");
142 143 144 145 146

	board = dmi_get_system_info(DMI_PRODUCT_NAME);
	if (!board)
		board = "";
	printk("Pid: %d, comm: %s %s (%s %.*s) %s\n",
147
			task_pid_nr(current), current->comm,
148 149
			print_tainted(), init_utsname()->release,
			(int)strcspn(init_utsname()->version, " "),
150
			init_utsname()->version, board);
151 152

	printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
153
			(u16)regs->cs, regs->ip, regs->flags,
154
			smp_processor_id());
155
	print_symbol("EIP is at %s\n", regs->ip);
L
Linus Torvalds 已提交
156 157

	printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
158
		regs->ax, regs->bx, regs->cx, regs->dx);
159
	printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
160
		regs->si, regs->di, regs->bp, sp);
161
	printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
162
	       (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss);
163 164 165

	if (!all)
		return;
L
Linus Torvalds 已提交
166

167 168 169
	cr0 = read_cr0();
	cr2 = read_cr2();
	cr3 = read_cr3();
170
	cr4 = read_cr4_safe();
171 172
	printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
			cr0, cr2, cr3, cr4);
173 174 175 176 177 178 179

	get_debugreg(d0, 0);
	get_debugreg(d1, 1);
	get_debugreg(d2, 2);
	get_debugreg(d3, 3);
	printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
			d0, d1, d2, d3);
180

181 182
	get_debugreg(d6, 6);
	get_debugreg(d7, 7);
183 184 185
	printk("DR6: %08lx DR7: %08lx\n",
			d6, d7);
}
186

187 188
void show_regs(struct pt_regs *regs)
{
189
	__show_regs(regs, 1);
190
	show_trace(NULL, regs, &regs->sp, regs->bp);
L
Linus Torvalds 已提交
191 192 193
}

/*
194 195
 * This gets run with %bx containing the
 * function to call, and %dx containing
L
Linus Torvalds 已提交
196 197 198 199 200 201 202
 * the "args".
 */
extern void kernel_thread_helper(void);

/*
 * Create a kernel thread
 */
203
int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
L
Linus Torvalds 已提交
204 205 206 207 208
{
	struct pt_regs regs;

	memset(&regs, 0, sizeof(regs));

209 210
	regs.bx = (unsigned long) fn;
	regs.dx = (unsigned long) arg;
L
Linus Torvalds 已提交
211

212 213 214 215 216 217 218
	regs.ds = __USER_DS;
	regs.es = __USER_DS;
	regs.fs = __KERNEL_PERCPU;
	regs.orig_ax = -1;
	regs.ip = (unsigned long) kernel_thread_helper;
	regs.cs = __KERNEL_CS | get_kernel_rpl();
	regs.flags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
L
Linus Torvalds 已提交
219 220

	/* Ok, create the new process.. */
221
	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
L
Linus Torvalds 已提交
222
}
223
EXPORT_SYMBOL(kernel_thread);
L
Linus Torvalds 已提交
224 225 226 227 228 229 230

/*
 * Free current thread data structures etc..
 */
void exit_thread(void)
{
	/* The process may have allocated an io port bitmap... nuke it. */
231 232 233
	if (unlikely(test_thread_flag(TIF_IO_BITMAP))) {
		struct task_struct *tsk = current;
		struct thread_struct *t = &tsk->thread;
L
Linus Torvalds 已提交
234 235 236 237 238
		int cpu = get_cpu();
		struct tss_struct *tss = &per_cpu(init_tss, cpu);

		kfree(t->io_bitmap_ptr);
		t->io_bitmap_ptr = NULL;
239
		clear_thread_flag(TIF_IO_BITMAP);
L
Linus Torvalds 已提交
240 241 242 243 244 245 246
		/*
		 * Careful, clear this in the TSS too:
		 */
		memset(tss->io_bitmap, 0xff, tss->io_bitmap_max);
		t->io_bitmap_max = 0;
		tss->io_bitmap_owner = NULL;
		tss->io_bitmap_max = 0;
247
		tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
L
Linus Torvalds 已提交
248 249
		put_cpu();
	}
250 251

	ds_exit_thread(current);
L
Linus Torvalds 已提交
252 253 254 255 256 257
}

void flush_thread(void)
{
	struct task_struct *tsk = current;

258 259 260 261 262 263
	tsk->thread.debugreg0 = 0;
	tsk->thread.debugreg1 = 0;
	tsk->thread.debugreg2 = 0;
	tsk->thread.debugreg3 = 0;
	tsk->thread.debugreg6 = 0;
	tsk->thread.debugreg7 = 0;
264
	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
265
	clear_tsk_thread_flag(tsk, TIF_DEBUG);
L
Linus Torvalds 已提交
266 267 268
	/*
	 * Forget coprocessor state..
	 */
269
	tsk->fpu_counter = 0;
L
Linus Torvalds 已提交
270 271 272 273 274 275
	clear_fpu(tsk);
	clear_used_math();
}

void release_thread(struct task_struct *dead_task)
{
276
	BUG_ON(dead_task->mm);
L
Linus Torvalds 已提交
277 278 279 280 281 282 283 284 285 286 287 288
	release_vm86_irqs(dead_task);
}

/*
 * This gets called before we allocate a new thread and copy
 * the current task into it.
 */
void prepare_to_copy(struct task_struct *tsk)
{
	unlazy_fpu(tsk);
}

289
int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
L
Linus Torvalds 已提交
290
	unsigned long unused,
291
	struct task_struct *p, struct pt_regs *regs)
L
Linus Torvalds 已提交
292
{
293
	struct pt_regs *childregs;
L
Linus Torvalds 已提交
294 295 296
	struct task_struct *tsk;
	int err;

A
akpm@osdl.org 已提交
297
	childregs = task_pt_regs(p);
298
	*childregs = *regs;
299 300
	childregs->ax = 0;
	childregs->sp = sp;
301

302 303
	p->thread.sp = (unsigned long) childregs;
	p->thread.sp0 = (unsigned long) (childregs+1);
L
Linus Torvalds 已提交
304

305
	p->thread.ip = (unsigned long) ret_from_fork;
L
Linus Torvalds 已提交
306

T
Tejun Heo 已提交
307
	task_user_gs(p) = get_user_gs(regs);
L
Linus Torvalds 已提交
308 309

	tsk = current;
310
	if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
A
Alexey Dobriyan 已提交
311 312
		p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
						IO_BITMAP_BYTES, GFP_KERNEL);
L
Linus Torvalds 已提交
313 314 315 316
		if (!p->thread.io_bitmap_ptr) {
			p->thread.io_bitmap_max = 0;
			return -ENOMEM;
		}
317
		set_tsk_thread_flag(p, TIF_IO_BITMAP);
L
Linus Torvalds 已提交
318 319
	}

R
Roland McGrath 已提交
320 321
	err = 0;

L
Linus Torvalds 已提交
322 323 324
	/*
	 * Set a new TLS for the child thread?
	 */
R
Roland McGrath 已提交
325 326
	if (clone_flags & CLONE_SETTLS)
		err = do_set_thread_area(p, -1,
327
			(struct user_desc __user *)childregs->si, 0);
L
Linus Torvalds 已提交
328 329 330 331 332

	if (err && p->thread.io_bitmap_ptr) {
		kfree(p->thread.io_bitmap_ptr);
		p->thread.io_bitmap_max = 0;
	}
333 334 335 336 337 338

	ds_copy_thread(p, current);

	clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
	p->thread.debugctlmsr = 0;

L
Linus Torvalds 已提交
339 340 341
	return err;
}

I
Ingo Molnar 已提交
342 343 344
void
start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
{
T
Tejun Heo 已提交
345
	set_user_gs(regs, 0);
I
Ingo Molnar 已提交
346 347 348 349 350 351 352 353
	regs->fs		= 0;
	set_fs(USER_DS);
	regs->ds		= __USER_DS;
	regs->es		= __USER_DS;
	regs->ss		= __USER_DS;
	regs->cs		= __USER_CS;
	regs->ip		= new_ip;
	regs->sp		= new_sp;
354 355 356 357
	/*
	 * Free the old FP and other extended state
	 */
	free_thread_xstate(current);
I
Ingo Molnar 已提交
358 359 360
}
EXPORT_SYMBOL_GPL(start_thread);

361
static void hard_disable_TSC(void)
362 363 364
{
	write_cr4(read_cr4() | X86_CR4_TSD);
}
365

366 367 368 369 370 371 372 373 374 375 376
void disable_TSC(void)
{
	preempt_disable();
	if (!test_and_set_thread_flag(TIF_NOTSC))
		/*
		 * Must flip the CPU state synchronously with
		 * TIF_NOTSC in the current running context.
		 */
		hard_disable_TSC();
	preempt_enable();
}
377

378
static void hard_enable_TSC(void)
379 380 381
{
	write_cr4(read_cr4() & ~X86_CR4_TSD);
}
382

I
Ingo Molnar 已提交
383
static void enable_TSC(void)
384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417
{
	preempt_disable();
	if (test_and_clear_thread_flag(TIF_NOTSC))
		/*
		 * Must flip the CPU state synchronously with
		 * TIF_NOTSC in the current running context.
		 */
		hard_enable_TSC();
	preempt_enable();
}

int get_tsc_mode(unsigned long adr)
{
	unsigned int val;

	if (test_thread_flag(TIF_NOTSC))
		val = PR_TSC_SIGSEGV;
	else
		val = PR_TSC_ENABLE;

	return put_user(val, (unsigned int __user *)adr);
}

int set_tsc_mode(unsigned int val)
{
	if (val == PR_TSC_SIGSEGV)
		disable_TSC();
	else if (val == PR_TSC_ENABLE)
		enable_TSC();
	else
		return -EINVAL;

	return 0;
}
418 419 420 421

static noinline void
__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
		 struct tss_struct *tss)
L
Linus Torvalds 已提交
422
{
R
Roland McGrath 已提交
423
	struct thread_struct *prev, *next;
424

R
Roland McGrath 已提交
425
	prev = &prev_p->thread;
426 427
	next = &next_p->thread;

428 429 430 431
	if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
	    test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
		ds_switch_to(prev_p, next_p);
	else if (next->debugctlmsr != prev->debugctlmsr)
432
		update_debugctlmsr(next->debugctlmsr);
R
Roland McGrath 已提交
433

434
	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
435 436 437 438
		set_debugreg(next->debugreg0, 0);
		set_debugreg(next->debugreg1, 1);
		set_debugreg(next->debugreg2, 2);
		set_debugreg(next->debugreg3, 3);
439
		/* no 4 and 5 */
440 441
		set_debugreg(next->debugreg6, 6);
		set_debugreg(next->debugreg7, 7);
442 443
	}

444 445 446 447 448 449 450 451 452
	if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
	    test_tsk_thread_flag(next_p, TIF_NOTSC)) {
		/* prev and next are different */
		if (test_tsk_thread_flag(next_p, TIF_NOTSC))
			hard_disable_TSC();
		else
			hard_enable_TSC();
	}

453
	if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
L
Linus Torvalds 已提交
454 455 456 457
		/*
		 * Disable the bitmap via an invalid offset. We still cache
		 * the previous bitmap owner and the IO bitmap contents:
		 */
458
		tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
L
Linus Torvalds 已提交
459 460
		return;
	}
461

L
Linus Torvalds 已提交
462 463 464 465 466 467
	if (likely(next == tss->io_bitmap_owner)) {
		/*
		 * Previous owner of the bitmap (hence the bitmap content)
		 * matches the next task, we dont have to do anything but
		 * to set a valid offset in the TSS:
		 */
468
		tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
L
Linus Torvalds 已提交
469 470 471 472 473 474 475 476 477 478 479
		return;
	}
	/*
	 * Lazy TSS's I/O bitmap copy. We set an invalid offset here
	 * and we let the task to get a GPF in case an I/O instruction
	 * is performed.  The handler of the GPF will verify that the
	 * faulting task has a valid I/O bitmap and, it true, does the
	 * real copy and restart the instruction.  This will save us
	 * redundant copies when the currently switched task does not
	 * perform any I/O during its timeslice.
	 */
480
	tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
L
Linus Torvalds 已提交
481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505
}

/*
 *	switch_to(x,yn) should switch tasks from x to y.
 *
 * We fsave/fwait so that an exception goes off at the right time
 * (as a call from the fsave or fwait in effect) rather than to
 * the wrong process. Lazy FP saving no longer makes any sense
 * with modern CPU's, and this simplifies a lot of things (SMP
 * and UP become the same).
 *
 * NOTE! We used to use the x86 hardware context switching. The
 * reason for not using it any more becomes apparent when you
 * try to recover gracefully from saved state that is no longer
 * valid (stale segment register values in particular). With the
 * hardware task-switch, there is no way to fix up bad state in
 * a reasonable manner.
 *
 * The fact that Intel documents the hardware task-switching to
 * be slow is a fairly red herring - this code is not noticeably
 * faster. However, there _is_ some room for improvement here,
 * so the performance issues may eventually be a valid point.
 * More important, however, is the fact that this allows us much
 * more flexibility.
 *
506
 * The return value (in %ax) will be the "prev" task after
L
Linus Torvalds 已提交
507 508 509
 * the task-switch, and shows up in ret_from_fork in entry.S,
 * for example.
 */
510 511
__notrace_funcgraph struct task_struct *
__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
L
Linus Torvalds 已提交
512 513 514 515 516 517 518 519 520 521
{
	struct thread_struct *prev = &prev_p->thread,
				 *next = &next_p->thread;
	int cpu = smp_processor_id();
	struct tss_struct *tss = &per_cpu(init_tss, cpu);

	/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */

	__unlazy_fpu(prev_p);

522 523 524

	/* we're going to use this soon, after a few expensive things */
	if (next_p->fpu_counter > 5)
525
		prefetch(next->xstate);
526

L
Linus Torvalds 已提交
527
	/*
Z
Zachary Amsden 已提交
528
	 * Reload esp0.
L
Linus Torvalds 已提交
529
	 */
530
	load_sp0(tss, next);
L
Linus Torvalds 已提交
531 532

	/*
533
	 * Save away %gs. No need to save %fs, as it was saved on the
534 535 536 537 538 539 540
	 * stack on entry.  No need to save %es and %ds, as those are
	 * always kernel segments while inside the kernel.  Doing this
	 * before setting the new TLS descriptors avoids the situation
	 * where we temporarily have non-reloadable segments in %fs
	 * and %gs.  This could be an issue if the NMI handler ever
	 * used %fs or %gs (it does not today), or if the kernel is
	 * running inside of a hypervisor layer.
L
Linus Torvalds 已提交
541
	 */
542
	savesegment(gs, prev->gs);
L
Linus Torvalds 已提交
543 544

	/*
Z
Zachary Amsden 已提交
545
	 * Load the per-thread Thread-Local Storage descriptor.
L
Linus Torvalds 已提交
546
	 */
Z
Zachary Amsden 已提交
547
	load_TLS(next, cpu);
L
Linus Torvalds 已提交
548

549 550 551 552 553 554 555 556 557
	/*
	 * Restore IOPL if needed.  In normal use, the flags restore
	 * in the switch assembly will handle this.  But if the kernel
	 * is running virtualized at a non-zero CPL, the popf will
	 * not restore flags, so it must be done in a separate step.
	 */
	if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl))
		set_iopl_mask(next->iopl);

L
Linus Torvalds 已提交
558
	/*
559
	 * Now maybe handle debug registers and/or IO bitmaps
L
Linus Torvalds 已提交
560
	 */
561 562 563
	if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV ||
		     task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
		__switch_to_xtra(prev_p, next_p, tss);
A
Andrea Arcangeli 已提交
564

565 566 567 568 569 570 571 572 573
	/*
	 * Leave lazy mode, flushing any hypercalls made here.
	 * This must be done before restoring TLS segments so
	 * the GDT and LDT are properly updated, and must be
	 * done before math_state_restore, so the TS bit is up
	 * to date.
	 */
	arch_leave_lazy_cpu_mode();

574 575 576
	/* If the task has used fpu the last 5 timeslices, just do a full
	 * restore of the math state immediately to avoid the trap; the
	 * chances of needing FPU soon are obviously high now
577 578 579
	 *
	 * tsk_used_math() checks prevent calling math_state_restore(),
	 * which can sleep in the case of !tsk_used_math()
580
	 */
581
	if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
582 583
		math_state_restore();

584 585 586 587 588 589
	/*
	 * Restore %gs if needed (which is common)
	 */
	if (prev->gs | next->gs)
		loadsegment(gs, next->gs);

590
	percpu_write(current_task, next_p);
591

L
Linus Torvalds 已提交
592 593 594 595 596
	return prev_p;
}

asmlinkage int sys_fork(struct pt_regs regs)
{
597
	return do_fork(SIGCHLD, regs.sp, &regs, 0, NULL, NULL);
L
Linus Torvalds 已提交
598 599 600 601 602 603 604 605
}

asmlinkage int sys_clone(struct pt_regs regs)
{
	unsigned long clone_flags;
	unsigned long newsp;
	int __user *parent_tidptr, *child_tidptr;

606 607 608 609
	clone_flags = regs.bx;
	newsp = regs.cx;
	parent_tidptr = (int __user *)regs.dx;
	child_tidptr = (int __user *)regs.di;
L
Linus Torvalds 已提交
610
	if (!newsp)
611
		newsp = regs.sp;
L
Linus Torvalds 已提交
612 613 614 615 616 617 618 619 620 621 622 623 624 625 626
	return do_fork(clone_flags, newsp, &regs, 0, parent_tidptr, child_tidptr);
}

/*
 * This is trivial, and on the face of it looks like it
 * could equally well be done in user mode.
 *
 * Not so, for quite unobvious reasons - register pressure.
 * In user mode vfork() cannot have a stack frame, and if
 * done by calling the "clone()" system call directly, you
 * do not have enough call-clobbered registers to hold all
 * the information you need.
 */
asmlinkage int sys_vfork(struct pt_regs regs)
{
627
	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.sp, &regs, 0, NULL, NULL);
L
Linus Torvalds 已提交
628 629 630 631 632 633 634 635
}

/*
 * sys_execve() executes a new program.
 */
asmlinkage int sys_execve(struct pt_regs regs)
{
	int error;
636
	char *filename;
L
Linus Torvalds 已提交
637

638
	filename = getname((char __user *) regs.bx);
L
Linus Torvalds 已提交
639 640 641 642
	error = PTR_ERR(filename);
	if (IS_ERR(filename))
		goto out;
	error = do_execve(filename,
643 644
			(char __user * __user *) regs.cx,
			(char __user * __user *) regs.dx,
L
Linus Torvalds 已提交
645 646 647 648 649 650 651 652 653 654 655 656 657 658 659
			&regs);
	if (error == 0) {
		/* Make sure we don't return using sysenter.. */
		set_thread_flag(TIF_IRET);
	}
	putname(filename);
out:
	return error;
}

#define top_esp                (THREAD_SIZE - sizeof(unsigned long))
#define top_ebp                (THREAD_SIZE - 2*sizeof(unsigned long))

unsigned long get_wchan(struct task_struct *p)
{
660
	unsigned long bp, sp, ip;
L
Linus Torvalds 已提交
661 662 663 664
	unsigned long stack_page;
	int count = 0;
	if (!p || p == current || p->state == TASK_RUNNING)
		return 0;
A
Al Viro 已提交
665
	stack_page = (unsigned long)task_stack_page(p);
666
	sp = p->thread.sp;
667
	if (!stack_page || sp < stack_page || sp > top_esp+stack_page)
L
Linus Torvalds 已提交
668
		return 0;
669 670
	/* include/asm-i386/system.h:switch_to() pushes bp last. */
	bp = *(unsigned long *) sp;
L
Linus Torvalds 已提交
671
	do {
672
		if (bp < stack_page || bp > top_ebp+stack_page)
L
Linus Torvalds 已提交
673
			return 0;
674 675 676 677
		ip = *(unsigned long *) (bp+4);
		if (!in_sched_functions(ip))
			return ip;
		bp = *(unsigned long *) bp;
L
Linus Torvalds 已提交
678 679 680 681 682 683
	} while (count++ < 16);
	return 0;
}

unsigned long arch_align_stack(unsigned long sp)
{
684
	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
L
Linus Torvalds 已提交
685 686 687
		sp -= get_random_int() % 8192;
	return sp & ~0xf;
}
J
Jiri Kosina 已提交
688 689 690 691 692 693

unsigned long arch_randomize_brk(struct mm_struct *mm)
{
	unsigned long range_end = mm->brk + 0x02000000;
	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
}