process_64.c 17.3 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5
/*
 *  Copyright (C) 1995  Linus Torvalds
 *
 *  Pentium III FXSR, SSE support
 *	Gareth Hughes <gareth@valinux.com>, May 2000
6
 *
L
Linus Torvalds 已提交
7 8
 *  X86-64 port
 *	Andi Kleen.
A
Ashok Raj 已提交
9 10
 *
 *	CPU hotplug support - ashok.raj@intel.com
L
Linus Torvalds 已提交
11 12 13 14 15 16
 */

/*
 * This file handles the architecture-dependent parts of process handling..
 */

17
#include <linux/stackprotector.h>
A
Ashok Raj 已提交
18
#include <linux/cpu.h>
L
Linus Torvalds 已提交
19 20
#include <linux/errno.h>
#include <linux/sched.h>
21
#include <linux/fs.h>
L
Linus Torvalds 已提交
22 23 24 25 26 27 28
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/elfcore.h>
#include <linux/smp.h>
#include <linux/slab.h>
#include <linux/user.h>
#include <linux/interrupt.h>
29
#include <linux/utsname.h>
L
Linus Torvalds 已提交
30
#include <linux/delay.h>
31
#include <linux/module.h>
L
Linus Torvalds 已提交
32
#include <linux/ptrace.h>
A
Andi Kleen 已提交
33
#include <linux/notifier.h>
34
#include <linux/kprobes.h>
35
#include <linux/kdebug.h>
36
#include <linux/tick.h>
37
#include <linux/prctl.h>
38 39
#include <linux/uaccess.h>
#include <linux/io.h>
40
#include <linux/ftrace.h>
41
#include <linux/dmi.h>
L
Linus Torvalds 已提交
42 43 44 45 46 47 48 49 50 51

#include <asm/pgtable.h>
#include <asm/system.h>
#include <asm/processor.h>
#include <asm/i387.h>
#include <asm/mmu_context.h>
#include <asm/prctl.h>
#include <asm/desc.h>
#include <asm/proto.h>
#include <asm/ia32.h>
A
Andi Kleen 已提交
52
#include <asm/idle.h>
53
#include <asm/syscalls.h>
54
#include <asm/ds.h>
55 56
#include <asm/debugreg.h>
#include <asm/hw_breakpoint.h>
L
Linus Torvalds 已提交
57 58 59

asmlinkage extern void ret_from_fork(void);

60
DEFINE_PER_CPU(unsigned long, old_rsp);
61
static DEFINE_PER_CPU(unsigned char, is_idle);
62

L
Linus Torvalds 已提交
63 64
unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;

65
static ATOMIC_NOTIFIER_HEAD(idle_notifier);
A
Andi Kleen 已提交
66 67 68

void idle_notifier_register(struct notifier_block *n)
{
69
	atomic_notifier_chain_register(&idle_notifier, n);
A
Andi Kleen 已提交
70
}
71 72 73 74 75 76 77
EXPORT_SYMBOL_GPL(idle_notifier_register);

void idle_notifier_unregister(struct notifier_block *n)
{
	atomic_notifier_chain_unregister(&idle_notifier, n);
}
EXPORT_SYMBOL_GPL(idle_notifier_unregister);
A
Andi Kleen 已提交
78 79 80

void enter_idle(void)
{
81
	percpu_write(is_idle, 1);
82
	atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
A
Andi Kleen 已提交
83 84 85 86
}

static void __exit_idle(void)
{
87
	if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
A
Andi Kleen 已提交
88
		return;
89
	atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
A
Andi Kleen 已提交
90 91 92 93 94
}

/* Called from interrupts to signify idle end */
void exit_idle(void)
{
A
Andi Kleen 已提交
95 96
	/* idle loop has pid 0 */
	if (current->pid)
A
Andi Kleen 已提交
97 98 99 100
		return;
	__exit_idle();
}

A
Alex Nixon 已提交
101
#ifndef CONFIG_SMP
A
Ashok Raj 已提交
102 103 104 105
static inline void play_dead(void)
{
	BUG();
}
A
Alex Nixon 已提交
106
#endif
A
Ashok Raj 已提交
107

L
Linus Torvalds 已提交
108 109 110 111 112 113
/*
 * The idle thread. There's no useful work to be
 * done, so just try to conserve power and have a
 * low exit latency (ie sit in a loop waiting for
 * somebody to say that they'd like to reschedule)
 */
P
Pavel Machek 已提交
114
void cpu_idle(void)
L
Linus Torvalds 已提交
115
{
116
	current_thread_info()->status |= TS_POLLING;
117 118

	/*
T
Tejun Heo 已提交
119 120 121 122 123
	 * If we're the non-boot CPU, nothing set the stack canary up
	 * for us.  CPU0 already has it initialized but no harm in
	 * doing it again.  This is a good place for updating it, as
	 * we wont ever return from this function (so the invalid
	 * canaries already on the stack wont ever trigger).
124
	 */
125 126
	boot_init_stack_canary();

L
Linus Torvalds 已提交
127 128
	/* endless idle loop with no priority at all */
	while (1) {
129
		tick_nohz_stop_sched_tick(1);
L
Linus Torvalds 已提交
130 131 132
		while (!need_resched()) {

			rmb();
T
Thomas Gleixner 已提交
133

A
Ashok Raj 已提交
134 135
			if (cpu_is_offline(smp_processor_id()))
				play_dead();
136 137 138 139 140 141
			/*
			 * Idle routines should keep interrupts disabled
			 * from here on, until they go to idle.
			 * Otherwise, idle callbacks can misfire.
			 */
			local_irq_disable();
A
Andi Kleen 已提交
142
			enter_idle();
143 144
			/* Don't trace irqs off for idle */
			stop_critical_timings();
T
Thomas Gleixner 已提交
145
			pm_idle();
146
			start_critical_timings();
A
Andi Kleen 已提交
147 148 149
			/* In many cases the interrupt that ended idle
			   has already called exit_idle. But some idle
			   loops can be woken up without interrupt. */
A
Andi Kleen 已提交
150
			__exit_idle();
L
Linus Torvalds 已提交
151 152
		}

153
		tick_nohz_restart_sched_tick();
154
		preempt_enable_no_resched();
L
Linus Torvalds 已提交
155
		schedule();
156
		preempt_disable();
L
Linus Torvalds 已提交
157 158 159
	}
}

160
/* Prints also some state that isn't saved in the pt_regs */
161
void __show_regs(struct pt_regs *regs, int all)
L
Linus Torvalds 已提交
162 163
{
	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
164
	unsigned long d0, d1, d2, d3, d6, d7;
165 166
	unsigned int fsindex, gsindex;
	unsigned int ds, cs, es;
167
	const char *board;
L
Linus Torvalds 已提交
168 169 170

	printk("\n");
	print_modules();
171 172 173 174
	board = dmi_get_system_info(DMI_PRODUCT_NAME);
	if (!board)
		board = "";
	printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n",
175
		current->pid, current->comm, print_tainted(),
176 177
		init_utsname()->release,
		(int)strcspn(init_utsname()->version, " "),
178
		init_utsname()->version, board);
179
	printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
180
	printk_address(regs->ip, 1);
181 182 183
	printk(KERN_INFO "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
			regs->sp, regs->flags);
	printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
184
	       regs->ax, regs->bx, regs->cx);
185
	printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
186
	       regs->dx, regs->si, regs->di);
187
	printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
188
	       regs->bp, regs->r8, regs->r9);
189
	printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
190
	       regs->r10, regs->r11, regs->r12);
191
	printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
192
	       regs->r13, regs->r14, regs->r15);
L
Linus Torvalds 已提交
193

194 195 196
	asm("movl %%ds,%0" : "=r" (ds));
	asm("movl %%cs,%0" : "=r" (cs));
	asm("movl %%es,%0" : "=r" (es));
L
Linus Torvalds 已提交
197 198 199 200
	asm("movl %%fs,%0" : "=r" (fsindex));
	asm("movl %%gs,%0" : "=r" (gsindex));

	rdmsrl(MSR_FS_BASE, fs);
201 202
	rdmsrl(MSR_GS_BASE, gs);
	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
L
Linus Torvalds 已提交
203

204 205
	if (!all)
		return;
L
Linus Torvalds 已提交
206

207 208 209 210
	cr0 = read_cr0();
	cr2 = read_cr2();
	cr3 = read_cr3();
	cr4 = read_cr4();
L
Linus Torvalds 已提交
211

212
	printk(KERN_INFO "FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
213
	       fs, fsindex, gs, gsindex, shadowgs);
214 215 216 217
	printk(KERN_INFO "CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
			es, cr0);
	printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
			cr4);
218 219 220 221

	get_debugreg(d0, 0);
	get_debugreg(d1, 1);
	get_debugreg(d2, 2);
222
	printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
223 224 225
	get_debugreg(d3, 3);
	get_debugreg(d6, 6);
	get_debugreg(d7, 7);
226
	printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
L
Linus Torvalds 已提交
227 228 229 230
}

void show_regs(struct pt_regs *regs)
{
231
	printk(KERN_INFO "CPU %d:", smp_processor_id());
232
	__show_regs(regs, 1);
233
	show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
L
Linus Torvalds 已提交
234 235 236 237 238 239 240 241 242 243 244 245 246
}

void release_thread(struct task_struct *dead_task)
{
	if (dead_task->mm) {
		if (dead_task->mm->context.size) {
			printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
					dead_task->comm,
					dead_task->mm->context.ldt,
					dead_task->mm->context.size);
			BUG();
		}
	}
247 248
	if (unlikely(dead_task->thread.debugreg7))
		flush_thread_hw_breakpoint(dead_task);
L
Linus Torvalds 已提交
249 250 251 252
}

static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
{
253
	struct user_desc ud = {
L
Linus Torvalds 已提交
254 255 256 257 258 259
		.base_addr = addr,
		.limit = 0xfffff,
		.seg_32bit = 1,
		.limit_in_pages = 1,
		.useable = 1,
	};
J
Jan Engelhardt 已提交
260
	struct desc_struct *desc = t->thread.tls_array;
L
Linus Torvalds 已提交
261
	desc += tls;
262
	fill_ldt(desc, &ud);
L
Linus Torvalds 已提交
263 264 265 266
}

static inline u32 read_32bit_tls(struct task_struct *t, int tls)
{
R
Roland McGrath 已提交
267
	return get_desc_base(&t->thread.tls_array[tls]);
L
Linus Torvalds 已提交
268 269 270 271 272 273 274 275 276 277 278
}

/*
 * This gets called before we allocate a new thread and copy
 * the current task into it.
 */
void prepare_to_copy(struct task_struct *tsk)
{
	unlazy_fpu(tsk);
}

A
Alexey Dobriyan 已提交
279
int copy_thread(unsigned long clone_flags, unsigned long sp,
L
Linus Torvalds 已提交
280
		unsigned long unused,
281
	struct task_struct *p, struct pt_regs *regs)
L
Linus Torvalds 已提交
282 283
{
	int err;
284
	struct pt_regs *childregs;
L
Linus Torvalds 已提交
285 286
	struct task_struct *me = current;

287
	childregs = ((struct pt_regs *)
A
Al Viro 已提交
288
			(THREAD_SIZE + task_stack_page(p))) - 1;
L
Linus Torvalds 已提交
289 290
	*childregs = *regs;

291 292 293 294
	childregs->ax = 0;
	childregs->sp = sp;
	if (sp == ~0UL)
		childregs->sp = (unsigned long)childregs;
L
Linus Torvalds 已提交
295

296 297 298
	p->thread.sp = (unsigned long) childregs;
	p->thread.sp0 = (unsigned long) (childregs+1);
	p->thread.usersp = me->thread.usersp;
L
Linus Torvalds 已提交
299

A
Al Viro 已提交
300
	set_tsk_thread_flag(p, TIF_FORK);
L
Linus Torvalds 已提交
301 302 303

	p->thread.fs = me->thread.fs;
	p->thread.gs = me->thread.gs;
304
	p->thread.io_bitmap_ptr = NULL;
L
Linus Torvalds 已提交
305

306 307 308 309
	savesegment(gs, p->thread.gsindex);
	savesegment(fs, p->thread.fsindex);
	savesegment(es, p->thread.es);
	savesegment(ds, p->thread.ds);
L
Linus Torvalds 已提交
310

311 312 313 314 315
	err = -ENOMEM;
	if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG)))
		if (copy_thread_hw_breakpoint(me, p, clone_flags))
			goto out;

316
	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
L
Linus Torvalds 已提交
317 318 319 320 321
		p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
		if (!p->thread.io_bitmap_ptr) {
			p->thread.io_bitmap_max = 0;
			return -ENOMEM;
		}
322 323
		memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
				IO_BITMAP_BYTES);
324
		set_tsk_thread_flag(p, TIF_IO_BITMAP);
325
	}
L
Linus Torvalds 已提交
326 327 328 329 330 331 332

	/*
	 * Set a new TLS for the child thread?
	 */
	if (clone_flags & CLONE_SETTLS) {
#ifdef CONFIG_IA32_EMULATION
		if (test_thread_flag(TIF_IA32))
R
Roland McGrath 已提交
333
			err = do_set_thread_area(p, -1,
334
				(struct user_desc __user *)childregs->si, 0);
335 336 337 338
		else
#endif
			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
		if (err)
L
Linus Torvalds 已提交
339 340
			goto out;
	}
341

M
Markus Metzger 已提交
342 343
	clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
	p->thread.ds_ctx = NULL;
344 345 346 347

	clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
	p->thread.debugctlmsr = 0;

L
Linus Torvalds 已提交
348 349 350 351 352 353
	err = 0;
out:
	if (err && p->thread.io_bitmap_ptr) {
		kfree(p->thread.io_bitmap_ptr);
		p->thread.io_bitmap_max = 0;
	}
354 355 356
	if (err)
		flush_thread_hw_breakpoint(p);

L
Linus Torvalds 已提交
357 358 359
	return err;
}

I
Ingo Molnar 已提交
360 361 362
void
start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
{
363 364 365
	loadsegment(fs, 0);
	loadsegment(es, 0);
	loadsegment(ds, 0);
I
Ingo Molnar 已提交
366 367 368
	load_gs_index(0);
	regs->ip		= new_ip;
	regs->sp		= new_sp;
369
	percpu_write(old_rsp, new_sp);
I
Ingo Molnar 已提交
370 371 372 373
	regs->cs		= __USER_CS;
	regs->ss		= __USER_DS;
	regs->flags		= 0x200;
	set_fs(USER_DS);
374 375 376 377
	/*
	 * Free the old FP and other extended state
	 */
	free_thread_xstate(current);
I
Ingo Molnar 已提交
378 379 380
}
EXPORT_SYMBOL_GPL(start_thread);

L
Linus Torvalds 已提交
381 382 383
/*
 *	switch_to(x,y) should switch tasks from x to y.
 *
384
 * This could still be optimized:
L
Linus Torvalds 已提交
385 386
 * - fold all the options into a flag word and test it with a single test.
 * - could test fs/gs bitsliced
387 388
 *
 * Kprobes not supported here. Set the probe on schedule instead.
389
 * Function graph tracer not supported too.
L
Linus Torvalds 已提交
390
 */
391
__notrace_funcgraph struct task_struct *
392
__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
L
Linus Torvalds 已提交
393
{
394 395
	struct thread_struct *prev = &prev_p->thread;
	struct thread_struct *next = &next_p->thread;
396
	int cpu = smp_processor_id();
L
Linus Torvalds 已提交
397
	struct tss_struct *tss = &per_cpu(init_tss, cpu);
398
	unsigned fsindex, gsindex;
399 400 401 402 403 404 405 406
	bool preload_fpu;

	/*
	 * If the task has used fpu the last 5 timeslices, just do a full
	 * restore of the math state immediately to avoid the trap; the
	 * chances of needing FPU soon are obviously high now
	 */
	preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
L
Linus Torvalds 已提交
407

408
	/* we're going to use this soon, after a few expensive things */
409
	if (preload_fpu)
410
		prefetch(next->xstate);
411

L
Linus Torvalds 已提交
412 413 414
	/*
	 * Reload esp0, LDT and the page table pointer:
	 */
415
	load_sp0(tss, next);
L
Linus Torvalds 已提交
416

417
	/*
L
Linus Torvalds 已提交
418 419 420
	 * Switch DS and ES.
	 * This won't pick up thread selector changes, but I guess that is ok.
	 */
421
	savesegment(es, prev->es);
L
Linus Torvalds 已提交
422
	if (unlikely(next->es | prev->es))
423
		loadsegment(es, next->es);
424 425

	savesegment(ds, prev->ds);
L
Linus Torvalds 已提交
426 427 428
	if (unlikely(next->ds | prev->ds))
		loadsegment(ds, next->ds);

429 430 431 432 433 434 435 436 437

	/* We must save %fs and %gs before load_TLS() because
	 * %fs and %gs may be cleared by load_TLS().
	 *
	 * (e.g. xen_load_tls())
	 */
	savesegment(fs, fsindex);
	savesegment(gs, gsindex);

L
Linus Torvalds 已提交
438 439
	load_TLS(next, cpu);

440 441 442
	/* Must be after DS reload */
	unlazy_fpu(prev_p);

443 444 445 446
	/* Make sure cpu is ready for new context */
	if (preload_fpu)
		clts();

447 448 449 450 451 452 453
	/*
	 * Leave lazy mode, flushing any hypercalls made here.
	 * This must be done before restoring TLS segments so
	 * the GDT and LDT are properly updated, and must be
	 * done before math_state_restore, so the TS bit is up
	 * to date.
	 */
454
	arch_end_context_switch(next_p);
455

456
	/*
L
Linus Torvalds 已提交
457
	 * Switch FS and GS.
458 459 460 461
	 *
	 * Segment register != 0 always requires a reload.  Also
	 * reload when it has changed.  When prev process used 64bit
	 * base always reload to avoid an information leak.
L
Linus Torvalds 已提交
462
	 */
463 464
	if (unlikely(fsindex | next->fsindex | prev->fs)) {
		loadsegment(fs, next->fsindex);
465
		/*
466 467 468 469 470
		 * Check if the user used a selector != 0; if yes
		 *  clear 64bit base, since overloaded base is always
		 *  mapped to the Null selector
		 */
		if (fsindex)
471
			prev->fs = 0;
L
Linus Torvalds 已提交
472
	}
473 474 475 476 477 478 479 480
	/* when next process has a 64bit base use it */
	if (next->fs)
		wrmsrl(MSR_FS_BASE, next->fs);
	prev->fsindex = fsindex;

	if (unlikely(gsindex | next->gsindex | prev->gs)) {
		load_gs_index(next->gsindex);
		if (gsindex)
481
			prev->gs = 0;
L
Linus Torvalds 已提交
482
	}
483 484 485
	if (next->gs)
		wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
	prev->gsindex = gsindex;
L
Linus Torvalds 已提交
486

487
	/*
488
	 * Switch the PDA and FPU contexts.
L
Linus Torvalds 已提交
489
	 */
490 491
	prev->usersp = percpu_read(old_rsp);
	percpu_write(old_rsp, next->usersp);
492
	percpu_write(current_task, next_p);
493

494
	percpu_write(kernel_stack,
495
		  (unsigned long)task_stack_page(next_p) +
496
		  THREAD_SIZE - KERNEL_STACK_OFFSET);
L
Linus Torvalds 已提交
497 498

	/*
499
	 * Now maybe reload the debug registers and handle I/O bitmaps
L
Linus Torvalds 已提交
500
	 */
501 502
	if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
503
		__switch_to_xtra(prev_p, next_p, tss);
L
Linus Torvalds 已提交
504

505 506 507
	/*
	 * Preload the FPU context, now that we've determined that the
	 * task is likely to be using it. 
508
	 */
509 510
	if (preload_fpu)
		__math_state_restore();
511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528
	/*
	 * There's a problem with moving the arch_install_thread_hw_breakpoint()
	 * call before current is updated.  Suppose a kernel breakpoint is
	 * triggered in between the two, the hw-breakpoint handler will see that
	 * the 'current' task does not have TIF_DEBUG flag set and will think it
	 * is leftover from an old task (lazy switching) and will erase it. Then
	 * until the next context switch, no user-breakpoints will be installed.
	 *
	 * The real problem is that it's impossible to update both current and
	 * physical debug registers at the same instant, so there will always be
	 * a window in which they disagree and a breakpoint might get triggered.
	 * Since we use lazy switching, we are forced to assume that a
	 * disagreement means that current is correct and the exception is due
	 * to lazy debug register switching.
	 */
	if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
		arch_install_thread_hw_breakpoint(next_p);

L
Linus Torvalds 已提交
529 530 531 532 533 534
	return prev_p;
}

/*
 * sys_execve() executes a new program.
 */
535
asmlinkage
L
Linus Torvalds 已提交
536
long sys_execve(char __user *name, char __user * __user *argv,
537
		char __user * __user *envp, struct pt_regs *regs)
L
Linus Torvalds 已提交
538 539
{
	long error;
540
	char *filename;
L
Linus Torvalds 已提交
541 542 543

	filename = getname(name);
	error = PTR_ERR(filename);
544
	if (IS_ERR(filename))
L
Linus Torvalds 已提交
545
		return error;
546
	error = do_execve(filename, argv, envp, regs);
L
Linus Torvalds 已提交
547 548 549 550 551 552 553 554 555
	putname(filename);
	return error;
}

void set_personality_64bit(void)
{
	/* inherit personality from parent */

	/* Make sure to be in 64bit mode */
556
	clear_thread_flag(TIF_IA32);
L
Linus Torvalds 已提交
557 558 559 560

	/* TBD: overwrites user setup. Should have two bits.
	   But 64bit processes have always behaved this way,
	   so it's not too bad. The main problem is just that
561
	   32bit childs are affected again. */
L
Linus Torvalds 已提交
562 563 564
	current->personality &= ~READ_IMPLIES_EXEC;
}

565 566 567
asmlinkage long
sys_clone(unsigned long clone_flags, unsigned long newsp,
	  void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
L
Linus Torvalds 已提交
568 569
{
	if (!newsp)
570
		newsp = regs->sp;
L
Linus Torvalds 已提交
571 572 573 574 575 576
	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
}

unsigned long get_wchan(struct task_struct *p)
{
	unsigned long stack;
577
	u64 fp, ip;
L
Linus Torvalds 已提交
578 579
	int count = 0;

580 581
	if (!p || p == current || p->state == TASK_RUNNING)
		return 0;
A
Al Viro 已提交
582
	stack = (unsigned long)task_stack_page(p);
583
	if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
L
Linus Torvalds 已提交
584
		return 0;
585
	fp = *(u64 *)(p->thread.sp);
586
	do {
587
		if (fp < (unsigned long)stack ||
588
		    fp >= (unsigned long)stack+THREAD_SIZE)
589
			return 0;
590 591 592
		ip = *(u64 *)(fp+8);
		if (!in_sched_functions(ip))
			return ip;
593 594
		fp = *(u64 *)fp;
	} while (count++ < 16);
L
Linus Torvalds 已提交
595 596 597 598
	return 0;
}

long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
599 600
{
	int ret = 0;
L
Linus Torvalds 已提交
601 602 603
	int doit = task == current;
	int cpu;

604
	switch (code) {
L
Linus Torvalds 已提交
605
	case ARCH_SET_GS:
606
		if (addr >= TASK_SIZE_OF(task))
607
			return -EPERM;
L
Linus Torvalds 已提交
608
		cpu = get_cpu();
609
		/* handle small bases via the GDT because that's faster to
L
Linus Torvalds 已提交
610
		   switch. */
611 612 613
		if (addr <= 0xffffffff) {
			set_32bit_tls(task, GS_TLS, addr);
			if (doit) {
L
Linus Torvalds 已提交
614
				load_TLS(&task->thread, cpu);
615
				load_gs_index(GS_TLS_SEL);
L
Linus Torvalds 已提交
616
			}
617
			task->thread.gsindex = GS_TLS_SEL;
L
Linus Torvalds 已提交
618
			task->thread.gs = 0;
619
		} else {
L
Linus Torvalds 已提交
620 621 622
			task->thread.gsindex = 0;
			task->thread.gs = addr;
			if (doit) {
623 624
				load_gs_index(0);
				ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
625
			}
L
Linus Torvalds 已提交
626 627 628 629 630 631
		}
		put_cpu();
		break;
	case ARCH_SET_FS:
		/* Not strictly needed for fs, but do it for symmetry
		   with gs */
632
		if (addr >= TASK_SIZE_OF(task))
633
			return -EPERM;
L
Linus Torvalds 已提交
634
		cpu = get_cpu();
635
		/* handle small bases via the GDT because that's faster to
L
Linus Torvalds 已提交
636
		   switch. */
637
		if (addr <= 0xffffffff) {
L
Linus Torvalds 已提交
638
			set_32bit_tls(task, FS_TLS, addr);
639 640
			if (doit) {
				load_TLS(&task->thread, cpu);
641
				loadsegment(fs, FS_TLS_SEL);
L
Linus Torvalds 已提交
642 643 644
			}
			task->thread.fsindex = FS_TLS_SEL;
			task->thread.fs = 0;
645
		} else {
L
Linus Torvalds 已提交
646 647 648 649 650
			task->thread.fsindex = 0;
			task->thread.fs = addr;
			if (doit) {
				/* set the selector to 0 to not confuse
				   __switch_to */
651
				loadsegment(fs, 0);
652
				ret = checking_wrmsrl(MSR_FS_BASE, addr);
L
Linus Torvalds 已提交
653 654 655 656
			}
		}
		put_cpu();
		break;
657 658
	case ARCH_GET_FS: {
		unsigned long base;
L
Linus Torvalds 已提交
659 660
		if (task->thread.fsindex == FS_TLS_SEL)
			base = read_32bit_tls(task, FS_TLS);
661
		else if (doit)
L
Linus Torvalds 已提交
662
			rdmsrl(MSR_FS_BASE, base);
663
		else
L
Linus Torvalds 已提交
664
			base = task->thread.fs;
665 666
		ret = put_user(base, (unsigned long __user *)addr);
		break;
L
Linus Torvalds 已提交
667
	}
668
	case ARCH_GET_GS: {
L
Linus Torvalds 已提交
669
		unsigned long base;
670
		unsigned gsindex;
L
Linus Torvalds 已提交
671 672
		if (task->thread.gsindex == GS_TLS_SEL)
			base = read_32bit_tls(task, GS_TLS);
673
		else if (doit) {
674
			savesegment(gs, gsindex);
675 676 677 678
			if (gsindex)
				rdmsrl(MSR_KERNEL_GS_BASE, base);
			else
				base = task->thread.gs;
679
		} else
L
Linus Torvalds 已提交
680
			base = task->thread.gs;
681
		ret = put_user(base, (unsigned long __user *)addr);
L
Linus Torvalds 已提交
682 683 684 685 686 687
		break;
	}

	default:
		ret = -EINVAL;
		break;
688
	}
L
Linus Torvalds 已提交
689

690 691
	return ret;
}
L
Linus Torvalds 已提交
692 693 694 695 696 697

long sys_arch_prctl(int code, unsigned long addr)
{
	return do_arch_prctl(current, code, addr);
}