process_64.c 16.4 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5
/*
 *  Copyright (C) 1995  Linus Torvalds
 *
 *  Pentium III FXSR, SSE support
 *	Gareth Hughes <gareth@valinux.com>, May 2000
6
 *
L
Linus Torvalds 已提交
7 8
 *  X86-64 port
 *	Andi Kleen.
A
Ashok Raj 已提交
9 10
 *
 *	CPU hotplug support - ashok.raj@intel.com
L
Linus Torvalds 已提交
11 12 13 14 15 16 17 18
 */

/*
 * This file handles the architecture-dependent parts of process handling..
 */

#include <stdarg.h>

19
#include <linux/stackprotector.h>
A
Ashok Raj 已提交
20
#include <linux/cpu.h>
L
Linus Torvalds 已提交
21 22
#include <linux/errno.h>
#include <linux/sched.h>
23
#include <linux/fs.h>
L
Linus Torvalds 已提交
24 25 26 27 28 29 30
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/elfcore.h>
#include <linux/smp.h>
#include <linux/slab.h>
#include <linux/user.h>
#include <linux/interrupt.h>
31
#include <linux/utsname.h>
L
Linus Torvalds 已提交
32
#include <linux/delay.h>
33
#include <linux/module.h>
L
Linus Torvalds 已提交
34 35
#include <linux/ptrace.h>
#include <linux/random.h>
A
Andi Kleen 已提交
36
#include <linux/notifier.h>
37
#include <linux/kprobes.h>
38
#include <linux/kdebug.h>
39
#include <linux/tick.h>
40
#include <linux/prctl.h>
41 42
#include <linux/uaccess.h>
#include <linux/io.h>
43
#include <linux/ftrace.h>
44
#include <linux/dmi.h>
L
Linus Torvalds 已提交
45 46 47 48 49 50 51 52 53 54

#include <asm/pgtable.h>
#include <asm/system.h>
#include <asm/processor.h>
#include <asm/i387.h>
#include <asm/mmu_context.h>
#include <asm/prctl.h>
#include <asm/desc.h>
#include <asm/proto.h>
#include <asm/ia32.h>
A
Andi Kleen 已提交
55
#include <asm/idle.h>
56
#include <asm/syscalls.h>
57
#include <asm/ds.h>
L
Linus Torvalds 已提交
58 59 60

asmlinkage extern void ret_from_fork(void);

61 62 63
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);

64
DEFINE_PER_CPU(unsigned long, old_rsp);
65
static DEFINE_PER_CPU(unsigned char, is_idle);
66

L
Linus Torvalds 已提交
67 68
unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;

69
static ATOMIC_NOTIFIER_HEAD(idle_notifier);
A
Andi Kleen 已提交
70 71 72

void idle_notifier_register(struct notifier_block *n)
{
73
	atomic_notifier_chain_register(&idle_notifier, n);
A
Andi Kleen 已提交
74
}
75 76 77 78 79 80 81
EXPORT_SYMBOL_GPL(idle_notifier_register);

void idle_notifier_unregister(struct notifier_block *n)
{
	atomic_notifier_chain_unregister(&idle_notifier, n);
}
EXPORT_SYMBOL_GPL(idle_notifier_unregister);
A
Andi Kleen 已提交
82 83 84

void enter_idle(void)
{
85
	percpu_write(is_idle, 1);
86
	atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
A
Andi Kleen 已提交
87 88 89 90
}

static void __exit_idle(void)
{
91
	if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
A
Andi Kleen 已提交
92
		return;
93
	atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
A
Andi Kleen 已提交
94 95 96 97 98
}

/* Called from interrupts to signify idle end */
void exit_idle(void)
{
A
Andi Kleen 已提交
99 100
	/* idle loop has pid 0 */
	if (current->pid)
A
Andi Kleen 已提交
101 102 103 104
		return;
	__exit_idle();
}

A
Alex Nixon 已提交
105
#ifndef CONFIG_SMP
A
Ashok Raj 已提交
106 107 108 109
static inline void play_dead(void)
{
	BUG();
}
A
Alex Nixon 已提交
110
#endif
A
Ashok Raj 已提交
111

L
Linus Torvalds 已提交
112 113 114 115 116 117
/*
 * The idle thread. There's no useful work to be
 * done, so just try to conserve power and have a
 * low exit latency (ie sit in a loop waiting for
 * somebody to say that they'd like to reschedule)
 */
P
Pavel Machek 已提交
118
void cpu_idle(void)
L
Linus Torvalds 已提交
119
{
120
	current_thread_info()->status |= TS_POLLING;
121 122

	/*
T
Tejun Heo 已提交
123 124 125 126 127
	 * If we're the non-boot CPU, nothing set the stack canary up
	 * for us.  CPU0 already has it initialized but no harm in
	 * doing it again.  This is a good place for updating it, as
	 * we wont ever return from this function (so the invalid
	 * canaries already on the stack wont ever trigger).
128
	 */
129 130
	boot_init_stack_canary();

L
Linus Torvalds 已提交
131 132
	/* endless idle loop with no priority at all */
	while (1) {
133
		tick_nohz_stop_sched_tick(1);
L
Linus Torvalds 已提交
134 135 136
		while (!need_resched()) {

			rmb();
T
Thomas Gleixner 已提交
137

A
Ashok Raj 已提交
138 139
			if (cpu_is_offline(smp_processor_id()))
				play_dead();
140 141 142 143 144 145
			/*
			 * Idle routines should keep interrupts disabled
			 * from here on, until they go to idle.
			 * Otherwise, idle callbacks can misfire.
			 */
			local_irq_disable();
A
Andi Kleen 已提交
146
			enter_idle();
147 148
			/* Don't trace irqs off for idle */
			stop_critical_timings();
T
Thomas Gleixner 已提交
149
			pm_idle();
150
			start_critical_timings();
A
Andi Kleen 已提交
151 152 153
			/* In many cases the interrupt that ended idle
			   has already called exit_idle. But some idle
			   loops can be woken up without interrupt. */
A
Andi Kleen 已提交
154
			__exit_idle();
L
Linus Torvalds 已提交
155 156
		}

157
		tick_nohz_restart_sched_tick();
158
		preempt_enable_no_resched();
L
Linus Torvalds 已提交
159
		schedule();
160
		preempt_disable();
L
Linus Torvalds 已提交
161 162 163
	}
}

164
/* Prints also some state that isn't saved in the pt_regs */
165
void __show_regs(struct pt_regs *regs, int all)
L
Linus Torvalds 已提交
166 167
{
	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
168
	unsigned long d0, d1, d2, d3, d6, d7;
169 170
	unsigned int fsindex, gsindex;
	unsigned int ds, cs, es;
171
	const char *board;
L
Linus Torvalds 已提交
172 173 174

	printk("\n");
	print_modules();
175 176 177 178
	board = dmi_get_system_info(DMI_PRODUCT_NAME);
	if (!board)
		board = "";
	printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n",
179
		current->pid, current->comm, print_tainted(),
180 181
		init_utsname()->release,
		(int)strcspn(init_utsname()->version, " "),
182
		init_utsname()->version, board);
183
	printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
184
	printk_address(regs->ip, 1);
185 186 187
	printk(KERN_INFO "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
			regs->sp, regs->flags);
	printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
188
	       regs->ax, regs->bx, regs->cx);
189
	printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
190
	       regs->dx, regs->si, regs->di);
191
	printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
192
	       regs->bp, regs->r8, regs->r9);
193
	printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
194
	       regs->r10, regs->r11, regs->r12);
195
	printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
196
	       regs->r13, regs->r14, regs->r15);
L
Linus Torvalds 已提交
197

198 199 200
	asm("movl %%ds,%0" : "=r" (ds));
	asm("movl %%cs,%0" : "=r" (cs));
	asm("movl %%es,%0" : "=r" (es));
L
Linus Torvalds 已提交
201 202 203 204
	asm("movl %%fs,%0" : "=r" (fsindex));
	asm("movl %%gs,%0" : "=r" (gsindex));

	rdmsrl(MSR_FS_BASE, fs);
205 206
	rdmsrl(MSR_GS_BASE, gs);
	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
L
Linus Torvalds 已提交
207

208 209
	if (!all)
		return;
L
Linus Torvalds 已提交
210

211 212 213 214
	cr0 = read_cr0();
	cr2 = read_cr2();
	cr3 = read_cr3();
	cr4 = read_cr4();
L
Linus Torvalds 已提交
215

216
	printk(KERN_INFO "FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
217
	       fs, fsindex, gs, gsindex, shadowgs);
218 219 220 221
	printk(KERN_INFO "CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
			es, cr0);
	printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
			cr4);
222 223 224 225

	get_debugreg(d0, 0);
	get_debugreg(d1, 1);
	get_debugreg(d2, 2);
226
	printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
227 228 229
	get_debugreg(d3, 3);
	get_debugreg(d6, 6);
	get_debugreg(d7, 7);
230
	printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
L
Linus Torvalds 已提交
231 232 233 234
}

void show_regs(struct pt_regs *regs)
{
235
	printk(KERN_INFO "CPU %d:", smp_processor_id());
236
	__show_regs(regs, 1);
237
	show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
L
Linus Torvalds 已提交
238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
}

void release_thread(struct task_struct *dead_task)
{
	if (dead_task->mm) {
		if (dead_task->mm->context.size) {
			printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
					dead_task->comm,
					dead_task->mm->context.ldt,
					dead_task->mm->context.size);
			BUG();
		}
	}
}

static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
{
255
	struct user_desc ud = {
L
Linus Torvalds 已提交
256 257 258 259 260 261
		.base_addr = addr,
		.limit = 0xfffff,
		.seg_32bit = 1,
		.limit_in_pages = 1,
		.useable = 1,
	};
J
Jan Engelhardt 已提交
262
	struct desc_struct *desc = t->thread.tls_array;
L
Linus Torvalds 已提交
263
	desc += tls;
264
	fill_ldt(desc, &ud);
L
Linus Torvalds 已提交
265 266 267 268
}

static inline u32 read_32bit_tls(struct task_struct *t, int tls)
{
R
Roland McGrath 已提交
269
	return get_desc_base(&t->thread.tls_array[tls]);
L
Linus Torvalds 已提交
270 271 272 273 274 275 276 277 278 279 280
}

/*
 * This gets called before we allocate a new thread and copy
 * the current task into it.
 */
void prepare_to_copy(struct task_struct *tsk)
{
	unlazy_fpu(tsk);
}

281
int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
L
Linus Torvalds 已提交
282
		unsigned long unused,
283
	struct task_struct *p, struct pt_regs *regs)
L
Linus Torvalds 已提交
284 285
{
	int err;
286
	struct pt_regs *childregs;
L
Linus Torvalds 已提交
287 288
	struct task_struct *me = current;

289
	childregs = ((struct pt_regs *)
A
Al Viro 已提交
290
			(THREAD_SIZE + task_stack_page(p))) - 1;
L
Linus Torvalds 已提交
291 292
	*childregs = *regs;

293 294 295 296
	childregs->ax = 0;
	childregs->sp = sp;
	if (sp == ~0UL)
		childregs->sp = (unsigned long)childregs;
L
Linus Torvalds 已提交
297

298 299 300
	p->thread.sp = (unsigned long) childregs;
	p->thread.sp0 = (unsigned long) (childregs+1);
	p->thread.usersp = me->thread.usersp;
L
Linus Torvalds 已提交
301

A
Al Viro 已提交
302
	set_tsk_thread_flag(p, TIF_FORK);
L
Linus Torvalds 已提交
303 304 305 306

	p->thread.fs = me->thread.fs;
	p->thread.gs = me->thread.gs;

307 308 309 310
	savesegment(gs, p->thread.gsindex);
	savesegment(fs, p->thread.fsindex);
	savesegment(es, p->thread.es);
	savesegment(ds, p->thread.ds);
L
Linus Torvalds 已提交
311

312
	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
L
Linus Torvalds 已提交
313 314 315 316 317
		p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
		if (!p->thread.io_bitmap_ptr) {
			p->thread.io_bitmap_max = 0;
			return -ENOMEM;
		}
318 319
		memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
				IO_BITMAP_BYTES);
320
		set_tsk_thread_flag(p, TIF_IO_BITMAP);
321
	}
L
Linus Torvalds 已提交
322 323 324 325 326 327 328

	/*
	 * Set a new TLS for the child thread?
	 */
	if (clone_flags & CLONE_SETTLS) {
#ifdef CONFIG_IA32_EMULATION
		if (test_thread_flag(TIF_IA32))
R
Roland McGrath 已提交
329
			err = do_set_thread_area(p, -1,
330
				(struct user_desc __user *)childregs->si, 0);
331 332 333 334
		else
#endif
			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
		if (err)
L
Linus Torvalds 已提交
335 336
			goto out;
	}
337 338 339 340 341 342

	ds_copy_thread(p, me);

	clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
	p->thread.debugctlmsr = 0;

L
Linus Torvalds 已提交
343 344 345 346 347 348 349 350 351
	err = 0;
out:
	if (err && p->thread.io_bitmap_ptr) {
		kfree(p->thread.io_bitmap_ptr);
		p->thread.io_bitmap_max = 0;
	}
	return err;
}

I
Ingo Molnar 已提交
352 353 354
void
start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
{
355 356 357
	loadsegment(fs, 0);
	loadsegment(es, 0);
	loadsegment(ds, 0);
I
Ingo Molnar 已提交
358 359 360
	load_gs_index(0);
	regs->ip		= new_ip;
	regs->sp		= new_sp;
361
	percpu_write(old_rsp, new_sp);
I
Ingo Molnar 已提交
362 363 364 365
	regs->cs		= __USER_CS;
	regs->ss		= __USER_DS;
	regs->flags		= 0x200;
	set_fs(USER_DS);
366 367 368 369
	/*
	 * Free the old FP and other extended state
	 */
	free_thread_xstate(current);
I
Ingo Molnar 已提交
370 371 372
}
EXPORT_SYMBOL_GPL(start_thread);

L
Linus Torvalds 已提交
373 374 375
/*
 *	switch_to(x,y) should switch tasks from x to y.
 *
376
 * This could still be optimized:
L
Linus Torvalds 已提交
377 378
 * - fold all the options into a flag word and test it with a single test.
 * - could test fs/gs bitsliced
379 380
 *
 * Kprobes not supported here. Set the probe on schedule instead.
381
 * Function graph tracer not supported too.
L
Linus Torvalds 已提交
382
 */
383
__notrace_funcgraph struct task_struct *
384
__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
L
Linus Torvalds 已提交
385
{
386 387
	struct thread_struct *prev = &prev_p->thread;
	struct thread_struct *next = &next_p->thread;
388
	int cpu = smp_processor_id();
L
Linus Torvalds 已提交
389
	struct tss_struct *tss = &per_cpu(init_tss, cpu);
390
	unsigned fsindex, gsindex;
L
Linus Torvalds 已提交
391

392
	/* we're going to use this soon, after a few expensive things */
393
	if (next_p->fpu_counter > 5)
394
		prefetch(next->xstate);
395

L
Linus Torvalds 已提交
396 397 398
	/*
	 * Reload esp0, LDT and the page table pointer:
	 */
399
	load_sp0(tss, next);
L
Linus Torvalds 已提交
400

401
	/*
L
Linus Torvalds 已提交
402 403 404
	 * Switch DS and ES.
	 * This won't pick up thread selector changes, but I guess that is ok.
	 */
405
	savesegment(es, prev->es);
L
Linus Torvalds 已提交
406
	if (unlikely(next->es | prev->es))
407
		loadsegment(es, next->es);
408 409

	savesegment(ds, prev->ds);
L
Linus Torvalds 已提交
410 411 412
	if (unlikely(next->ds | prev->ds))
		loadsegment(ds, next->ds);

413 414 415 416 417 418 419 420 421

	/* We must save %fs and %gs before load_TLS() because
	 * %fs and %gs may be cleared by load_TLS().
	 *
	 * (e.g. xen_load_tls())
	 */
	savesegment(fs, fsindex);
	savesegment(gs, gsindex);

L
Linus Torvalds 已提交
422 423
	load_TLS(next, cpu);

424 425 426 427 428 429 430 431 432
	/*
	 * Leave lazy mode, flushing any hypercalls made here.
	 * This must be done before restoring TLS segments so
	 * the GDT and LDT are properly updated, and must be
	 * done before math_state_restore, so the TS bit is up
	 * to date.
	 */
	arch_leave_lazy_cpu_mode();

433
	/*
L
Linus Torvalds 已提交
434
	 * Switch FS and GS.
435 436 437 438
	 *
	 * Segment register != 0 always requires a reload.  Also
	 * reload when it has changed.  When prev process used 64bit
	 * base always reload to avoid an information leak.
L
Linus Torvalds 已提交
439
	 */
440 441
	if (unlikely(fsindex | next->fsindex | prev->fs)) {
		loadsegment(fs, next->fsindex);
442
		/*
443 444 445 446 447
		 * Check if the user used a selector != 0; if yes
		 *  clear 64bit base, since overloaded base is always
		 *  mapped to the Null selector
		 */
		if (fsindex)
448
			prev->fs = 0;
L
Linus Torvalds 已提交
449
	}
450 451 452 453 454 455 456 457
	/* when next process has a 64bit base use it */
	if (next->fs)
		wrmsrl(MSR_FS_BASE, next->fs);
	prev->fsindex = fsindex;

	if (unlikely(gsindex | next->gsindex | prev->gs)) {
		load_gs_index(next->gsindex);
		if (gsindex)
458
			prev->gs = 0;
L
Linus Torvalds 已提交
459
	}
460 461 462
	if (next->gs)
		wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
	prev->gsindex = gsindex;
L
Linus Torvalds 已提交
463

A
Andi Kleen 已提交
464 465 466
	/* Must be after DS reload */
	unlazy_fpu(prev_p);

467
	/*
468
	 * Switch the PDA and FPU contexts.
L
Linus Torvalds 已提交
469
	 */
470 471
	prev->usersp = percpu_read(old_rsp);
	percpu_write(old_rsp, next->usersp);
472
	percpu_write(current_task, next_p);
473

474
	percpu_write(kernel_stack,
475
		  (unsigned long)task_stack_page(next_p) +
476
		  THREAD_SIZE - KERNEL_STACK_OFFSET);
L
Linus Torvalds 已提交
477 478

	/*
479
	 * Now maybe reload the debug registers and handle I/O bitmaps
L
Linus Torvalds 已提交
480
	 */
481 482
	if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
483
		__switch_to_xtra(prev_p, next_p, tss);
L
Linus Torvalds 已提交
484

485 486 487
	/* If the task has used fpu the last 5 timeslices, just do a full
	 * restore of the math state immediately to avoid the trap; the
	 * chances of needing FPU soon are obviously high now
488 489 490
	 *
	 * tsk_used_math() checks prevent calling math_state_restore(),
	 * which can sleep in the case of !tsk_used_math()
491
	 */
492
	if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
493
		math_state_restore();
L
Linus Torvalds 已提交
494 495 496 497 498 499
	return prev_p;
}

/*
 * sys_execve() executes a new program.
 */
500
asmlinkage
L
Linus Torvalds 已提交
501
long sys_execve(char __user *name, char __user * __user *argv,
502
		char __user * __user *envp, struct pt_regs *regs)
L
Linus Torvalds 已提交
503 504
{
	long error;
505
	char *filename;
L
Linus Torvalds 已提交
506 507 508

	filename = getname(name);
	error = PTR_ERR(filename);
509
	if (IS_ERR(filename))
L
Linus Torvalds 已提交
510
		return error;
511
	error = do_execve(filename, argv, envp, regs);
L
Linus Torvalds 已提交
512 513 514 515 516 517 518 519 520
	putname(filename);
	return error;
}

void set_personality_64bit(void)
{
	/* inherit personality from parent */

	/* Make sure to be in 64bit mode */
521
	clear_thread_flag(TIF_IA32);
L
Linus Torvalds 已提交
522 523 524 525

	/* TBD: overwrites user setup. Should have two bits.
	   But 64bit processes have always behaved this way,
	   so it's not too bad. The main problem is just that
526
	   32bit childs are affected again. */
L
Linus Torvalds 已提交
527 528 529
	current->personality &= ~READ_IMPLIES_EXEC;
}

530 531 532
asmlinkage long
sys_clone(unsigned long clone_flags, unsigned long newsp,
	  void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
L
Linus Torvalds 已提交
533 534
{
	if (!newsp)
535
		newsp = regs->sp;
L
Linus Torvalds 已提交
536 537 538 539 540 541
	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
}

unsigned long get_wchan(struct task_struct *p)
{
	unsigned long stack;
542
	u64 fp, ip;
L
Linus Torvalds 已提交
543 544
	int count = 0;

545 546
	if (!p || p == current || p->state == TASK_RUNNING)
		return 0;
A
Al Viro 已提交
547
	stack = (unsigned long)task_stack_page(p);
548
	if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
L
Linus Torvalds 已提交
549
		return 0;
550
	fp = *(u64 *)(p->thread.sp);
551
	do {
552
		if (fp < (unsigned long)stack ||
553
		    fp >= (unsigned long)stack+THREAD_SIZE)
554
			return 0;
555 556 557
		ip = *(u64 *)(fp+8);
		if (!in_sched_functions(ip))
			return ip;
558 559
		fp = *(u64 *)fp;
	} while (count++ < 16);
L
Linus Torvalds 已提交
560 561 562 563
	return 0;
}

long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
564 565
{
	int ret = 0;
L
Linus Torvalds 已提交
566 567 568
	int doit = task == current;
	int cpu;

569
	switch (code) {
L
Linus Torvalds 已提交
570
	case ARCH_SET_GS:
571
		if (addr >= TASK_SIZE_OF(task))
572
			return -EPERM;
L
Linus Torvalds 已提交
573
		cpu = get_cpu();
574
		/* handle small bases via the GDT because that's faster to
L
Linus Torvalds 已提交
575
		   switch. */
576 577 578
		if (addr <= 0xffffffff) {
			set_32bit_tls(task, GS_TLS, addr);
			if (doit) {
L
Linus Torvalds 已提交
579
				load_TLS(&task->thread, cpu);
580
				load_gs_index(GS_TLS_SEL);
L
Linus Torvalds 已提交
581
			}
582
			task->thread.gsindex = GS_TLS_SEL;
L
Linus Torvalds 已提交
583
			task->thread.gs = 0;
584
		} else {
L
Linus Torvalds 已提交
585 586 587
			task->thread.gsindex = 0;
			task->thread.gs = addr;
			if (doit) {
588 589
				load_gs_index(0);
				ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
590
			}
L
Linus Torvalds 已提交
591 592 593 594 595 596
		}
		put_cpu();
		break;
	case ARCH_SET_FS:
		/* Not strictly needed for fs, but do it for symmetry
		   with gs */
597
		if (addr >= TASK_SIZE_OF(task))
598
			return -EPERM;
L
Linus Torvalds 已提交
599
		cpu = get_cpu();
600
		/* handle small bases via the GDT because that's faster to
L
Linus Torvalds 已提交
601
		   switch. */
602
		if (addr <= 0xffffffff) {
L
Linus Torvalds 已提交
603
			set_32bit_tls(task, FS_TLS, addr);
604 605
			if (doit) {
				load_TLS(&task->thread, cpu);
606
				loadsegment(fs, FS_TLS_SEL);
L
Linus Torvalds 已提交
607 608 609
			}
			task->thread.fsindex = FS_TLS_SEL;
			task->thread.fs = 0;
610
		} else {
L
Linus Torvalds 已提交
611 612 613 614 615
			task->thread.fsindex = 0;
			task->thread.fs = addr;
			if (doit) {
				/* set the selector to 0 to not confuse
				   __switch_to */
616
				loadsegment(fs, 0);
617
				ret = checking_wrmsrl(MSR_FS_BASE, addr);
L
Linus Torvalds 已提交
618 619 620 621
			}
		}
		put_cpu();
		break;
622 623
	case ARCH_GET_FS: {
		unsigned long base;
L
Linus Torvalds 已提交
624 625
		if (task->thread.fsindex == FS_TLS_SEL)
			base = read_32bit_tls(task, FS_TLS);
626
		else if (doit)
L
Linus Torvalds 已提交
627
			rdmsrl(MSR_FS_BASE, base);
628
		else
L
Linus Torvalds 已提交
629
			base = task->thread.fs;
630 631
		ret = put_user(base, (unsigned long __user *)addr);
		break;
L
Linus Torvalds 已提交
632
	}
633
	case ARCH_GET_GS: {
L
Linus Torvalds 已提交
634
		unsigned long base;
635
		unsigned gsindex;
L
Linus Torvalds 已提交
636 637
		if (task->thread.gsindex == GS_TLS_SEL)
			base = read_32bit_tls(task, GS_TLS);
638
		else if (doit) {
639
			savesegment(gs, gsindex);
640 641 642 643
			if (gsindex)
				rdmsrl(MSR_KERNEL_GS_BASE, base);
			else
				base = task->thread.gs;
644
		} else
L
Linus Torvalds 已提交
645
			base = task->thread.gs;
646
		ret = put_user(base, (unsigned long __user *)addr);
L
Linus Torvalds 已提交
647 648 649 650 651 652
		break;
	}

	default:
		ret = -EINVAL;
		break;
653
	}
L
Linus Torvalds 已提交
654

655 656
	return ret;
}
L
Linus Torvalds 已提交
657 658 659 660 661 662 663 664

long sys_arch_prctl(int code, unsigned long addr)
{
	return do_arch_prctl(current, code, addr);
}

unsigned long arch_align_stack(unsigned long sp)
{
665
	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
L
Linus Torvalds 已提交
666 667 668
		sp -= get_random_int() % 8192;
	return sp & ~0xf;
}
J
Jiri Kosina 已提交
669 670 671 672 673 674

unsigned long arch_randomize_brk(struct mm_struct *mm)
{
	unsigned long range_end = mm->brk + 0x02000000;
	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
}