process.c 10.1 KB
Newer Older
J
Jeff Dike 已提交
1
/*
J
Jeff Dike 已提交
2
 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
L
Linus Torvalds 已提交
3 4 5 6
 * Copyright 2003 PathScale, Inc.
 * Licensed under the GPL
 */

J
Jeff Dike 已提交
7 8 9
#include "linux/stddef.h"
#include "linux/err.h"
#include "linux/hardirq.h"
L
Linus Torvalds 已提交
10
#include "linux/mm.h"
J
Jeff Dike 已提交
11
#include "linux/personality.h"
L
Linus Torvalds 已提交
12 13 14
#include "linux/proc_fs.h"
#include "linux/ptrace.h"
#include "linux/random.h"
J
Jeff Dike 已提交
15
#include "linux/sched.h"
J
Jeff Dike 已提交
16
#include "linux/tick.h"
J
Jeff Dike 已提交
17
#include "linux/threads.h"
L
Linus Torvalds 已提交
18 19
#include "asm/pgtable.h"
#include "asm/uaccess.h"
J
Jeff Dike 已提交
20
#include "as-layout.h"
J
Jeff Dike 已提交
21
#include "kern_util.h"
L
Linus Torvalds 已提交
22
#include "os.h"
23
#include "skas.h"
J
Jeff Dike 已提交
24
#include "tlb.h"
L
Linus Torvalds 已提交
25

J
Jeff Dike 已提交
26 27
/*
 * This is a per-cpu array.  A processor only modifies its entry and it only
L
Linus Torvalds 已提交
28 29 30 31 32
 * cares about its entry, so it's OK if another processor is modifying its
 * entry.
 */
struct cpu_task cpu_tasks[NR_CPUS] = { [0 ... NR_CPUS - 1] = { -1, NULL } };

J
Jeff Dike 已提交
33
static inline int external_pid(struct task_struct *task)
L
Linus Torvalds 已提交
34
{
35
	/* FIXME: Need to look up userspace_pid by cpu */
J
Jeff Dike 已提交
36
	return userspace_pid[0];
L
Linus Torvalds 已提交
37 38 39 40 41 42
}

int pid_to_processor_id(int pid)
{
	int i;

J
Jeff Dike 已提交
43 44
	for(i = 0; i < ncpus; i++) {
		if (cpu_tasks[i].pid == pid)
J
Jeff Dike 已提交
45
			return i;
L
Linus Torvalds 已提交
46
	}
J
Jeff Dike 已提交
47
	return -1;
L
Linus Torvalds 已提交
48 49 50 51 52 53 54 55 56 57
}

void free_stack(unsigned long stack, int order)
{
	free_pages(stack, order);
}

unsigned long alloc_stack(int order, int atomic)
{
	unsigned long page;
A
Al Viro 已提交
58
	gfp_t flags = GFP_KERNEL;
L
Linus Torvalds 已提交
59

60 61
	if (atomic)
		flags = GFP_ATOMIC;
L
Linus Torvalds 已提交
62
	page = __get_free_pages(flags, order);
63

J
Jeff Dike 已提交
64
	return page;
L
Linus Torvalds 已提交
65 66 67 68 69 70 71 72
}

int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
{
	int pid;

	current->thread.request.u.thread.proc = fn;
	current->thread.request.u.thread.arg = arg;
J
Jeff Dike 已提交
73 74
	pid = do_fork(CLONE_VM | CLONE_UNTRACED | flags, 0,
		      &current->thread.regs, 0, NULL, NULL);
J
Jeff Dike 已提交
75
	return pid;
L
Linus Torvalds 已提交
76 77
}

J
Jeff Dike 已提交
78
static inline void set_current(struct task_struct *task)
L
Linus Torvalds 已提交
79
{
A
Al Viro 已提交
80
	cpu_tasks[task_thread_info(task)->cpu] = ((struct cpu_task)
L
Linus Torvalds 已提交
81 82 83
		{ external_pid(task), task });
}

84 85
extern void arch_switch_to(struct task_struct *from, struct task_struct *to);

L
Linus Torvalds 已提交
86 87
void *_switch_to(void *prev, void *next, void *last)
{
J
Jeff Dike 已提交
88 89
	struct task_struct *from = prev;
	struct task_struct *to= next;
90

J
Jeff Dike 已提交
91 92
	to->thread.prev_sched = from;
	set_current(to);
93

94
	do {
J
Jeff Dike 已提交
95
		current->thread.saved_task = NULL;
96 97 98 99 100 101

		switch_threads(&from->thread.switch_buf,
			       &to->thread.switch_buf);

		arch_switch_to(current->thread.prev_sched, current);

J
Jeff Dike 已提交
102
		if (current->thread.saved_task)
103 104 105 106
			show_regs(&(current->thread.regs));
		next= current->thread.saved_task;
		prev= current;
	} while(current->thread.saved_task);
107

J
Jeff Dike 已提交
108
	return current->thread.prev_sched;
109

L
Linus Torvalds 已提交
110 111 112 113
}

void interrupt_end(void)
{
J
Jeff Dike 已提交
114
	if (need_resched())
J
Jeff Dike 已提交
115
		schedule();
J
Jeff Dike 已提交
116
	if (test_tsk_thread_flag(current, TIF_SIGPENDING))
J
Jeff Dike 已提交
117
		do_signal();
L
Linus Torvalds 已提交
118 119 120 121 122
}

void exit_thread(void)
{
}
J
Jeff Dike 已提交
123

L
Linus Torvalds 已提交
124 125
void *get_current(void)
{
J
Jeff Dike 已提交
126
	return current;
L
Linus Torvalds 已提交
127 128
}

129 130
extern void schedule_tail(struct task_struct *prev);

J
Jeff Dike 已提交
131 132
/*
 * This is called magically, by its address being stuffed in a jmp_buf
133 134 135 136 137 138 139
 * and being longjmp-d to.
 */
void new_thread_handler(void)
{
	int (*fn)(void *), n;
	void *arg;

J
Jeff Dike 已提交
140
	if (current->thread.prev_sched != NULL)
141 142 143 144 145 146
		schedule_tail(current->thread.prev_sched);
	current->thread.prev_sched = NULL;

	fn = current->thread.request.u.thread.proc;
	arg = current->thread.request.u.thread.arg;

J
Jeff Dike 已提交
147 148
	/*
	 * The return value is 1 if the kernel thread execs a process,
149 150 151
	 * 0 if it just exits
	 */
	n = run_kernel_thread(fn, arg, &current->thread.exec_buf);
J
Jeff Dike 已提交
152
	if (n == 1) {
153 154 155 156 157 158 159 160 161 162 163
		/* Handle any immediate reschedules or signals */
		interrupt_end();
		userspace(&current->thread.regs.regs);
	}
	else do_exit(0);
}

/* Called magically, see new_thread_handler above */
void fork_handler(void)
{
	force_flush_all();
J
Jeff Dike 已提交
164
	if (current->thread.prev_sched == NULL)
165 166 167 168
		panic("blech");

	schedule_tail(current->thread.prev_sched);

J
Jeff Dike 已提交
169 170
	/*
	 * XXX: if interrupt_end() calls schedule, this call to
171
	 * arch_switch_to isn't needed. We could want to apply this to
J
Jeff Dike 已提交
172 173
	 * improve performance. -bb
	 */
174 175 176 177 178 179 180 181 182 183
	arch_switch_to(current->thread.prev_sched, current);

	current->thread.prev_sched = NULL;

	/* Handle any immediate reschedules or signals */
	interrupt_end();

	userspace(&current->thread.regs.regs);
}

L
Linus Torvalds 已提交
184
int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
J
Jeff Dike 已提交
185
		unsigned long stack_top, struct task_struct * p,
L
Linus Torvalds 已提交
186 187
		struct pt_regs *regs)
{
188 189
	void (*handler)(void);
	int ret = 0;
190

L
Linus Torvalds 已提交
191
	p->thread = (struct thread_struct) INIT_THREAD;
192

J
Jeff Dike 已提交
193
	if (current->thread.forking) {
194 195
	  	memcpy(&p->thread.regs.regs, &regs->regs,
		       sizeof(p->thread.regs.regs));
196
		REGS_SET_SYSCALL_RETURN(p->thread.regs.regs.gp, 0);
J
Jeff Dike 已提交
197
		if (sp != 0)
198
			REGS_SP(p->thread.regs.regs.gp) = sp;
199

200
		handler = fork_handler;
201

202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
		arch_copy_thread(&current->thread.arch, &p->thread.arch);
	}
	else {
		init_thread_registers(&p->thread.regs.regs);
		p->thread.request.u.thread = current->thread.request.u.thread;
		handler = new_thread_handler;
	}

	new_thread(task_stack_page(p), &p->thread.switch_buf, handler);

	if (current->thread.forking) {
		clear_flushed_tls(p);

		/*
		 * Set a new TLS for the child thread?
		 */
		if (clone_flags & CLONE_SETTLS)
			ret = arch_copy_tls(p);
	}
221 222

	return ret;
L
Linus Torvalds 已提交
223 224 225 226 227 228 229
}

void initial_thread_cb(void (*proc)(void *), void *arg)
{
	int save_kmalloc_ok = kmalloc_ok;

	kmalloc_ok = 0;
J
Jeff Dike 已提交
230
	initial_thread_cb_skas(proc, arg);
L
Linus Torvalds 已提交
231 232
	kmalloc_ok = save_kmalloc_ok;
}
J
Jeff Dike 已提交
233

L
Linus Torvalds 已提交
234 235
void default_idle(void)
{
236 237
	unsigned long long nsecs;

J
Jeff Dike 已提交
238
	while(1) {
L
Linus Torvalds 已提交
239 240 241 242 243 244
		/* endless idle loop with no priority at all */

		/*
		 * although we are an idle CPU, we do not want to
		 * get into the scheduler unnecessarily.
		 */
J
Jeff Dike 已提交
245
		if (need_resched())
L
Linus Torvalds 已提交
246
			schedule();
J
Jeff Dike 已提交
247

J
Jeff Dike 已提交
248
		tick_nohz_stop_sched_tick();
249 250
		nsecs = disable_timer();
		idle_sleep(nsecs);
J
Jeff Dike 已提交
251
		tick_nohz_restart_sched_tick();
L
Linus Torvalds 已提交
252 253 254 255 256
	}
}

void cpu_idle(void)
{
257 258
	cpu_tasks[current_thread->cpu].pid = os_getpid();
	default_idle();
L
Linus Torvalds 已提交
259 260
}

J
Jeff Dike 已提交
261
void *um_virt_to_phys(struct task_struct *task, unsigned long addr,
L
Linus Torvalds 已提交
262 263 264 265 266 267
		      pte_t *pte_out)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;
H
Hugh Dickins 已提交
268
	pte_t ptent;
L
Linus Torvalds 已提交
269

J
Jeff Dike 已提交
270
	if (task->mm == NULL)
J
Jeff Dike 已提交
271
		return ERR_PTR(-EINVAL);
L
Linus Torvalds 已提交
272
	pgd = pgd_offset(task->mm, addr);
J
Jeff Dike 已提交
273
	if (!pgd_present(*pgd))
J
Jeff Dike 已提交
274
		return ERR_PTR(-EINVAL);
L
Linus Torvalds 已提交
275 276

	pud = pud_offset(pgd, addr);
J
Jeff Dike 已提交
277
	if (!pud_present(*pud))
J
Jeff Dike 已提交
278
		return ERR_PTR(-EINVAL);
L
Linus Torvalds 已提交
279 280

	pmd = pmd_offset(pud, addr);
J
Jeff Dike 已提交
281
	if (!pmd_present(*pmd))
J
Jeff Dike 已提交
282
		return ERR_PTR(-EINVAL);
L
Linus Torvalds 已提交
283 284

	pte = pte_offset_kernel(pmd, addr);
H
Hugh Dickins 已提交
285
	ptent = *pte;
J
Jeff Dike 已提交
286
	if (!pte_present(ptent))
J
Jeff Dike 已提交
287
		return ERR_PTR(-EINVAL);
L
Linus Torvalds 已提交
288

J
Jeff Dike 已提交
289
	if (pte_out != NULL)
H
Hugh Dickins 已提交
290
		*pte_out = ptent;
J
Jeff Dike 已提交
291
	return (void *) (pte_val(ptent) & PAGE_MASK) + (addr & ~PAGE_MASK);
L
Linus Torvalds 已提交
292 293 294 295 296
}

char *current_cmd(void)
{
#if defined(CONFIG_SMP) || defined(CONFIG_HIGHMEM)
J
Jeff Dike 已提交
297
	return "(Unknown)";
L
Linus Torvalds 已提交
298 299 300 301 302 303 304 305 306 307
#else
	void *addr = um_virt_to_phys(current, current->mm->arg_start, NULL);
	return IS_ERR(addr) ? "(Unknown)": __va((unsigned long) addr);
#endif
}

void dump_thread(struct pt_regs *regs, struct user *u)
{
}

308 309 310
int __cant_sleep(void) {
	return in_atomic() || irqs_disabled() || in_interrupt();
	/* Is in_interrupt() really needed? */
L
Linus Torvalds 已提交
311 312 313 314 315 316 317
}

int user_context(unsigned long sp)
{
	unsigned long stack;

	stack = sp & (PAGE_MASK << CONFIG_KERNEL_STACK_ORDER);
J
Jeff Dike 已提交
318
	return stack != (unsigned long) current_thread;
L
Linus Torvalds 已提交
319 320 321 322 323 324 325 326 327 328 329 330 331
}

extern exitcall_t __uml_exitcall_begin, __uml_exitcall_end;

void do_uml_exitcalls(void)
{
	exitcall_t *call;

	call = &__uml_exitcall_end;
	while (--call >= &__uml_exitcall_begin)
		(*call)();
}

W
WANG Cong 已提交
332
char *uml_strdup(const char *string)
L
Linus Torvalds 已提交
333
{
334
	return kstrdup(string, GFP_KERNEL);
L
Linus Torvalds 已提交
335 336 337 338
}

int copy_to_user_proc(void __user *to, void *from, int size)
{
J
Jeff Dike 已提交
339
	return copy_to_user(to, from, size);
L
Linus Torvalds 已提交
340 341 342 343
}

int copy_from_user_proc(void *to, void __user *from, int size)
{
J
Jeff Dike 已提交
344
	return copy_from_user(to, from, size);
L
Linus Torvalds 已提交
345 346 347 348
}

int clear_user_proc(void __user *buf, int size)
{
J
Jeff Dike 已提交
349
	return clear_user(buf, size);
L
Linus Torvalds 已提交
350 351 352 353
}

int strlen_user_proc(char __user *str)
{
J
Jeff Dike 已提交
354
	return strlen_user(str);
L
Linus Torvalds 已提交
355 356 357 358 359 360 361
}

int smp_sigio_handler(void)
{
#ifdef CONFIG_SMP
	int cpu = current_thread->cpu;
	IPI_handler(cpu);
J
Jeff Dike 已提交
362
	if (cpu != 0)
J
Jeff Dike 已提交
363
		return 1;
L
Linus Torvalds 已提交
364
#endif
J
Jeff Dike 已提交
365
	return 0;
L
Linus Torvalds 已提交
366 367 368 369
}

int cpu(void)
{
J
Jeff Dike 已提交
370
	return current_thread->cpu;
L
Linus Torvalds 已提交
371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389
}

static atomic_t using_sysemu = ATOMIC_INIT(0);
int sysemu_supported;

void set_using_sysemu(int value)
{
	if (value > sysemu_supported)
		return;
	atomic_set(&using_sysemu, value);
}

int get_using_sysemu(void)
{
	return atomic_read(&using_sysemu);
}

static int proc_read_sysemu(char *buf, char **start, off_t offset, int size,int *eof, void *data)
{
J
Jeff Dike 已提交
390 391
	if (snprintf(buf, size, "%d\n", get_using_sysemu()) < size)
		/* No overflow */
L
Linus Torvalds 已提交
392 393 394 395 396
		*eof = 1;

	return strlen(buf);
}

A
Al Viro 已提交
397
static int proc_write_sysemu(struct file *file,const char __user *buf, unsigned long count,void *data)
L
Linus Torvalds 已提交
398 399 400 401 402 403 404 405
{
	char tmp[2];

	if (copy_from_user(tmp, buf, 1))
		return -EFAULT;

	if (tmp[0] >= '0' && tmp[0] <= '2')
		set_using_sysemu(tmp[0] - '0');
J
Jeff Dike 已提交
406 407
	/* We use the first char, but pretend to write everything */
	return count;
L
Linus Torvalds 已提交
408 409 410 411 412 413 414 415 416 417 418 419
}

int __init make_proc_sysemu(void)
{
	struct proc_dir_entry *ent;
	if (!sysemu_supported)
		return 0;

	ent = create_proc_entry("sysemu", 0600, &proc_root);

	if (ent == NULL)
	{
420
		printk(KERN_WARNING "Failed to register /proc/sysemu\n");
J
Jeff Dike 已提交
421
		return 0;
L
Linus Torvalds 已提交
422 423 424 425 426 427 428 429 430 431 432 433 434 435 436
	}

	ent->read_proc  = proc_read_sysemu;
	ent->write_proc = proc_write_sysemu;

	return 0;
}

late_initcall(make_proc_sysemu);

int singlestepping(void * t)
{
	struct task_struct *task = t ? t : current;

	if ( ! (task->ptrace & PT_DTRACE) )
J
Jeff Dike 已提交
437
		return 0;
L
Linus Torvalds 已提交
438 439

	if (task->thread.singlestep_syscall)
J
Jeff Dike 已提交
440
		return 1;
L
Linus Torvalds 已提交
441 442 443 444

	return 2;
}

445 446 447 448 449 450 451 452
/*
 * Only x86 and x86_64 have an arch_align_stack().
 * All other arches have "#define arch_align_stack(x) (x)"
 * in their asm/system.h
 * As this is included in UML from asm-um/system-generic.h,
 * we can use it to behave as the subarch does.
 */
#ifndef arch_align_stack
L
Linus Torvalds 已提交
453 454
unsigned long arch_align_stack(unsigned long sp)
{
J
Jeff Dike 已提交
455
	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
L
Linus Torvalds 已提交
456 457 458
		sp -= get_random_int() % 8192;
	return sp & ~0xf;
}
459
#endif
J
Jeff Dike 已提交
460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494

unsigned long get_wchan(struct task_struct *p)
{
	unsigned long stack_page, sp, ip;
	bool seen_sched = 0;

	if ((p == NULL) || (p == current) || (p->state == TASK_RUNNING))
		return 0;

	stack_page = (unsigned long) task_stack_page(p);
	/* Bail if the process has no kernel stack for some reason */
	if (stack_page == 0)
		return 0;

	sp = p->thread.switch_buf->JB_SP;
	/*
	 * Bail if the stack pointer is below the bottom of the kernel
	 * stack for some reason
	 */
	if (sp < stack_page)
		return 0;

	while (sp < stack_page + THREAD_SIZE) {
		ip = *((unsigned long *) sp);
		if (in_sched_functions(ip))
			/* Ignore everything until we're above the scheduler */
			seen_sched = 1;
		else if (kernel_text_address(ip) && seen_sched)
			return ip;

		sp += sizeof(unsigned long);
	}

	return 0;
}