start_up.c 8.9 KB
Newer Older
1
/*
J
Jeff Dike 已提交
2
 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
L
Linus Torvalds 已提交
3 4 5 6
 * Licensed under the GPL
 */

#include <stdio.h>
7
#include <stdlib.h>
J
Jeff Dike 已提交
8
#include <stdarg.h>
L
Linus Torvalds 已提交
9 10
#include <unistd.h>
#include <errno.h>
J
Jeff Dike 已提交
11 12 13 14
#include <fcntl.h>
#include <sched.h>
#include <signal.h>
#include <string.h>
L
Linus Torvalds 已提交
15
#include <sys/mman.h>
J
Jeff Dike 已提交
16 17
#include <sys/stat.h>
#include <sys/wait.h>
18 19
#include <sys/time.h>
#include <sys/resource.h>
L
Linus Torvalds 已提交
20
#include <asm/unistd.h>
21 22 23 24 25 26
#include <init.h>
#include <os.h>
#include <mem_user.h>
#include <ptrace_user.h>
#include <registers.h>
#include <skas.h>
L
Linus Torvalds 已提交
27

28
static void ptrace_child(void)
L
Linus Torvalds 已提交
29 30
{
	int ret;
31
	/* Calling os_getpid because some libcs cached getpid incorrectly */
L
Linus Torvalds 已提交
32 33 34
	int pid = os_getpid(), ppid = getppid();
	int sc_result;

35 36
	if (change_sig(SIGWINCH, 0) < 0 ||
	    ptrace(PTRACE_TRACEME, 0, 0, 0) < 0) {
L
Linus Torvalds 已提交
37
		perror("ptrace");
38
		kill(pid, SIGKILL);
L
Linus Torvalds 已提交
39
	}
40
	kill(pid, SIGSTOP);
L
Linus Torvalds 已提交
41

J
Jeff Dike 已提交
42 43 44 45
	/*
	 * This syscall will be intercepted by the parent. Don't call more than
	 * once, please.
	 */
L
Linus Torvalds 已提交
46 47 48
	sc_result = os_getpid();

	if (sc_result == pid)
J
Jeff Dike 已提交
49 50
		/* Nothing modified by the parent, we are running normally. */
		ret = 1;
L
Linus Torvalds 已提交
51
	else if (sc_result == ppid)
J
Jeff Dike 已提交
52 53 54 55 56
		/*
		 * Expected in check_ptrace and check_sysemu when they succeed
		 * in modifying the stack frame
		 */
		ret = 0;
L
Linus Torvalds 已提交
57
	else
J
Jeff Dike 已提交
58 59 60 61 62
		/* Serious trouble! This could be caused by a bug in host 2.6
		 * SKAS3/2.6 patch before release -V6, together with a bug in
		 * the UML code itself.
		 */
		ret = 2;
J
Jeff Dike 已提交
63 64

	exit(ret);
L
Linus Torvalds 已提交
65 66
}

67
static void fatal_perror(const char *str)
68 69 70 71 72 73 74 75 76 77
{
	perror(str);
	exit(1);
}

static void fatal(char *fmt, ...)
{
	va_list list;

	va_start(list, fmt);
78
	vfprintf(stderr, fmt, list);
79 80 81 82 83 84 85 86 87 88
	va_end(list);

	exit(1);
}

static void non_fatal(char *fmt, ...)
{
	va_list list;

	va_start(list, fmt);
89
	vfprintf(stderr, fmt, list);
90 91 92
	va_end(list);
}

J
Jeff Dike 已提交
93
static int start_ptraced_child(void)
L
Linus Torvalds 已提交
94 95
{
	int pid, n, status;
96

V
Vegard Nossum 已提交
97 98
	fflush(stdout);

J
Jeff Dike 已提交
99 100 101 102 103
	pid = fork();
	if (pid == 0)
		ptrace_child();
	else if (pid < 0)
		fatal_perror("start_ptraced_child : fork failed");
J
Jeff Dike 已提交
104

L
Linus Torvalds 已提交
105
	CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
J
Jeff Dike 已提交
106
	if (n < 0)
J
Jeff Dike 已提交
107
		fatal_perror("check_ptrace : waitpid failed");
J
Jeff Dike 已提交
108
	if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP))
109
		fatal("check_ptrace : expected SIGSTOP, got status = %d",
L
Linus Torvalds 已提交
110 111
		      status);

112
	return pid;
L
Linus Torvalds 已提交
113 114
}

115 116 117
/* When testing for SYSEMU support, if it is one of the broken versions, we
 * must just avoid using sysemu, not panic, but only if SYSEMU features are
 * broken.
L
Linus Torvalds 已提交
118
 * So only for SYSEMU features we test mustpanic, while normal host features
119 120
 * must work anyway!
 */
J
Jeff Dike 已提交
121
static int stop_ptraced_child(int pid, int exitcode, int mustexit)
L
Linus Torvalds 已提交
122 123 124
{
	int status, n, ret = 0;

125 126 127 128
	if (ptrace(PTRACE_CONT, pid, 0, 0) < 0) {
		perror("stop_ptraced_child : ptrace failed");
		return -1;
	}
L
Linus Torvalds 已提交
129
	CATCH_EINTR(n = waitpid(pid, &status, 0));
J
Jeff Dike 已提交
130
	if (!WIFEXITED(status) || (WEXITSTATUS(status) != exitcode)) {
L
Linus Torvalds 已提交
131 132
		int exit_with = WEXITSTATUS(status);
		if (exit_with == 2)
133
			non_fatal("check_ptrace : child exited with status 2. "
J
Jeff Dike 已提交
134
				  "\nDisabling SYSEMU support.\n");
135 136 137 138 139
		non_fatal("check_ptrace : child exited with exitcode %d, while "
			  "expecting %d; status 0x%x\n", exit_with,
			  exitcode, status);
		if (mustexit)
			exit(1);
L
Linus Torvalds 已提交
140 141 142 143 144 145
		ret = -1;
	}

	return ret;
}

146
/* Changed only during early boot */
147 148
static int force_sysemu_disabled = 0;

L
Linus Torvalds 已提交
149 150 151 152 153 154 155
static int __init nosysemu_cmd_param(char *str, int* add)
{
	force_sysemu_disabled = 1;
	return 0;
}

__uml_setup("nosysemu", nosysemu_cmd_param,
156 157 158 159 160 161 162
"nosysemu\n"
"    Turns off syscall emulation patch for ptrace (SYSEMU) on.\n"
"    SYSEMU is a performance-patch introduced by Laurent Vivier. It changes\n"
"    behaviour of ptrace() and helps reducing host context switch rate.\n"
"    To make it working, you need a kernel patch for your host, too.\n"
"    See http://perso.wanadoo.fr/laurent.vivier/UML/ for further \n"
"    information.\n\n");
L
Linus Torvalds 已提交
163 164 165

static void __init check_sysemu(void)
{
J
Jeff Dike 已提交
166
	unsigned long regs[MAX_REG_NR];
167
	int pid, n, status, count=0;
L
Linus Torvalds 已提交
168

169
	os_info("Checking syscall emulation patch for ptrace...");
L
Linus Torvalds 已提交
170
	sysemu_supported = 0;
J
Jeff Dike 已提交
171
	pid = start_ptraced_child();
L
Linus Torvalds 已提交
172

J
Jeff Dike 已提交
173
	if (ptrace(PTRACE_SYSEMU, pid, 0, 0) < 0)
L
Linus Torvalds 已提交
174 175 176 177
		goto fail;

	CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
	if (n < 0)
178
		fatal_perror("check_sysemu : wait failed");
J
Jeff Dike 已提交
179
	if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP))
180
		fatal("check_sysemu : expected SIGTRAP, got status = %d\n",
181
		      status);
L
Linus Torvalds 已提交
182

J
Jeff Dike 已提交
183
	if (ptrace(PTRACE_GETREGS, pid, 0, regs) < 0)
J
Jeff Dike 已提交
184
		fatal_perror("check_sysemu : PTRACE_GETREGS failed");
J
Jeff Dike 已提交
185
	if (PT_SYSCALL_NR(regs) != __NR_getpid) {
J
Jeff Dike 已提交
186 187 188 189 190
		non_fatal("check_sysemu got system call number %d, "
			  "expected %d...", PT_SYSCALL_NR(regs), __NR_getpid);
		goto fail;
	}

A
Al Viro 已提交
191
	n = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_RET_OFFSET, os_getpid());
J
Jeff Dike 已提交
192
	if (n < 0) {
J
Jeff Dike 已提交
193 194 195 196
		non_fatal("check_sysemu : failed to modify system call "
			  "return");
		goto fail;
	}
L
Linus Torvalds 已提交
197

J
Jeff Dike 已提交
198
	if (stop_ptraced_child(pid, 0, 0) < 0)
L
Linus Torvalds 已提交
199 200 201
		goto fail_stopped;

	sysemu_supported = 1;
202
	os_info("OK\n");
L
Linus Torvalds 已提交
203 204
	set_using_sysemu(!force_sysemu_disabled);

205
	os_info("Checking advanced syscall emulation patch for ptrace...");
J
Jeff Dike 已提交
206
	pid = start_ptraced_child();
207

J
Jeff Dike 已提交
208
	if ((ptrace(PTRACE_OLDSETOPTIONS, pid, 0,
209
		   (void *) PTRACE_O_TRACESYSGOOD) < 0))
W
WANG Cong 已提交
210
		fatal_perror("check_sysemu: PTRACE_OLDSETOPTIONS failed");
211

J
Jeff Dike 已提交
212
	while (1) {
L
Linus Torvalds 已提交
213
		count++;
J
Jeff Dike 已提交
214
		if (ptrace(PTRACE_SYSEMU_SINGLESTEP, pid, 0, 0) < 0)
L
Linus Torvalds 已提交
215 216
			goto fail;
		CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
J
Jeff Dike 已提交
217
		if (n < 0)
W
WANG Cong 已提交
218
			fatal_perror("check_sysemu: wait failed");
219

J
Jeff Dike 已提交
220 221
		if (WIFSTOPPED(status) &&
		    (WSTOPSIG(status) == (SIGTRAP|0x80))) {
222
			if (!count) {
W
WANG Cong 已提交
223
				non_fatal("check_sysemu: SYSEMU_SINGLESTEP "
224 225 226
					  "doesn't singlestep");
				goto fail;
			}
A
Al Viro 已提交
227
			n = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_RET_OFFSET,
L
Linus Torvalds 已提交
228
				   os_getpid());
J
Jeff Dike 已提交
229
			if (n < 0)
230 231
				fatal_perror("check_sysemu : failed to modify "
					     "system call return");
L
Linus Torvalds 已提交
232 233
			break;
		}
J
Jeff Dike 已提交
234
		else if (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGTRAP))
235
			count++;
236
		else {
W
WANG Cong 已提交
237
			non_fatal("check_sysemu: expected SIGTRAP or "
238 239 240 241
				  "(SIGTRAP | 0x80), got status = %d\n",
				  status);
			goto fail;
		}
L
Linus Torvalds 已提交
242
	}
J
Jeff Dike 已提交
243
	if (stop_ptraced_child(pid, 0, 0) < 0)
L
Linus Torvalds 已提交
244 245 246
		goto fail_stopped;

	sysemu_supported = 2;
247
	os_info("OK\n");
L
Linus Torvalds 已提交
248

J
Jeff Dike 已提交
249
	if (!force_sysemu_disabled)
L
Linus Torvalds 已提交
250 251 252 253
		set_using_sysemu(sysemu_supported);
	return;

fail:
J
Jeff Dike 已提交
254
	stop_ptraced_child(pid, 1, 0);
L
Linus Torvalds 已提交
255
fail_stopped:
256
	non_fatal("missing\n");
L
Linus Torvalds 已提交
257 258
}

259
static void __init check_ptrace(void)
L
Linus Torvalds 已提交
260 261 262
{
	int pid, syscall, n, status;

263
	os_info("Checking that ptrace can change system call numbers...");
J
Jeff Dike 已提交
264
	pid = start_ptraced_child();
L
Linus Torvalds 已提交
265

J
Jeff Dike 已提交
266
	if ((ptrace(PTRACE_OLDSETOPTIONS, pid, 0,
267 268
		   (void *) PTRACE_O_TRACESYSGOOD) < 0))
		fatal_perror("check_ptrace: PTRACE_OLDSETOPTIONS failed");
L
Linus Torvalds 已提交
269

J
Jeff Dike 已提交
270 271
	while (1) {
		if (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0)
272 273
			fatal_perror("check_ptrace : ptrace failed");

L
Linus Torvalds 已提交
274
		CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
J
Jeff Dike 已提交
275
		if (n < 0)
276 277
			fatal_perror("check_ptrace : wait failed");

J
Jeff Dike 已提交
278
		if (!WIFSTOPPED(status) ||
279 280 281
		   (WSTOPSIG(status) != (SIGTRAP | 0x80)))
			fatal("check_ptrace : expected (SIGTRAP|0x80), "
			       "got status = %d", status);
282

A
Al Viro 已提交
283
		syscall = ptrace(PTRACE_PEEKUSER, pid, PT_SYSCALL_NR_OFFSET,
L
Linus Torvalds 已提交
284
				 0);
J
Jeff Dike 已提交
285
		if (syscall == __NR_getpid) {
A
Al Viro 已提交
286
			n = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET,
L
Linus Torvalds 已提交
287
				   __NR_getppid);
J
Jeff Dike 已提交
288
			if (n < 0)
289 290
				fatal_perror("check_ptrace : failed to modify "
					     "system call");
L
Linus Torvalds 已提交
291 292 293
			break;
		}
	}
J
Jeff Dike 已提交
294
	stop_ptraced_child(pid, 0, 1);
295
	os_info("OK\n");
L
Linus Torvalds 已提交
296 297 298
	check_sysemu();
}

299
extern void check_tmpexec(void);
300

J
Jeff Dike 已提交
301
static void __init check_coredump_limit(void)
J
Jeff Dike 已提交
302 303 304 305
{
	struct rlimit lim;
	int err = getrlimit(RLIMIT_CORE, &lim);

J
Jeff Dike 已提交
306
	if (err) {
J
Jeff Dike 已提交
307 308 309 310
		perror("Getting core dump limit");
		return;
	}

311
	os_info("Core dump limits :\n\tsoft - ");
J
Jeff Dike 已提交
312
	if (lim.rlim_cur == RLIM_INFINITY)
313 314 315
		os_info("NONE\n");
	else
		os_info("%llu\n", (unsigned long long)lim.rlim_cur);
J
Jeff Dike 已提交
316

317
	os_info("\thard - ");
J
Jeff Dike 已提交
318
	if (lim.rlim_max == RLIM_INFINITY)
319 320 321
		os_info("NONE\n");
	else
		os_info("%llu\n", (unsigned long long)lim.rlim_max);
J
Jeff Dike 已提交
322 323
}

J
Jeff Dike 已提交
324
void __init os_early_checks(void)
L
Linus Torvalds 已提交
325
{
J
Jeff Dike 已提交
326 327
	int pid;

J
Jeff Dike 已提交
328 329 330
	/* Print out the core dump limits early */
	check_coredump_limit();

331
	check_ptrace();
332 333 334 335 336

	/* Need to check this early because mmapping happens before the
	 * kernel is running.
	 */
	check_tmpexec();
J
Jeff Dike 已提交
337 338 339 340 341

	pid = start_ptraced_child();
	if (init_registers(pid))
		fatal("Failed to initialize default registers");
	stop_ptraced_child(pid, 1, 1);
L
Linus Torvalds 已提交
342 343
}

344 345 346
int __init parse_iomem(char *str, int *add)
{
	struct iomem_region *new;
347
	struct stat64 buf;
348
	char *file, *driver;
349
	int fd, size;
350 351 352

	driver = str;
	file = strchr(str,',');
J
Jeff Dike 已提交
353
	if (file == NULL) {
354
		os_warn("parse_iomem : failed to parse iomem\n");
355 356 357 358
		goto out;
	}
	*file = '\0';
	file++;
359
	fd = open(file, O_RDWR, 0);
J
Jeff Dike 已提交
360
	if (fd < 0) {
361
		perror("parse_iomem - Couldn't open io file");
362 363 364
		goto out;
	}

J
Jeff Dike 已提交
365
	if (fstat64(fd, &buf) < 0) {
366
		perror("parse_iomem - cannot stat_fd file");
367 368 369 370
		goto out_close;
	}

	new = malloc(sizeof(*new));
J
Jeff Dike 已提交
371
	if (new == NULL) {
372 373 374 375
		perror("Couldn't allocate iomem_region struct");
		goto out_close;
	}

376
	size = (buf.st_size + UM_KERN_PAGE_SIZE) & ~(UM_KERN_PAGE_SIZE - 1);
377 378 379 380 381 382 383 384 385 386

	*new = ((struct iomem_region) { .next		= iomem_regions,
					.driver		= driver,
					.fd		= fd,
					.size		= size,
					.phys		= 0,
					.virt		= 0 });
	iomem_regions = new;
	iomem_size += new->size + UM_KERN_PAGE_SIZE;

387
	return 0;
388
 out_close:
389
	close(fd);
390
 out:
391
	return 1;
392
}