setup.c 27.9 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4
/*
 *  arch/s390/kernel/setup.c
 *
 *  S390 version
5
 *    Copyright (C) IBM Corp. 1999,2010
L
Linus Torvalds 已提交
6 7 8 9 10 11 12 13 14 15 16
 *    Author(s): Hartmut Penner (hp@de.ibm.com),
 *               Martin Schwidefsky (schwidefsky@de.ibm.com)
 *
 *  Derived from "arch/i386/kernel/setup.c"
 *    Copyright (C) 1995, Linus Torvalds
 */

/*
 * This file handles the architecture-dependent parts of initialization
 */

17 18 19
#define KMSG_COMPONENT "setup"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt

L
Linus Torvalds 已提交
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/ptrace.h>
#include <linux/user.h>
#include <linux/tty.h>
#include <linux/ioport.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/initrd.h>
#include <linux/bootmem.h>
#include <linux/root_dev.h>
#include <linux/console.h>
#include <linux/kernel_stat.h>
38
#include <linux/device.h>
39
#include <linux/notifier.h>
40
#include <linux/pfn.h>
H
Hongjie Yang 已提交
41
#include <linux/ctype.h>
42
#include <linux/reboot.h>
43
#include <linux/topology.h>
44
#include <linux/ftrace.h>
M
Michael Holzheu 已提交
45 46 47
#include <linux/kexec.h>
#include <linux/crash_dump.h>
#include <linux/memory.h>
L
Linus Torvalds 已提交
48

M
Michael Holzheu 已提交
49
#include <asm/ipl.h>
L
Linus Torvalds 已提交
50 51 52 53 54 55 56
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/smp.h>
#include <asm/mmu_context.h>
#include <asm/cpcmd.h>
#include <asm/lowcore.h>
#include <asm/irq.h>
57 58
#include <asm/page.h>
#include <asm/ptrace.h>
59
#include <asm/sections.h>
H
Hongjie Yang 已提交
60
#include <asm/ebcdic.h>
G
Gerald Schaefer 已提交
61
#include <asm/compat.h>
62
#include <asm/kvm_virtio.h>
M
Michael Holzheu 已提交
63
#include <asm/diag.h>
G
Gerald Schaefer 已提交
64 65 66 67 68 69

long psw_kernel_bits	= (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY |
			   PSW_MASK_MCHECK | PSW_DEFAULT_KEY);
long psw_user_bits	= (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_HOME |
			   PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK |
			   PSW_MASK_PSTATE | PSW_DEFAULT_KEY);
L
Linus Torvalds 已提交
70

71 72 73 74
/*
 * User copy operations.
 */
struct uaccess_ops uaccess;
75
EXPORT_SYMBOL(uaccess);
76

L
Linus Torvalds 已提交
77 78 79 80
/*
 * Machine setup..
 */
unsigned int console_mode = 0;
81 82
EXPORT_SYMBOL(console_mode);

L
Linus Torvalds 已提交
83
unsigned int console_devno = -1;
84 85
EXPORT_SYMBOL(console_devno);

L
Linus Torvalds 已提交
86
unsigned int console_irq = -1;
87 88
EXPORT_SYMBOL(console_irq);

89 90
unsigned long elf_hwcap = 0;
char elf_platform[ELF_PLATFORM_SIZE];
91

92
struct mem_chunk __initdata memory_chunk[MEMORY_CHUNKS];
93 94 95

int __initdata memory_end_set;
unsigned long __initdata memory_end;
L
Linus Torvalds 已提交
96

F
Frank Munzert 已提交
97 98 99 100
/* An array with a pointer to the lowcore of every CPU. */
struct _lowcore *lowcore_ptr[NR_CPUS];
EXPORT_SYMBOL(lowcore_ptr);

L
Linus Torvalds 已提交
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
/*
 * This is set up by the setup-routine at boot-time
 * for S390 need to find out, what we have to setup
 * using address 0x10400 ...
 */

#include <asm/setup.h>

/*
 * condev= and conmode= setup parameter.
 */

static int __init condev_setup(char *str)
{
	int vdev;

	vdev = simple_strtoul(str, &str, 0);
	if (vdev >= 0 && vdev < 65536) {
		console_devno = vdev;
		console_irq = -1;
	}
	return 1;
}

__setup("condev=", condev_setup);

127 128
static void __init set_preferred_console(void)
{
129
	if (MACHINE_IS_KVM)
130
		add_preferred_console("hvc", 0, NULL);
131
	else if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
132
		add_preferred_console("ttyS", 0, NULL);
133
	else if (CONSOLE_IS_3270)
134 135 136
		add_preferred_console("tty3270", 0, NULL);
}

L
Linus Torvalds 已提交
137 138
static int __init conmode_setup(char *str)
{
139
#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
L
Linus Torvalds 已提交
140 141 142 143 144 145 146 147 148 149 150
	if (strncmp(str, "hwc", 4) == 0 || strncmp(str, "sclp", 5) == 0)
                SET_CONSOLE_SCLP;
#endif
#if defined(CONFIG_TN3215_CONSOLE)
	if (strncmp(str, "3215", 5) == 0)
		SET_CONSOLE_3215;
#endif
#if defined(CONFIG_TN3270_CONSOLE)
	if (strncmp(str, "3270", 5) == 0)
		SET_CONSOLE_3270;
#endif
151
	set_preferred_console();
L
Linus Torvalds 已提交
152 153 154 155 156 157 158 159 160 161 162
        return 1;
}

__setup("conmode=", conmode_setup);

static void __init conmode_default(void)
{
	char query_buffer[1024];
	char *ptr;

        if (MACHINE_IS_VM) {
163
		cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL);
L
Linus Torvalds 已提交
164 165 166
		console_devno = simple_strtoul(query_buffer + 5, NULL, 16);
		ptr = strstr(query_buffer, "SUBCHANNEL =");
		console_irq = simple_strtoul(ptr + 13, NULL, 16);
167
		cpcmd("QUERY TERM", query_buffer, 1024, NULL);
L
Linus Torvalds 已提交
168 169 170 171 172 173 174 175
		ptr = strstr(query_buffer, "CONMODE");
		/*
		 * Set the conmode to 3215 so that the device recognition 
		 * will set the cu_type of the console to 3215. If the
		 * conmode is 3270 and we don't set it back then both
		 * 3215 and the 3270 driver will try to access the console
		 * device (3215 as console and 3270 as normal tty).
		 */
176
		cpcmd("TERM CONMODE 3215", NULL, 0, NULL);
L
Linus Torvalds 已提交
177
		if (ptr == NULL) {
178
#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
L
Linus Torvalds 已提交
179 180 181 182 183 184 185 186 187
			SET_CONSOLE_SCLP;
#endif
			return;
		}
		if (strncmp(ptr + 8, "3270", 4) == 0) {
#if defined(CONFIG_TN3270_CONSOLE)
			SET_CONSOLE_3270;
#elif defined(CONFIG_TN3215_CONSOLE)
			SET_CONSOLE_3215;
188
#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
L
Linus Torvalds 已提交
189 190 191 192 193 194 195
			SET_CONSOLE_SCLP;
#endif
		} else if (strncmp(ptr + 8, "3215", 4) == 0) {
#if defined(CONFIG_TN3215_CONSOLE)
			SET_CONSOLE_3215;
#elif defined(CONFIG_TN3270_CONSOLE)
			SET_CONSOLE_3270;
196
#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
L
Linus Torvalds 已提交
197 198 199 200
			SET_CONSOLE_SCLP;
#endif
		}
	} else {
201
#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
L
Linus Torvalds 已提交
202 203 204 205 206
		SET_CONSOLE_SCLP;
#endif
	}
}

207
#ifdef CONFIG_ZFCPDUMP
M
Michael Holzheu 已提交
208 209
static void __init setup_zfcpdump(unsigned int console_devno)
{
210
	static char str[41];
M
Michael Holzheu 已提交
211 212 213 214

	if (ipl_info.type != IPL_TYPE_FCP_DUMP)
		return;
	if (console_devno != -1)
215
		sprintf(str, " cio_ignore=all,!0.0.%04x,!0.0.%04x",
M
Michael Holzheu 已提交
216 217
			ipl_info.data.fcp.dev_id.devno, console_devno);
	else
218
		sprintf(str, " cio_ignore=all,!0.0.%04x",
M
Michael Holzheu 已提交
219
			ipl_info.data.fcp.dev_id.devno);
220
	strcat(boot_command_line, str);
M
Michael Holzheu 已提交
221 222 223 224 225 226
	console_loglevel = 2;
}
#else
static inline void setup_zfcpdump(unsigned int console_devno) {}
#endif /* CONFIG_ZFCPDUMP */

L
Linus Torvalds 已提交
227 228 229 230 231 232 233
 /*
 * Reboot, halt and power_off stubs. They just call _machine_restart,
 * _machine_halt or _machine_power_off. 
 */

void machine_restart(char *command)
{
234
	if ((!in_interrupt() && !in_atomic()) || oops_in_progress)
235 236 237 238 239
		/*
		 * Only unblank the console if we are called in enabled
		 * context or a bust_spinlocks cleared the way for us.
		 */
		console_unblank();
L
Linus Torvalds 已提交
240 241 242 243 244
	_machine_restart(command);
}

void machine_halt(void)
{
245 246 247 248 249 250
	if (!in_interrupt() || oops_in_progress)
		/*
		 * Only unblank the console if we are called in enabled
		 * context or a bust_spinlocks cleared the way for us.
		 */
		console_unblank();
L
Linus Torvalds 已提交
251 252 253 254 255
	_machine_halt();
}

void machine_power_off(void)
{
256 257 258 259 260 261
	if (!in_interrupt() || oops_in_progress)
		/*
		 * Only unblank the console if we are called in enabled
		 * context or a bust_spinlocks cleared the way for us.
		 */
		console_unblank();
L
Linus Torvalds 已提交
262 263 264
	_machine_power_off();
}

265 266 267 268 269
/*
 * Dummy power off function.
 */
void (*pm_power_off)(void) = machine_power_off;

270 271 272
static int __init early_parse_mem(char *p)
{
	memory_end = memparse(p, &p);
273
	memory_end_set = 1;
274 275 276 277
	return 0;
}
early_param("mem", early_parse_mem);

278 279
unsigned int user_mode = HOME_SPACE_MODE;
EXPORT_SYMBOL_GPL(user_mode);
G
Gerald Schaefer 已提交
280

281 282
static int set_amode_and_uaccess(unsigned long user_amode,
				 unsigned long user32_amode)
G
Gerald Schaefer 已提交
283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299
{
	psw_user_bits = PSW_BASE_BITS | PSW_MASK_DAT | user_amode |
			PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK |
			PSW_MASK_PSTATE | PSW_DEFAULT_KEY;
#ifdef CONFIG_COMPAT
	psw_user32_bits = PSW_BASE32_BITS | PSW_MASK_DAT | user_amode |
			  PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK |
			  PSW_MASK_PSTATE | PSW_DEFAULT_KEY;
	psw32_user_bits = PSW32_BASE_BITS | PSW32_MASK_DAT | user32_amode |
			  PSW32_MASK_IO | PSW32_MASK_EXT | PSW32_MASK_MCHECK |
			  PSW32_MASK_PSTATE;
#endif
	psw_kernel_bits = PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_HOME |
			  PSW_MASK_MCHECK | PSW_DEFAULT_KEY;

	if (MACHINE_HAS_MVCOS) {
		memcpy(&uaccess, &uaccess_mvcos_switch, sizeof(uaccess));
300
		return 1;
G
Gerald Schaefer 已提交
301 302
	} else {
		memcpy(&uaccess, &uaccess_pt, sizeof(uaccess));
303
		return 0;
G
Gerald Schaefer 已提交
304 305 306 307 308 309 310 311
	}
}

/*
 * Switch kernel/user addressing modes?
 */
static int __init early_parse_switch_amode(char *p)
{
312
	user_mode = PRIMARY_SPACE_MODE;
G
Gerald Schaefer 已提交
313 314 315 316
	return 0;
}
early_param("switch_amode", early_parse_switch_amode);

317
static int __init early_parse_user_mode(char *p)
G
Gerald Schaefer 已提交
318
{
319 320 321 322 323 324
	if (p && strcmp(p, "primary") == 0)
		user_mode = PRIMARY_SPACE_MODE;
	else if (!p || strcmp(p, "home") == 0)
		user_mode = HOME_SPACE_MODE;
	else
		return 1;
325
	return 0;
G
Gerald Schaefer 已提交
326
}
327
early_param("user_mode", early_parse_user_mode);
G
Gerald Schaefer 已提交
328 329 330

static void setup_addressing_mode(void)
{
331
	if (user_mode == PRIMARY_SPACE_MODE) {
332 333 334 335 336 337
		if (set_amode_and_uaccess(PSW_ASC_PRIMARY, PSW32_ASC_PRIMARY))
			pr_info("Address spaces switched, "
				"mvcos available\n");
		else
			pr_info("Address spaces switched, "
				"mvcos not available\n");
G
Gerald Schaefer 已提交
338 339 340
	}
}

341 342 343 344 345 346 347 348
static void __init
setup_lowcore(void)
{
	struct _lowcore *lc;

	/*
	 * Setup lowcore for boot cpu
	 */
349
	BUILD_BUG_ON(sizeof(struct _lowcore) != LC_PAGES * 4096);
350
	lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0);
351
	lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY;
352
	lc->restart_psw.addr =
353
		PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler;
354
	if (user_mode != HOME_SPACE_MODE)
G
Gerald Schaefer 已提交
355 356
		lc->restart_psw.mask |= PSW_ASC_HOME;
	lc->external_new_psw.mask = psw_kernel_bits;
357 358
	lc->external_new_psw.addr =
		PSW_ADDR_AMODE | (unsigned long) ext_int_handler;
G
Gerald Schaefer 已提交
359
	lc->svc_new_psw.mask = psw_kernel_bits | PSW_MASK_IO | PSW_MASK_EXT;
360
	lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call;
G
Gerald Schaefer 已提交
361
	lc->program_new_psw.mask = psw_kernel_bits;
362 363
	lc->program_new_psw.addr =
		PSW_ADDR_AMODE | (unsigned long)pgm_check_handler;
364
	lc->mcck_new_psw.mask =
G
Gerald Schaefer 已提交
365
		psw_kernel_bits & ~PSW_MASK_MCHECK & ~PSW_MASK_DAT;
366 367
	lc->mcck_new_psw.addr =
		PSW_ADDR_AMODE | (unsigned long) mcck_int_handler;
G
Gerald Schaefer 已提交
368
	lc->io_new_psw.mask = psw_kernel_bits;
369
	lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
370
	lc->clock_comparator = -1ULL;
371 372 373 374 375 376 377
	lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE;
	lc->async_stack = (unsigned long)
		__alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE;
	lc->panic_stack = (unsigned long)
		__alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0) + PAGE_SIZE;
	lc->current_task = (unsigned long) init_thread_union.thread_info.task;
	lc->thread_info = (unsigned long) &init_thread_union;
378
	lc->machine_flags = S390_lowcore.machine_flags;
379 380 381
	lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
	memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
	       MAX_FACILITY_BIT/8);
382
#ifndef CONFIG_64BIT
383 384
	if (MACHINE_HAS_IEEE) {
		lc->extended_save_area_addr = (__u32)
385
			__alloc_bootmem_low(PAGE_SIZE, PAGE_SIZE, 0);
386
		/* enable extended save area */
H
Heiko Carstens 已提交
387
		__ctl_set_bit(14, 29);
388
	}
389
#else
390
	lc->cmf_hpp = -1ULL;
391
	lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
392
#endif
393 394 395 396 397 398 399 400
	lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
	lc->async_enter_timer = S390_lowcore.async_enter_timer;
	lc->exit_timer = S390_lowcore.exit_timer;
	lc->user_timer = S390_lowcore.user_timer;
	lc->system_timer = S390_lowcore.system_timer;
	lc->steal_timer = S390_lowcore.steal_timer;
	lc->last_update_timer = S390_lowcore.last_update_timer;
	lc->last_update_clock = S390_lowcore.last_update_clock;
401
	lc->ftrace_func = S390_lowcore.ftrace_func;
402
	set_prefix((u32)(unsigned long) lc);
F
Frank Munzert 已提交
403
	lowcore_ptr[0] = lc;
404 405
}

406 407 408 409 410 411 412 413 414 415
static struct resource code_resource = {
	.name  = "Kernel code",
	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
};

static struct resource data_resource = {
	.name = "Kernel data",
	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
};

416 417 418 419 420
static struct resource bss_resource = {
	.name = "Kernel bss",
	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
};

421 422 423
static struct resource __initdata *standard_resources[] = {
	&code_resource,
	&data_resource,
424
	&bss_resource,
425 426 427
};

static void __init setup_resources(void)
428
{
429 430
	struct resource *res, *std_res, *sub_res;
	int i, j;
431

432 433 434 435
	code_resource.start = (unsigned long) &_text;
	code_resource.end = (unsigned long) &_etext - 1;
	data_resource.start = (unsigned long) &_etext;
	data_resource.end = (unsigned long) &_edata - 1;
436 437
	bss_resource.start = (unsigned long) &__bss_start;
	bss_resource.end = (unsigned long) &__bss_stop - 1;
438

439 440 441
	for (i = 0; i < MEMORY_CHUNKS; i++) {
		if (!memory_chunk[i].size)
			continue;
M
Michael Holzheu 已提交
442 443 444
		if (memory_chunk[i].type == CHUNK_OLDMEM ||
		    memory_chunk[i].type == CHUNK_CRASHK)
			continue;
445
		res = alloc_bootmem_low(sizeof(*res));
446 447 448
		res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
		switch (memory_chunk[i].type) {
		case CHUNK_READ_WRITE:
449
		case CHUNK_CRASHK:
450 451 452 453 454 455 456 457 458 459
			res->name = "System RAM";
			break;
		case CHUNK_READ_ONLY:
			res->name = "System ROM";
			res->flags |= IORESOURCE_READONLY;
			break;
		default:
			res->name = "reserved";
		}
		res->start = memory_chunk[i].addr;
460
		res->end = res->start + memory_chunk[i].size - 1;
461
		request_resource(&iomem_resource, res);
H
Hongjie Yang 已提交
462

463 464 465 466 467 468 469 470 471 472 473 474 475 476
		for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
			std_res = standard_resources[j];
			if (std_res->start < res->start ||
			    std_res->start > res->end)
				continue;
			if (std_res->end > res->end) {
				sub_res = alloc_bootmem_low(sizeof(*sub_res));
				*sub_res = *std_res;
				sub_res->end = res->end;
				std_res->start = res->end + 1;
				request_resource(res, sub_res);
			} else {
				request_resource(res, std_res);
			}
H
Hongjie Yang 已提交
477
		}
478 479 480
	}
}

M
Michael Holzheu 已提交
481 482 483
unsigned long real_memory_size;
EXPORT_SYMBOL_GPL(real_memory_size);

H
Heiko Carstens 已提交
484 485
static void __init setup_memory_end(void)
{
M
Michael Holzheu 已提交
486
	unsigned long memory_size;
487
	unsigned long max_mem;
H
Heiko Carstens 已提交
488 489
	int i;

M
Michael Holzheu 已提交
490

491
#ifdef CONFIG_ZFCPDUMP
492
	if (ipl_info.type == IPL_TYPE_FCP_DUMP) {
M
Michael Holzheu 已提交
493
		memory_end = ZFCPDUMP_HSA_SIZE;
494 495
		memory_end_set = 1;
	}
M
Michael Holzheu 已提交
496 497
#endif
	memory_size = 0;
H
Heiko Carstens 已提交
498 499
	memory_end &= PAGE_MASK;

500
	max_mem = memory_end ? min(VMEM_MAX_PHYS, memory_end) : VMEM_MAX_PHYS;
501
	memory_end = min(max_mem, memory_end);
H
Heiko Carstens 已提交
502

503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523
	/*
	 * Make sure all chunks are MAX_ORDER aligned so we don't need the
	 * extra checks that HOLES_IN_ZONE would require.
	 */
	for (i = 0; i < MEMORY_CHUNKS; i++) {
		unsigned long start, end;
		struct mem_chunk *chunk;
		unsigned long align;

		chunk = &memory_chunk[i];
		align = 1UL << (MAX_ORDER + PAGE_SHIFT - 1);
		start = (chunk->addr + align - 1) & ~(align - 1);
		end = (chunk->addr + chunk->size) & ~(align - 1);
		if (start >= end)
			memset(chunk, 0, sizeof(*chunk));
		else {
			chunk->addr = start;
			chunk->size = end - start;
		}
	}

H
Heiko Carstens 已提交
524 525 526
	for (i = 0; i < MEMORY_CHUNKS; i++) {
		struct mem_chunk *chunk = &memory_chunk[i];

M
Michael Holzheu 已提交
527 528
		real_memory_size = max(real_memory_size,
				       chunk->addr + chunk->size);
H
Heiko Carstens 已提交
529 530 531 532 533 534 535 536 537 538 539 540
		if (chunk->addr >= max_mem) {
			memset(chunk, 0, sizeof(*chunk));
			continue;
		}
		if (chunk->addr + chunk->size > max_mem)
			chunk->size = max_mem - chunk->addr;
		memory_size = max(memory_size, chunk->addr + chunk->size);
	}
	if (!memory_end)
		memory_end = memory_size;
}

541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561
void *restart_stack __attribute__((__section__(".data")));

/*
 * Setup new PSW and allocate stack for PSW restart interrupt
 */
static void __init setup_restart_psw(void)
{
	psw_t psw;

	restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0);
	restart_stack += ASYNC_SIZE;

	/*
	 * Setup restart PSW for absolute zero lowcore. This is necesary
	 * if PSW restart is done on an offline CPU that has lowcore zero
	 */
	psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY;
	psw.addr = PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler;
	copy_to_absolute_zero(&S390_lowcore.restart_psw, &psw, sizeof(psw));
}

M
Michael Holzheu 已提交
562 563 564 565 566 567 568 569 570
static void __init setup_vmcoreinfo(void)
{
#ifdef CONFIG_KEXEC
	unsigned long ptr = paddr_vmcoreinfo_note();

	copy_to_absolute_zero(&S390_lowcore.vmcore_info, &ptr, sizeof(ptr));
#endif
}

M
Michael Holzheu 已提交
571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723
#ifdef CONFIG_CRASH_DUMP

/*
 * Find suitable location for crashkernel memory
 */
static unsigned long __init find_crash_base(unsigned long crash_size,
					    char **msg)
{
	unsigned long crash_base;
	struct mem_chunk *chunk;
	int i;

	if (memory_chunk[0].size < crash_size) {
		*msg = "first memory chunk must be at least crashkernel size";
		return 0;
	}
	if (is_kdump_kernel() && (crash_size == OLDMEM_SIZE))
		return OLDMEM_BASE;

	for (i = MEMORY_CHUNKS - 1; i >= 0; i--) {
		chunk = &memory_chunk[i];
		if (chunk->size == 0)
			continue;
		if (chunk->type != CHUNK_READ_WRITE)
			continue;
		if (chunk->size < crash_size)
			continue;
		crash_base = (chunk->addr + chunk->size) - crash_size;
		if (crash_base < crash_size)
			continue;
		if (crash_base < ZFCPDUMP_HSA_SIZE_MAX)
			continue;
		if (crash_base < (unsigned long) INITRD_START + INITRD_SIZE)
			continue;
		return crash_base;
	}
	*msg = "no suitable area found";
	return 0;
}

/*
 * Check if crash_base and crash_size is valid
 */
static int __init verify_crash_base(unsigned long crash_base,
				    unsigned long crash_size,
				    char **msg)
{
	struct mem_chunk *chunk;
	int i;

	/*
	 * Because we do the swap to zero, we must have at least 'crash_size'
	 * bytes free space before crash_base
	 */
	if (crash_size > crash_base) {
		*msg = "crashkernel offset must be greater than size";
		return -EINVAL;
	}

	/* First memory chunk must be at least crash_size */
	if (memory_chunk[0].size < crash_size) {
		*msg = "first memory chunk must be at least crashkernel size";
		return -EINVAL;
	}
	/* Check if we fit into the respective memory chunk */
	for (i = 0; i < MEMORY_CHUNKS; i++) {
		chunk = &memory_chunk[i];
		if (chunk->size == 0)
			continue;
		if (crash_base < chunk->addr)
			continue;
		if (crash_base >= chunk->addr + chunk->size)
			continue;
		/* we have found the memory chunk */
		if (crash_base + crash_size > chunk->addr + chunk->size) {
			*msg = "selected memory chunk is too small for "
				"crashkernel memory";
			return -EINVAL;
		}
		return 0;
	}
	*msg = "invalid memory range specified";
	return -EINVAL;
}

/*
 * Reserve kdump memory by creating a memory hole in the mem_chunk array
 */
static void __init reserve_kdump_bootmem(unsigned long addr, unsigned long size,
					 int type)
{

	create_mem_hole(memory_chunk, addr, size, type);
}

/*
 * When kdump is enabled, we have to ensure that no memory from
 * the area [0 - crashkernel memory size] and
 * [crashk_res.start - crashk_res.end] is set offline.
 */
static int kdump_mem_notifier(struct notifier_block *nb,
			      unsigned long action, void *data)
{
	struct memory_notify *arg = data;

	if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
		return NOTIFY_BAD;
	if (arg->start_pfn > PFN_DOWN(crashk_res.end))
		return NOTIFY_OK;
	if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start))
		return NOTIFY_OK;
	return NOTIFY_BAD;
}

static struct notifier_block kdump_mem_nb = {
	.notifier_call = kdump_mem_notifier,
};

#endif

/*
 * Make sure that oldmem, where the dump is stored, is protected
 */
static void reserve_oldmem(void)
{
#ifdef CONFIG_CRASH_DUMP
	if (!OLDMEM_BASE)
		return;

	reserve_kdump_bootmem(OLDMEM_BASE, OLDMEM_SIZE, CHUNK_OLDMEM);
	reserve_kdump_bootmem(OLDMEM_SIZE, memory_end - OLDMEM_SIZE,
			      CHUNK_OLDMEM);
	if (OLDMEM_BASE + OLDMEM_SIZE == real_memory_size)
		saved_max_pfn = PFN_DOWN(OLDMEM_BASE) - 1;
	else
		saved_max_pfn = PFN_DOWN(real_memory_size) - 1;
#endif
}

/*
 * Reserve memory for kdump kernel to be loaded with kexec
 */
static void __init reserve_crashkernel(void)
{
#ifdef CONFIG_CRASH_DUMP
	unsigned long long crash_base, crash_size;
	char *msg;
	int rc;

	rc = parse_crashkernel(boot_command_line, memory_end, &crash_size,
			       &crash_base);
	if (rc || crash_size == 0)
		return;
724 725
	crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
	crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
M
Michael Holzheu 已提交
726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744
	if (register_memory_notifier(&kdump_mem_nb))
		return;
	if (!crash_base)
		crash_base = find_crash_base(crash_size, &msg);
	if (!crash_base) {
		pr_info("crashkernel reservation failed: %s\n", msg);
		unregister_memory_notifier(&kdump_mem_nb);
		return;
	}
	if (verify_crash_base(crash_base, crash_size, &msg)) {
		pr_info("crashkernel reservation failed: %s\n", msg);
		unregister_memory_notifier(&kdump_mem_nb);
		return;
	}
	if (!OLDMEM_BASE && MACHINE_IS_VM)
		diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
	crashk_res.start = crash_base;
	crashk_res.end = crash_base + crash_size - 1;
	insert_resource(&iomem_resource, &crashk_res);
745
	reserve_kdump_bootmem(crash_base, crash_size, CHUNK_CRASHK);
M
Michael Holzheu 已提交
746 747 748 749 750 751
	pr_info("Reserving %lluMB of memory at %lluMB "
		"for crashkernel (System RAM: %luMB)\n",
		crash_size >> 20, crash_base >> 20, memory_end >> 20);
#endif
}

752 753 754 755
static void __init
setup_memory(void)
{
        unsigned long bootmap_size;
H
Hongjie Yang 已提交
756
	unsigned long start_pfn, end_pfn;
757
	int i;
L
Linus Torvalds 已提交
758 759 760 761 762

	/*
	 * partially used pages are not usable - thus
	 * we are rounding upwards:
	 */
763 764
	start_pfn = PFN_UP(__pa(&_end));
	end_pfn = max_pfn = PFN_DOWN(memory_end);
L
Linus Torvalds 已提交
765

766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781
#ifdef CONFIG_BLK_DEV_INITRD
	/*
	 * Move the initrd in case the bitmap of the bootmem allocater
	 * would overwrite it.
	 */

	if (INITRD_START && INITRD_SIZE) {
		unsigned long bmap_size;
		unsigned long start;

		bmap_size = bootmem_bootmap_pages(end_pfn - start_pfn + 1);
		bmap_size = PFN_PHYS(bmap_size);

		if (PFN_PHYS(start_pfn) + bmap_size > INITRD_START) {
			start = PFN_PHYS(start_pfn) + bmap_size + PAGE_SIZE;

M
Michael Holzheu 已提交
782 783 784 785 786 787 788 789
#ifdef CONFIG_CRASH_DUMP
			if (OLDMEM_BASE) {
				/* Move initrd behind kdump oldmem */
				if (start + INITRD_SIZE > OLDMEM_BASE &&
				    start < OLDMEM_BASE + OLDMEM_SIZE)
					start = OLDMEM_BASE + OLDMEM_SIZE;
			}
#endif
790
			if (start + INITRD_SIZE > memory_end) {
791 792
				pr_err("initrd extends beyond end of "
				       "memory (0x%08lx > 0x%08lx) "
793 794 795 796
				       "disabling initrd\n",
				       start + INITRD_SIZE, memory_end);
				INITRD_START = INITRD_SIZE = 0;
			} else {
797 798 799
				pr_info("Moving initrd (0x%08lx -> "
					"0x%08lx, size: %ld)\n",
					INITRD_START, start, INITRD_SIZE);
800 801 802 803 804 805 806 807
				memmove((void *) start, (void *) INITRD_START,
					INITRD_SIZE);
				INITRD_START = start;
			}
		}
	}
#endif

L
Linus Torvalds 已提交
808
	/*
809
	 * Initialize the boot-time allocator
L
Linus Torvalds 已提交
810 811 812 813 814 815
	 */
	bootmap_size = init_bootmem(start_pfn, end_pfn);

	/*
	 * Register RAM areas with the bootmem allocator.
	 */
816

817
	for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
818
		unsigned long start_chunk, end_chunk, pfn;
L
Linus Torvalds 已提交
819

820 821
		if (memory_chunk[i].type != CHUNK_READ_WRITE &&
		    memory_chunk[i].type != CHUNK_CRASHK)
L
Linus Torvalds 已提交
822
			continue;
823
		start_chunk = PFN_DOWN(memory_chunk[i].addr);
824
		end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size);
825 826 827 828 829
		end_chunk = min(end_chunk, end_pfn);
		if (start_chunk >= end_chunk)
			continue;
		add_active_range(0, start_chunk, end_chunk);
		pfn = max(start_chunk, start_pfn);
830
		for (; pfn < end_chunk; pfn++)
831 832
			page_set_storage_key(PFN_PHYS(pfn),
					     PAGE_DEFAULT_KEY, 0);
L
Linus Torvalds 已提交
833 834
	}

835 836
	psw_set_key(PAGE_DEFAULT_KEY);

837
	free_bootmem_with_active_regions(0, max_pfn);
838

839 840 841
	/*
	 * Reserve memory used for lowcore/command line/kernel image.
	 */
842
	reserve_bootmem(0, (unsigned long)_ehead, BOOTMEM_DEFAULT);
843
	reserve_bootmem((unsigned long)_stext,
844 845
			PFN_PHYS(start_pfn) - (unsigned long)_stext,
			BOOTMEM_DEFAULT);
846 847 848 849 850 851
	/*
	 * Reserve the bootmem bitmap itself as well. We do this in two
	 * steps (first step was init_bootmem()) because this catches
	 * the (very unlikely) case of us accidentally initializing the
	 * bootmem allocator with an invalid RAM area.
	 */
852 853
	reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size,
			BOOTMEM_DEFAULT);
L
Linus Torvalds 已提交
854

M
Michael Holzheu 已提交
855 856 857 858 859 860 861 862 863
#ifdef CONFIG_CRASH_DUMP
	if (crashk_res.start)
		reserve_bootmem(crashk_res.start,
				crashk_res.end - crashk_res.start + 1,
				BOOTMEM_DEFAULT);
	if (is_kdump_kernel())
		reserve_bootmem(elfcorehdr_addr - OLDMEM_BASE,
				PAGE_ALIGN(elfcorehdr_size), BOOTMEM_DEFAULT);
#endif
L
Linus Torvalds 已提交
864
#ifdef CONFIG_BLK_DEV_INITRD
865
	if (INITRD_START && INITRD_SIZE) {
L
Linus Torvalds 已提交
866
		if (INITRD_START + INITRD_SIZE <= memory_end) {
867 868
			reserve_bootmem(INITRD_START, INITRD_SIZE,
					BOOTMEM_DEFAULT);
L
Linus Torvalds 已提交
869 870 871
			initrd_start = INITRD_START;
			initrd_end = initrd_start + INITRD_SIZE;
		} else {
872 873 874
			pr_err("initrd extends beyond end of "
			       "memory (0x%08lx > 0x%08lx) "
			       "disabling initrd\n",
875 876
			       initrd_start + INITRD_SIZE, memory_end);
			initrd_start = initrd_end = 0;
L
Linus Torvalds 已提交
877
		}
878
	}
L
Linus Torvalds 已提交
879
#endif
880
}
L
Linus Torvalds 已提交
881

882 883 884 885 886 887
/*
 * Setup hardware capabilities.
 */
static void __init setup_hwcaps(void)
{
	static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 };
888
	struct cpuid cpu_id;
889 890 891 892 893 894 895 896 897 898 899 900
	int i;

	/*
	 * The store facility list bits numbers as found in the principles
	 * of operation are numbered with bit 1UL<<31 as number 0 to
	 * bit 1UL<<0 as number 31.
	 *   Bit 0: instructions named N3, "backported" to esa-mode
	 *   Bit 2: z/Architecture mode is active
	 *   Bit 7: the store-facility-list-extended facility is installed
	 *   Bit 17: the message-security assist is installed
	 *   Bit 19: the long-displacement facility is installed
	 *   Bit 21: the extended-immediate facility is installed
901 902
	 *   Bit 22: extended-translation facility 3 is installed
	 *   Bit 30: extended-translation facility 3 enhancement facility
903 904 905
	 * These get translated to:
	 *   HWCAP_S390_ESAN3 bit 0, HWCAP_S390_ZARCH bit 1,
	 *   HWCAP_S390_STFLE bit 2, HWCAP_S390_MSA bit 3,
906 907
	 *   HWCAP_S390_LDISP bit 4, HWCAP_S390_EIMM bit 5 and
	 *   HWCAP_S390_ETF3EH bit 8 (22 && 30).
908 909
	 */
	for (i = 0; i < 6; i++)
910
		if (test_facility(stfl_bits[i]))
911 912
			elf_hwcap |= 1UL << i;

913
	if (test_facility(22) && test_facility(30))
914
		elf_hwcap |= HWCAP_S390_ETF3EH;
915

916 917 918 919 920 921
	/*
	 * Check for additional facilities with store-facility-list-extended.
	 * stfle stores doublewords (8 byte) with bit 1ULL<<63 as bit 0
	 * and 1ULL<<0 as bit 63. Bits 0-31 contain the same information
	 * as stored by stfl, bits 32-xxx contain additional facilities.
	 * How many facility words are stored depends on the number of
L
Lucas De Marchi 已提交
922
	 * doublewords passed to the instruction. The additional facilities
923
	 * are:
924 925
	 *   Bit 42: decimal floating point facility is installed
	 *   Bit 44: perform floating point operation facility is installed
926
	 * translated to:
927
	 *   HWCAP_S390_DFP bit 6 (42 && 44).
928
	 */
929 930
	if ((elf_hwcap & (1UL << 2)) && test_facility(42) && test_facility(44))
		elf_hwcap |= HWCAP_S390_DFP;
931

932 933 934
	/*
	 * Huge page support HWCAP_S390_HPAGE is bit 7.
	 */
935
	if (MACHINE_HAS_HPAGE)
936 937 938 939 940 941 942
		elf_hwcap |= HWCAP_S390_HPAGE;

	/*
	 * 64-bit register support for 31-bit processes
	 * HWCAP_S390_HIGH_GPRS is bit 9.
	 */
	elf_hwcap |= HWCAP_S390_HIGH_GPRS;
943

944 945
	get_cpu_id(&cpu_id);
	switch (cpu_id.machine) {
946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963
	case 0x9672:
#if !defined(CONFIG_64BIT)
	default:	/* Use "g5" as default for 31 bit kernels. */
#endif
		strcpy(elf_platform, "g5");
		break;
	case 0x2064:
	case 0x2066:
#if defined(CONFIG_64BIT)
	default:	/* Use "z900" as default for 64 bit kernels. */
#endif
		strcpy(elf_platform, "z900");
		break;
	case 0x2084:
	case 0x2086:
		strcpy(elf_platform, "z990");
		break;
	case 0x2094:
964
	case 0x2096:
965 966
		strcpy(elf_platform, "z9-109");
		break;
967 968 969 970
	case 0x2097:
	case 0x2098:
		strcpy(elf_platform, "z10");
		break;
971
	case 0x2817:
972
	case 0x2818:
973 974
		strcpy(elf_platform, "z196");
		break;
975 976 977
	}
}

978 979 980 981
/*
 * Setup function called from init/main.c just after the banner
 * was printed.
 */
L
Linus Torvalds 已提交
982

983 984 985
void __init
setup_arch(char **cmdline_p)
{
L
Linus Torvalds 已提交
986
        /*
987
         * print what head.S has found out about the machine
L
Linus Torvalds 已提交
988
         */
989
#ifndef CONFIG_64BIT
990 991 992
	if (MACHINE_IS_VM)
		pr_info("Linux is running as a z/VM "
			"guest operating system in 31-bit mode\n");
993
	else if (MACHINE_IS_LPAR)
994 995 996 997 998 999 1000
		pr_info("Linux is running natively in 31-bit mode\n");
	if (MACHINE_HAS_IEEE)
		pr_info("The hardware system has IEEE compatible "
			"floating point units\n");
	else
		pr_info("The hardware system has no IEEE compatible "
			"floating point units\n");
1001
#else /* CONFIG_64BIT */
1002
	if (MACHINE_IS_VM)
1003 1004
		pr_info("Linux is running as a z/VM "
			"guest operating system in 64-bit mode\n");
1005
	else if (MACHINE_IS_KVM)
1006
		pr_info("Linux is running under KVM in 64-bit mode\n");
1007
	else if (MACHINE_IS_LPAR)
1008
		pr_info("Linux is running natively in 64-bit mode\n");
1009
#endif /* CONFIG_64BIT */
1010

1011 1012 1013
	/* Have one command line that is parsed and saved in /proc/cmdline */
	/* boot_command_line has been already set up in early.c */
	*cmdline_p = boot_command_line;
1014

1015
        ROOT_DEV = Root_RAM0;
1016 1017 1018 1019 1020 1021

	init_mm.start_code = PAGE_OFFSET;
	init_mm.end_code = (unsigned long) &_etext;
	init_mm.end_data = (unsigned long) &_edata;
	init_mm.brk = (unsigned long) &_end;

1022 1023 1024 1025 1026
	if (MACHINE_HAS_MVCOS)
		memcpy(&uaccess, &uaccess_mvcos, sizeof(uaccess));
	else
		memcpy(&uaccess, &uaccess_std, sizeof(uaccess));

1027 1028
	parse_early_param();

1029
	setup_ipl();
H
Heiko Carstens 已提交
1030
	setup_memory_end();
G
Gerald Schaefer 已提交
1031
	setup_addressing_mode();
M
Michael Holzheu 已提交
1032 1033
	reserve_oldmem();
	reserve_crashkernel();
1034 1035
	setup_memory();
	setup_resources();
M
Michael Holzheu 已提交
1036
	setup_vmcoreinfo();
1037
	setup_restart_psw();
1038 1039
	setup_lowcore();

L
Linus Torvalds 已提交
1040
        cpu_init();
1041
	s390_init_cpu_topology();
L
Linus Torvalds 已提交
1042

1043 1044 1045 1046 1047
	/*
	 * Setup capabilities (ELF_HWCAP & ELF_PLATFORM).
	 */
	setup_hwcaps();

L
Linus Torvalds 已提交
1048 1049 1050 1051 1052 1053 1054
	/*
	 * Create kernel page tables and switch to virtual addressing.
	 */
        paging_init();

        /* Setup default console */
	conmode_default();
1055
	set_preferred_console();
M
Michael Holzheu 已提交
1056 1057 1058

	/* Setup zfcpdump support */
	setup_zfcpdump(console_devno);
L
Linus Torvalds 已提交
1059
}