setup.c 29.0 KB
Newer Older
L
Linus Torvalds 已提交
1 2
/*
 *  S390 version
3
 *    Copyright IBM Corp. 1999, 2012
L
Linus Torvalds 已提交
4 5 6 7 8 9 10 11 12 13 14
 *    Author(s): Hartmut Penner (hp@de.ibm.com),
 *               Martin Schwidefsky (schwidefsky@de.ibm.com)
 *
 *  Derived from "arch/i386/kernel/setup.c"
 *    Copyright (C) 1995, Linus Torvalds
 */

/*
 * This file handles the architecture-dependent parts of initialization
 */

15 16 17
#define KMSG_COMPONENT "setup"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt

L
Linus Torvalds 已提交
18
#include <linux/errno.h>
H
Heiko Carstens 已提交
19
#include <linux/export.h>
L
Linus Torvalds 已提交
20 21
#include <linux/sched.h>
#include <linux/kernel.h>
T
Tejun Heo 已提交
22
#include <linux/memblock.h>
L
Linus Torvalds 已提交
23 24 25 26 27 28 29 30 31 32 33 34 35 36
#include <linux/mm.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/ptrace.h>
#include <linux/user.h>
#include <linux/tty.h>
#include <linux/ioport.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/initrd.h>
#include <linux/bootmem.h>
#include <linux/root_dev.h>
#include <linux/console.h>
#include <linux/kernel_stat.h>
37
#include <linux/device.h>
38
#include <linux/notifier.h>
39
#include <linux/pfn.h>
H
Hongjie Yang 已提交
40
#include <linux/ctype.h>
41
#include <linux/reboot.h>
42
#include <linux/topology.h>
43
#include <linux/ftrace.h>
M
Michael Holzheu 已提交
44 45 46
#include <linux/kexec.h>
#include <linux/crash_dump.h>
#include <linux/memory.h>
47
#include <linux/compat.h>
L
Linus Torvalds 已提交
48

M
Michael Holzheu 已提交
49
#include <asm/ipl.h>
L
Linus Torvalds 已提交
50
#include <asm/uaccess.h>
51
#include <asm/facility.h>
L
Linus Torvalds 已提交
52 53 54 55 56
#include <asm/smp.h>
#include <asm/mmu_context.h>
#include <asm/cpcmd.h>
#include <asm/lowcore.h>
#include <asm/irq.h>
57 58
#include <asm/page.h>
#include <asm/ptrace.h>
59
#include <asm/sections.h>
H
Hongjie Yang 已提交
60
#include <asm/ebcdic.h>
61
#include <asm/kvm_virtio.h>
M
Michael Holzheu 已提交
62
#include <asm/diag.h>
63
#include <asm/os_info.h>
64
#include <asm/sclp.h>
M
Martin Schwidefsky 已提交
65
#include "entry.h"
G
Gerald Schaefer 已提交
66

67 68 69 70 71
long psw_kernel_bits	= PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_PRIMARY |
			  PSW_MASK_EA | PSW_MASK_BA;
long psw_user_bits	= PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT |
			  PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK |
			  PSW_MASK_PSTATE | PSW_ASC_HOME;
L
Linus Torvalds 已提交
72

73 74 75 76
/*
 * User copy operations.
 */
struct uaccess_ops uaccess;
77
EXPORT_SYMBOL(uaccess);
78

L
Linus Torvalds 已提交
79 80 81 82
/*
 * Machine setup..
 */
unsigned int console_mode = 0;
83 84
EXPORT_SYMBOL(console_mode);

L
Linus Torvalds 已提交
85
unsigned int console_devno = -1;
86 87
EXPORT_SYMBOL(console_devno);

L
Linus Torvalds 已提交
88
unsigned int console_irq = -1;
89 90
EXPORT_SYMBOL(console_irq);

91 92
unsigned long elf_hwcap = 0;
char elf_platform[ELF_PLATFORM_SIZE];
93

94
struct mem_chunk __initdata memory_chunk[MEMORY_CHUNKS];
95 96 97

int __initdata memory_end_set;
unsigned long __initdata memory_end;
L
Linus Torvalds 已提交
98

99 100 101 102 103 104 105 106 107
unsigned long VMALLOC_START;
EXPORT_SYMBOL(VMALLOC_START);

unsigned long VMALLOC_END;
EXPORT_SYMBOL(VMALLOC_END);

struct page *vmemmap;
EXPORT_SYMBOL(vmemmap);

108 109 110 111 112
#ifdef CONFIG_64BIT
unsigned long MODULES_VADDR;
unsigned long MODULES_END;
#endif

F
Frank Munzert 已提交
113 114 115 116
/* An array with a pointer to the lowcore of every CPU. */
struct _lowcore *lowcore_ptr[NR_CPUS];
EXPORT_SYMBOL(lowcore_ptr);

L
Linus Torvalds 已提交
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
/*
 * This is set up by the setup-routine at boot-time
 * for S390 need to find out, what we have to setup
 * using address 0x10400 ...
 */

#include <asm/setup.h>

/*
 * condev= and conmode= setup parameter.
 */

static int __init condev_setup(char *str)
{
	int vdev;

	vdev = simple_strtoul(str, &str, 0);
	if (vdev >= 0 && vdev < 65536) {
		console_devno = vdev;
		console_irq = -1;
	}
	return 1;
}

__setup("condev=", condev_setup);

143 144
static void __init set_preferred_console(void)
{
145 146 147 148 149 150 151 152
	if (MACHINE_IS_KVM) {
		if (sclp_has_vt220())
			add_preferred_console("ttyS", 1, NULL);
		else if (sclp_has_linemode())
			add_preferred_console("ttyS", 0, NULL);
		else
			add_preferred_console("hvc", 0, NULL);
	} else if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
153
		add_preferred_console("ttyS", 0, NULL);
154
	else if (CONSOLE_IS_3270)
155 156 157
		add_preferred_console("tty3270", 0, NULL);
}

L
Linus Torvalds 已提交
158 159
static int __init conmode_setup(char *str)
{
160
#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
L
Linus Torvalds 已提交
161 162 163 164 165 166 167 168 169 170 171
	if (strncmp(str, "hwc", 4) == 0 || strncmp(str, "sclp", 5) == 0)
                SET_CONSOLE_SCLP;
#endif
#if defined(CONFIG_TN3215_CONSOLE)
	if (strncmp(str, "3215", 5) == 0)
		SET_CONSOLE_3215;
#endif
#if defined(CONFIG_TN3270_CONSOLE)
	if (strncmp(str, "3270", 5) == 0)
		SET_CONSOLE_3270;
#endif
172
	set_preferred_console();
L
Linus Torvalds 已提交
173 174 175 176 177 178 179 180 181 182 183
        return 1;
}

__setup("conmode=", conmode_setup);

static void __init conmode_default(void)
{
	char query_buffer[1024];
	char *ptr;

        if (MACHINE_IS_VM) {
184
		cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL);
L
Linus Torvalds 已提交
185 186 187
		console_devno = simple_strtoul(query_buffer + 5, NULL, 16);
		ptr = strstr(query_buffer, "SUBCHANNEL =");
		console_irq = simple_strtoul(ptr + 13, NULL, 16);
188
		cpcmd("QUERY TERM", query_buffer, 1024, NULL);
L
Linus Torvalds 已提交
189 190 191 192 193 194 195 196
		ptr = strstr(query_buffer, "CONMODE");
		/*
		 * Set the conmode to 3215 so that the device recognition 
		 * will set the cu_type of the console to 3215. If the
		 * conmode is 3270 and we don't set it back then both
		 * 3215 and the 3270 driver will try to access the console
		 * device (3215 as console and 3270 as normal tty).
		 */
197
		cpcmd("TERM CONMODE 3215", NULL, 0, NULL);
L
Linus Torvalds 已提交
198
		if (ptr == NULL) {
199
#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
L
Linus Torvalds 已提交
200 201 202 203 204 205 206 207 208
			SET_CONSOLE_SCLP;
#endif
			return;
		}
		if (strncmp(ptr + 8, "3270", 4) == 0) {
#if defined(CONFIG_TN3270_CONSOLE)
			SET_CONSOLE_3270;
#elif defined(CONFIG_TN3215_CONSOLE)
			SET_CONSOLE_3215;
209
#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
L
Linus Torvalds 已提交
210 211 212 213 214 215 216
			SET_CONSOLE_SCLP;
#endif
		} else if (strncmp(ptr + 8, "3215", 4) == 0) {
#if defined(CONFIG_TN3215_CONSOLE)
			SET_CONSOLE_3215;
#elif defined(CONFIG_TN3270_CONSOLE)
			SET_CONSOLE_3270;
217
#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
L
Linus Torvalds 已提交
218 219 220 221
			SET_CONSOLE_SCLP;
#endif
		}
	} else {
222
#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
L
Linus Torvalds 已提交
223 224 225 226 227
		SET_CONSOLE_SCLP;
#endif
	}
}

228
#ifdef CONFIG_ZFCPDUMP
229
static void __init setup_zfcpdump(void)
M
Michael Holzheu 已提交
230 231 232
{
	if (ipl_info.type != IPL_TYPE_FCP_DUMP)
		return;
233 234
	if (OLDMEM_BASE)
		return;
235
	strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev");
M
Michael Holzheu 已提交
236 237 238
	console_loglevel = 2;
}
#else
239
static inline void setup_zfcpdump(void) {}
M
Michael Holzheu 已提交
240 241
#endif /* CONFIG_ZFCPDUMP */

L
Linus Torvalds 已提交
242 243 244 245 246 247 248
 /*
 * Reboot, halt and power_off stubs. They just call _machine_restart,
 * _machine_halt or _machine_power_off. 
 */

void machine_restart(char *command)
{
249
	if ((!in_interrupt() && !in_atomic()) || oops_in_progress)
250 251 252 253 254
		/*
		 * Only unblank the console if we are called in enabled
		 * context or a bust_spinlocks cleared the way for us.
		 */
		console_unblank();
L
Linus Torvalds 已提交
255 256 257 258 259
	_machine_restart(command);
}

void machine_halt(void)
{
260 261 262 263 264 265
	if (!in_interrupt() || oops_in_progress)
		/*
		 * Only unblank the console if we are called in enabled
		 * context or a bust_spinlocks cleared the way for us.
		 */
		console_unblank();
L
Linus Torvalds 已提交
266 267 268 269 270
	_machine_halt();
}

void machine_power_off(void)
{
271 272 273 274 275 276
	if (!in_interrupt() || oops_in_progress)
		/*
		 * Only unblank the console if we are called in enabled
		 * context or a bust_spinlocks cleared the way for us.
		 */
		console_unblank();
L
Linus Torvalds 已提交
277 278 279
	_machine_power_off();
}

280 281 282 283
/*
 * Dummy power off function.
 */
void (*pm_power_off)(void) = machine_power_off;
H
Heiko Carstens 已提交
284
EXPORT_SYMBOL_GPL(pm_power_off);
285

286 287 288
static int __init early_parse_mem(char *p)
{
	memory_end = memparse(p, &p);
289
	memory_end_set = 1;
290 291 292 293
	return 0;
}
early_param("mem", early_parse_mem);

294 295 296 297 298 299 300 301 302
static int __init parse_vmalloc(char *arg)
{
	if (!arg)
		return -EINVAL;
	VMALLOC_END = (memparse(arg, &arg) + PAGE_SIZE - 1) & PAGE_MASK;
	return 0;
}
early_param("vmalloc", parse_vmalloc);

303 304
unsigned int s390_user_mode = PRIMARY_SPACE_MODE;
EXPORT_SYMBOL_GPL(s390_user_mode);
G
Gerald Schaefer 已提交
305

306
static void __init set_user_mode_primary(void)
G
Gerald Schaefer 已提交
307
{
308 309
	psw_kernel_bits = (psw_kernel_bits & ~PSW_MASK_ASC) | PSW_ASC_HOME;
	psw_user_bits = (psw_user_bits & ~PSW_MASK_ASC) | PSW_ASC_PRIMARY;
G
Gerald Schaefer 已提交
310
#ifdef CONFIG_COMPAT
311 312
	psw32_user_bits =
		(psw32_user_bits & ~PSW32_MASK_ASC) | PSW32_ASC_PRIMARY;
G
Gerald Schaefer 已提交
313
#endif
314
	uaccess = MACHINE_HAS_MVCOS ? uaccess_mvcos_switch : uaccess_pt;
G
Gerald Schaefer 已提交
315 316
}

317
static int __init early_parse_user_mode(char *p)
G
Gerald Schaefer 已提交
318
{
319
	if (p && strcmp(p, "primary") == 0)
320
		s390_user_mode = PRIMARY_SPACE_MODE;
321
	else if (!p || strcmp(p, "home") == 0)
322
		s390_user_mode = HOME_SPACE_MODE;
323 324
	else
		return 1;
325
	return 0;
G
Gerald Schaefer 已提交
326
}
327
early_param("user_mode", early_parse_user_mode);
G
Gerald Schaefer 已提交
328

329
static void __init setup_addressing_mode(void)
G
Gerald Schaefer 已提交
330
{
331 332 333 334 335 336 337
	if (s390_user_mode != PRIMARY_SPACE_MODE)
		return;
	set_user_mode_primary();
	if (MACHINE_HAS_MVCOS)
		pr_info("Address spaces switched, mvcos available\n");
	else
		pr_info("Address spaces switched, mvcos not available\n");
G
Gerald Schaefer 已提交
338 339
}

M
Martin Schwidefsky 已提交
340 341 342
void *restart_stack __attribute__((__section__(".data")));

static void __init setup_lowcore(void)
343 344 345 346 347 348
{
	struct _lowcore *lc;

	/*
	 * Setup lowcore for boot cpu
	 */
349
	BUILD_BUG_ON(sizeof(struct _lowcore) != LC_PAGES * 4096);
350
	lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0);
351
	lc->restart_psw.mask = psw_kernel_bits;
352
	lc->restart_psw.addr =
M
Martin Schwidefsky 已提交
353
		PSW_ADDR_AMODE | (unsigned long) restart_int_handler;
354 355
	lc->external_new_psw.mask = psw_kernel_bits |
		PSW_MASK_DAT | PSW_MASK_MCHECK;
356 357
	lc->external_new_psw.addr =
		PSW_ADDR_AMODE | (unsigned long) ext_int_handler;
358 359
	lc->svc_new_psw.mask = psw_kernel_bits |
		PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
360
	lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call;
361 362
	lc->program_new_psw.mask = psw_kernel_bits |
		PSW_MASK_DAT | PSW_MASK_MCHECK;
363
	lc->program_new_psw.addr =
364 365
		PSW_ADDR_AMODE | (unsigned long) pgm_check_handler;
	lc->mcck_new_psw.mask = psw_kernel_bits;
366 367
	lc->mcck_new_psw.addr =
		PSW_ADDR_AMODE | (unsigned long) mcck_int_handler;
368 369
	lc->io_new_psw.mask = psw_kernel_bits |
		PSW_MASK_DAT | PSW_MASK_MCHECK;
370
	lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
371
	lc->clock_comparator = -1ULL;
372 373
	lc->kernel_stack = ((unsigned long) &init_thread_union)
		+ THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
374
	lc->async_stack = (unsigned long)
375 376
		__alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0)
		+ ASYNC_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
377
	lc->panic_stack = (unsigned long)
378 379
		__alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0)
		+ PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
380 381
	lc->current_task = (unsigned long) init_thread_union.thread_info.task;
	lc->thread_info = (unsigned long) &init_thread_union;
382
	lc->machine_flags = S390_lowcore.machine_flags;
383 384 385
	lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
	memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
	       MAX_FACILITY_BIT/8);
386
#ifndef CONFIG_64BIT
387 388
	if (MACHINE_HAS_IEEE) {
		lc->extended_save_area_addr = (__u32)
389
			__alloc_bootmem_low(PAGE_SIZE, PAGE_SIZE, 0);
390
		/* enable extended save area */
H
Heiko Carstens 已提交
391
		__ctl_set_bit(14, 29);
392
	}
393 394
#else
	lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
395
#endif
396 397 398 399 400 401 402 403
	lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
	lc->async_enter_timer = S390_lowcore.async_enter_timer;
	lc->exit_timer = S390_lowcore.exit_timer;
	lc->user_timer = S390_lowcore.user_timer;
	lc->system_timer = S390_lowcore.system_timer;
	lc->steal_timer = S390_lowcore.steal_timer;
	lc->last_update_timer = S390_lowcore.last_update_timer;
	lc->last_update_clock = S390_lowcore.last_update_clock;
404
	lc->ftrace_func = S390_lowcore.ftrace_func;
M
Martin Schwidefsky 已提交
405 406 407 408 409 410 411 412 413 414 415 416 417

	restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0);
	restart_stack += ASYNC_SIZE;

	/*
	 * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
	 * restart data to the absolute zero lowcore. This is necesary if
	 * PSW restart is done on an offline CPU that has lowcore zero.
	 */
	lc->restart_stack = (unsigned long) restart_stack;
	lc->restart_fn = (unsigned long) do_restart;
	lc->restart_data = 0;
	lc->restart_source = -1UL;
418 419

	/* Setup absolute zero lowcore */
420 421 422 423 424
	mem_assign_absolute(S390_lowcore.restart_stack, lc->restart_stack);
	mem_assign_absolute(S390_lowcore.restart_fn, lc->restart_fn);
	mem_assign_absolute(S390_lowcore.restart_data, lc->restart_data);
	mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source);
	mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw);
M
Martin Schwidefsky 已提交
425

426
	set_prefix((u32)(unsigned long) lc);
F
Frank Munzert 已提交
427
	lowcore_ptr[0] = lc;
428 429
}

430 431 432 433 434 435 436 437 438 439
static struct resource code_resource = {
	.name  = "Kernel code",
	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
};

static struct resource data_resource = {
	.name = "Kernel data",
	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
};

440 441 442 443 444
static struct resource bss_resource = {
	.name = "Kernel bss",
	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
};

445 446 447
static struct resource __initdata *standard_resources[] = {
	&code_resource,
	&data_resource,
448
	&bss_resource,
449 450 451
};

static void __init setup_resources(void)
452
{
453 454
	struct resource *res, *std_res, *sub_res;
	int i, j;
455

456 457 458 459
	code_resource.start = (unsigned long) &_text;
	code_resource.end = (unsigned long) &_etext - 1;
	data_resource.start = (unsigned long) &_etext;
	data_resource.end = (unsigned long) &_edata - 1;
460 461
	bss_resource.start = (unsigned long) &__bss_start;
	bss_resource.end = (unsigned long) &__bss_stop - 1;
462

463 464 465
	for (i = 0; i < MEMORY_CHUNKS; i++) {
		if (!memory_chunk[i].size)
			continue;
466
		res = alloc_bootmem_low(sizeof(*res));
467 468 469 470 471 472 473 474 475 476 477 478 479
		res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
		switch (memory_chunk[i].type) {
		case CHUNK_READ_WRITE:
			res->name = "System RAM";
			break;
		case CHUNK_READ_ONLY:
			res->name = "System ROM";
			res->flags |= IORESOURCE_READONLY;
			break;
		default:
			res->name = "reserved";
		}
		res->start = memory_chunk[i].addr;
480
		res->end = res->start + memory_chunk[i].size - 1;
481
		request_resource(&iomem_resource, res);
H
Hongjie Yang 已提交
482

483 484 485 486 487 488 489 490 491 492 493 494 495 496
		for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
			std_res = standard_resources[j];
			if (std_res->start < res->start ||
			    std_res->start > res->end)
				continue;
			if (std_res->end > res->end) {
				sub_res = alloc_bootmem_low(sizeof(*sub_res));
				*sub_res = *std_res;
				sub_res->end = res->end;
				std_res->start = res->end + 1;
				request_resource(res, sub_res);
			} else {
				request_resource(res, std_res);
			}
H
Hongjie Yang 已提交
497
		}
498 499 500
	}
}

H
Heiko Carstens 已提交
501 502
static void __init setup_memory_end(void)
{
503
	unsigned long vmax, vmalloc_size, tmp;
504
	unsigned long real_memory_size = 0;
H
Heiko Carstens 已提交
505 506
	int i;

M
Michael Holzheu 已提交
507

508
#ifdef CONFIG_ZFCPDUMP
509
	if (ipl_info.type == IPL_TYPE_FCP_DUMP && !OLDMEM_BASE) {
M
Michael Holzheu 已提交
510
		memory_end = ZFCPDUMP_HSA_SIZE;
511 512
		memory_end_set = 1;
	}
M
Michael Holzheu 已提交
513
#endif
H
Heiko Carstens 已提交
514 515
	memory_end &= PAGE_MASK;

516 517 518 519 520 521 522 523 524 525
	/*
	 * Make sure all chunks are MAX_ORDER aligned so we don't need the
	 * extra checks that HOLES_IN_ZONE would require.
	 */
	for (i = 0; i < MEMORY_CHUNKS; i++) {
		unsigned long start, end;
		struct mem_chunk *chunk;
		unsigned long align;

		chunk = &memory_chunk[i];
526
		if (!chunk->size)
527
			continue;
528 529 530 531 532 533 534 535 536
		align = 1UL << (MAX_ORDER + PAGE_SHIFT - 1);
		start = (chunk->addr + align - 1) & ~(align - 1);
		end = (chunk->addr + chunk->size) & ~(align - 1);
		if (start >= end)
			memset(chunk, 0, sizeof(*chunk));
		else {
			chunk->addr = start;
			chunk->size = end - start;
		}
537 538
		real_memory_size = max(real_memory_size,
				       chunk->addr + chunk->size);
539 540
	}

541 542
	/* Choose kernel address space layout: 2, 3, or 4 levels. */
#ifdef CONFIG_64BIT
543
	vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN;
544 545 546 547 548 549
	tmp = (memory_end ?: real_memory_size) / PAGE_SIZE;
	tmp = tmp * (sizeof(struct page) + PAGE_SIZE) + vmalloc_size;
	if (tmp <= (1UL << 42))
		vmax = 1UL << 42;	/* 3-level kernel page table */
	else
		vmax = 1UL << 53;	/* 4-level kernel page table */
550 551 552 553
	/* module area is at the end of the kernel address space. */
	MODULES_END = vmax;
	MODULES_VADDR = MODULES_END - MODULES_LEN;
	VMALLOC_END = MODULES_VADDR;
554 555 556 557 558
#else
	vmalloc_size = VMALLOC_END ?: 96UL << 20;
	vmax = 1UL << 31;		/* 2-level kernel page table */
	/* vmalloc area is at the end of the kernel address space. */
	VMALLOC_END = vmax;
559
#endif
560 561 562 563
	VMALLOC_START = vmax - vmalloc_size;

	/* Split remaining virtual space between 1:1 mapping & vmemmap array */
	tmp = VMALLOC_START / (PAGE_SIZE + sizeof(struct page));
564 565
	/* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */
	tmp = SECTION_ALIGN_UP(tmp);
566 567 568 569 570 571 572 573 574
	tmp = VMALLOC_START - tmp * sizeof(struct page);
	tmp &= ~((vmax >> 11) - 1);	/* align to page table level */
	tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS);
	vmemmap = (struct page *) tmp;

	/* Take care that memory_end is set and <= vmemmap */
	memory_end = min(memory_end ?: real_memory_size, tmp);

	/* Fixup memory chunk array to fit into 0..memory_end */
H
Heiko Carstens 已提交
575 576 577
	for (i = 0; i < MEMORY_CHUNKS; i++) {
		struct mem_chunk *chunk = &memory_chunk[i];

578
		if (!chunk->size)
579
			continue;
580
		if (chunk->addr >= memory_end) {
H
Heiko Carstens 已提交
581 582 583
			memset(chunk, 0, sizeof(*chunk));
			continue;
		}
584 585
		if (chunk->addr + chunk->size > memory_end)
			chunk->size = memory_end - chunk->addr;
H
Heiko Carstens 已提交
586 587 588
	}
}

M
Michael Holzheu 已提交
589 590
static void __init setup_vmcoreinfo(void)
{
591
	mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
M
Michael Holzheu 已提交
592 593
}

M
Michael Holzheu 已提交
594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609
#ifdef CONFIG_CRASH_DUMP

/*
 * Find suitable location for crashkernel memory
 */
static unsigned long __init find_crash_base(unsigned long crash_size,
					    char **msg)
{
	unsigned long crash_base;
	struct mem_chunk *chunk;
	int i;

	if (memory_chunk[0].size < crash_size) {
		*msg = "first memory chunk must be at least crashkernel size";
		return 0;
	}
610
	if (OLDMEM_BASE && crash_size == OLDMEM_SIZE)
M
Michael Holzheu 已提交
611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709
		return OLDMEM_BASE;

	for (i = MEMORY_CHUNKS - 1; i >= 0; i--) {
		chunk = &memory_chunk[i];
		if (chunk->size == 0)
			continue;
		if (chunk->type != CHUNK_READ_WRITE)
			continue;
		if (chunk->size < crash_size)
			continue;
		crash_base = (chunk->addr + chunk->size) - crash_size;
		if (crash_base < crash_size)
			continue;
		if (crash_base < ZFCPDUMP_HSA_SIZE_MAX)
			continue;
		if (crash_base < (unsigned long) INITRD_START + INITRD_SIZE)
			continue;
		return crash_base;
	}
	*msg = "no suitable area found";
	return 0;
}

/*
 * Check if crash_base and crash_size is valid
 */
static int __init verify_crash_base(unsigned long crash_base,
				    unsigned long crash_size,
				    char **msg)
{
	struct mem_chunk *chunk;
	int i;

	/*
	 * Because we do the swap to zero, we must have at least 'crash_size'
	 * bytes free space before crash_base
	 */
	if (crash_size > crash_base) {
		*msg = "crashkernel offset must be greater than size";
		return -EINVAL;
	}

	/* First memory chunk must be at least crash_size */
	if (memory_chunk[0].size < crash_size) {
		*msg = "first memory chunk must be at least crashkernel size";
		return -EINVAL;
	}
	/* Check if we fit into the respective memory chunk */
	for (i = 0; i < MEMORY_CHUNKS; i++) {
		chunk = &memory_chunk[i];
		if (chunk->size == 0)
			continue;
		if (crash_base < chunk->addr)
			continue;
		if (crash_base >= chunk->addr + chunk->size)
			continue;
		/* we have found the memory chunk */
		if (crash_base + crash_size > chunk->addr + chunk->size) {
			*msg = "selected memory chunk is too small for "
				"crashkernel memory";
			return -EINVAL;
		}
		return 0;
	}
	*msg = "invalid memory range specified";
	return -EINVAL;
}

/*
 * When kdump is enabled, we have to ensure that no memory from
 * the area [0 - crashkernel memory size] and
 * [crashk_res.start - crashk_res.end] is set offline.
 */
static int kdump_mem_notifier(struct notifier_block *nb,
			      unsigned long action, void *data)
{
	struct memory_notify *arg = data;

	if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
		return NOTIFY_BAD;
	if (arg->start_pfn > PFN_DOWN(crashk_res.end))
		return NOTIFY_OK;
	if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start))
		return NOTIFY_OK;
	return NOTIFY_BAD;
}

static struct notifier_block kdump_mem_nb = {
	.notifier_call = kdump_mem_notifier,
};

#endif

/*
 * Make sure that oldmem, where the dump is stored, is protected
 */
static void reserve_oldmem(void)
{
#ifdef CONFIG_CRASH_DUMP
710 711 712
	unsigned long real_size = 0;
	int i;

M
Michael Holzheu 已提交
713 714
	if (!OLDMEM_BASE)
		return;
715 716
	for (i = 0; i < MEMORY_CHUNKS; i++) {
		struct mem_chunk *chunk = &memory_chunk[i];
M
Michael Holzheu 已提交
717

718 719
		real_size = max(real_size, chunk->addr + chunk->size);
	}
720 721
	create_mem_hole(memory_chunk, OLDMEM_BASE, OLDMEM_SIZE);
	create_mem_hole(memory_chunk, OLDMEM_SIZE, real_size - OLDMEM_SIZE);
M
Michael Holzheu 已提交
722 723 724 725 726 727 728 729 730 731
#endif
}

/*
 * Reserve memory for kdump kernel to be loaded with kexec
 */
static void __init reserve_crashkernel(void)
{
#ifdef CONFIG_CRASH_DUMP
	unsigned long long crash_base, crash_size;
732
	char *msg = NULL;
M
Michael Holzheu 已提交
733 734 735 736 737 738
	int rc;

	rc = parse_crashkernel(boot_command_line, memory_end, &crash_size,
			       &crash_base);
	if (rc || crash_size == 0)
		return;
739 740
	crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
	crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
M
Michael Holzheu 已提交
741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759
	if (register_memory_notifier(&kdump_mem_nb))
		return;
	if (!crash_base)
		crash_base = find_crash_base(crash_size, &msg);
	if (!crash_base) {
		pr_info("crashkernel reservation failed: %s\n", msg);
		unregister_memory_notifier(&kdump_mem_nb);
		return;
	}
	if (verify_crash_base(crash_base, crash_size, &msg)) {
		pr_info("crashkernel reservation failed: %s\n", msg);
		unregister_memory_notifier(&kdump_mem_nb);
		return;
	}
	if (!OLDMEM_BASE && MACHINE_IS_VM)
		diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
	crashk_res.start = crash_base;
	crashk_res.end = crash_base + crash_size - 1;
	insert_resource(&iomem_resource, &crashk_res);
760
	create_mem_hole(memory_chunk, crash_base, crash_size);
M
Michael Holzheu 已提交
761 762 763
	pr_info("Reserving %lluMB of memory at %lluMB "
		"for crashkernel (System RAM: %luMB)\n",
		crash_size >> 20, crash_base >> 20, memory_end >> 20);
764
	os_info_crashkernel_add(crash_base, crash_size);
M
Michael Holzheu 已提交
765 766 767
#endif
}

M
Martin Schwidefsky 已提交
768
static void __init setup_memory(void)
769 770
{
        unsigned long bootmap_size;
H
Hongjie Yang 已提交
771
	unsigned long start_pfn, end_pfn;
772
	int i;
L
Linus Torvalds 已提交
773 774 775 776 777

	/*
	 * partially used pages are not usable - thus
	 * we are rounding upwards:
	 */
778 779
	start_pfn = PFN_UP(__pa(&_end));
	end_pfn = max_pfn = PFN_DOWN(memory_end);
L
Linus Torvalds 已提交
780

781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796
#ifdef CONFIG_BLK_DEV_INITRD
	/*
	 * Move the initrd in case the bitmap of the bootmem allocater
	 * would overwrite it.
	 */

	if (INITRD_START && INITRD_SIZE) {
		unsigned long bmap_size;
		unsigned long start;

		bmap_size = bootmem_bootmap_pages(end_pfn - start_pfn + 1);
		bmap_size = PFN_PHYS(bmap_size);

		if (PFN_PHYS(start_pfn) + bmap_size > INITRD_START) {
			start = PFN_PHYS(start_pfn) + bmap_size + PAGE_SIZE;

M
Michael Holzheu 已提交
797 798 799 800 801 802 803 804
#ifdef CONFIG_CRASH_DUMP
			if (OLDMEM_BASE) {
				/* Move initrd behind kdump oldmem */
				if (start + INITRD_SIZE > OLDMEM_BASE &&
				    start < OLDMEM_BASE + OLDMEM_SIZE)
					start = OLDMEM_BASE + OLDMEM_SIZE;
			}
#endif
805
			if (start + INITRD_SIZE > memory_end) {
806 807
				pr_err("initrd extends beyond end of "
				       "memory (0x%08lx > 0x%08lx) "
808 809 810 811
				       "disabling initrd\n",
				       start + INITRD_SIZE, memory_end);
				INITRD_START = INITRD_SIZE = 0;
			} else {
812 813 814
				pr_info("Moving initrd (0x%08lx -> "
					"0x%08lx, size: %ld)\n",
					INITRD_START, start, INITRD_SIZE);
815 816 817 818 819 820 821 822
				memmove((void *) start, (void *) INITRD_START,
					INITRD_SIZE);
				INITRD_START = start;
			}
		}
	}
#endif

L
Linus Torvalds 已提交
823
	/*
824
	 * Initialize the boot-time allocator
L
Linus Torvalds 已提交
825 826 827 828 829 830
	 */
	bootmap_size = init_bootmem(start_pfn, end_pfn);

	/*
	 * Register RAM areas with the bootmem allocator.
	 */
831

832
	for (i = 0; i < MEMORY_CHUNKS; i++) {
833
		unsigned long start_chunk, end_chunk, pfn;
L
Linus Torvalds 已提交
834

835
		if (!memory_chunk[i].size)
L
Linus Torvalds 已提交
836
			continue;
837
		start_chunk = PFN_DOWN(memory_chunk[i].addr);
838
		end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size);
839 840 841
		end_chunk = min(end_chunk, end_pfn);
		if (start_chunk >= end_chunk)
			continue;
T
Tejun Heo 已提交
842 843
		memblock_add_node(PFN_PHYS(start_chunk),
				  PFN_PHYS(end_chunk - start_chunk), 0);
844
		pfn = max(start_chunk, start_pfn);
845
		storage_key_init_range(PFN_PHYS(pfn), PFN_PHYS(end_chunk));
L
Linus Torvalds 已提交
846 847
	}

848 849
	psw_set_key(PAGE_DEFAULT_KEY);

850
	free_bootmem_with_active_regions(0, max_pfn);
851

852 853 854
	/*
	 * Reserve memory used for lowcore/command line/kernel image.
	 */
855
	reserve_bootmem(0, (unsigned long)_ehead, BOOTMEM_DEFAULT);
856
	reserve_bootmem((unsigned long)_stext,
857 858
			PFN_PHYS(start_pfn) - (unsigned long)_stext,
			BOOTMEM_DEFAULT);
859 860 861 862 863 864
	/*
	 * Reserve the bootmem bitmap itself as well. We do this in two
	 * steps (first step was init_bootmem()) because this catches
	 * the (very unlikely) case of us accidentally initializing the
	 * bootmem allocator with an invalid RAM area.
	 */
865 866
	reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size,
			BOOTMEM_DEFAULT);
L
Linus Torvalds 已提交
867

M
Michael Holzheu 已提交
868 869 870 871 872 873 874 875 876
#ifdef CONFIG_CRASH_DUMP
	if (crashk_res.start)
		reserve_bootmem(crashk_res.start,
				crashk_res.end - crashk_res.start + 1,
				BOOTMEM_DEFAULT);
	if (is_kdump_kernel())
		reserve_bootmem(elfcorehdr_addr - OLDMEM_BASE,
				PAGE_ALIGN(elfcorehdr_size), BOOTMEM_DEFAULT);
#endif
L
Linus Torvalds 已提交
877
#ifdef CONFIG_BLK_DEV_INITRD
878
	if (INITRD_START && INITRD_SIZE) {
L
Linus Torvalds 已提交
879
		if (INITRD_START + INITRD_SIZE <= memory_end) {
880 881
			reserve_bootmem(INITRD_START, INITRD_SIZE,
					BOOTMEM_DEFAULT);
L
Linus Torvalds 已提交
882 883 884
			initrd_start = INITRD_START;
			initrd_end = initrd_start + INITRD_SIZE;
		} else {
885 886 887
			pr_err("initrd extends beyond end of "
			       "memory (0x%08lx > 0x%08lx) "
			       "disabling initrd\n",
888 889
			       initrd_start + INITRD_SIZE, memory_end);
			initrd_start = initrd_end = 0;
L
Linus Torvalds 已提交
890
		}
891
	}
L
Linus Torvalds 已提交
892
#endif
893
}
L
Linus Torvalds 已提交
894

895 896 897 898 899 900
/*
 * Setup hardware capabilities.
 */
static void __init setup_hwcaps(void)
{
	static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 };
901
	struct cpuid cpu_id;
902 903 904 905 906 907 908 909 910 911 912 913
	int i;

	/*
	 * The store facility list bits numbers as found in the principles
	 * of operation are numbered with bit 1UL<<31 as number 0 to
	 * bit 1UL<<0 as number 31.
	 *   Bit 0: instructions named N3, "backported" to esa-mode
	 *   Bit 2: z/Architecture mode is active
	 *   Bit 7: the store-facility-list-extended facility is installed
	 *   Bit 17: the message-security assist is installed
	 *   Bit 19: the long-displacement facility is installed
	 *   Bit 21: the extended-immediate facility is installed
914 915
	 *   Bit 22: extended-translation facility 3 is installed
	 *   Bit 30: extended-translation facility 3 enhancement facility
916 917 918
	 * These get translated to:
	 *   HWCAP_S390_ESAN3 bit 0, HWCAP_S390_ZARCH bit 1,
	 *   HWCAP_S390_STFLE bit 2, HWCAP_S390_MSA bit 3,
919 920
	 *   HWCAP_S390_LDISP bit 4, HWCAP_S390_EIMM bit 5 and
	 *   HWCAP_S390_ETF3EH bit 8 (22 && 30).
921 922
	 */
	for (i = 0; i < 6; i++)
923
		if (test_facility(stfl_bits[i]))
924 925
			elf_hwcap |= 1UL << i;

926
	if (test_facility(22) && test_facility(30))
927
		elf_hwcap |= HWCAP_S390_ETF3EH;
928

929 930 931 932 933 934
	/*
	 * Check for additional facilities with store-facility-list-extended.
	 * stfle stores doublewords (8 byte) with bit 1ULL<<63 as bit 0
	 * and 1ULL<<0 as bit 63. Bits 0-31 contain the same information
	 * as stored by stfl, bits 32-xxx contain additional facilities.
	 * How many facility words are stored depends on the number of
L
Lucas De Marchi 已提交
935
	 * doublewords passed to the instruction. The additional facilities
936
	 * are:
937 938
	 *   Bit 42: decimal floating point facility is installed
	 *   Bit 44: perform floating point operation facility is installed
939
	 * translated to:
940
	 *   HWCAP_S390_DFP bit 6 (42 && 44).
941
	 */
942 943
	if ((elf_hwcap & (1UL << 2)) && test_facility(42) && test_facility(44))
		elf_hwcap |= HWCAP_S390_DFP;
944

945 946 947
	/*
	 * Huge page support HWCAP_S390_HPAGE is bit 7.
	 */
948
	if (MACHINE_HAS_HPAGE)
949 950
		elf_hwcap |= HWCAP_S390_HPAGE;

951
#if defined(CONFIG_64BIT)
952 953 954 955 956
	/*
	 * 64-bit register support for 31-bit processes
	 * HWCAP_S390_HIGH_GPRS is bit 9.
	 */
	elf_hwcap |= HWCAP_S390_HIGH_GPRS;
957 958 959 960 961 962

	/*
	 * Transactional execution support HWCAP_S390_TE is bit 10.
	 */
	if (test_facility(50) && test_facility(73))
		elf_hwcap |= HWCAP_S390_TE;
963
#endif
964

965 966
	get_cpu_id(&cpu_id);
	switch (cpu_id.machine) {
967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984
	case 0x9672:
#if !defined(CONFIG_64BIT)
	default:	/* Use "g5" as default for 31 bit kernels. */
#endif
		strcpy(elf_platform, "g5");
		break;
	case 0x2064:
	case 0x2066:
#if defined(CONFIG_64BIT)
	default:	/* Use "z900" as default for 64 bit kernels. */
#endif
		strcpy(elf_platform, "z900");
		break;
	case 0x2084:
	case 0x2086:
		strcpy(elf_platform, "z990");
		break;
	case 0x2094:
985
	case 0x2096:
986 987
		strcpy(elf_platform, "z9-109");
		break;
988 989 990 991
	case 0x2097:
	case 0x2098:
		strcpy(elf_platform, "z10");
		break;
992
	case 0x2817:
993
	case 0x2818:
994 995
		strcpy(elf_platform, "z196");
		break;
996
	case 0x2827:
997
	case 0x2828:
998 999
		strcpy(elf_platform, "zEC12");
		break;
1000 1001 1002
	}
}

1003 1004 1005 1006
/*
 * Setup function called from init/main.c just after the banner
 * was printed.
 */
L
Linus Torvalds 已提交
1007

M
Martin Schwidefsky 已提交
1008
void __init setup_arch(char **cmdline_p)
1009
{
L
Linus Torvalds 已提交
1010
        /*
1011
         * print what head.S has found out about the machine
L
Linus Torvalds 已提交
1012
         */
1013
#ifndef CONFIG_64BIT
1014 1015 1016
	if (MACHINE_IS_VM)
		pr_info("Linux is running as a z/VM "
			"guest operating system in 31-bit mode\n");
1017
	else if (MACHINE_IS_LPAR)
1018 1019 1020 1021 1022 1023 1024
		pr_info("Linux is running natively in 31-bit mode\n");
	if (MACHINE_HAS_IEEE)
		pr_info("The hardware system has IEEE compatible "
			"floating point units\n");
	else
		pr_info("The hardware system has no IEEE compatible "
			"floating point units\n");
1025
#else /* CONFIG_64BIT */
1026
	if (MACHINE_IS_VM)
1027 1028
		pr_info("Linux is running as a z/VM "
			"guest operating system in 64-bit mode\n");
1029
	else if (MACHINE_IS_KVM)
1030
		pr_info("Linux is running under KVM in 64-bit mode\n");
1031
	else if (MACHINE_IS_LPAR)
1032
		pr_info("Linux is running natively in 64-bit mode\n");
1033
#endif /* CONFIG_64BIT */
1034

1035 1036 1037
	/* Have one command line that is parsed and saved in /proc/cmdline */
	/* boot_command_line has been already set up in early.c */
	*cmdline_p = boot_command_line;
1038

1039
        ROOT_DEV = Root_RAM0;
1040 1041 1042 1043 1044 1045

	init_mm.start_code = PAGE_OFFSET;
	init_mm.end_code = (unsigned long) &_etext;
	init_mm.end_data = (unsigned long) &_edata;
	init_mm.brk = (unsigned long) &_end;

1046 1047 1048 1049 1050
	if (MACHINE_HAS_MVCOS)
		memcpy(&uaccess, &uaccess_mvcos, sizeof(uaccess));
	else
		memcpy(&uaccess, &uaccess_std, sizeof(uaccess));

1051
	parse_early_param();
1052
	detect_memory_layout(memory_chunk, memory_end);
1053
	os_info_init();
1054
	setup_ipl();
1055
	reserve_oldmem();
H
Heiko Carstens 已提交
1056
	setup_memory_end();
G
Gerald Schaefer 已提交
1057
	setup_addressing_mode();
M
Michael Holzheu 已提交
1058
	reserve_crashkernel();
1059 1060
	setup_memory();
	setup_resources();
M
Michael Holzheu 已提交
1061
	setup_vmcoreinfo();
1062 1063
	setup_lowcore();

L
Linus Torvalds 已提交
1064
        cpu_init();
1065
	s390_init_cpu_topology();
L
Linus Torvalds 已提交
1066

1067 1068 1069 1070 1071
	/*
	 * Setup capabilities (ELF_HWCAP & ELF_PLATFORM).
	 */
	setup_hwcaps();

L
Linus Torvalds 已提交
1072 1073 1074 1075 1076 1077 1078
	/*
	 * Create kernel page tables and switch to virtual addressing.
	 */
        paging_init();

        /* Setup default console */
	conmode_default();
1079
	set_preferred_console();
M
Michael Holzheu 已提交
1080 1081

	/* Setup zfcpdump support */
1082
	setup_zfcpdump();
L
Linus Torvalds 已提交
1083
}