setup.c 29.8 KB
Newer Older
L
Linus Torvalds 已提交
1 2
/*
 *  S390 version
3
 *    Copyright IBM Corp. 1999, 2012
L
Linus Torvalds 已提交
4 5 6 7 8 9 10 11 12 13 14
 *    Author(s): Hartmut Penner (hp@de.ibm.com),
 *               Martin Schwidefsky (schwidefsky@de.ibm.com)
 *
 *  Derived from "arch/i386/kernel/setup.c"
 *    Copyright (C) 1995, Linus Torvalds
 */

/*
 * This file handles the architecture-dependent parts of initialization
 */

15 16 17
#define KMSG_COMPONENT "setup"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt

L
Linus Torvalds 已提交
18 19 20 21
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/kernel.h>
T
Tejun Heo 已提交
22
#include <linux/memblock.h>
L
Linus Torvalds 已提交
23 24 25 26 27 28 29 30 31 32 33 34 35 36
#include <linux/mm.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/ptrace.h>
#include <linux/user.h>
#include <linux/tty.h>
#include <linux/ioport.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/initrd.h>
#include <linux/bootmem.h>
#include <linux/root_dev.h>
#include <linux/console.h>
#include <linux/kernel_stat.h>
37
#include <linux/device.h>
38
#include <linux/notifier.h>
39
#include <linux/pfn.h>
H
Hongjie Yang 已提交
40
#include <linux/ctype.h>
41
#include <linux/reboot.h>
42
#include <linux/topology.h>
43
#include <linux/ftrace.h>
M
Michael Holzheu 已提交
44 45 46
#include <linux/kexec.h>
#include <linux/crash_dump.h>
#include <linux/memory.h>
47
#include <linux/compat.h>
L
Linus Torvalds 已提交
48

M
Michael Holzheu 已提交
49
#include <asm/ipl.h>
L
Linus Torvalds 已提交
50
#include <asm/uaccess.h>
51
#include <asm/facility.h>
L
Linus Torvalds 已提交
52 53 54 55 56
#include <asm/smp.h>
#include <asm/mmu_context.h>
#include <asm/cpcmd.h>
#include <asm/lowcore.h>
#include <asm/irq.h>
57 58
#include <asm/page.h>
#include <asm/ptrace.h>
59
#include <asm/sections.h>
H
Hongjie Yang 已提交
60
#include <asm/ebcdic.h>
61
#include <asm/kvm_virtio.h>
M
Michael Holzheu 已提交
62
#include <asm/diag.h>
63
#include <asm/os_info.h>
64
#include <asm/sclp.h>
M
Martin Schwidefsky 已提交
65
#include "entry.h"
G
Gerald Schaefer 已提交
66

67 68 69 70 71
long psw_kernel_bits	= PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_PRIMARY |
			  PSW_MASK_EA | PSW_MASK_BA;
long psw_user_bits	= PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT |
			  PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK |
			  PSW_MASK_PSTATE | PSW_ASC_HOME;
L
Linus Torvalds 已提交
72

73 74 75 76
/*
 * User copy operations.
 */
struct uaccess_ops uaccess;
77
EXPORT_SYMBOL(uaccess);
78

L
Linus Torvalds 已提交
79 80 81 82
/*
 * Machine setup..
 */
unsigned int console_mode = 0;
83 84
EXPORT_SYMBOL(console_mode);

L
Linus Torvalds 已提交
85
unsigned int console_devno = -1;
86 87
EXPORT_SYMBOL(console_devno);

L
Linus Torvalds 已提交
88
unsigned int console_irq = -1;
89 90
EXPORT_SYMBOL(console_irq);

91 92
unsigned long elf_hwcap = 0;
char elf_platform[ELF_PLATFORM_SIZE];
93

94
struct mem_chunk __initdata memory_chunk[MEMORY_CHUNKS];
95 96 97

int __initdata memory_end_set;
unsigned long __initdata memory_end;
L
Linus Torvalds 已提交
98

99 100 101 102 103 104 105 106 107
unsigned long VMALLOC_START;
EXPORT_SYMBOL(VMALLOC_START);

unsigned long VMALLOC_END;
EXPORT_SYMBOL(VMALLOC_END);

struct page *vmemmap;
EXPORT_SYMBOL(vmemmap);

F
Frank Munzert 已提交
108 109 110 111
/* An array with a pointer to the lowcore of every CPU. */
struct _lowcore *lowcore_ptr[NR_CPUS];
EXPORT_SYMBOL(lowcore_ptr);

L
Linus Torvalds 已提交
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
/*
 * This is set up by the setup-routine at boot-time
 * for S390 need to find out, what we have to setup
 * using address 0x10400 ...
 */

#include <asm/setup.h>

/*
 * condev= and conmode= setup parameter.
 */

static int __init condev_setup(char *str)
{
	int vdev;

	vdev = simple_strtoul(str, &str, 0);
	if (vdev >= 0 && vdev < 65536) {
		console_devno = vdev;
		console_irq = -1;
	}
	return 1;
}

__setup("condev=", condev_setup);

138 139
static void __init set_preferred_console(void)
{
140 141 142 143 144 145 146 147
	if (MACHINE_IS_KVM) {
		if (sclp_has_vt220())
			add_preferred_console("ttyS", 1, NULL);
		else if (sclp_has_linemode())
			add_preferred_console("ttyS", 0, NULL);
		else
			add_preferred_console("hvc", 0, NULL);
	} else if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
148
		add_preferred_console("ttyS", 0, NULL);
149
	else if (CONSOLE_IS_3270)
150 151 152
		add_preferred_console("tty3270", 0, NULL);
}

L
Linus Torvalds 已提交
153 154
static int __init conmode_setup(char *str)
{
155
#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
L
Linus Torvalds 已提交
156 157 158 159 160 161 162 163 164 165 166
	if (strncmp(str, "hwc", 4) == 0 || strncmp(str, "sclp", 5) == 0)
                SET_CONSOLE_SCLP;
#endif
#if defined(CONFIG_TN3215_CONSOLE)
	if (strncmp(str, "3215", 5) == 0)
		SET_CONSOLE_3215;
#endif
#if defined(CONFIG_TN3270_CONSOLE)
	if (strncmp(str, "3270", 5) == 0)
		SET_CONSOLE_3270;
#endif
167
	set_preferred_console();
L
Linus Torvalds 已提交
168 169 170 171 172 173 174 175 176 177 178
        return 1;
}

__setup("conmode=", conmode_setup);

static void __init conmode_default(void)
{
	char query_buffer[1024];
	char *ptr;

        if (MACHINE_IS_VM) {
179
		cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL);
L
Linus Torvalds 已提交
180 181 182
		console_devno = simple_strtoul(query_buffer + 5, NULL, 16);
		ptr = strstr(query_buffer, "SUBCHANNEL =");
		console_irq = simple_strtoul(ptr + 13, NULL, 16);
183
		cpcmd("QUERY TERM", query_buffer, 1024, NULL);
L
Linus Torvalds 已提交
184 185 186 187 188 189 190 191
		ptr = strstr(query_buffer, "CONMODE");
		/*
		 * Set the conmode to 3215 so that the device recognition 
		 * will set the cu_type of the console to 3215. If the
		 * conmode is 3270 and we don't set it back then both
		 * 3215 and the 3270 driver will try to access the console
		 * device (3215 as console and 3270 as normal tty).
		 */
192
		cpcmd("TERM CONMODE 3215", NULL, 0, NULL);
L
Linus Torvalds 已提交
193
		if (ptr == NULL) {
194
#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
L
Linus Torvalds 已提交
195 196 197 198 199 200 201 202 203
			SET_CONSOLE_SCLP;
#endif
			return;
		}
		if (strncmp(ptr + 8, "3270", 4) == 0) {
#if defined(CONFIG_TN3270_CONSOLE)
			SET_CONSOLE_3270;
#elif defined(CONFIG_TN3215_CONSOLE)
			SET_CONSOLE_3215;
204
#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
L
Linus Torvalds 已提交
205 206 207 208 209 210 211
			SET_CONSOLE_SCLP;
#endif
		} else if (strncmp(ptr + 8, "3215", 4) == 0) {
#if defined(CONFIG_TN3215_CONSOLE)
			SET_CONSOLE_3215;
#elif defined(CONFIG_TN3270_CONSOLE)
			SET_CONSOLE_3270;
212
#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
L
Linus Torvalds 已提交
213 214 215 216
			SET_CONSOLE_SCLP;
#endif
		}
	} else {
217
#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
L
Linus Torvalds 已提交
218 219 220 221 222
		SET_CONSOLE_SCLP;
#endif
	}
}

223
#ifdef CONFIG_ZFCPDUMP
M
Michael Holzheu 已提交
224 225
static void __init setup_zfcpdump(unsigned int console_devno)
{
226
	static char str[41];
M
Michael Holzheu 已提交
227 228 229

	if (ipl_info.type != IPL_TYPE_FCP_DUMP)
		return;
230 231
	if (OLDMEM_BASE)
		return;
M
Michael Holzheu 已提交
232
	if (console_devno != -1)
233
		sprintf(str, " cio_ignore=all,!0.0.%04x,!0.0.%04x",
M
Michael Holzheu 已提交
234 235
			ipl_info.data.fcp.dev_id.devno, console_devno);
	else
236
		sprintf(str, " cio_ignore=all,!0.0.%04x",
M
Michael Holzheu 已提交
237
			ipl_info.data.fcp.dev_id.devno);
238
	strcat(boot_command_line, str);
M
Michael Holzheu 已提交
239 240 241 242 243 244
	console_loglevel = 2;
}
#else
static inline void setup_zfcpdump(unsigned int console_devno) {}
#endif /* CONFIG_ZFCPDUMP */

L
Linus Torvalds 已提交
245 246 247 248 249 250 251
 /*
 * Reboot, halt and power_off stubs. They just call _machine_restart,
 * _machine_halt or _machine_power_off. 
 */

void machine_restart(char *command)
{
252
	if ((!in_interrupt() && !in_atomic()) || oops_in_progress)
253 254 255 256 257
		/*
		 * Only unblank the console if we are called in enabled
		 * context or a bust_spinlocks cleared the way for us.
		 */
		console_unblank();
L
Linus Torvalds 已提交
258 259 260 261 262
	_machine_restart(command);
}

void machine_halt(void)
{
263 264 265 266 267 268
	if (!in_interrupt() || oops_in_progress)
		/*
		 * Only unblank the console if we are called in enabled
		 * context or a bust_spinlocks cleared the way for us.
		 */
		console_unblank();
L
Linus Torvalds 已提交
269 270 271 272 273
	_machine_halt();
}

void machine_power_off(void)
{
274 275 276 277 278 279
	if (!in_interrupt() || oops_in_progress)
		/*
		 * Only unblank the console if we are called in enabled
		 * context or a bust_spinlocks cleared the way for us.
		 */
		console_unblank();
L
Linus Torvalds 已提交
280 281 282
	_machine_power_off();
}

283 284 285 286 287
/*
 * Dummy power off function.
 */
void (*pm_power_off)(void) = machine_power_off;

288 289 290
static int __init early_parse_mem(char *p)
{
	memory_end = memparse(p, &p);
291
	memory_end_set = 1;
292 293 294 295
	return 0;
}
early_param("mem", early_parse_mem);

296 297 298 299 300 301 302 303 304
static int __init parse_vmalloc(char *arg)
{
	if (!arg)
		return -EINVAL;
	VMALLOC_END = (memparse(arg, &arg) + PAGE_SIZE - 1) & PAGE_MASK;
	return 0;
}
early_param("vmalloc", parse_vmalloc);

305 306
unsigned int s390_user_mode = PRIMARY_SPACE_MODE;
EXPORT_SYMBOL_GPL(s390_user_mode);
G
Gerald Schaefer 已提交
307

308
static void __init set_user_mode_primary(void)
G
Gerald Schaefer 已提交
309
{
310 311
	psw_kernel_bits = (psw_kernel_bits & ~PSW_MASK_ASC) | PSW_ASC_HOME;
	psw_user_bits = (psw_user_bits & ~PSW_MASK_ASC) | PSW_ASC_PRIMARY;
G
Gerald Schaefer 已提交
312
#ifdef CONFIG_COMPAT
313 314
	psw32_user_bits =
		(psw32_user_bits & ~PSW32_MASK_ASC) | PSW32_ASC_PRIMARY;
G
Gerald Schaefer 已提交
315
#endif
316
	uaccess = MACHINE_HAS_MVCOS ? uaccess_mvcos_switch : uaccess_pt;
G
Gerald Schaefer 已提交
317 318
}

319
static int __init early_parse_user_mode(char *p)
G
Gerald Schaefer 已提交
320
{
321
	if (p && strcmp(p, "primary") == 0)
322
		s390_user_mode = PRIMARY_SPACE_MODE;
323
	else if (!p || strcmp(p, "home") == 0)
324
		s390_user_mode = HOME_SPACE_MODE;
325 326
	else
		return 1;
327
	return 0;
G
Gerald Schaefer 已提交
328
}
329
early_param("user_mode", early_parse_user_mode);
G
Gerald Schaefer 已提交
330

331
static void __init setup_addressing_mode(void)
G
Gerald Schaefer 已提交
332
{
333 334 335 336 337 338 339
	if (s390_user_mode != PRIMARY_SPACE_MODE)
		return;
	set_user_mode_primary();
	if (MACHINE_HAS_MVCOS)
		pr_info("Address spaces switched, mvcos available\n");
	else
		pr_info("Address spaces switched, mvcos not available\n");
G
Gerald Schaefer 已提交
340 341
}

M
Martin Schwidefsky 已提交
342 343 344
void *restart_stack __attribute__((__section__(".data")));

static void __init setup_lowcore(void)
345 346 347 348 349 350
{
	struct _lowcore *lc;

	/*
	 * Setup lowcore for boot cpu
	 */
351
	BUILD_BUG_ON(sizeof(struct _lowcore) != LC_PAGES * 4096);
352
	lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0);
353
	lc->restart_psw.mask = psw_kernel_bits;
354
	lc->restart_psw.addr =
M
Martin Schwidefsky 已提交
355
		PSW_ADDR_AMODE | (unsigned long) restart_int_handler;
356 357
	lc->external_new_psw.mask = psw_kernel_bits |
		PSW_MASK_DAT | PSW_MASK_MCHECK;
358 359
	lc->external_new_psw.addr =
		PSW_ADDR_AMODE | (unsigned long) ext_int_handler;
360 361
	lc->svc_new_psw.mask = psw_kernel_bits |
		PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
362
	lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call;
363 364
	lc->program_new_psw.mask = psw_kernel_bits |
		PSW_MASK_DAT | PSW_MASK_MCHECK;
365
	lc->program_new_psw.addr =
366 367
		PSW_ADDR_AMODE | (unsigned long) pgm_check_handler;
	lc->mcck_new_psw.mask = psw_kernel_bits;
368 369
	lc->mcck_new_psw.addr =
		PSW_ADDR_AMODE | (unsigned long) mcck_int_handler;
370 371
	lc->io_new_psw.mask = psw_kernel_bits |
		PSW_MASK_DAT | PSW_MASK_MCHECK;
372
	lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
373
	lc->clock_comparator = -1ULL;
374 375 376 377 378 379 380
	lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE;
	lc->async_stack = (unsigned long)
		__alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE;
	lc->panic_stack = (unsigned long)
		__alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0) + PAGE_SIZE;
	lc->current_task = (unsigned long) init_thread_union.thread_info.task;
	lc->thread_info = (unsigned long) &init_thread_union;
381
	lc->machine_flags = S390_lowcore.machine_flags;
382 383 384
	lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
	memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
	       MAX_FACILITY_BIT/8);
385
#ifndef CONFIG_64BIT
386 387
	if (MACHINE_HAS_IEEE) {
		lc->extended_save_area_addr = (__u32)
388
			__alloc_bootmem_low(PAGE_SIZE, PAGE_SIZE, 0);
389
		/* enable extended save area */
H
Heiko Carstens 已提交
390
		__ctl_set_bit(14, 29);
391
	}
392 393
#else
	lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
394
#endif
395 396 397 398 399 400 401 402
	lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
	lc->async_enter_timer = S390_lowcore.async_enter_timer;
	lc->exit_timer = S390_lowcore.exit_timer;
	lc->user_timer = S390_lowcore.user_timer;
	lc->system_timer = S390_lowcore.system_timer;
	lc->steal_timer = S390_lowcore.steal_timer;
	lc->last_update_timer = S390_lowcore.last_update_timer;
	lc->last_update_clock = S390_lowcore.last_update_clock;
403
	lc->ftrace_func = S390_lowcore.ftrace_func;
M
Martin Schwidefsky 已提交
404 405 406 407 408 409 410 411 412 413 414 415 416

	restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0);
	restart_stack += ASYNC_SIZE;

	/*
	 * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
	 * restart data to the absolute zero lowcore. This is necesary if
	 * PSW restart is done on an offline CPU that has lowcore zero.
	 */
	lc->restart_stack = (unsigned long) restart_stack;
	lc->restart_fn = (unsigned long) do_restart;
	lc->restart_data = 0;
	lc->restart_source = -1UL;
417 418

	/* Setup absolute zero lowcore */
419 420 421 422 423
	mem_assign_absolute(S390_lowcore.restart_stack, lc->restart_stack);
	mem_assign_absolute(S390_lowcore.restart_fn, lc->restart_fn);
	mem_assign_absolute(S390_lowcore.restart_data, lc->restart_data);
	mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source);
	mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw);
M
Martin Schwidefsky 已提交
424

425
	set_prefix((u32)(unsigned long) lc);
F
Frank Munzert 已提交
426
	lowcore_ptr[0] = lc;
427 428
}

429 430 431 432 433 434 435 436 437 438
static struct resource code_resource = {
	.name  = "Kernel code",
	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
};

static struct resource data_resource = {
	.name = "Kernel data",
	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
};

439 440 441 442 443
static struct resource bss_resource = {
	.name = "Kernel bss",
	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
};

444 445 446
static struct resource __initdata *standard_resources[] = {
	&code_resource,
	&data_resource,
447
	&bss_resource,
448 449 450
};

static void __init setup_resources(void)
451
{
452 453
	struct resource *res, *std_res, *sub_res;
	int i, j;
454

455 456 457 458
	code_resource.start = (unsigned long) &_text;
	code_resource.end = (unsigned long) &_etext - 1;
	data_resource.start = (unsigned long) &_etext;
	data_resource.end = (unsigned long) &_edata - 1;
459 460
	bss_resource.start = (unsigned long) &__bss_start;
	bss_resource.end = (unsigned long) &__bss_stop - 1;
461

462 463 464
	for (i = 0; i < MEMORY_CHUNKS; i++) {
		if (!memory_chunk[i].size)
			continue;
M
Michael Holzheu 已提交
465 466 467
		if (memory_chunk[i].type == CHUNK_OLDMEM ||
		    memory_chunk[i].type == CHUNK_CRASHK)
			continue;
468
		res = alloc_bootmem_low(sizeof(*res));
469 470 471
		res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
		switch (memory_chunk[i].type) {
		case CHUNK_READ_WRITE:
472
		case CHUNK_CRASHK:
473 474 475 476 477 478 479 480 481 482
			res->name = "System RAM";
			break;
		case CHUNK_READ_ONLY:
			res->name = "System ROM";
			res->flags |= IORESOURCE_READONLY;
			break;
		default:
			res->name = "reserved";
		}
		res->start = memory_chunk[i].addr;
483
		res->end = res->start + memory_chunk[i].size - 1;
484
		request_resource(&iomem_resource, res);
H
Hongjie Yang 已提交
485

486 487 488 489 490 491 492 493 494 495 496 497 498 499
		for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
			std_res = standard_resources[j];
			if (std_res->start < res->start ||
			    std_res->start > res->end)
				continue;
			if (std_res->end > res->end) {
				sub_res = alloc_bootmem_low(sizeof(*sub_res));
				*sub_res = *std_res;
				sub_res->end = res->end;
				std_res->start = res->end + 1;
				request_resource(res, sub_res);
			} else {
				request_resource(res, std_res);
			}
H
Hongjie Yang 已提交
500
		}
501 502 503
	}
}

M
Michael Holzheu 已提交
504 505 506
unsigned long real_memory_size;
EXPORT_SYMBOL_GPL(real_memory_size);

H
Heiko Carstens 已提交
507 508
static void __init setup_memory_end(void)
{
509
	unsigned long vmax, vmalloc_size, tmp;
H
Heiko Carstens 已提交
510 511
	int i;

M
Michael Holzheu 已提交
512

513
#ifdef CONFIG_ZFCPDUMP
514
	if (ipl_info.type == IPL_TYPE_FCP_DUMP && !OLDMEM_BASE) {
M
Michael Holzheu 已提交
515
		memory_end = ZFCPDUMP_HSA_SIZE;
516 517
		memory_end_set = 1;
	}
M
Michael Holzheu 已提交
518
#endif
519
	real_memory_size = 0;
H
Heiko Carstens 已提交
520 521
	memory_end &= PAGE_MASK;

522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540
	/*
	 * Make sure all chunks are MAX_ORDER aligned so we don't need the
	 * extra checks that HOLES_IN_ZONE would require.
	 */
	for (i = 0; i < MEMORY_CHUNKS; i++) {
		unsigned long start, end;
		struct mem_chunk *chunk;
		unsigned long align;

		chunk = &memory_chunk[i];
		align = 1UL << (MAX_ORDER + PAGE_SHIFT - 1);
		start = (chunk->addr + align - 1) & ~(align - 1);
		end = (chunk->addr + chunk->size) & ~(align - 1);
		if (start >= end)
			memset(chunk, 0, sizeof(*chunk));
		else {
			chunk->addr = start;
			chunk->size = end - start;
		}
541 542
		real_memory_size = max(real_memory_size,
				       chunk->addr + chunk->size);
543 544
	}

545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572
	/* Choose kernel address space layout: 2, 3, or 4 levels. */
#ifdef CONFIG_64BIT
	vmalloc_size = VMALLOC_END ?: 128UL << 30;
	tmp = (memory_end ?: real_memory_size) / PAGE_SIZE;
	tmp = tmp * (sizeof(struct page) + PAGE_SIZE) + vmalloc_size;
	if (tmp <= (1UL << 42))
		vmax = 1UL << 42;	/* 3-level kernel page table */
	else
		vmax = 1UL << 53;	/* 4-level kernel page table */
#else
	vmalloc_size = VMALLOC_END ?: 96UL << 20;
	vmax = 1UL << 31;		/* 2-level kernel page table */
#endif
	/* vmalloc area is at the end of the kernel address space. */
	VMALLOC_END = vmax;
	VMALLOC_START = vmax - vmalloc_size;

	/* Split remaining virtual space between 1:1 mapping & vmemmap array */
	tmp = VMALLOC_START / (PAGE_SIZE + sizeof(struct page));
	tmp = VMALLOC_START - tmp * sizeof(struct page);
	tmp &= ~((vmax >> 11) - 1);	/* align to page table level */
	tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS);
	vmemmap = (struct page *) tmp;

	/* Take care that memory_end is set and <= vmemmap */
	memory_end = min(memory_end ?: real_memory_size, tmp);

	/* Fixup memory chunk array to fit into 0..memory_end */
H
Heiko Carstens 已提交
573 574 575
	for (i = 0; i < MEMORY_CHUNKS; i++) {
		struct mem_chunk *chunk = &memory_chunk[i];

576
		if (chunk->addr >= memory_end) {
H
Heiko Carstens 已提交
577 578 579
			memset(chunk, 0, sizeof(*chunk));
			continue;
		}
580 581
		if (chunk->addr + chunk->size > memory_end)
			chunk->size = memory_end - chunk->addr;
H
Heiko Carstens 已提交
582 583 584
	}
}

M
Michael Holzheu 已提交
585 586
static void __init setup_vmcoreinfo(void)
{
587
	mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
M
Michael Holzheu 已提交
588 589
}

M
Michael Holzheu 已提交
590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605
#ifdef CONFIG_CRASH_DUMP

/*
 * Find suitable location for crashkernel memory
 */
static unsigned long __init find_crash_base(unsigned long crash_size,
					    char **msg)
{
	unsigned long crash_base;
	struct mem_chunk *chunk;
	int i;

	if (memory_chunk[0].size < crash_size) {
		*msg = "first memory chunk must be at least crashkernel size";
		return 0;
	}
606
	if (OLDMEM_BASE && crash_size == OLDMEM_SIZE)
M
Michael Holzheu 已提交
607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734
		return OLDMEM_BASE;

	for (i = MEMORY_CHUNKS - 1; i >= 0; i--) {
		chunk = &memory_chunk[i];
		if (chunk->size == 0)
			continue;
		if (chunk->type != CHUNK_READ_WRITE)
			continue;
		if (chunk->size < crash_size)
			continue;
		crash_base = (chunk->addr + chunk->size) - crash_size;
		if (crash_base < crash_size)
			continue;
		if (crash_base < ZFCPDUMP_HSA_SIZE_MAX)
			continue;
		if (crash_base < (unsigned long) INITRD_START + INITRD_SIZE)
			continue;
		return crash_base;
	}
	*msg = "no suitable area found";
	return 0;
}

/*
 * Check if crash_base and crash_size is valid
 */
static int __init verify_crash_base(unsigned long crash_base,
				    unsigned long crash_size,
				    char **msg)
{
	struct mem_chunk *chunk;
	int i;

	/*
	 * Because we do the swap to zero, we must have at least 'crash_size'
	 * bytes free space before crash_base
	 */
	if (crash_size > crash_base) {
		*msg = "crashkernel offset must be greater than size";
		return -EINVAL;
	}

	/* First memory chunk must be at least crash_size */
	if (memory_chunk[0].size < crash_size) {
		*msg = "first memory chunk must be at least crashkernel size";
		return -EINVAL;
	}
	/* Check if we fit into the respective memory chunk */
	for (i = 0; i < MEMORY_CHUNKS; i++) {
		chunk = &memory_chunk[i];
		if (chunk->size == 0)
			continue;
		if (crash_base < chunk->addr)
			continue;
		if (crash_base >= chunk->addr + chunk->size)
			continue;
		/* we have found the memory chunk */
		if (crash_base + crash_size > chunk->addr + chunk->size) {
			*msg = "selected memory chunk is too small for "
				"crashkernel memory";
			return -EINVAL;
		}
		return 0;
	}
	*msg = "invalid memory range specified";
	return -EINVAL;
}

/*
 * Reserve kdump memory by creating a memory hole in the mem_chunk array
 */
static void __init reserve_kdump_bootmem(unsigned long addr, unsigned long size,
					 int type)
{
	create_mem_hole(memory_chunk, addr, size, type);
}

/*
 * When kdump is enabled, we have to ensure that no memory from
 * the area [0 - crashkernel memory size] and
 * [crashk_res.start - crashk_res.end] is set offline.
 */
static int kdump_mem_notifier(struct notifier_block *nb,
			      unsigned long action, void *data)
{
	struct memory_notify *arg = data;

	if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
		return NOTIFY_BAD;
	if (arg->start_pfn > PFN_DOWN(crashk_res.end))
		return NOTIFY_OK;
	if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start))
		return NOTIFY_OK;
	return NOTIFY_BAD;
}

static struct notifier_block kdump_mem_nb = {
	.notifier_call = kdump_mem_notifier,
};

#endif

/*
 * Make sure that oldmem, where the dump is stored, is protected
 */
static void reserve_oldmem(void)
{
#ifdef CONFIG_CRASH_DUMP
	if (!OLDMEM_BASE)
		return;

	reserve_kdump_bootmem(OLDMEM_BASE, OLDMEM_SIZE, CHUNK_OLDMEM);
	reserve_kdump_bootmem(OLDMEM_SIZE, memory_end - OLDMEM_SIZE,
			      CHUNK_OLDMEM);
	if (OLDMEM_BASE + OLDMEM_SIZE == real_memory_size)
		saved_max_pfn = PFN_DOWN(OLDMEM_BASE) - 1;
	else
		saved_max_pfn = PFN_DOWN(real_memory_size) - 1;
#endif
}

/*
 * Reserve memory for kdump kernel to be loaded with kexec
 */
static void __init reserve_crashkernel(void)
{
#ifdef CONFIG_CRASH_DUMP
	unsigned long long crash_base, crash_size;
735
	char *msg = NULL;
M
Michael Holzheu 已提交
736 737 738 739 740 741
	int rc;

	rc = parse_crashkernel(boot_command_line, memory_end, &crash_size,
			       &crash_base);
	if (rc || crash_size == 0)
		return;
742 743
	crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
	crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
M
Michael Holzheu 已提交
744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762
	if (register_memory_notifier(&kdump_mem_nb))
		return;
	if (!crash_base)
		crash_base = find_crash_base(crash_size, &msg);
	if (!crash_base) {
		pr_info("crashkernel reservation failed: %s\n", msg);
		unregister_memory_notifier(&kdump_mem_nb);
		return;
	}
	if (verify_crash_base(crash_base, crash_size, &msg)) {
		pr_info("crashkernel reservation failed: %s\n", msg);
		unregister_memory_notifier(&kdump_mem_nb);
		return;
	}
	if (!OLDMEM_BASE && MACHINE_IS_VM)
		diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
	crashk_res.start = crash_base;
	crashk_res.end = crash_base + crash_size - 1;
	insert_resource(&iomem_resource, &crashk_res);
763
	reserve_kdump_bootmem(crash_base, crash_size, CHUNK_CRASHK);
M
Michael Holzheu 已提交
764 765 766
	pr_info("Reserving %lluMB of memory at %lluMB "
		"for crashkernel (System RAM: %luMB)\n",
		crash_size >> 20, crash_base >> 20, memory_end >> 20);
767
	os_info_crashkernel_add(crash_base, crash_size);
M
Michael Holzheu 已提交
768 769 770
#endif
}

771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804
static void __init init_storage_keys(unsigned long start, unsigned long end)
{
	unsigned long boundary, function, size;

	while (start < end) {
		if (MACHINE_HAS_EDAT2) {
			/* set storage keys for a 2GB frame */
			function = 0x22000 | PAGE_DEFAULT_KEY;
			size = 1UL << 31;
			boundary = (start + size) & ~(size - 1);
			if (boundary <= end) {
				do {
					start = pfmf(function, start);
				} while (start < boundary);
				continue;
			}
		}
		if (MACHINE_HAS_EDAT1) {
			/* set storage keys for a 1MB frame */
			function = 0x21000 | PAGE_DEFAULT_KEY;
			size = 1UL << 20;
			boundary = (start + size) & ~(size - 1);
			if (boundary <= end) {
				do {
					start = pfmf(function, start);
				} while (start < boundary);
				continue;
			}
		}
		page_set_storage_key(start, PAGE_DEFAULT_KEY, 0);
		start += PAGE_SIZE;
	}
}

M
Martin Schwidefsky 已提交
805
static void __init setup_memory(void)
806 807
{
        unsigned long bootmap_size;
H
Hongjie Yang 已提交
808
	unsigned long start_pfn, end_pfn;
809
	int i;
L
Linus Torvalds 已提交
810 811 812 813 814

	/*
	 * partially used pages are not usable - thus
	 * we are rounding upwards:
	 */
815 816
	start_pfn = PFN_UP(__pa(&_end));
	end_pfn = max_pfn = PFN_DOWN(memory_end);
L
Linus Torvalds 已提交
817

818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833
#ifdef CONFIG_BLK_DEV_INITRD
	/*
	 * Move the initrd in case the bitmap of the bootmem allocater
	 * would overwrite it.
	 */

	if (INITRD_START && INITRD_SIZE) {
		unsigned long bmap_size;
		unsigned long start;

		bmap_size = bootmem_bootmap_pages(end_pfn - start_pfn + 1);
		bmap_size = PFN_PHYS(bmap_size);

		if (PFN_PHYS(start_pfn) + bmap_size > INITRD_START) {
			start = PFN_PHYS(start_pfn) + bmap_size + PAGE_SIZE;

M
Michael Holzheu 已提交
834 835 836 837 838 839 840 841
#ifdef CONFIG_CRASH_DUMP
			if (OLDMEM_BASE) {
				/* Move initrd behind kdump oldmem */
				if (start + INITRD_SIZE > OLDMEM_BASE &&
				    start < OLDMEM_BASE + OLDMEM_SIZE)
					start = OLDMEM_BASE + OLDMEM_SIZE;
			}
#endif
842
			if (start + INITRD_SIZE > memory_end) {
843 844
				pr_err("initrd extends beyond end of "
				       "memory (0x%08lx > 0x%08lx) "
845 846 847 848
				       "disabling initrd\n",
				       start + INITRD_SIZE, memory_end);
				INITRD_START = INITRD_SIZE = 0;
			} else {
849 850 851
				pr_info("Moving initrd (0x%08lx -> "
					"0x%08lx, size: %ld)\n",
					INITRD_START, start, INITRD_SIZE);
852 853 854 855 856 857 858 859
				memmove((void *) start, (void *) INITRD_START,
					INITRD_SIZE);
				INITRD_START = start;
			}
		}
	}
#endif

L
Linus Torvalds 已提交
860
	/*
861
	 * Initialize the boot-time allocator
L
Linus Torvalds 已提交
862 863 864 865 866 867
	 */
	bootmap_size = init_bootmem(start_pfn, end_pfn);

	/*
	 * Register RAM areas with the bootmem allocator.
	 */
868

869
	for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
870
		unsigned long start_chunk, end_chunk, pfn;
L
Linus Torvalds 已提交
871

872 873
		if (memory_chunk[i].type != CHUNK_READ_WRITE &&
		    memory_chunk[i].type != CHUNK_CRASHK)
L
Linus Torvalds 已提交
874
			continue;
875
		start_chunk = PFN_DOWN(memory_chunk[i].addr);
876
		end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size);
877 878 879
		end_chunk = min(end_chunk, end_pfn);
		if (start_chunk >= end_chunk)
			continue;
T
Tejun Heo 已提交
880 881
		memblock_add_node(PFN_PHYS(start_chunk),
				  PFN_PHYS(end_chunk - start_chunk), 0);
882
		pfn = max(start_chunk, start_pfn);
883
		init_storage_keys(PFN_PHYS(pfn), PFN_PHYS(end_chunk));
L
Linus Torvalds 已提交
884 885
	}

886 887
	psw_set_key(PAGE_DEFAULT_KEY);

888
	free_bootmem_with_active_regions(0, max_pfn);
889

890 891 892
	/*
	 * Reserve memory used for lowcore/command line/kernel image.
	 */
893
	reserve_bootmem(0, (unsigned long)_ehead, BOOTMEM_DEFAULT);
894
	reserve_bootmem((unsigned long)_stext,
895 896
			PFN_PHYS(start_pfn) - (unsigned long)_stext,
			BOOTMEM_DEFAULT);
897 898 899 900 901 902
	/*
	 * Reserve the bootmem bitmap itself as well. We do this in two
	 * steps (first step was init_bootmem()) because this catches
	 * the (very unlikely) case of us accidentally initializing the
	 * bootmem allocator with an invalid RAM area.
	 */
903 904
	reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size,
			BOOTMEM_DEFAULT);
L
Linus Torvalds 已提交
905

M
Michael Holzheu 已提交
906 907 908 909 910 911 912 913 914
#ifdef CONFIG_CRASH_DUMP
	if (crashk_res.start)
		reserve_bootmem(crashk_res.start,
				crashk_res.end - crashk_res.start + 1,
				BOOTMEM_DEFAULT);
	if (is_kdump_kernel())
		reserve_bootmem(elfcorehdr_addr - OLDMEM_BASE,
				PAGE_ALIGN(elfcorehdr_size), BOOTMEM_DEFAULT);
#endif
L
Linus Torvalds 已提交
915
#ifdef CONFIG_BLK_DEV_INITRD
916
	if (INITRD_START && INITRD_SIZE) {
L
Linus Torvalds 已提交
917
		if (INITRD_START + INITRD_SIZE <= memory_end) {
918 919
			reserve_bootmem(INITRD_START, INITRD_SIZE,
					BOOTMEM_DEFAULT);
L
Linus Torvalds 已提交
920 921 922
			initrd_start = INITRD_START;
			initrd_end = initrd_start + INITRD_SIZE;
		} else {
923 924 925
			pr_err("initrd extends beyond end of "
			       "memory (0x%08lx > 0x%08lx) "
			       "disabling initrd\n",
926 927
			       initrd_start + INITRD_SIZE, memory_end);
			initrd_start = initrd_end = 0;
L
Linus Torvalds 已提交
928
		}
929
	}
L
Linus Torvalds 已提交
930
#endif
931
}
L
Linus Torvalds 已提交
932

933 934 935 936 937 938
/*
 * Setup hardware capabilities.
 */
static void __init setup_hwcaps(void)
{
	static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 };
939
	struct cpuid cpu_id;
940 941 942 943 944 945 946 947 948 949 950 951
	int i;

	/*
	 * The store facility list bits numbers as found in the principles
	 * of operation are numbered with bit 1UL<<31 as number 0 to
	 * bit 1UL<<0 as number 31.
	 *   Bit 0: instructions named N3, "backported" to esa-mode
	 *   Bit 2: z/Architecture mode is active
	 *   Bit 7: the store-facility-list-extended facility is installed
	 *   Bit 17: the message-security assist is installed
	 *   Bit 19: the long-displacement facility is installed
	 *   Bit 21: the extended-immediate facility is installed
952 953
	 *   Bit 22: extended-translation facility 3 is installed
	 *   Bit 30: extended-translation facility 3 enhancement facility
954 955 956
	 * These get translated to:
	 *   HWCAP_S390_ESAN3 bit 0, HWCAP_S390_ZARCH bit 1,
	 *   HWCAP_S390_STFLE bit 2, HWCAP_S390_MSA bit 3,
957 958
	 *   HWCAP_S390_LDISP bit 4, HWCAP_S390_EIMM bit 5 and
	 *   HWCAP_S390_ETF3EH bit 8 (22 && 30).
959 960
	 */
	for (i = 0; i < 6; i++)
961
		if (test_facility(stfl_bits[i]))
962 963
			elf_hwcap |= 1UL << i;

964
	if (test_facility(22) && test_facility(30))
965
		elf_hwcap |= HWCAP_S390_ETF3EH;
966

967 968 969 970 971 972
	/*
	 * Check for additional facilities with store-facility-list-extended.
	 * stfle stores doublewords (8 byte) with bit 1ULL<<63 as bit 0
	 * and 1ULL<<0 as bit 63. Bits 0-31 contain the same information
	 * as stored by stfl, bits 32-xxx contain additional facilities.
	 * How many facility words are stored depends on the number of
L
Lucas De Marchi 已提交
973
	 * doublewords passed to the instruction. The additional facilities
974
	 * are:
975 976
	 *   Bit 42: decimal floating point facility is installed
	 *   Bit 44: perform floating point operation facility is installed
977
	 * translated to:
978
	 *   HWCAP_S390_DFP bit 6 (42 && 44).
979
	 */
980 981
	if ((elf_hwcap & (1UL << 2)) && test_facility(42) && test_facility(44))
		elf_hwcap |= HWCAP_S390_DFP;
982

983 984 985
	/*
	 * Huge page support HWCAP_S390_HPAGE is bit 7.
	 */
986
	if (MACHINE_HAS_HPAGE)
987 988
		elf_hwcap |= HWCAP_S390_HPAGE;

989
#if defined(CONFIG_64BIT)
990 991 992 993 994
	/*
	 * 64-bit register support for 31-bit processes
	 * HWCAP_S390_HIGH_GPRS is bit 9.
	 */
	elf_hwcap |= HWCAP_S390_HIGH_GPRS;
995 996 997 998 999 1000

	/*
	 * Transactional execution support HWCAP_S390_TE is bit 10.
	 */
	if (test_facility(50) && test_facility(73))
		elf_hwcap |= HWCAP_S390_TE;
1001
#endif
1002

1003 1004
	get_cpu_id(&cpu_id);
	switch (cpu_id.machine) {
1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022
	case 0x9672:
#if !defined(CONFIG_64BIT)
	default:	/* Use "g5" as default for 31 bit kernels. */
#endif
		strcpy(elf_platform, "g5");
		break;
	case 0x2064:
	case 0x2066:
#if defined(CONFIG_64BIT)
	default:	/* Use "z900" as default for 64 bit kernels. */
#endif
		strcpy(elf_platform, "z900");
		break;
	case 0x2084:
	case 0x2086:
		strcpy(elf_platform, "z990");
		break;
	case 0x2094:
1023
	case 0x2096:
1024 1025
		strcpy(elf_platform, "z9-109");
		break;
1026 1027 1028 1029
	case 0x2097:
	case 0x2098:
		strcpy(elf_platform, "z10");
		break;
1030
	case 0x2817:
1031
	case 0x2818:
1032 1033
		strcpy(elf_platform, "z196");
		break;
1034 1035 1036
	}
}

1037 1038 1039 1040
/*
 * Setup function called from init/main.c just after the banner
 * was printed.
 */
L
Linus Torvalds 已提交
1041

M
Martin Schwidefsky 已提交
1042
void __init setup_arch(char **cmdline_p)
1043
{
L
Linus Torvalds 已提交
1044
        /*
1045
         * print what head.S has found out about the machine
L
Linus Torvalds 已提交
1046
         */
1047
#ifndef CONFIG_64BIT
1048 1049 1050
	if (MACHINE_IS_VM)
		pr_info("Linux is running as a z/VM "
			"guest operating system in 31-bit mode\n");
1051
	else if (MACHINE_IS_LPAR)
1052 1053 1054 1055 1056 1057 1058
		pr_info("Linux is running natively in 31-bit mode\n");
	if (MACHINE_HAS_IEEE)
		pr_info("The hardware system has IEEE compatible "
			"floating point units\n");
	else
		pr_info("The hardware system has no IEEE compatible "
			"floating point units\n");
1059
#else /* CONFIG_64BIT */
1060
	if (MACHINE_IS_VM)
1061 1062
		pr_info("Linux is running as a z/VM "
			"guest operating system in 64-bit mode\n");
1063
	else if (MACHINE_IS_KVM)
1064
		pr_info("Linux is running under KVM in 64-bit mode\n");
1065
	else if (MACHINE_IS_LPAR)
1066
		pr_info("Linux is running natively in 64-bit mode\n");
1067
#endif /* CONFIG_64BIT */
1068

1069 1070 1071
	/* Have one command line that is parsed and saved in /proc/cmdline */
	/* boot_command_line has been already set up in early.c */
	*cmdline_p = boot_command_line;
1072

1073
        ROOT_DEV = Root_RAM0;
1074 1075 1076 1077 1078 1079

	init_mm.start_code = PAGE_OFFSET;
	init_mm.end_code = (unsigned long) &_etext;
	init_mm.end_data = (unsigned long) &_edata;
	init_mm.brk = (unsigned long) &_end;

1080 1081 1082 1083 1084
	if (MACHINE_HAS_MVCOS)
		memcpy(&uaccess, &uaccess_mvcos, sizeof(uaccess));
	else
		memcpy(&uaccess, &uaccess_std, sizeof(uaccess));

1085 1086
	parse_early_param();

1087
	os_info_init();
1088
	setup_ipl();
H
Heiko Carstens 已提交
1089
	setup_memory_end();
G
Gerald Schaefer 已提交
1090
	setup_addressing_mode();
M
Michael Holzheu 已提交
1091 1092
	reserve_oldmem();
	reserve_crashkernel();
1093 1094
	setup_memory();
	setup_resources();
M
Michael Holzheu 已提交
1095
	setup_vmcoreinfo();
1096 1097
	setup_lowcore();

L
Linus Torvalds 已提交
1098
        cpu_init();
1099
	s390_init_cpu_topology();
L
Linus Torvalds 已提交
1100

1101 1102 1103 1104 1105
	/*
	 * Setup capabilities (ELF_HWCAP & ELF_PLATFORM).
	 */
	setup_hwcaps();

L
Linus Torvalds 已提交
1106 1107 1108 1109 1110 1111 1112
	/*
	 * Create kernel page tables and switch to virtual addressing.
	 */
        paging_init();

        /* Setup default console */
	conmode_default();
1113
	set_preferred_console();
M
Michael Holzheu 已提交
1114 1115 1116

	/* Setup zfcpdump support */
	setup_zfcpdump(console_devno);
L
Linus Torvalds 已提交
1117
}