setup.c 28.9 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4
/*
 *  arch/s390/kernel/setup.c
 *
 *  S390 version
M
Martin Schwidefsky 已提交
5
 *    Copyright (C) IBM Corp. 1999,2012
L
Linus Torvalds 已提交
6 7 8 9 10 11 12 13 14 15 16
 *    Author(s): Hartmut Penner (hp@de.ibm.com),
 *               Martin Schwidefsky (schwidefsky@de.ibm.com)
 *
 *  Derived from "arch/i386/kernel/setup.c"
 *    Copyright (C) 1995, Linus Torvalds
 */

/*
 * This file handles the architecture-dependent parts of initialization
 */

17 18 19
#define KMSG_COMPONENT "setup"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt

L
Linus Torvalds 已提交
20 21 22 23
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/kernel.h>
T
Tejun Heo 已提交
24
#include <linux/memblock.h>
L
Linus Torvalds 已提交
25 26 27 28 29 30 31 32 33 34 35 36 37 38
#include <linux/mm.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/ptrace.h>
#include <linux/user.h>
#include <linux/tty.h>
#include <linux/ioport.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/initrd.h>
#include <linux/bootmem.h>
#include <linux/root_dev.h>
#include <linux/console.h>
#include <linux/kernel_stat.h>
39
#include <linux/device.h>
40
#include <linux/notifier.h>
41
#include <linux/pfn.h>
H
Hongjie Yang 已提交
42
#include <linux/ctype.h>
43
#include <linux/reboot.h>
44
#include <linux/topology.h>
45
#include <linux/ftrace.h>
M
Michael Holzheu 已提交
46 47 48
#include <linux/kexec.h>
#include <linux/crash_dump.h>
#include <linux/memory.h>
49
#include <linux/compat.h>
L
Linus Torvalds 已提交
50

M
Michael Holzheu 已提交
51
#include <asm/ipl.h>
L
Linus Torvalds 已提交
52
#include <asm/uaccess.h>
53
#include <asm/facility.h>
L
Linus Torvalds 已提交
54 55 56 57 58
#include <asm/smp.h>
#include <asm/mmu_context.h>
#include <asm/cpcmd.h>
#include <asm/lowcore.h>
#include <asm/irq.h>
59 60
#include <asm/page.h>
#include <asm/ptrace.h>
61
#include <asm/sections.h>
H
Hongjie Yang 已提交
62
#include <asm/ebcdic.h>
63
#include <asm/kvm_virtio.h>
M
Michael Holzheu 已提交
64
#include <asm/diag.h>
65
#include <asm/os_info.h>
M
Martin Schwidefsky 已提交
66
#include "entry.h"
G
Gerald Schaefer 已提交
67

68 69 70 71 72
long psw_kernel_bits	= PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_PRIMARY |
			  PSW_MASK_EA | PSW_MASK_BA;
long psw_user_bits	= PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT |
			  PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK |
			  PSW_MASK_PSTATE | PSW_ASC_HOME;
L
Linus Torvalds 已提交
73

74 75 76 77
/*
 * User copy operations.
 */
struct uaccess_ops uaccess;
78
EXPORT_SYMBOL(uaccess);
79

L
Linus Torvalds 已提交
80 81 82 83
/*
 * Machine setup..
 */
unsigned int console_mode = 0;
84 85
EXPORT_SYMBOL(console_mode);

L
Linus Torvalds 已提交
86
unsigned int console_devno = -1;
87 88
EXPORT_SYMBOL(console_devno);

L
Linus Torvalds 已提交
89
unsigned int console_irq = -1;
90 91
EXPORT_SYMBOL(console_irq);

92 93
unsigned long elf_hwcap = 0;
char elf_platform[ELF_PLATFORM_SIZE];
94

95
struct mem_chunk __initdata memory_chunk[MEMORY_CHUNKS];
96 97 98

int __initdata memory_end_set;
unsigned long __initdata memory_end;
L
Linus Torvalds 已提交
99

100 101 102 103 104 105 106 107 108
unsigned long VMALLOC_START;
EXPORT_SYMBOL(VMALLOC_START);

unsigned long VMALLOC_END;
EXPORT_SYMBOL(VMALLOC_END);

struct page *vmemmap;
EXPORT_SYMBOL(vmemmap);

F
Frank Munzert 已提交
109 110 111 112
/* An array with a pointer to the lowcore of every CPU. */
struct _lowcore *lowcore_ptr[NR_CPUS];
EXPORT_SYMBOL(lowcore_ptr);

L
Linus Torvalds 已提交
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
/*
 * This is set up by the setup-routine at boot-time
 * for S390 need to find out, what we have to setup
 * using address 0x10400 ...
 */

#include <asm/setup.h>

/*
 * condev= and conmode= setup parameter.
 */

static int __init condev_setup(char *str)
{
	int vdev;

	vdev = simple_strtoul(str, &str, 0);
	if (vdev >= 0 && vdev < 65536) {
		console_devno = vdev;
		console_irq = -1;
	}
	return 1;
}

__setup("condev=", condev_setup);

139 140
static void __init set_preferred_console(void)
{
141
	if (MACHINE_IS_KVM)
142
		add_preferred_console("hvc", 0, NULL);
143
	else if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
144
		add_preferred_console("ttyS", 0, NULL);
145
	else if (CONSOLE_IS_3270)
146 147 148
		add_preferred_console("tty3270", 0, NULL);
}

L
Linus Torvalds 已提交
149 150
static int __init conmode_setup(char *str)
{
151
#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
L
Linus Torvalds 已提交
152 153 154 155 156 157 158 159 160 161 162
	if (strncmp(str, "hwc", 4) == 0 || strncmp(str, "sclp", 5) == 0)
                SET_CONSOLE_SCLP;
#endif
#if defined(CONFIG_TN3215_CONSOLE)
	if (strncmp(str, "3215", 5) == 0)
		SET_CONSOLE_3215;
#endif
#if defined(CONFIG_TN3270_CONSOLE)
	if (strncmp(str, "3270", 5) == 0)
		SET_CONSOLE_3270;
#endif
163
	set_preferred_console();
L
Linus Torvalds 已提交
164 165 166 167 168 169 170 171 172 173 174
        return 1;
}

__setup("conmode=", conmode_setup);

static void __init conmode_default(void)
{
	char query_buffer[1024];
	char *ptr;

        if (MACHINE_IS_VM) {
175
		cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL);
L
Linus Torvalds 已提交
176 177 178
		console_devno = simple_strtoul(query_buffer + 5, NULL, 16);
		ptr = strstr(query_buffer, "SUBCHANNEL =");
		console_irq = simple_strtoul(ptr + 13, NULL, 16);
179
		cpcmd("QUERY TERM", query_buffer, 1024, NULL);
L
Linus Torvalds 已提交
180 181 182 183 184 185 186 187
		ptr = strstr(query_buffer, "CONMODE");
		/*
		 * Set the conmode to 3215 so that the device recognition 
		 * will set the cu_type of the console to 3215. If the
		 * conmode is 3270 and we don't set it back then both
		 * 3215 and the 3270 driver will try to access the console
		 * device (3215 as console and 3270 as normal tty).
		 */
188
		cpcmd("TERM CONMODE 3215", NULL, 0, NULL);
L
Linus Torvalds 已提交
189
		if (ptr == NULL) {
190
#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
L
Linus Torvalds 已提交
191 192 193 194 195 196 197 198 199
			SET_CONSOLE_SCLP;
#endif
			return;
		}
		if (strncmp(ptr + 8, "3270", 4) == 0) {
#if defined(CONFIG_TN3270_CONSOLE)
			SET_CONSOLE_3270;
#elif defined(CONFIG_TN3215_CONSOLE)
			SET_CONSOLE_3215;
200
#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
L
Linus Torvalds 已提交
201 202 203 204 205 206 207
			SET_CONSOLE_SCLP;
#endif
		} else if (strncmp(ptr + 8, "3215", 4) == 0) {
#if defined(CONFIG_TN3215_CONSOLE)
			SET_CONSOLE_3215;
#elif defined(CONFIG_TN3270_CONSOLE)
			SET_CONSOLE_3270;
208
#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
L
Linus Torvalds 已提交
209 210 211 212
			SET_CONSOLE_SCLP;
#endif
		}
	} else {
213
#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
L
Linus Torvalds 已提交
214 215 216 217 218
		SET_CONSOLE_SCLP;
#endif
	}
}

219
#ifdef CONFIG_ZFCPDUMP
M
Michael Holzheu 已提交
220 221
static void __init setup_zfcpdump(unsigned int console_devno)
{
222
	static char str[41];
M
Michael Holzheu 已提交
223 224 225

	if (ipl_info.type != IPL_TYPE_FCP_DUMP)
		return;
226 227
	if (OLDMEM_BASE)
		return;
M
Michael Holzheu 已提交
228
	if (console_devno != -1)
229
		sprintf(str, " cio_ignore=all,!0.0.%04x,!0.0.%04x",
M
Michael Holzheu 已提交
230 231
			ipl_info.data.fcp.dev_id.devno, console_devno);
	else
232
		sprintf(str, " cio_ignore=all,!0.0.%04x",
M
Michael Holzheu 已提交
233
			ipl_info.data.fcp.dev_id.devno);
234
	strcat(boot_command_line, str);
M
Michael Holzheu 已提交
235 236 237 238 239 240
	console_loglevel = 2;
}
#else
static inline void setup_zfcpdump(unsigned int console_devno) {}
#endif /* CONFIG_ZFCPDUMP */

L
Linus Torvalds 已提交
241 242 243 244 245 246 247
 /*
 * Reboot, halt and power_off stubs. They just call _machine_restart,
 * _machine_halt or _machine_power_off. 
 */

void machine_restart(char *command)
{
248
	if ((!in_interrupt() && !in_atomic()) || oops_in_progress)
249 250 251 252 253
		/*
		 * Only unblank the console if we are called in enabled
		 * context or a bust_spinlocks cleared the way for us.
		 */
		console_unblank();
L
Linus Torvalds 已提交
254 255 256 257 258
	_machine_restart(command);
}

void machine_halt(void)
{
259 260 261 262 263 264
	if (!in_interrupt() || oops_in_progress)
		/*
		 * Only unblank the console if we are called in enabled
		 * context or a bust_spinlocks cleared the way for us.
		 */
		console_unblank();
L
Linus Torvalds 已提交
265 266 267 268 269
	_machine_halt();
}

void machine_power_off(void)
{
270 271 272 273 274 275
	if (!in_interrupt() || oops_in_progress)
		/*
		 * Only unblank the console if we are called in enabled
		 * context or a bust_spinlocks cleared the way for us.
		 */
		console_unblank();
L
Linus Torvalds 已提交
276 277 278
	_machine_power_off();
}

279 280 281 282 283
/*
 * Dummy power off function.
 */
void (*pm_power_off)(void) = machine_power_off;

284 285 286
static int __init early_parse_mem(char *p)
{
	memory_end = memparse(p, &p);
287
	memory_end_set = 1;
288 289 290 291
	return 0;
}
early_param("mem", early_parse_mem);

292 293 294 295 296 297 298 299 300
static int __init parse_vmalloc(char *arg)
{
	if (!arg)
		return -EINVAL;
	VMALLOC_END = (memparse(arg, &arg) + PAGE_SIZE - 1) & PAGE_MASK;
	return 0;
}
early_param("vmalloc", parse_vmalloc);

301 302
unsigned int user_mode = HOME_SPACE_MODE;
EXPORT_SYMBOL_GPL(user_mode);
G
Gerald Schaefer 已提交
303

304
static int set_amode_primary(void)
G
Gerald Schaefer 已提交
305
{
306 307
	psw_kernel_bits = (psw_kernel_bits & ~PSW_MASK_ASC) | PSW_ASC_HOME;
	psw_user_bits = (psw_user_bits & ~PSW_MASK_ASC) | PSW_ASC_PRIMARY;
G
Gerald Schaefer 已提交
308
#ifdef CONFIG_COMPAT
309 310
	psw32_user_bits =
		(psw32_user_bits & ~PSW32_MASK_ASC) | PSW32_ASC_PRIMARY;
G
Gerald Schaefer 已提交
311 312 313 314
#endif

	if (MACHINE_HAS_MVCOS) {
		memcpy(&uaccess, &uaccess_mvcos_switch, sizeof(uaccess));
315
		return 1;
G
Gerald Schaefer 已提交
316 317
	} else {
		memcpy(&uaccess, &uaccess_pt, sizeof(uaccess));
318
		return 0;
G
Gerald Schaefer 已提交
319 320 321 322 323 324 325 326
	}
}

/*
 * Switch kernel/user addressing modes?
 */
static int __init early_parse_switch_amode(char *p)
{
327
	user_mode = PRIMARY_SPACE_MODE;
G
Gerald Schaefer 已提交
328 329 330 331
	return 0;
}
early_param("switch_amode", early_parse_switch_amode);

332
static int __init early_parse_user_mode(char *p)
G
Gerald Schaefer 已提交
333
{
334 335 336 337 338 339
	if (p && strcmp(p, "primary") == 0)
		user_mode = PRIMARY_SPACE_MODE;
	else if (!p || strcmp(p, "home") == 0)
		user_mode = HOME_SPACE_MODE;
	else
		return 1;
340
	return 0;
G
Gerald Schaefer 已提交
341
}
342
early_param("user_mode", early_parse_user_mode);
G
Gerald Schaefer 已提交
343 344 345

static void setup_addressing_mode(void)
{
346
	if (user_mode == PRIMARY_SPACE_MODE) {
347
		if (set_amode_primary())
348 349 350 351 352
			pr_info("Address spaces switched, "
				"mvcos available\n");
		else
			pr_info("Address spaces switched, "
				"mvcos not available\n");
G
Gerald Schaefer 已提交
353 354 355
	}
}

M
Martin Schwidefsky 已提交
356 357 358
void *restart_stack __attribute__((__section__(".data")));

static void __init setup_lowcore(void)
359 360 361 362 363 364
{
	struct _lowcore *lc;

	/*
	 * Setup lowcore for boot cpu
	 */
365
	BUILD_BUG_ON(sizeof(struct _lowcore) != LC_PAGES * 4096);
366
	lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0);
367
	lc->restart_psw.mask = psw_kernel_bits;
368
	lc->restart_psw.addr =
M
Martin Schwidefsky 已提交
369
		PSW_ADDR_AMODE | (unsigned long) restart_int_handler;
370 371
	lc->external_new_psw.mask = psw_kernel_bits |
		PSW_MASK_DAT | PSW_MASK_MCHECK;
372 373
	lc->external_new_psw.addr =
		PSW_ADDR_AMODE | (unsigned long) ext_int_handler;
374 375
	lc->svc_new_psw.mask = psw_kernel_bits |
		PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
376
	lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call;
377 378
	lc->program_new_psw.mask = psw_kernel_bits |
		PSW_MASK_DAT | PSW_MASK_MCHECK;
379
	lc->program_new_psw.addr =
380 381
		PSW_ADDR_AMODE | (unsigned long) pgm_check_handler;
	lc->mcck_new_psw.mask = psw_kernel_bits;
382 383
	lc->mcck_new_psw.addr =
		PSW_ADDR_AMODE | (unsigned long) mcck_int_handler;
384 385
	lc->io_new_psw.mask = psw_kernel_bits |
		PSW_MASK_DAT | PSW_MASK_MCHECK;
386
	lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
387
	lc->clock_comparator = -1ULL;
388 389 390 391 392 393 394
	lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE;
	lc->async_stack = (unsigned long)
		__alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE;
	lc->panic_stack = (unsigned long)
		__alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0) + PAGE_SIZE;
	lc->current_task = (unsigned long) init_thread_union.thread_info.task;
	lc->thread_info = (unsigned long) &init_thread_union;
395
	lc->machine_flags = S390_lowcore.machine_flags;
396 397 398
	lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
	memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
	       MAX_FACILITY_BIT/8);
399
#ifndef CONFIG_64BIT
400 401
	if (MACHINE_HAS_IEEE) {
		lc->extended_save_area_addr = (__u32)
402
			__alloc_bootmem_low(PAGE_SIZE, PAGE_SIZE, 0);
403
		/* enable extended save area */
H
Heiko Carstens 已提交
404
		__ctl_set_bit(14, 29);
405
	}
406 407
#else
	lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
408
#endif
409 410 411 412 413 414 415 416
	lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
	lc->async_enter_timer = S390_lowcore.async_enter_timer;
	lc->exit_timer = S390_lowcore.exit_timer;
	lc->user_timer = S390_lowcore.user_timer;
	lc->system_timer = S390_lowcore.system_timer;
	lc->steal_timer = S390_lowcore.steal_timer;
	lc->last_update_timer = S390_lowcore.last_update_timer;
	lc->last_update_clock = S390_lowcore.last_update_clock;
417
	lc->ftrace_func = S390_lowcore.ftrace_func;
M
Martin Schwidefsky 已提交
418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435

	restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0);
	restart_stack += ASYNC_SIZE;

	/*
	 * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
	 * restart data to the absolute zero lowcore. This is necesary if
	 * PSW restart is done on an offline CPU that has lowcore zero.
	 */
	lc->restart_stack = (unsigned long) restart_stack;
	lc->restart_fn = (unsigned long) do_restart;
	lc->restart_data = 0;
	lc->restart_source = -1UL;
	memcpy(&S390_lowcore.restart_stack, &lc->restart_stack,
	       4*sizeof(unsigned long));
	copy_to_absolute_zero(&S390_lowcore.restart_psw,
			      &lc->restart_psw, sizeof(psw_t));

436
	set_prefix((u32)(unsigned long) lc);
F
Frank Munzert 已提交
437
	lowcore_ptr[0] = lc;
438 439
}

440 441 442 443 444 445 446 447 448 449
static struct resource code_resource = {
	.name  = "Kernel code",
	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
};

static struct resource data_resource = {
	.name = "Kernel data",
	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
};

450 451 452 453 454
static struct resource bss_resource = {
	.name = "Kernel bss",
	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
};

455 456 457
static struct resource __initdata *standard_resources[] = {
	&code_resource,
	&data_resource,
458
	&bss_resource,
459 460 461
};

static void __init setup_resources(void)
462
{
463 464
	struct resource *res, *std_res, *sub_res;
	int i, j;
465

466 467 468 469
	code_resource.start = (unsigned long) &_text;
	code_resource.end = (unsigned long) &_etext - 1;
	data_resource.start = (unsigned long) &_etext;
	data_resource.end = (unsigned long) &_edata - 1;
470 471
	bss_resource.start = (unsigned long) &__bss_start;
	bss_resource.end = (unsigned long) &__bss_stop - 1;
472

473 474 475
	for (i = 0; i < MEMORY_CHUNKS; i++) {
		if (!memory_chunk[i].size)
			continue;
M
Michael Holzheu 已提交
476 477 478
		if (memory_chunk[i].type == CHUNK_OLDMEM ||
		    memory_chunk[i].type == CHUNK_CRASHK)
			continue;
479
		res = alloc_bootmem_low(sizeof(*res));
480 481 482
		res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
		switch (memory_chunk[i].type) {
		case CHUNK_READ_WRITE:
483
		case CHUNK_CRASHK:
484 485 486 487 488 489 490 491 492 493
			res->name = "System RAM";
			break;
		case CHUNK_READ_ONLY:
			res->name = "System ROM";
			res->flags |= IORESOURCE_READONLY;
			break;
		default:
			res->name = "reserved";
		}
		res->start = memory_chunk[i].addr;
494
		res->end = res->start + memory_chunk[i].size - 1;
495
		request_resource(&iomem_resource, res);
H
Hongjie Yang 已提交
496

497 498 499 500 501 502 503 504 505 506 507 508 509 510
		for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
			std_res = standard_resources[j];
			if (std_res->start < res->start ||
			    std_res->start > res->end)
				continue;
			if (std_res->end > res->end) {
				sub_res = alloc_bootmem_low(sizeof(*sub_res));
				*sub_res = *std_res;
				sub_res->end = res->end;
				std_res->start = res->end + 1;
				request_resource(res, sub_res);
			} else {
				request_resource(res, std_res);
			}
H
Hongjie Yang 已提交
511
		}
512 513 514
	}
}

M
Michael Holzheu 已提交
515 516 517
unsigned long real_memory_size;
EXPORT_SYMBOL_GPL(real_memory_size);

H
Heiko Carstens 已提交
518 519
static void __init setup_memory_end(void)
{
520
	unsigned long vmax, vmalloc_size, tmp;
H
Heiko Carstens 已提交
521 522
	int i;

M
Michael Holzheu 已提交
523

524
#ifdef CONFIG_ZFCPDUMP
525
	if (ipl_info.type == IPL_TYPE_FCP_DUMP && !OLDMEM_BASE) {
M
Michael Holzheu 已提交
526
		memory_end = ZFCPDUMP_HSA_SIZE;
527 528
		memory_end_set = 1;
	}
M
Michael Holzheu 已提交
529
#endif
530
	real_memory_size = 0;
H
Heiko Carstens 已提交
531 532
	memory_end &= PAGE_MASK;

533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551
	/*
	 * Make sure all chunks are MAX_ORDER aligned so we don't need the
	 * extra checks that HOLES_IN_ZONE would require.
	 */
	for (i = 0; i < MEMORY_CHUNKS; i++) {
		unsigned long start, end;
		struct mem_chunk *chunk;
		unsigned long align;

		chunk = &memory_chunk[i];
		align = 1UL << (MAX_ORDER + PAGE_SHIFT - 1);
		start = (chunk->addr + align - 1) & ~(align - 1);
		end = (chunk->addr + chunk->size) & ~(align - 1);
		if (start >= end)
			memset(chunk, 0, sizeof(*chunk));
		else {
			chunk->addr = start;
			chunk->size = end - start;
		}
552 553
		real_memory_size = max(real_memory_size,
				       chunk->addr + chunk->size);
554 555
	}

556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583
	/* Choose kernel address space layout: 2, 3, or 4 levels. */
#ifdef CONFIG_64BIT
	vmalloc_size = VMALLOC_END ?: 128UL << 30;
	tmp = (memory_end ?: real_memory_size) / PAGE_SIZE;
	tmp = tmp * (sizeof(struct page) + PAGE_SIZE) + vmalloc_size;
	if (tmp <= (1UL << 42))
		vmax = 1UL << 42;	/* 3-level kernel page table */
	else
		vmax = 1UL << 53;	/* 4-level kernel page table */
#else
	vmalloc_size = VMALLOC_END ?: 96UL << 20;
	vmax = 1UL << 31;		/* 2-level kernel page table */
#endif
	/* vmalloc area is at the end of the kernel address space. */
	VMALLOC_END = vmax;
	VMALLOC_START = vmax - vmalloc_size;

	/* Split remaining virtual space between 1:1 mapping & vmemmap array */
	tmp = VMALLOC_START / (PAGE_SIZE + sizeof(struct page));
	tmp = VMALLOC_START - tmp * sizeof(struct page);
	tmp &= ~((vmax >> 11) - 1);	/* align to page table level */
	tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS);
	vmemmap = (struct page *) tmp;

	/* Take care that memory_end is set and <= vmemmap */
	memory_end = min(memory_end ?: real_memory_size, tmp);

	/* Fixup memory chunk array to fit into 0..memory_end */
H
Heiko Carstens 已提交
584 585 586
	for (i = 0; i < MEMORY_CHUNKS; i++) {
		struct mem_chunk *chunk = &memory_chunk[i];

587
		if (chunk->addr >= memory_end) {
H
Heiko Carstens 已提交
588 589 590
			memset(chunk, 0, sizeof(*chunk));
			continue;
		}
591 592
		if (chunk->addr + chunk->size > memory_end)
			chunk->size = memory_end - chunk->addr;
H
Heiko Carstens 已提交
593 594 595
	}
}

M
Michael Holzheu 已提交
596 597 598 599 600 601 602 603 604
static void __init setup_vmcoreinfo(void)
{
#ifdef CONFIG_KEXEC
	unsigned long ptr = paddr_vmcoreinfo_note();

	copy_to_absolute_zero(&S390_lowcore.vmcore_info, &ptr, sizeof(ptr));
#endif
}

M
Michael Holzheu 已提交
605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620
#ifdef CONFIG_CRASH_DUMP

/*
 * Find suitable location for crashkernel memory
 */
static unsigned long __init find_crash_base(unsigned long crash_size,
					    char **msg)
{
	unsigned long crash_base;
	struct mem_chunk *chunk;
	int i;

	if (memory_chunk[0].size < crash_size) {
		*msg = "first memory chunk must be at least crashkernel size";
		return 0;
	}
621
	if (OLDMEM_BASE && crash_size == OLDMEM_SIZE)
M
Michael Holzheu 已提交
622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749
		return OLDMEM_BASE;

	for (i = MEMORY_CHUNKS - 1; i >= 0; i--) {
		chunk = &memory_chunk[i];
		if (chunk->size == 0)
			continue;
		if (chunk->type != CHUNK_READ_WRITE)
			continue;
		if (chunk->size < crash_size)
			continue;
		crash_base = (chunk->addr + chunk->size) - crash_size;
		if (crash_base < crash_size)
			continue;
		if (crash_base < ZFCPDUMP_HSA_SIZE_MAX)
			continue;
		if (crash_base < (unsigned long) INITRD_START + INITRD_SIZE)
			continue;
		return crash_base;
	}
	*msg = "no suitable area found";
	return 0;
}

/*
 * Check if crash_base and crash_size is valid
 */
static int __init verify_crash_base(unsigned long crash_base,
				    unsigned long crash_size,
				    char **msg)
{
	struct mem_chunk *chunk;
	int i;

	/*
	 * Because we do the swap to zero, we must have at least 'crash_size'
	 * bytes free space before crash_base
	 */
	if (crash_size > crash_base) {
		*msg = "crashkernel offset must be greater than size";
		return -EINVAL;
	}

	/* First memory chunk must be at least crash_size */
	if (memory_chunk[0].size < crash_size) {
		*msg = "first memory chunk must be at least crashkernel size";
		return -EINVAL;
	}
	/* Check if we fit into the respective memory chunk */
	for (i = 0; i < MEMORY_CHUNKS; i++) {
		chunk = &memory_chunk[i];
		if (chunk->size == 0)
			continue;
		if (crash_base < chunk->addr)
			continue;
		if (crash_base >= chunk->addr + chunk->size)
			continue;
		/* we have found the memory chunk */
		if (crash_base + crash_size > chunk->addr + chunk->size) {
			*msg = "selected memory chunk is too small for "
				"crashkernel memory";
			return -EINVAL;
		}
		return 0;
	}
	*msg = "invalid memory range specified";
	return -EINVAL;
}

/*
 * Reserve kdump memory by creating a memory hole in the mem_chunk array
 */
static void __init reserve_kdump_bootmem(unsigned long addr, unsigned long size,
					 int type)
{
	create_mem_hole(memory_chunk, addr, size, type);
}

/*
 * When kdump is enabled, we have to ensure that no memory from
 * the area [0 - crashkernel memory size] and
 * [crashk_res.start - crashk_res.end] is set offline.
 */
static int kdump_mem_notifier(struct notifier_block *nb,
			      unsigned long action, void *data)
{
	struct memory_notify *arg = data;

	if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
		return NOTIFY_BAD;
	if (arg->start_pfn > PFN_DOWN(crashk_res.end))
		return NOTIFY_OK;
	if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start))
		return NOTIFY_OK;
	return NOTIFY_BAD;
}

static struct notifier_block kdump_mem_nb = {
	.notifier_call = kdump_mem_notifier,
};

#endif

/*
 * Make sure that oldmem, where the dump is stored, is protected
 */
static void reserve_oldmem(void)
{
#ifdef CONFIG_CRASH_DUMP
	if (!OLDMEM_BASE)
		return;

	reserve_kdump_bootmem(OLDMEM_BASE, OLDMEM_SIZE, CHUNK_OLDMEM);
	reserve_kdump_bootmem(OLDMEM_SIZE, memory_end - OLDMEM_SIZE,
			      CHUNK_OLDMEM);
	if (OLDMEM_BASE + OLDMEM_SIZE == real_memory_size)
		saved_max_pfn = PFN_DOWN(OLDMEM_BASE) - 1;
	else
		saved_max_pfn = PFN_DOWN(real_memory_size) - 1;
#endif
}

/*
 * Reserve memory for kdump kernel to be loaded with kexec
 */
static void __init reserve_crashkernel(void)
{
#ifdef CONFIG_CRASH_DUMP
	unsigned long long crash_base, crash_size;
750
	char *msg = NULL;
M
Michael Holzheu 已提交
751 752 753 754 755 756
	int rc;

	rc = parse_crashkernel(boot_command_line, memory_end, &crash_size,
			       &crash_base);
	if (rc || crash_size == 0)
		return;
757 758
	crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
	crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
M
Michael Holzheu 已提交
759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777
	if (register_memory_notifier(&kdump_mem_nb))
		return;
	if (!crash_base)
		crash_base = find_crash_base(crash_size, &msg);
	if (!crash_base) {
		pr_info("crashkernel reservation failed: %s\n", msg);
		unregister_memory_notifier(&kdump_mem_nb);
		return;
	}
	if (verify_crash_base(crash_base, crash_size, &msg)) {
		pr_info("crashkernel reservation failed: %s\n", msg);
		unregister_memory_notifier(&kdump_mem_nb);
		return;
	}
	if (!OLDMEM_BASE && MACHINE_IS_VM)
		diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
	crashk_res.start = crash_base;
	crashk_res.end = crash_base + crash_size - 1;
	insert_resource(&iomem_resource, &crashk_res);
778
	reserve_kdump_bootmem(crash_base, crash_size, CHUNK_CRASHK);
M
Michael Holzheu 已提交
779 780 781
	pr_info("Reserving %lluMB of memory at %lluMB "
		"for crashkernel (System RAM: %luMB)\n",
		crash_size >> 20, crash_base >> 20, memory_end >> 20);
782
	os_info_crashkernel_add(crash_base, crash_size);
M
Michael Holzheu 已提交
783 784 785
#endif
}

M
Martin Schwidefsky 已提交
786
static void __init setup_memory(void)
787 788
{
        unsigned long bootmap_size;
H
Hongjie Yang 已提交
789
	unsigned long start_pfn, end_pfn;
790
	int i;
L
Linus Torvalds 已提交
791 792 793 794 795

	/*
	 * partially used pages are not usable - thus
	 * we are rounding upwards:
	 */
796 797
	start_pfn = PFN_UP(__pa(&_end));
	end_pfn = max_pfn = PFN_DOWN(memory_end);
L
Linus Torvalds 已提交
798

799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814
#ifdef CONFIG_BLK_DEV_INITRD
	/*
	 * Move the initrd in case the bitmap of the bootmem allocater
	 * would overwrite it.
	 */

	if (INITRD_START && INITRD_SIZE) {
		unsigned long bmap_size;
		unsigned long start;

		bmap_size = bootmem_bootmap_pages(end_pfn - start_pfn + 1);
		bmap_size = PFN_PHYS(bmap_size);

		if (PFN_PHYS(start_pfn) + bmap_size > INITRD_START) {
			start = PFN_PHYS(start_pfn) + bmap_size + PAGE_SIZE;

M
Michael Holzheu 已提交
815 816 817 818 819 820 821 822
#ifdef CONFIG_CRASH_DUMP
			if (OLDMEM_BASE) {
				/* Move initrd behind kdump oldmem */
				if (start + INITRD_SIZE > OLDMEM_BASE &&
				    start < OLDMEM_BASE + OLDMEM_SIZE)
					start = OLDMEM_BASE + OLDMEM_SIZE;
			}
#endif
823
			if (start + INITRD_SIZE > memory_end) {
824 825
				pr_err("initrd extends beyond end of "
				       "memory (0x%08lx > 0x%08lx) "
826 827 828 829
				       "disabling initrd\n",
				       start + INITRD_SIZE, memory_end);
				INITRD_START = INITRD_SIZE = 0;
			} else {
830 831 832
				pr_info("Moving initrd (0x%08lx -> "
					"0x%08lx, size: %ld)\n",
					INITRD_START, start, INITRD_SIZE);
833 834 835 836 837 838 839 840
				memmove((void *) start, (void *) INITRD_START,
					INITRD_SIZE);
				INITRD_START = start;
			}
		}
	}
#endif

L
Linus Torvalds 已提交
841
	/*
842
	 * Initialize the boot-time allocator
L
Linus Torvalds 已提交
843 844 845 846 847 848
	 */
	bootmap_size = init_bootmem(start_pfn, end_pfn);

	/*
	 * Register RAM areas with the bootmem allocator.
	 */
849

850
	for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
851
		unsigned long start_chunk, end_chunk, pfn;
L
Linus Torvalds 已提交
852

853 854
		if (memory_chunk[i].type != CHUNK_READ_WRITE &&
		    memory_chunk[i].type != CHUNK_CRASHK)
L
Linus Torvalds 已提交
855
			continue;
856
		start_chunk = PFN_DOWN(memory_chunk[i].addr);
857
		end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size);
858 859 860
		end_chunk = min(end_chunk, end_pfn);
		if (start_chunk >= end_chunk)
			continue;
T
Tejun Heo 已提交
861 862
		memblock_add_node(PFN_PHYS(start_chunk),
				  PFN_PHYS(end_chunk - start_chunk), 0);
863
		pfn = max(start_chunk, start_pfn);
864
		for (; pfn < end_chunk; pfn++)
865 866
			page_set_storage_key(PFN_PHYS(pfn),
					     PAGE_DEFAULT_KEY, 0);
L
Linus Torvalds 已提交
867 868
	}

869 870
	psw_set_key(PAGE_DEFAULT_KEY);

871
	free_bootmem_with_active_regions(0, max_pfn);
872

873 874 875
	/*
	 * Reserve memory used for lowcore/command line/kernel image.
	 */
876
	reserve_bootmem(0, (unsigned long)_ehead, BOOTMEM_DEFAULT);
877
	reserve_bootmem((unsigned long)_stext,
878 879
			PFN_PHYS(start_pfn) - (unsigned long)_stext,
			BOOTMEM_DEFAULT);
880 881 882 883 884 885
	/*
	 * Reserve the bootmem bitmap itself as well. We do this in two
	 * steps (first step was init_bootmem()) because this catches
	 * the (very unlikely) case of us accidentally initializing the
	 * bootmem allocator with an invalid RAM area.
	 */
886 887
	reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size,
			BOOTMEM_DEFAULT);
L
Linus Torvalds 已提交
888

M
Michael Holzheu 已提交
889 890 891 892 893 894 895 896 897
#ifdef CONFIG_CRASH_DUMP
	if (crashk_res.start)
		reserve_bootmem(crashk_res.start,
				crashk_res.end - crashk_res.start + 1,
				BOOTMEM_DEFAULT);
	if (is_kdump_kernel())
		reserve_bootmem(elfcorehdr_addr - OLDMEM_BASE,
				PAGE_ALIGN(elfcorehdr_size), BOOTMEM_DEFAULT);
#endif
L
Linus Torvalds 已提交
898
#ifdef CONFIG_BLK_DEV_INITRD
899
	if (INITRD_START && INITRD_SIZE) {
L
Linus Torvalds 已提交
900
		if (INITRD_START + INITRD_SIZE <= memory_end) {
901 902
			reserve_bootmem(INITRD_START, INITRD_SIZE,
					BOOTMEM_DEFAULT);
L
Linus Torvalds 已提交
903 904 905
			initrd_start = INITRD_START;
			initrd_end = initrd_start + INITRD_SIZE;
		} else {
906 907 908
			pr_err("initrd extends beyond end of "
			       "memory (0x%08lx > 0x%08lx) "
			       "disabling initrd\n",
909 910
			       initrd_start + INITRD_SIZE, memory_end);
			initrd_start = initrd_end = 0;
L
Linus Torvalds 已提交
911
		}
912
	}
L
Linus Torvalds 已提交
913
#endif
914
}
L
Linus Torvalds 已提交
915

916 917 918 919 920 921
/*
 * Setup hardware capabilities.
 */
static void __init setup_hwcaps(void)
{
	static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 };
922
	struct cpuid cpu_id;
923 924 925 926 927 928 929 930 931 932 933 934
	int i;

	/*
	 * The store facility list bits numbers as found in the principles
	 * of operation are numbered with bit 1UL<<31 as number 0 to
	 * bit 1UL<<0 as number 31.
	 *   Bit 0: instructions named N3, "backported" to esa-mode
	 *   Bit 2: z/Architecture mode is active
	 *   Bit 7: the store-facility-list-extended facility is installed
	 *   Bit 17: the message-security assist is installed
	 *   Bit 19: the long-displacement facility is installed
	 *   Bit 21: the extended-immediate facility is installed
935 936
	 *   Bit 22: extended-translation facility 3 is installed
	 *   Bit 30: extended-translation facility 3 enhancement facility
937 938 939
	 * These get translated to:
	 *   HWCAP_S390_ESAN3 bit 0, HWCAP_S390_ZARCH bit 1,
	 *   HWCAP_S390_STFLE bit 2, HWCAP_S390_MSA bit 3,
940 941
	 *   HWCAP_S390_LDISP bit 4, HWCAP_S390_EIMM bit 5 and
	 *   HWCAP_S390_ETF3EH bit 8 (22 && 30).
942 943
	 */
	for (i = 0; i < 6; i++)
944
		if (test_facility(stfl_bits[i]))
945 946
			elf_hwcap |= 1UL << i;

947
	if (test_facility(22) && test_facility(30))
948
		elf_hwcap |= HWCAP_S390_ETF3EH;
949

950 951 952 953 954 955
	/*
	 * Check for additional facilities with store-facility-list-extended.
	 * stfle stores doublewords (8 byte) with bit 1ULL<<63 as bit 0
	 * and 1ULL<<0 as bit 63. Bits 0-31 contain the same information
	 * as stored by stfl, bits 32-xxx contain additional facilities.
	 * How many facility words are stored depends on the number of
L
Lucas De Marchi 已提交
956
	 * doublewords passed to the instruction. The additional facilities
957
	 * are:
958 959
	 *   Bit 42: decimal floating point facility is installed
	 *   Bit 44: perform floating point operation facility is installed
960
	 * translated to:
961
	 *   HWCAP_S390_DFP bit 6 (42 && 44).
962
	 */
963 964
	if ((elf_hwcap & (1UL << 2)) && test_facility(42) && test_facility(44))
		elf_hwcap |= HWCAP_S390_DFP;
965

966 967 968
	/*
	 * Huge page support HWCAP_S390_HPAGE is bit 7.
	 */
969
	if (MACHINE_HAS_HPAGE)
970 971 972 973 974 975 976
		elf_hwcap |= HWCAP_S390_HPAGE;

	/*
	 * 64-bit register support for 31-bit processes
	 * HWCAP_S390_HIGH_GPRS is bit 9.
	 */
	elf_hwcap |= HWCAP_S390_HIGH_GPRS;
977

978 979
	get_cpu_id(&cpu_id);
	switch (cpu_id.machine) {
980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997
	case 0x9672:
#if !defined(CONFIG_64BIT)
	default:	/* Use "g5" as default for 31 bit kernels. */
#endif
		strcpy(elf_platform, "g5");
		break;
	case 0x2064:
	case 0x2066:
#if defined(CONFIG_64BIT)
	default:	/* Use "z900" as default for 64 bit kernels. */
#endif
		strcpy(elf_platform, "z900");
		break;
	case 0x2084:
	case 0x2086:
		strcpy(elf_platform, "z990");
		break;
	case 0x2094:
998
	case 0x2096:
999 1000
		strcpy(elf_platform, "z9-109");
		break;
1001 1002 1003 1004
	case 0x2097:
	case 0x2098:
		strcpy(elf_platform, "z10");
		break;
1005
	case 0x2817:
1006
	case 0x2818:
1007 1008
		strcpy(elf_platform, "z196");
		break;
1009 1010 1011
	}
}

1012 1013 1014 1015
/*
 * Setup function called from init/main.c just after the banner
 * was printed.
 */
L
Linus Torvalds 已提交
1016

M
Martin Schwidefsky 已提交
1017
void __init setup_arch(char **cmdline_p)
1018
{
L
Linus Torvalds 已提交
1019
        /*
1020
         * print what head.S has found out about the machine
L
Linus Torvalds 已提交
1021
         */
1022
#ifndef CONFIG_64BIT
1023 1024 1025
	if (MACHINE_IS_VM)
		pr_info("Linux is running as a z/VM "
			"guest operating system in 31-bit mode\n");
1026
	else if (MACHINE_IS_LPAR)
1027 1028 1029 1030 1031 1032 1033
		pr_info("Linux is running natively in 31-bit mode\n");
	if (MACHINE_HAS_IEEE)
		pr_info("The hardware system has IEEE compatible "
			"floating point units\n");
	else
		pr_info("The hardware system has no IEEE compatible "
			"floating point units\n");
1034
#else /* CONFIG_64BIT */
1035
	if (MACHINE_IS_VM)
1036 1037
		pr_info("Linux is running as a z/VM "
			"guest operating system in 64-bit mode\n");
1038
	else if (MACHINE_IS_KVM)
1039
		pr_info("Linux is running under KVM in 64-bit mode\n");
1040
	else if (MACHINE_IS_LPAR)
1041
		pr_info("Linux is running natively in 64-bit mode\n");
1042
#endif /* CONFIG_64BIT */
1043

1044 1045 1046
	/* Have one command line that is parsed and saved in /proc/cmdline */
	/* boot_command_line has been already set up in early.c */
	*cmdline_p = boot_command_line;
1047

1048
        ROOT_DEV = Root_RAM0;
1049 1050 1051 1052 1053 1054

	init_mm.start_code = PAGE_OFFSET;
	init_mm.end_code = (unsigned long) &_etext;
	init_mm.end_data = (unsigned long) &_edata;
	init_mm.brk = (unsigned long) &_end;

1055 1056 1057 1058 1059
	if (MACHINE_HAS_MVCOS)
		memcpy(&uaccess, &uaccess_mvcos, sizeof(uaccess));
	else
		memcpy(&uaccess, &uaccess_std, sizeof(uaccess));

1060 1061
	parse_early_param();

1062
	os_info_init();
1063
	setup_ipl();
H
Heiko Carstens 已提交
1064
	setup_memory_end();
G
Gerald Schaefer 已提交
1065
	setup_addressing_mode();
M
Michael Holzheu 已提交
1066 1067
	reserve_oldmem();
	reserve_crashkernel();
1068 1069
	setup_memory();
	setup_resources();
M
Michael Holzheu 已提交
1070
	setup_vmcoreinfo();
1071 1072
	setup_lowcore();

L
Linus Torvalds 已提交
1073
        cpu_init();
1074
	s390_init_cpu_topology();
L
Linus Torvalds 已提交
1075

1076 1077 1078 1079 1080
	/*
	 * Setup capabilities (ELF_HWCAP & ELF_PLATFORM).
	 */
	setup_hwcaps();

L
Linus Torvalds 已提交
1081 1082 1083 1084 1085 1086 1087
	/*
	 * Create kernel page tables and switch to virtual addressing.
	 */
        paging_init();

        /* Setup default console */
	conmode_default();
1088
	set_preferred_console();
M
Michael Holzheu 已提交
1089 1090 1091

	/* Setup zfcpdump support */
	setup_zfcpdump(console_devno);
L
Linus Torvalds 已提交
1092
}