smp.c 25.9 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3
/*
 *  arch/s390/kernel/smp.c
 *
4
 *    Copyright IBM Corp. 1999, 2009
L
Linus Torvalds 已提交
5
 *    Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
6 7
 *		 Martin Schwidefsky (schwidefsky@de.ibm.com)
 *		 Heiko Carstens (heiko.carstens@de.ibm.com)
L
Linus Torvalds 已提交
8
 *
9
 *  based on other smp stuff by
L
Linus Torvalds 已提交
10 11 12 13 14 15 16 17 18 19 20 21 22
 *    (c) 1995 Alan Cox, CymruNET Ltd  <alan@cymru.net>
 *    (c) 1998 Ingo Molnar
 *
 * We work with logical cpu numbering everywhere we can. The only
 * functions using the real cpu address (got from STAP) are the sigp
 * functions. For all other functions we use the identity mapping.
 * That means that cpu_number_map[i] == i for every cpu. cpu_number_map is
 * used e.g. to find the idle task belonging to a logical cpu. Every array
 * in the kernel is sorted by the logical cpu number and not by the physical
 * one which is causing all the confusion with __cpu_logical_map and
 * cpu_number_map in other architectures.
 */

23 24 25
#define KMSG_COMPONENT "cpu"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt

L
Linus Torvalds 已提交
26 27 28
#include <linux/module.h>
#include <linux/init.h>
#include <linux/mm.h>
A
Alexey Dobriyan 已提交
29
#include <linux/err.h>
L
Linus Torvalds 已提交
30 31 32 33 34
#include <linux/spinlock.h>
#include <linux/kernel_stat.h>
#include <linux/delay.h>
#include <linux/cache.h>
#include <linux/interrupt.h>
35
#include <linux/irqflags.h>
L
Linus Torvalds 已提交
36
#include <linux/cpu.h>
37
#include <linux/timex.h>
M
Michael Holzheu 已提交
38
#include <linux/bootmem.h>
39
#include <asm/asm-offsets.h>
M
Michael Holzheu 已提交
40
#include <asm/ipl.h>
41
#include <asm/setup.h>
L
Linus Torvalds 已提交
42 43 44 45 46 47
#include <asm/sigp.h>
#include <asm/pgalloc.h>
#include <asm/irq.h>
#include <asm/s390_ext.h>
#include <asm/cpcmd.h>
#include <asm/tlbflush.h>
48
#include <asm/timer.h>
M
Michael Holzheu 已提交
49
#include <asm/lowcore.h>
50
#include <asm/sclp.h>
51
#include <asm/cputime.h>
52
#include <asm/vdso.h>
53
#include <asm/cpu.h>
54
#include "entry.h"
L
Linus Torvalds 已提交
55

56
/* logical cpu to cpu address */
H
Heiko Carstens 已提交
57
unsigned short __cpu_logical_map[NR_CPUS];
58

L
Linus Torvalds 已提交
59 60
static struct task_struct *current_set[NR_CPUS];

61 62 63 64 65 66 67 68
static u8 smp_cpu_type;
static int smp_use_sigp_detection;

enum s390_cpu_state {
	CPU_STATE_STANDBY,
	CPU_STATE_CONFIGURED,
};

69
DEFINE_MUTEX(smp_cpu_state_mutex);
H
Heiko Carstens 已提交
70
int smp_cpu_polarization[NR_CPUS];
71
static int smp_cpu_state[NR_CPUS];
H
Heiko Carstens 已提交
72
static int cpu_management;
73 74 75

static DEFINE_PER_CPU(struct cpu, cpu_devices);

H
Heiko Carstens 已提交
76
static void smp_ext_bitcall(int, int);
L
Linus Torvalds 已提交
77

H
Heiko Carstens 已提交
78
static int raw_cpu_stopped(int cpu)
79
{
H
Heiko Carstens 已提交
80
	u32 status;
81

H
Heiko Carstens 已提交
82
	switch (raw_sigp_ps(&status, 0, cpu, sigp_sense)) {
83 84 85 86 87 88 89 90 91 92 93
	case sigp_status_stored:
		/* Check for stopped and check stop state */
		if (status & 0x50)
			return 1;
		break;
	default:
		break;
	}
	return 0;
}

H
Heiko Carstens 已提交
94 95 96 97 98
static inline int cpu_stopped(int cpu)
{
	return raw_cpu_stopped(cpu_logical_map(cpu));
}

99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
void smp_switch_to_ipl_cpu(void (*func)(void *), void *data)
{
	struct _lowcore *lc, *current_lc;
	struct stack_frame *sf;
	struct pt_regs *regs;
	unsigned long sp;

	if (smp_processor_id() == 0)
		func(data);
	__load_psw_mask(PSW_BASE_BITS | PSW_DEFAULT_KEY);
	/* Disable lowcore protection */
	__ctl_clear_bit(0, 28);
	current_lc = lowcore_ptr[smp_processor_id()];
	lc = lowcore_ptr[0];
	if (!lc)
		lc = current_lc;
	lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY;
	lc->restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) smp_restart_cpu;
	if (!cpu_online(0))
		smp_switch_to_cpu(func, data, 0, stap(), __cpu_logical_map[0]);
H
Heiko Carstens 已提交
119
	while (sigp(0, sigp_stop_and_store_status) == sigp_busy)
120 121 122 123 124
		cpu_relax();
	sp = lc->panic_stack;
	sp -= sizeof(struct pt_regs);
	regs = (struct pt_regs *) sp;
	memcpy(&regs->gprs, &current_lc->gpregs_save_area, sizeof(regs->gprs));
125
	regs->psw = lc->psw_save_area;
126 127 128 129 130 131
	sp -= STACK_FRAME_OVERHEAD;
	sf = (struct stack_frame *) sp;
	sf->back_chain = regs->gprs[15];
	smp_switch_to_cpu(func, data, sp, stap(), __cpu_logical_map[0]);
}

132
void smp_send_stop(void)
L
Linus Torvalds 已提交
133
{
134
	int cpu, rc;
L
Linus Torvalds 已提交
135

136 137
	/* Disable all interrupts/machine checks */
	__load_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK);
138
	trace_hardirqs_off();
L
Linus Torvalds 已提交
139

140
	/* stop all processors */
L
Linus Torvalds 已提交
141 142 143 144
	for_each_online_cpu(cpu) {
		if (cpu == smp_processor_id())
			continue;
		do {
H
Heiko Carstens 已提交
145
			rc = sigp(cpu, sigp_stop);
146
		} while (rc == sigp_busy);
L
Linus Torvalds 已提交
147

148
		while (!cpu_stopped(cpu))
H
Heiko Carstens 已提交
149 150 151 152
			cpu_relax();
	}
}

L
Linus Torvalds 已提交
153 154 155 156 157
/*
 * This is the main routine where commands issued by other
 * cpus are handled.
 */

158
static void do_ext_call_interrupt(__u16 code)
L
Linus Torvalds 已提交
159
{
160
	unsigned long bits;
L
Linus Torvalds 已提交
161

162 163 164 165 166 167
	/*
	 * handle bit signal external calls
	 *
	 * For the ec_schedule signal we have to do nothing. All the work
	 * is done automatically when we return from the interrupt.
	 */
L
Linus Torvalds 已提交
168 169
	bits = xchg(&S390_lowcore.ext_call_fast, 0);

170
	if (test_bit(ec_call_function, &bits))
171 172 173 174
		generic_smp_call_function_interrupt();

	if (test_bit(ec_call_function_single, &bits))
		generic_smp_call_function_single_interrupt();
L
Linus Torvalds 已提交
175 176 177 178 179 180
}

/*
 * Send an external call sigp to another cpu and return without waiting
 * for its completion.
 */
H
Heiko Carstens 已提交
181
static void smp_ext_bitcall(int cpu, int sig)
L
Linus Torvalds 已提交
182
{
183 184 185
	/*
	 * Set signaling bit in lowcore of target cpu and kick it
	 */
L
Linus Torvalds 已提交
186
	set_bit(sig, (unsigned long *) &lowcore_ptr[cpu]->ext_call_fast);
H
Heiko Carstens 已提交
187
	while (sigp(cpu, sigp_emergency_signal) == sigp_busy)
L
Linus Torvalds 已提交
188 189 190
		udelay(10);
}

191
void arch_send_call_function_ipi_mask(const struct cpumask *mask)
192 193 194
{
	int cpu;

195
	for_each_cpu(cpu, mask)
196 197 198 199 200 201 202 203
		smp_ext_bitcall(cpu, ec_call_function);
}

void arch_send_call_function_single_ipi(int cpu)
{
	smp_ext_bitcall(cpu, ec_call_function_single);
}

204
#ifndef CONFIG_64BIT
L
Linus Torvalds 已提交
205 206 207
/*
 * this function sends a 'purge tlb' signal to another CPU.
 */
208
static void smp_ptlb_callback(void *info)
L
Linus Torvalds 已提交
209
{
M
Martin Schwidefsky 已提交
210
	__tlb_flush_local();
L
Linus Torvalds 已提交
211 212 213 214
}

void smp_ptlb_all(void)
{
215
	on_each_cpu(smp_ptlb_callback, NULL, 1);
L
Linus Torvalds 已提交
216 217
}
EXPORT_SYMBOL(smp_ptlb_all);
218
#endif /* ! CONFIG_64BIT */
L
Linus Torvalds 已提交
219 220 221 222 223 224 225 226

/*
 * this function sends a 'reschedule' IPI to another CPU.
 * it goes straight through and wastes no time serializing
 * anything. Worst case is that we lose a reschedule ...
 */
void smp_send_reschedule(int cpu)
{
227
	smp_ext_bitcall(cpu, ec_schedule);
L
Linus Torvalds 已提交
228 229 230 231 232
}

/*
 * parameter area for the set/clear control bit callbacks
 */
233
struct ec_creg_mask_parms {
L
Linus Torvalds 已提交
234 235
	unsigned long orvals[16];
	unsigned long andvals[16];
236
};
L
Linus Torvalds 已提交
237 238 239 240

/*
 * callback for setting/clearing control bits
 */
241 242
static void smp_ctl_bit_callback(void *info)
{
243
	struct ec_creg_mask_parms *pp = info;
L
Linus Torvalds 已提交
244 245
	unsigned long cregs[16];
	int i;
246

247 248
	__ctl_store(cregs, 0, 15);
	for (i = 0; i <= 15; i++)
L
Linus Torvalds 已提交
249
		cregs[i] = (cregs[i] & pp->andvals[i]) | pp->orvals[i];
250
	__ctl_load(cregs, 0, 15);
L
Linus Torvalds 已提交
251 252 253 254 255
}

/*
 * Set a bit in a control register of all cpus
 */
256 257 258
void smp_ctl_set_bit(int cr, int bit)
{
	struct ec_creg_mask_parms parms;
L
Linus Torvalds 已提交
259

260 261
	memset(&parms.orvals, 0, sizeof(parms.orvals));
	memset(&parms.andvals, 0xff, sizeof(parms.andvals));
L
Linus Torvalds 已提交
262
	parms.orvals[cr] = 1 << bit;
263
	on_each_cpu(smp_ctl_bit_callback, &parms, 1);
L
Linus Torvalds 已提交
264
}
265
EXPORT_SYMBOL(smp_ctl_set_bit);
L
Linus Torvalds 已提交
266 267 268 269

/*
 * Clear a bit in a control register of all cpus
 */
270 271 272
void smp_ctl_clear_bit(int cr, int bit)
{
	struct ec_creg_mask_parms parms;
L
Linus Torvalds 已提交
273

274 275
	memset(&parms.orvals, 0, sizeof(parms.orvals));
	memset(&parms.andvals, 0xff, sizeof(parms.andvals));
L
Linus Torvalds 已提交
276
	parms.andvals[cr] = ~(1L << bit);
277
	on_each_cpu(smp_ctl_bit_callback, &parms, 1);
L
Linus Torvalds 已提交
278
}
279
EXPORT_SYMBOL(smp_ctl_clear_bit);
L
Linus Torvalds 已提交
280

281
#ifdef CONFIG_ZFCPDUMP
M
Michael Holzheu 已提交
282

283
static void __init smp_get_save_area(unsigned int cpu, unsigned int phy_cpu)
M
Michael Holzheu 已提交
284 285 286
{
	if (ipl_info.type != IPL_TYPE_FCP_DUMP)
		return;
287
	if (cpu >= NR_CPUS) {
288 289
		pr_warning("CPU %i exceeds the maximum %i and is excluded from "
			   "the dump\n", cpu, NR_CPUS - 1);
290
		return;
M
Michael Holzheu 已提交
291
	}
292
	zfcpdump_save_areas[cpu] = kmalloc(sizeof(struct save_area), GFP_KERNEL);
H
Heiko Carstens 已提交
293
	while (raw_sigp(phy_cpu, sigp_stop_and_store_status) == sigp_busy)
294 295 296
		cpu_relax();
	memcpy(zfcpdump_save_areas[cpu],
	       (void *)(unsigned long) store_prefix() + SAVE_AREA_BASE,
297
	       sizeof(struct save_area));
M
Michael Holzheu 已提交
298 299
}

300
struct save_area *zfcpdump_save_areas[NR_CPUS + 1];
M
Michael Holzheu 已提交
301 302 303
EXPORT_SYMBOL_GPL(zfcpdump_save_areas);

#else
304 305 306

static inline void smp_get_save_area(unsigned int cpu, unsigned int phy_cpu) { }

307
#endif /* CONFIG_ZFCPDUMP */
M
Michael Holzheu 已提交
308

309 310 311 312 313 314 315 316 317 318 319 320 321 322 323
static int cpu_known(int cpu_id)
{
	int cpu;

	for_each_present_cpu(cpu) {
		if (__cpu_logical_map[cpu] == cpu_id)
			return 1;
	}
	return 0;
}

static int smp_rescan_cpus_sigp(cpumask_t avail)
{
	int cpu_id, logical_cpu;

324 325
	logical_cpu = cpumask_first(&avail);
	if (logical_cpu >= nr_cpu_ids)
326
		return 0;
327
	for (cpu_id = 0; cpu_id <= MAX_CPU_ADDRESS; cpu_id++) {
328 329 330
		if (cpu_known(cpu_id))
			continue;
		__cpu_logical_map[logical_cpu] = cpu_id;
H
Heiko Carstens 已提交
331
		smp_cpu_polarization[logical_cpu] = POLARIZATION_UNKNWN;
332 333 334 335
		if (!cpu_stopped(logical_cpu))
			continue;
		cpu_set(logical_cpu, cpu_present_map);
		smp_cpu_state[logical_cpu] = CPU_STATE_CONFIGURED;
336 337
		logical_cpu = cpumask_next(logical_cpu, &avail);
		if (logical_cpu >= nr_cpu_ids)
338 339 340 341 342
			break;
	}
	return 0;
}

343
static int smp_rescan_cpus_sclp(cpumask_t avail)
344 345 346 347 348
{
	struct sclp_cpu_info *info;
	int cpu_id, logical_cpu, cpu;
	int rc;

349 350
	logical_cpu = cpumask_first(&avail);
	if (logical_cpu >= nr_cpu_ids)
351
		return 0;
352
	info = kmalloc(sizeof(*info), GFP_KERNEL);
353 354 355 356 357 358 359 360 361 362 363 364
	if (!info)
		return -ENOMEM;
	rc = sclp_get_cpu_info(info);
	if (rc)
		goto out;
	for (cpu = 0; cpu < info->combined; cpu++) {
		if (info->has_cpu_type && info->cpu[cpu].type != smp_cpu_type)
			continue;
		cpu_id = info->cpu[cpu].address;
		if (cpu_known(cpu_id))
			continue;
		__cpu_logical_map[logical_cpu] = cpu_id;
H
Heiko Carstens 已提交
365
		smp_cpu_polarization[logical_cpu] = POLARIZATION_UNKNWN;
366 367 368 369 370
		cpu_set(logical_cpu, cpu_present_map);
		if (cpu >= info->configured)
			smp_cpu_state[logical_cpu] = CPU_STATE_STANDBY;
		else
			smp_cpu_state[logical_cpu] = CPU_STATE_CONFIGURED;
371 372
		logical_cpu = cpumask_next(logical_cpu, &avail);
		if (logical_cpu >= nr_cpu_ids)
373 374 375
			break;
	}
out:
376
	kfree(info);
377 378 379
	return rc;
}

380
static int __smp_rescan_cpus(void)
381 382 383
{
	cpumask_t avail;

384
	cpus_xor(avail, cpu_possible_map, cpu_present_map);
385 386 387 388
	if (smp_use_sigp_detection)
		return smp_rescan_cpus_sigp(avail);
	else
		return smp_rescan_cpus_sclp(avail);
L
Linus Torvalds 已提交
389 390
}

391 392 393 394 395 396 397 398
static void __init smp_detect_cpus(void)
{
	unsigned int cpu, c_cpus, s_cpus;
	struct sclp_cpu_info *info;
	u16 boot_cpu_addr, cpu_addr;

	c_cpus = 1;
	s_cpus = 0;
399
	boot_cpu_addr = __cpu_logical_map[0];
400 401 402 403 404 405
	info = kmalloc(sizeof(*info), GFP_KERNEL);
	if (!info)
		panic("smp_detect_cpus failed to allocate memory\n");
	/* Use sigp detection algorithm if sclp doesn't work. */
	if (sclp_get_cpu_info(info)) {
		smp_use_sigp_detection = 1;
406
		for (cpu = 0; cpu <= MAX_CPU_ADDRESS; cpu++) {
407 408
			if (cpu == boot_cpu_addr)
				continue;
H
Heiko Carstens 已提交
409
			if (!raw_cpu_stopped(cpu))
410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431
				continue;
			smp_get_save_area(c_cpus, cpu);
			c_cpus++;
		}
		goto out;
	}

	if (info->has_cpu_type) {
		for (cpu = 0; cpu < info->combined; cpu++) {
			if (info->cpu[cpu].address == boot_cpu_addr) {
				smp_cpu_type = info->cpu[cpu].type;
				break;
			}
		}
	}

	for (cpu = 0; cpu < info->combined; cpu++) {
		if (info->has_cpu_type && info->cpu[cpu].type != smp_cpu_type)
			continue;
		cpu_addr = info->cpu[cpu].address;
		if (cpu_addr == boot_cpu_addr)
			continue;
H
Heiko Carstens 已提交
432
		if (!raw_cpu_stopped(cpu_addr)) {
433 434 435 436 437 438 439 440
			s_cpus++;
			continue;
		}
		smp_get_save_area(c_cpus, cpu_addr);
		c_cpus++;
	}
out:
	kfree(info);
441
	pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus);
442
	get_online_cpus();
443
	__smp_rescan_cpus();
444
	put_online_cpus();
445 446
}

L
Linus Torvalds 已提交
447
/*
448
 *	Activate a secondary processor.
L
Linus Torvalds 已提交
449
 */
H
Heiko Carstens 已提交
450
int __cpuinit start_secondary(void *cpuvoid)
L
Linus Torvalds 已提交
451
{
452 453
	/* Setup the cpu */
	cpu_init();
454
	preempt_disable();
M
Martin Schwidefsky 已提交
455
	/* Enable TOD clock interrupts on the secondary cpu. */
456
	init_cpu_timer();
M
Martin Schwidefsky 已提交
457
	/* Enable cpu timer interrupts on the secondary cpu. */
458
	init_cpu_vtimer();
L
Linus Torvalds 已提交
459
	/* Enable pfault pseudo page faults on this cpu. */
H
Heiko Carstens 已提交
460 461
	pfault_init();

462 463
	/* call cpu notifiers */
	notify_cpu_starting(smp_processor_id());
L
Linus Torvalds 已提交
464
	/* Mark this cpu as online */
465
	ipi_call_lock();
L
Linus Torvalds 已提交
466
	cpu_set(smp_processor_id(), cpu_online_map);
467
	ipi_call_unlock();
L
Linus Torvalds 已提交
468 469
	/* Switch on interrupts */
	local_irq_enable();
470
	/* Print info about this processor */
471
	print_cpu_info();
472 473 474
	/* cpu_idle will call schedule for us */
	cpu_idle();
	return 0;
L
Linus Torvalds 已提交
475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490
}

static void __init smp_create_idle(unsigned int cpu)
{
	struct task_struct *p;

	/*
	 *  don't care about the psw and regs settings since we'll never
	 *  reschedule the forked task.
	 */
	p = fork_idle(cpu);
	if (IS_ERR(p))
		panic("failed fork for CPU %u: %li", cpu, PTR_ERR(p));
	current_set[cpu] = p;
}

491 492 493 494 495
static int __cpuinit smp_alloc_lowcore(int cpu)
{
	unsigned long async_stack, panic_stack;
	struct _lowcore *lowcore;

496
	lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
497 498 499 500
	if (!lowcore)
		return -ENOMEM;
	async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
	panic_stack = __get_free_page(GFP_KERNEL);
501 502
	if (!panic_stack || !async_stack)
		goto out;
503 504
	memcpy(lowcore, &S390_lowcore, 512);
	memset((char *)lowcore + 512, 0, sizeof(*lowcore) - 512);
505 506 507 508 509 510 511 512 513
	lowcore->async_stack = async_stack + ASYNC_SIZE;
	lowcore->panic_stack = panic_stack + PAGE_SIZE;

#ifndef CONFIG_64BIT
	if (MACHINE_HAS_IEEE) {
		unsigned long save_area;

		save_area = get_zeroed_page(GFP_KERNEL);
		if (!save_area)
514
			goto out;
515 516
		lowcore->extended_save_area_addr = (u32) save_area;
	}
517 518 519
#else
	if (vdso_alloc_per_cpu(cpu, lowcore))
		goto out;
520 521 522 523
#endif
	lowcore_ptr[cpu] = lowcore;
	return 0;

524
out:
525
	free_page(panic_stack);
526
	free_pages(async_stack, ASYNC_ORDER);
527
	free_pages((unsigned long) lowcore, LC_ORDER);
528 529 530 531 532 533 534 535 536 537 538
	return -ENOMEM;
}

static void smp_free_lowcore(int cpu)
{
	struct _lowcore *lowcore;

	lowcore = lowcore_ptr[cpu];
#ifndef CONFIG_64BIT
	if (MACHINE_HAS_IEEE)
		free_page((unsigned long) lowcore->extended_save_area_addr);
539 540
#else
	vdso_free_per_cpu(cpu, lowcore);
541 542 543
#endif
	free_page(lowcore->panic_stack - PAGE_SIZE);
	free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER);
544
	free_pages((unsigned long) lowcore, LC_ORDER);
545 546 547
	lowcore_ptr[cpu] = NULL;
}

L
Linus Torvalds 已提交
548
/* Upping and downing of CPUs */
549
int __cpuinit __cpu_up(unsigned int cpu)
L
Linus Torvalds 已提交
550
{
551
	struct _lowcore *cpu_lowcore;
H
Heiko Carstens 已提交
552
	struct task_struct *idle;
L
Linus Torvalds 已提交
553
	struct stack_frame *sf;
554
	u32 lowcore;
H
Heiko Carstens 已提交
555
	int ccode;
L
Linus Torvalds 已提交
556

557 558
	if (smp_cpu_state[cpu] != CPU_STATE_CONFIGURED)
		return -EIO;
559 560
	if (smp_alloc_lowcore(cpu))
		return -ENOMEM;
561
	do {
H
Heiko Carstens 已提交
562
		ccode = sigp(cpu, sigp_initial_cpu_reset);
563 564 565 566 567 568 569
		if (ccode == sigp_busy)
			udelay(10);
		if (ccode == sigp_not_operational)
			goto err_out;
	} while (ccode == sigp_busy);

	lowcore = (u32)(unsigned long)lowcore_ptr[cpu];
H
Heiko Carstens 已提交
570
	while (sigp_p(lowcore, cpu, sigp_set_prefix) == sigp_busy)
571
		udelay(10);
L
Linus Torvalds 已提交
572 573

	idle = current_set[cpu];
574
	cpu_lowcore = lowcore_ptr[cpu];
L
Linus Torvalds 已提交
575
	cpu_lowcore->kernel_stack = (unsigned long)
576
		task_stack_page(idle) + THREAD_SIZE;
577
	cpu_lowcore->thread_info = (unsigned long) task_thread_info(idle);
L
Linus Torvalds 已提交
578 579 580 581 582 583
	sf = (struct stack_frame *) (cpu_lowcore->kernel_stack
				     - sizeof(struct pt_regs)
				     - sizeof(struct stack_frame));
	memset(sf, 0, sizeof(struct stack_frame));
	sf->gprs[9] = (unsigned long) sf;
	cpu_lowcore->save_area[15] = (unsigned long) sf;
584
	__ctl_store(cpu_lowcore->cregs_save_area, 0, 15);
585 586 587
	asm volatile(
		"	stam	0,15,0(%0)"
		: : "a" (&cpu_lowcore->access_regs_save_area) : "memory");
L
Linus Torvalds 已提交
588
	cpu_lowcore->percpu_offset = __per_cpu_offset[cpu];
589
	cpu_lowcore->current_task = (unsigned long) idle;
590
	cpu_lowcore->cpu_nr = cpu;
591
	cpu_lowcore->kernel_asce = S390_lowcore.kernel_asce;
592
	cpu_lowcore->machine_flags = S390_lowcore.machine_flags;
593
	cpu_lowcore->ftrace_func = S390_lowcore.ftrace_func;
L
Linus Torvalds 已提交
594
	eieio();
M
Michael Ryan 已提交
595

H
Heiko Carstens 已提交
596
	while (sigp(cpu, sigp_restart) == sigp_busy)
M
Michael Ryan 已提交
597
		udelay(10);
L
Linus Torvalds 已提交
598 599 600 601

	while (!cpu_online(cpu))
		cpu_relax();
	return 0;
602 603 604 605

err_out:
	smp_free_lowcore(cpu);
	return -EIO;
L
Linus Torvalds 已提交
606 607
}

608
static int __init setup_possible_cpus(char *s)
609
{
610
	int pcpus, cpu;
611

612
	pcpus = simple_strtoul(s, NULL, 0);
613 614
	init_cpu_possible(cpumask_of(0));
	for (cpu = 1; cpu < pcpus && cpu < nr_cpu_ids; cpu++)
615
		set_cpu_possible(cpu, true);
616 617 618 619
	return 0;
}
early_param("possible_cpus", setup_possible_cpus);

620 621
#ifdef CONFIG_HOTPLUG_CPU

622
int __cpu_disable(void)
L
Linus Torvalds 已提交
623
{
624
	struct ec_creg_mask_parms cr_parms;
Z
Zwane Mwaikambo 已提交
625
	int cpu = smp_processor_id();
L
Linus Torvalds 已提交
626

Z
Zwane Mwaikambo 已提交
627
	cpu_clear(cpu, cpu_online_map);
L
Linus Torvalds 已提交
628 629

	/* Disable pfault pseudo page faults on this cpu. */
H
Heiko Carstens 已提交
630
	pfault_fini();
L
Linus Torvalds 已提交
631

632 633
	memset(&cr_parms.orvals, 0, sizeof(cr_parms.orvals));
	memset(&cr_parms.andvals, 0xff, sizeof(cr_parms.andvals));
L
Linus Torvalds 已提交
634

635
	/* disable all external interrupts */
L
Linus Torvalds 已提交
636
	cr_parms.orvals[0] = 0;
637 638
	cr_parms.andvals[0] = ~(1 << 15 | 1 << 14 | 1 << 13 | 1 << 12 |
				1 << 11 | 1 << 10 | 1 <<  6 | 1 <<  4);
L
Linus Torvalds 已提交
639 640
	/* disable all I/O interrupts */
	cr_parms.orvals[6] = 0;
641 642
	cr_parms.andvals[6] = ~(1 << 31 | 1 << 30 | 1 << 29 | 1 << 28 |
				1 << 27 | 1 << 26 | 1 << 25 | 1 << 24);
L
Linus Torvalds 已提交
643 644
	/* disable most machine checks */
	cr_parms.orvals[14] = 0;
645 646
	cr_parms.andvals[14] = ~(1 << 28 | 1 << 27 | 1 << 26 |
				 1 << 25 | 1 << 24);
647

L
Linus Torvalds 已提交
648 649 650 651 652
	smp_ctl_bit_callback(&cr_parms);

	return 0;
}

653
void __cpu_die(unsigned int cpu)
L
Linus Torvalds 已提交
654 655
{
	/* Wait until target cpu is down */
656
	while (!cpu_stopped(cpu))
L
Linus Torvalds 已提交
657
		cpu_relax();
H
Heiko Carstens 已提交
658
	while (sigp_p(0, cpu, sigp_set_prefix) == sigp_busy)
659
		udelay(10);
660
	smp_free_lowcore(cpu);
661
	pr_info("Processor %d stopped\n", cpu);
L
Linus Torvalds 已提交
662 663
}

664
void cpu_die(void)
L
Linus Torvalds 已提交
665 666
{
	idle_task_exit();
H
Heiko Carstens 已提交
667
	while (sigp(smp_processor_id(), sigp_stop) == sigp_busy)
668
		cpu_relax();
669
	for (;;);
L
Linus Torvalds 已提交
670 671
}

672 673
#endif /* CONFIG_HOTPLUG_CPU */

L
Linus Torvalds 已提交
674 675
void __init smp_prepare_cpus(unsigned int max_cpus)
{
676 677 678 679 680
#ifndef CONFIG_64BIT
	unsigned long save_area = 0;
#endif
	unsigned long async_stack, panic_stack;
	struct _lowcore *lowcore;
L
Linus Torvalds 已提交
681
	unsigned int cpu;
682

683 684
	smp_detect_cpus();

685 686 687
	/* request the 0x1201 emergency signal external interrupt */
	if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0)
		panic("Couldn't request external interrupt 0x1201");
688
	print_cpu_info();
L
Linus Torvalds 已提交
689

690
	/* Reallocate current lowcore, but keep its contents. */
691
	lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
692 693
	panic_stack = __get_free_page(GFP_KERNEL);
	async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
694
	BUG_ON(!lowcore || !panic_stack || !async_stack);
695
#ifndef CONFIG_64BIT
696
	if (MACHINE_HAS_IEEE)
697
		save_area = get_zeroed_page(GFP_KERNEL);
698
#endif
699 700 701 702 703 704 705 706 707 708 709 710 711
	local_irq_disable();
	local_mcck_disable();
	lowcore_ptr[smp_processor_id()] = lowcore;
	*lowcore = S390_lowcore;
	lowcore->panic_stack = panic_stack + PAGE_SIZE;
	lowcore->async_stack = async_stack + ASYNC_SIZE;
#ifndef CONFIG_64BIT
	if (MACHINE_HAS_IEEE)
		lowcore->extended_save_area_addr = (u32) save_area;
#endif
	set_prefix((u32)(unsigned long) lowcore);
	local_mcck_enable();
	local_irq_enable();
712 713 714 715
#ifdef CONFIG_64BIT
	if (vdso_alloc_per_cpu(smp_processor_id(), &S390_lowcore))
		BUG();
#endif
716
	for_each_possible_cpu(cpu)
L
Linus Torvalds 已提交
717 718 719 720
		if (cpu != smp_processor_id())
			smp_create_idle(cpu);
}

H
Heiko Carstens 已提交
721
void __init smp_prepare_boot_cpu(void)
L
Linus Torvalds 已提交
722 723 724
{
	BUG_ON(smp_processor_id() != 0);

725 726
	current_thread_info()->cpu = 0;
	cpu_set(0, cpu_present_map);
L
Linus Torvalds 已提交
727 728 729
	cpu_set(0, cpu_online_map);
	S390_lowcore.percpu_offset = __per_cpu_offset[0];
	current_set[0] = current;
730
	smp_cpu_state[0] = CPU_STATE_CONFIGURED;
H
Heiko Carstens 已提交
731
	smp_cpu_polarization[0] = POLARIZATION_UNKNWN;
L
Linus Torvalds 已提交
732 733
}

H
Heiko Carstens 已提交
734
void __init smp_cpus_done(unsigned int max_cpus)
L
Linus Torvalds 已提交
735 736 737
{
}

738 739 740 741 742 743
void __init smp_setup_processor_id(void)
{
	S390_lowcore.cpu_nr = 0;
	__cpu_logical_map[0] = stap();
}

L
Linus Torvalds 已提交
744 745 746 747 748 749 750 751
/*
 * the frequency of the profiling timer can be changed
 * by writing a multiplier value into /proc/profile.
 *
 * usually you want to run this on all CPUs ;)
 */
int setup_profiling_timer(unsigned int multiplier)
{
752
	return 0;
L
Linus Torvalds 已提交
753 754
}

755
#ifdef CONFIG_HOTPLUG_CPU
756 757
static ssize_t cpu_configure_show(struct sys_device *dev,
				struct sysdev_attribute *attr, char *buf)
758 759 760 761 762 763 764 765 766
{
	ssize_t count;

	mutex_lock(&smp_cpu_state_mutex);
	count = sprintf(buf, "%d\n", smp_cpu_state[dev->id]);
	mutex_unlock(&smp_cpu_state_mutex);
	return count;
}

767 768 769
static ssize_t cpu_configure_store(struct sys_device *dev,
				  struct sysdev_attribute *attr,
				  const char *buf, size_t count)
770 771 772 773 774 775 776 777 778 779
{
	int cpu = dev->id;
	int val, rc;
	char delim;

	if (sscanf(buf, "%d %c", &val, &delim) != 1)
		return -EINVAL;
	if (val != 0 && val != 1)
		return -EINVAL;

780
	get_online_cpus();
H
Heiko Carstens 已提交
781
	mutex_lock(&smp_cpu_state_mutex);
782
	rc = -EBUSY;
783 784
	/* disallow configuration changes of online cpus and cpu 0 */
	if (cpu_online(cpu) || cpu == 0)
785 786 787 788 789 790
		goto out;
	rc = 0;
	switch (val) {
	case 0:
		if (smp_cpu_state[cpu] == CPU_STATE_CONFIGURED) {
			rc = sclp_cpu_deconfigure(__cpu_logical_map[cpu]);
H
Heiko Carstens 已提交
791
			if (!rc) {
792
				smp_cpu_state[cpu] = CPU_STATE_STANDBY;
H
Heiko Carstens 已提交
793 794
				smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN;
			}
795 796 797 798 799
		}
		break;
	case 1:
		if (smp_cpu_state[cpu] == CPU_STATE_STANDBY) {
			rc = sclp_cpu_configure(__cpu_logical_map[cpu]);
H
Heiko Carstens 已提交
800
			if (!rc) {
801
				smp_cpu_state[cpu] = CPU_STATE_CONFIGURED;
H
Heiko Carstens 已提交
802 803
				smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN;
			}
804 805 806 807 808 809 810
		}
		break;
	default:
		break;
	}
out:
	mutex_unlock(&smp_cpu_state_mutex);
H
Heiko Carstens 已提交
811
	put_online_cpus();
812 813 814 815 816
	return rc ? rc : count;
}
static SYSDEV_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
#endif /* CONFIG_HOTPLUG_CPU */

817 818
static ssize_t cpu_polarization_show(struct sys_device *dev,
				     struct sysdev_attribute *attr, char *buf)
H
Heiko Carstens 已提交
819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845
{
	int cpu = dev->id;
	ssize_t count;

	mutex_lock(&smp_cpu_state_mutex);
	switch (smp_cpu_polarization[cpu]) {
	case POLARIZATION_HRZ:
		count = sprintf(buf, "horizontal\n");
		break;
	case POLARIZATION_VL:
		count = sprintf(buf, "vertical:low\n");
		break;
	case POLARIZATION_VM:
		count = sprintf(buf, "vertical:medium\n");
		break;
	case POLARIZATION_VH:
		count = sprintf(buf, "vertical:high\n");
		break;
	default:
		count = sprintf(buf, "unknown\n");
		break;
	}
	mutex_unlock(&smp_cpu_state_mutex);
	return count;
}
static SYSDEV_ATTR(polarization, 0444, cpu_polarization_show, NULL);

846 847
static ssize_t show_cpu_address(struct sys_device *dev,
				struct sysdev_attribute *attr, char *buf)
848 849 850 851 852 853 854 855 856 857 858
{
	return sprintf(buf, "%d\n", __cpu_logical_map[dev->id]);
}
static SYSDEV_ATTR(address, 0444, show_cpu_address, NULL);


static struct attribute *cpu_common_attrs[] = {
#ifdef CONFIG_HOTPLUG_CPU
	&attr_configure.attr,
#endif
	&attr_address.attr,
H
Heiko Carstens 已提交
859
	&attr_polarization.attr,
860 861 862 863 864 865
	NULL,
};

static struct attribute_group cpu_common_attr_group = {
	.attrs = cpu_common_attrs,
};
L
Linus Torvalds 已提交
866

867 868
static ssize_t show_capability(struct sys_device *dev,
				struct sysdev_attribute *attr, char *buf)
869 870 871 872 873 874 875 876 877 878 879
{
	unsigned int capability;
	int rc;

	rc = get_cpu_capability(&capability);
	if (rc)
		return rc;
	return sprintf(buf, "%u\n", capability);
}
static SYSDEV_ATTR(capability, 0444, show_capability, NULL);

880 881
static ssize_t show_idle_count(struct sys_device *dev,
				struct sysdev_attribute *attr, char *buf)
882 883 884
{
	struct s390_idle_data *idle;
	unsigned long long idle_count;
885
	unsigned int sequence;
886 887

	idle = &per_cpu(s390_idle, dev->id);
888 889 890 891 892
repeat:
	sequence = idle->sequence;
	smp_rmb();
	if (sequence & 1)
		goto repeat;
893
	idle_count = idle->idle_count;
894 895
	if (idle->idle_enter)
		idle_count++;
896 897 898
	smp_rmb();
	if (idle->sequence != sequence)
		goto repeat;
899 900 901 902
	return sprintf(buf, "%llu\n", idle_count);
}
static SYSDEV_ATTR(idle_count, 0444, show_idle_count, NULL);

903 904
static ssize_t show_idle_time(struct sys_device *dev,
				struct sysdev_attribute *attr, char *buf)
905 906
{
	struct s390_idle_data *idle;
907
	unsigned long long now, idle_time, idle_enter;
908
	unsigned int sequence;
909 910

	idle = &per_cpu(s390_idle, dev->id);
911
	now = get_clock();
912 913 914 915 916
repeat:
	sequence = idle->sequence;
	smp_rmb();
	if (sequence & 1)
		goto repeat;
917 918 919 920
	idle_time = idle->idle_time;
	idle_enter = idle->idle_enter;
	if (idle_enter != 0ULL && idle_enter < now)
		idle_time += now - idle_enter;
921 922 923
	smp_rmb();
	if (idle->sequence != sequence)
		goto repeat;
924
	return sprintf(buf, "%llu\n", idle_time >> 12);
925
}
926
static SYSDEV_ATTR(idle_time_us, 0444, show_idle_time, NULL);
927

928
static struct attribute *cpu_online_attrs[] = {
929 930
	&attr_capability.attr,
	&attr_idle_count.attr,
931
	&attr_idle_time_us.attr,
932 933 934
	NULL,
};

935 936
static struct attribute_group cpu_online_attr_group = {
	.attrs = cpu_online_attrs,
937 938
};

939 940 941 942 943 944
static int __cpuinit smp_cpu_notify(struct notifier_block *self,
				    unsigned long action, void *hcpu)
{
	unsigned int cpu = (unsigned int)(long)hcpu;
	struct cpu *c = &per_cpu(cpu_devices, cpu);
	struct sys_device *s = &c->sysdev;
945
	struct s390_idle_data *idle;
946 947 948

	switch (action) {
	case CPU_ONLINE:
949
	case CPU_ONLINE_FROZEN:
950
		idle = &per_cpu(s390_idle, cpu);
951
		memset(idle, 0, sizeof(struct s390_idle_data));
952
		if (sysfs_create_group(&s->kobj, &cpu_online_attr_group))
953 954 955
			return NOTIFY_BAD;
		break;
	case CPU_DEAD:
956
	case CPU_DEAD_FROZEN:
957
		sysfs_remove_group(&s->kobj, &cpu_online_attr_group);
958 959 960 961 962 963
		break;
	}
	return NOTIFY_OK;
}

static struct notifier_block __cpuinitdata smp_cpu_nb = {
964
	.notifier_call = smp_cpu_notify,
965 966
};

967
static int __devinit smp_add_present_cpu(int cpu)
968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994
{
	struct cpu *c = &per_cpu(cpu_devices, cpu);
	struct sys_device *s = &c->sysdev;
	int rc;

	c->hotpluggable = 1;
	rc = register_cpu(c, cpu);
	if (rc)
		goto out;
	rc = sysfs_create_group(&s->kobj, &cpu_common_attr_group);
	if (rc)
		goto out_cpu;
	if (!cpu_online(cpu))
		goto out;
	rc = sysfs_create_group(&s->kobj, &cpu_online_attr_group);
	if (!rc)
		return 0;
	sysfs_remove_group(&s->kobj, &cpu_common_attr_group);
out_cpu:
#ifdef CONFIG_HOTPLUG_CPU
	unregister_cpu(c);
#endif
out:
	return rc;
}

#ifdef CONFIG_HOTPLUG_CPU
995

996
int __ref smp_rescan_cpus(void)
997 998 999 1000 1001
{
	cpumask_t newcpus;
	int cpu;
	int rc;

1002
	get_online_cpus();
H
Heiko Carstens 已提交
1003
	mutex_lock(&smp_cpu_state_mutex);
1004
	newcpus = cpu_present_map;
1005
	rc = __smp_rescan_cpus();
1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016
	if (rc)
		goto out;
	cpus_andnot(newcpus, cpu_present_map, newcpus);
	for_each_cpu_mask(cpu, newcpus) {
		rc = smp_add_present_cpu(cpu);
		if (rc)
			cpu_clear(cpu, cpu_present_map);
	}
	rc = 0;
out:
	mutex_unlock(&smp_cpu_state_mutex);
H
Heiko Carstens 已提交
1017
	put_online_cpus();
H
Heiko Carstens 已提交
1018 1019
	if (!cpus_empty(newcpus))
		topology_schedule_update();
1020 1021 1022
	return rc;
}

1023
static ssize_t __ref rescan_store(struct sysdev_class *class, const char *buf,
1024 1025 1026 1027 1028
				  size_t count)
{
	int rc;

	rc = smp_rescan_cpus();
1029 1030
	return rc ? rc : count;
}
1031
static SYSDEV_CLASS_ATTR(rescan, 0200, NULL, rescan_store);
1032 1033
#endif /* CONFIG_HOTPLUG_CPU */

1034
static ssize_t dispatching_show(struct sysdev_class *class, char *buf)
H
Heiko Carstens 已提交
1035 1036 1037 1038 1039 1040 1041 1042 1043
{
	ssize_t count;

	mutex_lock(&smp_cpu_state_mutex);
	count = sprintf(buf, "%d\n", cpu_management);
	mutex_unlock(&smp_cpu_state_mutex);
	return count;
}

1044 1045
static ssize_t dispatching_store(struct sysdev_class *dev, const char *buf,
				 size_t count)
H
Heiko Carstens 已提交
1046 1047 1048 1049 1050 1051 1052 1053 1054 1055
{
	int val, rc;
	char delim;

	if (sscanf(buf, "%d %c", &val, &delim) != 1)
		return -EINVAL;
	if (val != 0 && val != 1)
		return -EINVAL;
	rc = 0;
	get_online_cpus();
H
Heiko Carstens 已提交
1056
	mutex_lock(&smp_cpu_state_mutex);
H
Heiko Carstens 已提交
1057 1058 1059 1060 1061 1062 1063
	if (cpu_management == val)
		goto out;
	rc = topology_set_cpu_management(val);
	if (!rc)
		cpu_management = val;
out:
	mutex_unlock(&smp_cpu_state_mutex);
H
Heiko Carstens 已提交
1064
	put_online_cpus();
H
Heiko Carstens 已提交
1065 1066
	return rc ? rc : count;
}
1067 1068
static SYSDEV_CLASS_ATTR(dispatching, 0644, dispatching_show,
			 dispatching_store);
H
Heiko Carstens 已提交
1069

L
Linus Torvalds 已提交
1070 1071 1072
static int __init topology_init(void)
{
	int cpu;
1073
	int rc;
1074 1075

	register_cpu_notifier(&smp_cpu_nb);
L
Linus Torvalds 已提交
1076

1077
#ifdef CONFIG_HOTPLUG_CPU
1078
	rc = sysdev_class_create_file(&cpu_sysdev_class, &attr_rescan);
1079 1080 1081
	if (rc)
		return rc;
#endif
1082
	rc = sysdev_class_create_file(&cpu_sysdev_class, &attr_dispatching);
H
Heiko Carstens 已提交
1083 1084
	if (rc)
		return rc;
1085 1086
	for_each_present_cpu(cpu) {
		rc = smp_add_present_cpu(cpu);
1087 1088
		if (rc)
			return rc;
L
Linus Torvalds 已提交
1089 1090 1091 1092
	}
	return 0;
}
subsys_initcall(topology_init);