smp.c 23.5 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
 * SMP support for ppc.
 *
 * Written by Cort Dougan (cort@cs.nmt.edu) borrowing a great
 * deal of code from the sparc and intel versions.
 *
 * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
 *
 * PowerPC-64 Support added by Dave Engebretsen, Peter Bergner, and
 * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
 *
 *      This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

#undef DEBUG

#include <linux/kernel.h>
21
#include <linux/export.h>
22
#include <linux/sched/mm.h>
23
#include <linux/sched/topology.h>
L
Linus Torvalds 已提交
24 25 26 27 28 29 30
#include <linux/smp.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/cache.h>
#include <linux/err.h>
31
#include <linux/device.h>
L
Linus Torvalds 已提交
32 33
#include <linux/cpu.h>
#include <linux/notifier.h>
34
#include <linux/topology.h>
35
#include <linux/profile.h>
36
#include <linux/processor.h>
L
Linus Torvalds 已提交
37 38

#include <asm/ptrace.h>
A
Arun Sharma 已提交
39
#include <linux/atomic.h>
L
Linus Torvalds 已提交
40
#include <asm/irq.h>
41
#include <asm/hw_irq.h>
42
#include <asm/kvm_ppc.h>
43
#include <asm/dbell.h>
L
Linus Torvalds 已提交
44 45 46 47 48 49
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/prom.h>
#include <asm/smp.h>
#include <asm/time.h>
#include <asm/machdep.h>
50
#include <asm/cputhreads.h>
L
Linus Torvalds 已提交
51
#include <asm/cputable.h>
52
#include <asm/mpic.h>
53
#include <asm/vdso_datapage.h>
P
Paul Mackerras 已提交
54 55 56
#ifdef CONFIG_PPC64
#include <asm/paca.h>
#endif
57
#include <asm/vdso.h>
58
#include <asm/debug.h>
59
#include <asm/kexec.h>
60
#include <asm/asm-prototypes.h>
61
#include <asm/cpu_has_feature.h>
P
Paul Mackerras 已提交
62

L
Linus Torvalds 已提交
63
#ifdef DEBUG
64
#include <asm/udbg.h>
L
Linus Torvalds 已提交
65 66 67 68 69
#define DBG(fmt...) udbg_printf(fmt)
#else
#define DBG(fmt...)
#endif

70
#ifdef CONFIG_HOTPLUG_CPU
71 72
/* State of each CPU during hotplug phases */
static DEFINE_PER_CPU(int, cpu_state) = { 0 };
73 74
#endif

75 76
struct thread_info *secondary_ti;

77 78
DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
L
Linus Torvalds 已提交
79

80
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
81
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
L
Linus Torvalds 已提交
82

P
Paul Mackerras 已提交
83
/* SMP operations for this machine */
L
Linus Torvalds 已提交
84 85
struct smp_ops_t *smp_ops;

86 87
/* Can't be static due to PowerMac hackery */
volatile unsigned int cpu_callin_map[NR_CPUS];
L
Linus Torvalds 已提交
88 89 90

int smt_enabled_at_boot = 1;

91 92 93 94 95 96 97 98 99 100
/*
 * Returns 1 if the specified cpu should be brought up during boot.
 * Used to inhibit booting threads if they've been disabled or
 * limited on the command line
 */
int smp_generic_cpu_bootable(unsigned int nr)
{
	/* Special case - we inhibit secondary thread startup
	 * during boot if the user requests it.
	 */
101
	if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) {
102 103 104 105 106 107 108 109 110 111 112
		if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
			return 0;
		if (smt_enabled_at_boot
		    && cpu_thread_in_core(nr) >= smt_enabled_at_boot)
			return 0;
	}

	return 1;
}


P
Paul Mackerras 已提交
113
#ifdef CONFIG_PPC64
114
int smp_generic_kick_cpu(int nr)
L
Linus Torvalds 已提交
115
{
116
	if (nr < 0 || nr >= nr_cpu_ids)
117
		return -EINVAL;
L
Linus Torvalds 已提交
118 119 120 121 122 123

	/*
	 * The processor is currently spinning, waiting for the
	 * cpu_start field to become non-zero After we set cpu_start,
	 * the processor will continue on to secondary_start
	 */
124 125 126 127 128 129 130 131 132 133 134
	if (!paca[nr].cpu_start) {
		paca[nr].cpu_start = 1;
		smp_mb();
		return 0;
	}

#ifdef CONFIG_HOTPLUG_CPU
	/*
	 * Ok it's not there, so it might be soft-unplugged, let's
	 * try to bring it back
	 */
135
	generic_set_cpu_up(nr);
136 137 138
	smp_wmb();
	smp_send_reschedule(nr);
#endif /* CONFIG_HOTPLUG_CPU */
139 140

	return 0;
L
Linus Torvalds 已提交
141
}
142
#endif /* CONFIG_PPC64 */
L
Linus Torvalds 已提交
143

144 145 146 147 148 149 150 151
static irqreturn_t call_function_action(int irq, void *data)
{
	generic_smp_call_function_interrupt();
	return IRQ_HANDLED;
}

static irqreturn_t reschedule_action(int irq, void *data)
{
152
	scheduler_ipi();
153 154 155
	return IRQ_HANDLED;
}

156
static irqreturn_t tick_broadcast_ipi_action(int irq, void *data)
157
{
158
	tick_broadcast_ipi_handler();
159 160 161
	return IRQ_HANDLED;
}

162 163
#ifdef CONFIG_NMI_IPI
static irqreturn_t nmi_ipi_action(int irq, void *data)
164
{
165
	smp_handle_nmi_ipi(get_irq_regs());
166 167
	return IRQ_HANDLED;
}
168
#endif
169 170 171 172

static irq_handler_t smp_ipi_action[] = {
	[PPC_MSG_CALL_FUNCTION] =  call_function_action,
	[PPC_MSG_RESCHEDULE] = reschedule_action,
173
	[PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action,
174 175 176
#ifdef CONFIG_NMI_IPI
	[PPC_MSG_NMI_IPI] = nmi_ipi_action,
#endif
177 178
};

179 180 181 182 183
/*
 * The NMI IPI is a fallback and not truly non-maskable. It is simpler
 * than going through the call function infrastructure, and strongly
 * serialized, so it is more appropriate for debugging.
 */
184 185 186
const char *smp_ipi_name[] = {
	[PPC_MSG_CALL_FUNCTION] =  "ipi call function",
	[PPC_MSG_RESCHEDULE] = "ipi reschedule",
187
	[PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast",
188
	[PPC_MSG_NMI_IPI] = "nmi ipi",
189 190 191 192 193 194 195
};

/* optional function to request ipi, for controllers with >= 4 ipis */
int smp_request_message_ipi(int virq, int msg)
{
	int err;

196
	if (msg < 0 || msg > PPC_MSG_NMI_IPI)
197
		return -EINVAL;
198 199
#ifndef CONFIG_NMI_IPI
	if (msg == PPC_MSG_NMI_IPI)
200 201
		return 1;
#endif
202

203
	err = request_irq(virq, smp_ipi_action[msg],
204
			  IRQF_PERCPU | IRQF_NO_THREAD | IRQF_NO_SUSPEND,
205
			  smp_ipi_name[msg], NULL);
206 207 208 209 210 211
	WARN(err < 0, "unable to request_irq %d for %s (rc %d)\n",
		virq, smp_ipi_name[msg], err);

	return err;
}

212
#ifdef CONFIG_PPC_SMP_MUXED_IPI
213
struct cpu_messages {
214
	long messages;			/* current messages */
215 216 217
};
static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);

218
void smp_muxed_ipi_set_message(int cpu, int msg)
219 220
{
	struct cpu_messages *info = &per_cpu(ipi_message, cpu);
221
	char *message = (char *)&info->messages;
222

223 224 225 226
	/*
	 * Order previous accesses before accesses in the IPI handler.
	 */
	smp_mb();
227
	message[msg] = 1;
228 229 230 231 232
}

void smp_muxed_ipi_message_pass(int cpu, int msg)
{
	smp_muxed_ipi_set_message(cpu, msg);
233

234 235 236 237
	/*
	 * cause_ipi functions are required to include a full barrier
	 * before doing whatever causes the IPI.
	 */
238
	smp_ops->cause_ipi(cpu);
239 240
}

241
#ifdef __BIG_ENDIAN__
242
#define IPI_MESSAGE(A) (1uL << ((BITS_PER_LONG - 8) - 8 * (A)))
243
#else
244
#define IPI_MESSAGE(A) (1uL << (8 * (A)))
245 246
#endif

247 248 249
irqreturn_t smp_ipi_demux(void)
{
	mb();	/* order any irq clear */
250

251 252 253 254 255
	return smp_ipi_demux_relaxed();
}

/* sync-free variant. Callers should ensure synchronization */
irqreturn_t smp_ipi_demux_relaxed(void)
256
{
257
	struct cpu_messages *info;
258
	unsigned long all;
259

260
	info = this_cpu_ptr(&ipi_message);
261
	do {
262
		all = xchg(&info->messages, 0);
263 264 265 266 267 268 269 270 271 272 273
#if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
		/*
		 * Must check for PPC_MSG_RM_HOST_ACTION messages
		 * before PPC_MSG_CALL_FUNCTION messages because when
		 * a VM is destroyed, we call kick_all_cpus_sync()
		 * to ensure that any pending PPC_MSG_RM_HOST_ACTION
		 * messages have completed before we free any VCPUs.
		 */
		if (all & IPI_MESSAGE(PPC_MSG_RM_HOST_ACTION))
			kvmppc_xics_ipi_action();
#endif
274
		if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNCTION))
275
			generic_smp_call_function_interrupt();
276
		if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE))
277
			scheduler_ipi();
278 279
		if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST))
			tick_broadcast_ipi_handler();
280 281 282 283
#ifdef CONFIG_NMI_IPI
		if (all & IPI_MESSAGE(PPC_MSG_NMI_IPI))
			nmi_ipi_action(0, NULL);
#endif
284 285
	} while (info->messages);

286 287
	return IRQ_HANDLED;
}
288
#endif /* CONFIG_PPC_SMP_MUXED_IPI */
289

290 291 292 293 294 295 296 297 298 299
static inline void do_message_pass(int cpu, int msg)
{
	if (smp_ops->message_pass)
		smp_ops->message_pass(cpu, msg);
#ifdef CONFIG_PPC_SMP_MUXED_IPI
	else
		smp_muxed_ipi_message_pass(cpu, msg);
#endif
}

L
Linus Torvalds 已提交
300 301
void smp_send_reschedule(int cpu)
{
302
	if (likely(smp_ops))
303
		do_message_pass(cpu, PPC_MSG_RESCHEDULE);
L
Linus Torvalds 已提交
304
}
305
EXPORT_SYMBOL_GPL(smp_send_reschedule);
L
Linus Torvalds 已提交
306

307 308
void arch_send_call_function_single_ipi(int cpu)
{
309
	do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);
310 311
}

312
void arch_send_call_function_ipi_mask(const struct cpumask *mask)
313 314 315
{
	unsigned int cpu;

316
	for_each_cpu(cpu, mask)
317
		do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);
318 319
}

320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421
#ifdef CONFIG_NMI_IPI

/*
 * "NMI IPI" system.
 *
 * NMI IPIs may not be recoverable, so should not be used as ongoing part of
 * a running system. They can be used for crash, debug, halt/reboot, etc.
 *
 * NMI IPIs are globally single threaded. No more than one in progress at
 * any time.
 *
 * The IPI call waits with interrupts disabled until all targets enter the
 * NMI handler, then the call returns.
 *
 * No new NMI can be initiated until targets exit the handler.
 *
 * The IPI call may time out without all targets entering the NMI handler.
 * In that case, there is some logic to recover (and ignore subsequent
 * NMI interrupts that may eventually be raised), but the platform interrupt
 * handler may not be able to distinguish this from other exception causes,
 * which may cause a crash.
 */

static atomic_t __nmi_ipi_lock = ATOMIC_INIT(0);
static struct cpumask nmi_ipi_pending_mask;
static int nmi_ipi_busy_count = 0;
static void (*nmi_ipi_function)(struct pt_regs *) = NULL;

static void nmi_ipi_lock_start(unsigned long *flags)
{
	raw_local_irq_save(*flags);
	hard_irq_disable();
	while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
		raw_local_irq_restore(*flags);
		cpu_relax();
		raw_local_irq_save(*flags);
		hard_irq_disable();
	}
}

static void nmi_ipi_lock(void)
{
	while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
		cpu_relax();
}

static void nmi_ipi_unlock(void)
{
	smp_mb();
	WARN_ON(atomic_read(&__nmi_ipi_lock) != 1);
	atomic_set(&__nmi_ipi_lock, 0);
}

static void nmi_ipi_unlock_end(unsigned long *flags)
{
	nmi_ipi_unlock();
	raw_local_irq_restore(*flags);
}

/*
 * Platform NMI handler calls this to ack
 */
int smp_handle_nmi_ipi(struct pt_regs *regs)
{
	void (*fn)(struct pt_regs *);
	unsigned long flags;
	int me = raw_smp_processor_id();
	int ret = 0;

	/*
	 * Unexpected NMIs are possible here because the interrupt may not
	 * be able to distinguish NMI IPIs from other types of NMIs, or
	 * because the caller may have timed out.
	 */
	nmi_ipi_lock_start(&flags);
	if (!nmi_ipi_busy_count)
		goto out;
	if (!cpumask_test_cpu(me, &nmi_ipi_pending_mask))
		goto out;

	fn = nmi_ipi_function;
	if (!fn)
		goto out;

	cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
	nmi_ipi_busy_count++;
	nmi_ipi_unlock();

	ret = 1;

	fn(regs);

	nmi_ipi_lock();
	nmi_ipi_busy_count--;
out:
	nmi_ipi_unlock_end(&flags);

	return ret;
}

static void do_smp_send_nmi_ipi(int cpu)
{
422 423 424
	if (smp_ops->cause_nmi_ipi && smp_ops->cause_nmi_ipi(cpu))
		return;

425 426 427 428 429 430 431 432 433 434 435 436 437
	if (cpu >= 0) {
		do_message_pass(cpu, PPC_MSG_NMI_IPI);
	} else {
		int c;

		for_each_online_cpu(c) {
			if (c == raw_smp_processor_id())
				continue;
			do_message_pass(c, PPC_MSG_NMI_IPI);
		}
	}
}

438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
void smp_flush_nmi_ipi(u64 delay_us)
{
	unsigned long flags;

	nmi_ipi_lock_start(&flags);
	while (nmi_ipi_busy_count) {
		nmi_ipi_unlock_end(&flags);
		udelay(1);
		if (delay_us) {
			delay_us--;
			if (!delay_us)
				return;
		}
		nmi_ipi_lock_start(&flags);
	}
	nmi_ipi_unlock_end(&flags);
}

456 457 458 459 460 461
/*
 * - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS.
 * - fn is the target callback function.
 * - delay_us > 0 is the delay before giving up waiting for targets to
 *   enter the handler, == 0 specifies indefinite delay.
 */
462
int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518
{
	unsigned long flags;
	int me = raw_smp_processor_id();
	int ret = 1;

	BUG_ON(cpu == me);
	BUG_ON(cpu < 0 && cpu != NMI_IPI_ALL_OTHERS);

	if (unlikely(!smp_ops))
		return 0;

	/* Take the nmi_ipi_busy count/lock with interrupts hard disabled */
	nmi_ipi_lock_start(&flags);
	while (nmi_ipi_busy_count) {
		nmi_ipi_unlock_end(&flags);
		cpu_relax();
		nmi_ipi_lock_start(&flags);
	}

	nmi_ipi_function = fn;

	if (cpu < 0) {
		/* ALL_OTHERS */
		cpumask_copy(&nmi_ipi_pending_mask, cpu_online_mask);
		cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
	} else {
		/* cpumask starts clear */
		cpumask_set_cpu(cpu, &nmi_ipi_pending_mask);
	}
	nmi_ipi_busy_count++;
	nmi_ipi_unlock();

	do_smp_send_nmi_ipi(cpu);

	while (!cpumask_empty(&nmi_ipi_pending_mask)) {
		udelay(1);
		if (delay_us) {
			delay_us--;
			if (!delay_us)
				break;
		}
	}

	nmi_ipi_lock();
	if (!cpumask_empty(&nmi_ipi_pending_mask)) {
		/* Could not gather all CPUs */
		ret = 0;
		cpumask_clear(&nmi_ipi_pending_mask);
	}
	nmi_ipi_busy_count--;
	nmi_ipi_unlock_end(&flags);

	return ret;
}
#endif /* CONFIG_NMI_IPI */

519 520 521 522 523 524 525 526 527 528
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
void tick_broadcast(const struct cpumask *mask)
{
	unsigned int cpu;

	for_each_cpu(cpu, mask)
		do_message_pass(cpu, PPC_MSG_TICK_BROADCAST);
}
#endif

529 530
#ifdef CONFIG_DEBUGGER
void debugger_ipi_callback(struct pt_regs *regs)
L
Linus Torvalds 已提交
531
{
532 533
	debugger_ipi(regs);
}
534

535 536 537
void smp_send_debugger_break(void)
{
	smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, debugger_ipi_callback, 1000000);
L
Linus Torvalds 已提交
538 539 540
}
#endif

541
#ifdef CONFIG_KEXEC_CORE
542 543
void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
{
544
	smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_ipi_callback, 1000000);
545 546 547
}
#endif

L
Linus Torvalds 已提交
548 549
static void stop_this_cpu(void *dummy)
{
550 551 552
	/* Remove this CPU */
	set_cpu_online(smp_processor_id(), false);

L
Linus Torvalds 已提交
553 554 555 556 557
	local_irq_disable();
	while (1)
		;
}

558 559
void smp_send_stop(void)
{
560
	smp_call_function(stop_this_cpu, NULL, 0);
L
Linus Torvalds 已提交
561 562 563 564
}

struct thread_info *current_set[NR_CPUS];

565
static void smp_store_cpu_info(int id)
L
Linus Torvalds 已提交
566
{
567
	per_cpu(cpu_pvr, id) = mfspr(SPRN_PVR);
568 569 570 571
#ifdef CONFIG_PPC_FSL_BOOK3E
	per_cpu(next_tlbcam_idx, id)
		= (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1;
#endif
L
Linus Torvalds 已提交
572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589
}

void __init smp_prepare_cpus(unsigned int max_cpus)
{
	unsigned int cpu;

	DBG("smp_prepare_cpus\n");

	/* 
	 * setup_cpu may need to be called on the boot cpu. We havent
	 * spun any cpus up but lets be paranoid.
	 */
	BUG_ON(boot_cpuid != smp_processor_id());

	/* Fixup boot cpu */
	smp_store_cpu_info(boot_cpuid);
	cpu_callin_map[boot_cpuid] = 1;

590 591 592 593 594
	for_each_possible_cpu(cpu) {
		zalloc_cpumask_var_node(&per_cpu(cpu_sibling_map, cpu),
					GFP_KERNEL, cpu_to_node(cpu));
		zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
					GFP_KERNEL, cpu_to_node(cpu));
595 596 597
		/*
		 * numa_node_id() works after this.
		 */
598 599 600 601 602
		if (cpu_present(cpu)) {
			set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]);
			set_cpu_numa_mem(cpu,
				local_memory_node(numa_cpu_lookup_table[cpu]));
		}
603 604 605 606 607
	}

	cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
	cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));

608 609
	if (smp_ops && smp_ops->probe)
		smp_ops->probe();
L
Linus Torvalds 已提交
610 611
}

612
void smp_prepare_boot_cpu(void)
L
Linus Torvalds 已提交
613 614
{
	BUG_ON(smp_processor_id() != boot_cpuid);
P
Paul Mackerras 已提交
615
#ifdef CONFIG_PPC64
L
Linus Torvalds 已提交
616
	paca[boot_cpuid].__current = current;
P
Paul Mackerras 已提交
617
#endif
618
	set_numa_node(numa_cpu_lookup_table[boot_cpuid]);
A
Al Viro 已提交
619
	current_set[boot_cpuid] = task_thread_info(current);
L
Linus Torvalds 已提交
620 621 622 623 624 625 626 627 628 629 630
}

#ifdef CONFIG_HOTPLUG_CPU

int generic_cpu_disable(void)
{
	unsigned int cpu = smp_processor_id();

	if (cpu == boot_cpuid)
		return -EBUSY;

631
	set_cpu_online(cpu, false);
632
#ifdef CONFIG_PPC64
633
	vdso_data->processorCount--;
634
#endif
635 636 637
	/* Update affinity of all IRQs previously aimed at this CPU */
	irq_migrate_all_off_this_cpu();

638 639 640 641 642 643 644 645
	/*
	 * Depending on the details of the interrupt controller, it's possible
	 * that one of the interrupts we just migrated away from this CPU is
	 * actually already pending on this CPU. If we leave it in that state
	 * the interrupt will never be EOI'ed, and will never fire again. So
	 * temporarily enable interrupts here, to allow any pending interrupt to
	 * be received (and EOI'ed), before we take this CPU offline.
	 */
646 647 648 649
	local_irq_enable();
	mdelay(1);
	local_irq_disable();

L
Linus Torvalds 已提交
650 651 652 653 654 655 656 657
	return 0;
}

void generic_cpu_die(unsigned int cpu)
{
	int i;

	for (i = 0; i < 100; i++) {
658
		smp_rmb();
659
		if (is_cpu_dead(cpu))
L
Linus Torvalds 已提交
660 661 662 663 664 665
			return;
		msleep(100);
	}
	printk(KERN_ERR "CPU%d didn't die...\n", cpu);
}

666 667 668 669
void generic_set_cpu_dead(unsigned int cpu)
{
	per_cpu(cpu_state, cpu) = CPU_DEAD;
}
670

671 672 673 674 675 676 677 678 679 680
/*
 * The cpu_state should be set to CPU_UP_PREPARE in kick_cpu(), otherwise
 * the cpu_state is always CPU_DEAD after calling generic_set_cpu_dead(),
 * which makes the delay in generic_cpu_die() not happen.
 */
void generic_set_cpu_up(unsigned int cpu)
{
	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
}

681 682 683 684
int generic_check_cpu_restart(unsigned int cpu)
{
	return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE;
}
685

686 687 688 689 690
int is_cpu_dead(unsigned int cpu)
{
	return per_cpu(cpu_state, cpu) == CPU_DEAD;
}

691
static bool secondaries_inhibited(void)
692
{
693
	return kvm_hv_mode_active();
694 695 696 697 698 699
}

#else /* HOTPLUG_CPU */

#define secondaries_inhibited()		0

L
Linus Torvalds 已提交
700 701
#endif

702
static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
703
{
704
	struct thread_info *ti = task_thread_info(idle);
705 706

#ifdef CONFIG_PPC64
707
	paca[cpu].__current = idle;
708 709 710
	paca[cpu].kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD;
#endif
	ti->cpu = cpu;
711
	secondary_ti = current_set[cpu] = ti;
712 713
}

714
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
L
Linus Torvalds 已提交
715
{
716
	int rc, c;
L
Linus Torvalds 已提交
717

718 719 720 721
	/*
	 * Don't allow secondary threads to come online if inhibited
	 */
	if (threads_per_core > 1 && secondaries_inhibited() &&
722
	    cpu_thread_in_subcore(cpu))
723 724
		return -EBUSY;

725 726
	if (smp_ops == NULL ||
	    (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu)))
L
Linus Torvalds 已提交
727 728
		return -EINVAL;

729
	cpu_idle_thread_init(cpu, tidle);
730

731 732 733 734 735 736 737 738 739 740
	/*
	 * The platform might need to allocate resources prior to bringing
	 * up the CPU
	 */
	if (smp_ops->prepare_cpu) {
		rc = smp_ops->prepare_cpu(cpu);
		if (rc)
			return rc;
	}

L
Linus Torvalds 已提交
741 742 743 744 745 746 747 748 749
	/* Make sure callin-map entry is 0 (can be leftover a CPU
	 * hotplug
	 */
	cpu_callin_map[cpu] = 0;

	/* The information for processor bringup must
	 * be written out to main store before we release
	 * the processor.
	 */
750
	smp_mb();
L
Linus Torvalds 已提交
751 752 753

	/* wake up cpus */
	DBG("smp: kicking cpu %d\n", cpu);
754 755 756 757 758
	rc = smp_ops->kick_cpu(cpu);
	if (rc) {
		pr_err("smp: failed starting cpu %d (rc %d)\n", cpu, rc);
		return rc;
	}
L
Linus Torvalds 已提交
759 760 761 762 763 764 765

	/*
	 * wait to see if the cpu made a callin (is actually up).
	 * use this value that I found through experimentation.
	 * -- Cort
	 */
	if (system_state < SYSTEM_RUNNING)
766
		for (c = 50000; c && !cpu_callin_map[cpu]; c--)
L
Linus Torvalds 已提交
767 768 769 770 771 772 773
			udelay(100);
#ifdef CONFIG_HOTPLUG_CPU
	else
		/*
		 * CPUs can take much longer to come up in the
		 * hotplug case.  Wait five seconds.
		 */
774 775
		for (c = 5000; c && !cpu_callin_map[cpu]; c--)
			msleep(1);
L
Linus Torvalds 已提交
776 777 778
#endif

	if (!cpu_callin_map[cpu]) {
779
		printk(KERN_ERR "Processor %u is stuck.\n", cpu);
L
Linus Torvalds 已提交
780 781 782
		return -ENOENT;
	}

783
	DBG("Processor %u found.\n", cpu);
L
Linus Torvalds 已提交
784 785 786 787

	if (smp_ops->give_timebase)
		smp_ops->give_timebase();

788
	/* Wait until cpu puts itself in the online & active maps */
789
	spin_until_cond(cpu_online(cpu));
L
Linus Torvalds 已提交
790 791 792 793

	return 0;
}

794 795 796 797 798 799
/* Return the value of the reg property corresponding to the given
 * logical cpu.
 */
int cpu_to_core_id(int cpu)
{
	struct device_node *np;
800
	const __be32 *reg;
801 802 803 804 805 806 807 808 809 810
	int id = -1;

	np = of_get_cpu_node(cpu, NULL);
	if (!np)
		goto out;

	reg = of_get_property(np, "reg", NULL);
	if (!reg)
		goto out;

811
	id = be32_to_cpup(reg);
812 813 814 815
out:
	of_node_put(np);
	return id;
}
816
EXPORT_SYMBOL_GPL(cpu_to_core_id);
817

818 819 820 821 822 823 824 825 826 827 828 829 830
/* Helper routines for cpu to core mapping */
int cpu_core_index_of_thread(int cpu)
{
	return cpu >> threads_shift;
}
EXPORT_SYMBOL_GPL(cpu_core_index_of_thread);

int cpu_first_thread_of_core(int core)
{
	return core << threads_shift;
}
EXPORT_SYMBOL_GPL(cpu_first_thread_of_core);

831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857
static void traverse_siblings_chip_id(int cpu, bool add, int chipid)
{
	const struct cpumask *mask;
	struct device_node *np;
	int i, plen;
	const __be32 *prop;

	mask = add ? cpu_online_mask : cpu_present_mask;
	for_each_cpu(i, mask) {
		np = of_get_cpu_node(i, NULL);
		if (!np)
			continue;
		prop = of_get_property(np, "ibm,chip-id", &plen);
		if (prop && plen == sizeof(int) &&
		    of_read_number(prop, 1) == chipid) {
			if (add) {
				cpumask_set_cpu(cpu, cpu_core_mask(i));
				cpumask_set_cpu(i, cpu_core_mask(cpu));
			} else {
				cpumask_clear_cpu(cpu, cpu_core_mask(i));
				cpumask_clear_cpu(i, cpu_core_mask(cpu));
			}
		}
		of_node_put(np);
	}
}

858
/* Must be called when no change can occur to cpu_present_mask,
859 860 861 862 863
 * i.e. during cpu online or offline.
 */
static struct device_node *cpu_to_l2cache(int cpu)
{
	struct device_node *np;
864
	struct device_node *cache;
865 866 867 868 869 870 871 872

	if (!cpu_present(cpu))
		return NULL;

	np = of_get_cpu_node(cpu, NULL);
	if (np == NULL)
		return NULL;

873 874
	cache = of_find_next_cache_node(np);

875 876
	of_node_put(np);

877
	return cache;
878
}
L
Linus Torvalds 已提交
879

880 881
static void traverse_core_siblings(int cpu, bool add)
{
882
	struct device_node *l2_cache, *np;
883
	const struct cpumask *mask;
884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899
	int i, chip, plen;
	const __be32 *prop;

	/* First see if we have ibm,chip-id properties in cpu nodes */
	np = of_get_cpu_node(cpu, NULL);
	if (np) {
		chip = -1;
		prop = of_get_property(np, "ibm,chip-id", &plen);
		if (prop && plen == sizeof(int))
			chip = of_read_number(prop, 1);
		of_node_put(np);
		if (chip >= 0) {
			traverse_siblings_chip_id(cpu, add, chip);
			return;
		}
	}
900 901 902 903

	l2_cache = cpu_to_l2cache(cpu);
	mask = add ? cpu_online_mask : cpu_present_mask;
	for_each_cpu(i, mask) {
904
		np = cpu_to_l2cache(i);
905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920
		if (!np)
			continue;
		if (np == l2_cache) {
			if (add) {
				cpumask_set_cpu(cpu, cpu_core_mask(i));
				cpumask_set_cpu(i, cpu_core_mask(cpu));
			} else {
				cpumask_clear_cpu(cpu, cpu_core_mask(i));
				cpumask_clear_cpu(i, cpu_core_mask(cpu));
			}
		}
		of_node_put(np);
	}
	of_node_put(l2_cache);
}

L
Linus Torvalds 已提交
921
/* Activate a secondary processor. */
922
void start_secondary(void *unused)
L
Linus Torvalds 已提交
923 924
{
	unsigned int cpu = smp_processor_id();
925
	int i, base;
L
Linus Torvalds 已提交
926

V
Vegard Nossum 已提交
927
	mmgrab(&init_mm);
L
Linus Torvalds 已提交
928 929 930
	current->active_mm = &init_mm;

	smp_store_cpu_info(cpu);
P
Paul Mackerras 已提交
931
	set_dec(tb_ticks_per_jiffy);
A
Andrew Morton 已提交
932
	preempt_disable();
933
	cpu_callin_map[cpu] = 1;
L
Linus Torvalds 已提交
934

935 936
	if (smp_ops->setup_cpu)
		smp_ops->setup_cpu(cpu);
L
Linus Torvalds 已提交
937 938 939
	if (smp_ops->take_timebase)
		smp_ops->take_timebase();

940 941
	secondary_cpu_time_init();

942 943 944
#ifdef CONFIG_PPC64
	if (system_state == SYSTEM_RUNNING)
		vdso_data->processorCount++;
945 946

	vdso_getcpu_init();
947
#endif
948
	/* Update sibling maps */
949
	base = cpu_first_thread_sibling(cpu);
950
	for (i = 0; i < threads_per_core; i++) {
951
		if (cpu_is_offline(base + i) && (cpu != base + i))
952
			continue;
953 954
		cpumask_set_cpu(cpu, cpu_sibling_mask(base + i));
		cpumask_set_cpu(base + i, cpu_sibling_mask(cpu));
955 956 957 958 959

		/* cpu_core_map should be a superset of
		 * cpu_sibling_map even if we don't have cache
		 * information, so update the former here, too.
		 */
960 961
		cpumask_set_cpu(cpu, cpu_core_mask(base + i));
		cpumask_set_cpu(base + i, cpu_core_mask(cpu));
962
	}
963
	traverse_core_siblings(cpu, true);
L
Linus Torvalds 已提交
964

965 966 967
	set_numa_node(numa_cpu_lookup_table[cpu]);
	set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));

968 969 970 971
	smp_wmb();
	notify_cpu_starting(cpu);
	set_cpu_online(cpu, true);

L
Linus Torvalds 已提交
972 973
	local_irq_enable();

974
	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
975 976

	BUG();
L
Linus Torvalds 已提交
977 978 979 980 981 982 983
}

int setup_profiling_timer(unsigned int multiplier)
{
	return 0;
}

984 985
#ifdef CONFIG_SCHED_SMT
/* cpumask of CPUs with asymetric SMT dependancy */
G
Guenter Roeck 已提交
986
static int powerpc_smt_flags(void)
987
{
988
	int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005

	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
		flags |= SD_ASYM_PACKING;
	}
	return flags;
}
#endif

static struct sched_domain_topology_level powerpc_topology[] = {
#ifdef CONFIG_SCHED_SMT
	{ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
#endif
	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
	{ NULL, },
};

1006 1007 1008
void __init smp_cpus_done(unsigned int max_cpus)
{
	/*
1009
	 * We are running pinned to the boot CPU, see rest_init().
L
Linus Torvalds 已提交
1010
	 */
1011
	if (smp_ops && smp_ops->setup_cpu)
1012
		smp_ops->setup_cpu(boot_cpuid);
1013

1014 1015 1016
	if (smp_ops && smp_ops->bringup_done)
		smp_ops->bringup_done();

1017
	dump_numa_cpu_topology();
1018

1019
	set_sched_topology(powerpc_topology);
1020 1021
}

L
Linus Torvalds 已提交
1022 1023 1024
#ifdef CONFIG_HOTPLUG_CPU
int __cpu_disable(void)
{
1025 1026 1027
	int cpu = smp_processor_id();
	int base, i;
	int err;
L
Linus Torvalds 已提交
1028

1029 1030 1031 1032 1033 1034 1035 1036
	if (!smp_ops->cpu_disable)
		return -ENOSYS;

	err = smp_ops->cpu_disable();
	if (err)
		return err;

	/* Update sibling maps */
1037
	base = cpu_first_thread_sibling(cpu);
1038
	for (i = 0; i < threads_per_core && base + i < nr_cpu_ids; i++) {
1039 1040 1041 1042
		cpumask_clear_cpu(cpu, cpu_sibling_mask(base + i));
		cpumask_clear_cpu(base + i, cpu_sibling_mask(cpu));
		cpumask_clear_cpu(cpu, cpu_core_mask(base + i));
		cpumask_clear_cpu(base + i, cpu_core_mask(cpu));
1043
	}
1044
	traverse_core_siblings(cpu, false);
1045 1046

	return 0;
L
Linus Torvalds 已提交
1047 1048 1049 1050 1051 1052 1053
}

void __cpu_die(unsigned int cpu)
{
	if (smp_ops->cpu_die)
		smp_ops->cpu_die(cpu);
}
1054

1055 1056 1057 1058
void cpu_die(void)
{
	if (ppc_md.cpu_die)
		ppc_md.cpu_die();
1059 1060 1061

	/* If we return, we re-enter start_secondary */
	start_secondary_resume();
1062
}
1063

L
Linus Torvalds 已提交
1064
#endif